# SPDX-FileCopyrightText: 2023 Karlsruher Institut für Technologie
#
# SPDX-License-Identifier: CC0-1.0
from typing import Union
from urllib import parse as urlparse
from urllib.parse import quote_plus
import pystac
import requests
from . import utils
from .statics import constants
from .thumbnails import Thumbnails
[docs]
class Assets:
"""
This class is tasked with the responsibility of incorporating
assets into STAC-Collections and STAC-Items.
"""
[docs]
def item(
self,
harvesting_vars: dict,
item: pystac.Item,
url: str,
Recognizer_output: Union[str, None] = None,
aggregated_dataset_url: Union[str, None] = None,
asset_properties: Union[dict, None] = dict(),
logger_properties: dict = dict(),
):
"""
This is a function for adding assets to STAC-Items.
Args:
harvesting_vars (dict): A dictionary containing the variables required for harvesting.
item (pystac.Item): A STAC-Item.
Recognizer_output (dict): A dictionary containing the scenario output of the :class:`~tds2stac.Recognizer`.
aggregated_dataset_url (str): The URL of the aggregated dataset.
url (str): The URL of the catalog. It will be used for thumbnails
asset_properties (dict): A dictionary containing the properties of the assets for more information refer to :class:`~tds2stac.TDS2STACIntegrator.asset_properties`.
logger_properties (dict): A dictionary containing the properties of the logger for more information refer to :class:`~tds2stac.TDS2STACIntegrator.logger_properties`.
"""
# Adding web services as assets into items
if logger_properties is not None:
self.logger_properties = logger_properties
media_type_: Union[str, pystac.MediaType] = ""
# for service in harvesting_vars["services"]:
# print(harvesting_vars["services_tuples"])
# if (
# service.get("serviceType") == "Compound"
# or service.get("serviceType") == "compound"
# ):
if (
asset_properties is not None
and asset_properties.get("jupyter_notebook") is True
):
catalog_path = urlparse.urlsplit(url).path.replace(
"/thredds/catalog/", ""
)
additional = "/thredds/notebook/"
service_url_notebook = utils.references_urls(
url, additional
) + harvesting_vars["dataset"].get("ID")
item.add_asset(
key="jupyter_notebook",
asset=pystac.Asset(
href=service_url_notebook
+ "?catalog=%s&filename=%s"
% (catalog_path, "default_viewer.ipynb"),
title="Jupyter Notebook",
media_type=pystac.MediaType.HTML,
),
)
for s in harvesting_vars["services_tuples"]:
service_url = utils.references_urls(url, s[0]) + harvesting_vars[
"dataset"
].get("urlPath")
if s[1] is not None:
service_url += s[1]
name_str: str = str(s[2])
if s[2] in [
"iso",
"ncml",
"uddc",
]:
service_url += "?dataset=%s&&catalog=%s" % (
harvesting_vars["catalog_id"],
quote_plus(harvesting_vars["catalog_url"]),
)
elif s[2] in [
"wms",
]:
if (
asset_properties is not None
and asset_properties.get("explore_data") is True
and asset_properties.get("verify_explore_data") is not True
):
item.add_asset(
key="explore_data",
asset=pystac.Asset(
href=utils.references_urls(
url, "/thredds/Godiva.html"
)
+ "?server="
+ service_url,
title="Explore Data",
media_type=pystac.MediaType.HTML,
),
)
elif (
asset_properties is not None
and asset_properties.get("explore_data") is True
and asset_properties.get("verify_explore_data") is True
):
head = requests.head(
service_url + "?request=GetMetadata&item=menu"
)
if head.status_code == 200:
item.add_asset(
key="explore_data",
asset=pystac.Asset(
href=utils.references_urls(
url, "/thredds/Godiva.html"
)
+ "?server="
+ service_url,
title="Explore Data",
media_type=pystac.MediaType.HTML,
),
)
service_url += (
"?service=WMS&version=1.3.0&request=GetCapabilities"
)
elif (
s[2]
in [
"http",
]
and "Seventh Scenario" in str(Recognizer_output)
and aggregated_dataset_url is not None
):
service_url = aggregated_dataset_url
media_type_ = pystac.MediaType.HTML
elif (
s[2]
in [
"http",
]
and "Seventh Scenario" in str(Recognizer_output)
and aggregated_dataset_url is None
):
# service_url += "?service=WMS&version=1.3.0&request=GetCapabilities"
media_type_ = pystac.MediaType.HTML
if s[2] in ["odap"]:
service_url += ".html"
# Determinatio of Media Type
if s[2] in [
"iso",
"ncml",
"wms",
"wcs",
"wfs",
"sos",
]:
media_type_ = pystac.MediaType.XML
elif s[2] in ["http"] and "Seventh Scenario" not in str(
Recognizer_output
):
media_type_ = "application/netcdf"
elif s[2] in [
"dap4",
"odap",
"uddc",
]:
media_type_ = pystac.MediaType.HTML
else:
media_type_ = pystac.MediaType.TEXT
if asset_properties is None or (
(
asset_properties.get("assets_list_allowed") is None
or asset_properties.get("assets_list_allowed") == []
or isinstance(
asset_properties.get("assets_list_allowed"),
str,
)
)
and (
asset_properties.get("assets_list_avoided") is None
or asset_properties.get("assets_list_avoided") == []
or isinstance(
asset_properties.get("assets_list_avoided"),
str,
)
)
):
item.add_asset(
key=s[2],
asset=pystac.Asset(
href=service_url,
# title=without_slash,
media_type=media_type_,
),
)
elif (
asset_properties is not None
and (
asset_properties.get("assets_list_allowed") is not None
and isinstance(
asset_properties.get("assets_list_allowed"),
list,
)
)
and (
asset_properties.get("assets_list_avoided") is not None
and isinstance(
asset_properties.get("assets_list_avoided"),
list,
)
)
):
if name_str in ( # type: ignore
asset_properties.get("assets_list_allowed")
) and name_str not in ( # type: ignore
asset_properties.get("assets_list_avoided")
):
item.add_asset(
key=s[2],
asset=pystac.Asset(
href=service_url,
# title=without_slash,
media_type=media_type_,
),
)
elif (
asset_properties is not None
and (
asset_properties.get("assets_list_allowed") is None
or asset_properties.get("assets_list_allowed") == []
or isinstance(
asset_properties.get("assets_list_allowed"),
str,
)
)
and (
asset_properties.get("assets_list_avoided") is not None
and isinstance(
asset_properties.get("assets_list_avoided"),
list,
)
)
):
if name_str not in (asset_properties.get("assets_list_avoided")): # type: ignore
item.add_asset(
key=s[2],
asset=pystac.Asset(
href=service_url,
# title=without_slash,
media_type=media_type_,
),
)
elif (
asset_properties is not None
and (
asset_properties.get("assets_list_allowed") is not None
and isinstance(
asset_properties.get("assets_list_allowed"),
list,
)
)
and (
asset_properties.get("assets_list_avoided") is None
or asset_properties.get("assets_list_avoided") == []
or isinstance(
asset_properties.get("assets_list_avoided"),
str,
)
)
):
if name_str in (asset_properties.get("assets_list_allowed")): # type: ignore
item.add_asset(
key=s[2],
asset=pystac.Asset(
href=service_url,
# title=without_slash,
media_type=media_type_,
),
)
if asset_properties is not None:
if asset_properties.get("item_thumbnail") is not None:
thumbnail = Thumbnails()
thumbnail.item(
harvesting_vars["services"][0],
harvesting_vars["dataset"],
harvesting_vars,
url,
item,
asset_properties["item_thumbnail"],
asset_properties["item_overview"],
asset_properties["item_getminmax_thumbnail"],
logger_properties=self.logger_properties,
)
if asset_properties.get("item_custom_asset") is not None:
for asset in asset_properties["item_custom_asset"]:
item.add_asset(
key=asset.get("key"),
asset=pystac.Asset(
href=asset.get("href"),
title=asset.get("title")
if asset.get("title") is not None
else asset.get("key"),
roles=asset["roles"]
if asset.get("roles") is not None
else ["data"],
media_type=asset["media_type"]
if asset.get("media_type") is not None
else pystac.MediaType.TEXT,
),
)
# print(s.get("base"), s.get("suffix"), s.get("name"),service_url)
# s.clear()
# service.clear()
[docs]
def collection(
self,
harvesting_vars: dict,
collection_dict: dict,
stac_catalog: pystac.Catalog,
asset_properties: dict = dict(),
logger_properties: dict = dict(),
):
"""
This is a function for adding assets to STAC-Collections.
Args:
asset_properties (dict): A dictionary containing the properties of the assets for more information refer to :class:`~tds2stac.TDS2STACIntegrator.asset_properties`.
harvesting_vars (dict): A dictionary containing the variables required for harvesting.
collection_dict (dict): A dictionary containing the properties of the collection.
stac_catalog (pystac.Catalog): A STAC-Catalog.
logger_properties (dict): A dictionary containing the properties of the logger for more information refer to :class:`~tds2stac.TDS2STACIntegrator.logger_properties`.
"""
if logger_properties is not None:
self.logger_properties = logger_properties
if asset_properties is not None:
if (
asset_properties.get("collection_thumbnail") is not None
or asset_properties.get("collection_overview") is not None
):
thumbnail = Thumbnails()
thumbnail.collection(
asset_properties["collection_thumbnail"],
asset_properties["collection_overview"],
dict(harvesting_vars)["services"],
dict(harvesting_vars)["dataset"],
dict(harvesting_vars),
dict(collection_dict)["collection_id"],
dict(harvesting_vars)["main_dataset_url"],
stac_catalog,
str(asset_properties.get("collection_thumbnail_link")),
str(asset_properties.get("collection_overview_link")),
logger_properties=self.logger_properties,
)
if asset_properties.get("collection_custom_asset") is not None:
collection: pystac.Collection = constants.empty_collection
collection = stac_catalog.get_child(dict(collection_dict)["collection_id"]) # type: ignore
for asset in asset_properties["collection_custom_asset"]:
collection.add_asset(
key=asset.get("key"),
asset=pystac.Asset(
href=asset.get("href"),
title=asset.get("title")
if asset.get("title") is not None
else asset.get("key"),
roles=asset.get("roles")
if asset.get("roles") is not None
else ["data"],
media_type=asset.get("media_type")
if asset.get("media_type") is not None
else pystac.MediaType.TEXT,
),
)