Source code for tds2stac.assets

# SPDX-FileCopyrightText: 2023 Karlsruher Institut für Technologie
#
# SPDX-License-Identifier: CC0-1.0


from typing import Union
from urllib import parse as urlparse
from urllib.parse import quote_plus

import pystac
import requests

from . import utils
from .statics import constants
from .thumbnails import Thumbnails


[docs] class Assets: """ This class is tasked with the responsibility of incorporating assets into STAC-Collections and STAC-Items. """
[docs] def item( self, harvesting_vars: dict, item: pystac.Item, url: str, Recognizer_output: Union[str, None] = None, aggregated_dataset_url: Union[str, None] = None, asset_properties: Union[dict, None] = dict(), logger_properties: dict = dict(), ): """ This is a function for adding assets to STAC-Items. Args: harvesting_vars (dict): A dictionary containing the variables required for harvesting. item (pystac.Item): A STAC-Item. Recognizer_output (dict): A dictionary containing the senario output of the :class:`~tds2stac.Recognizer`. aggregated_dataset_url (str): The URL of the aggregated dataset. url (str): The URL of the catalog. It will be used for thumbnails asset_properties (dict): A dictionary containing the properties of the assets for more information refere to :class:`~tds2stac.TDS2STACIntegrator.asset_properties`. logger_properties (dict): A dictionary containing the properties of the logger for more information refere to :class:`~tds2stac.TDS2STACIntegrator.logger_properties`. """ # Adding web services as assets into items if logger_properties is not None: self.logger_properties = logger_properties media_type_: Union[str, pystac.MediaType] = "" for service in harvesting_vars["services"]: if ( service.get("serviceType") == "Compound" or service.get("serviceType") == "compound" ): if ( asset_properties is not None and asset_properties.get("jupyter_notebook") is True ): catalog_path = urlparse.urlsplit(url).path.replace( "/thredds/catalog/", "" ) additional = "/thredds/notebook/" service_url_notebook = utils.references_urls( url, additional ) + harvesting_vars["dataset"].get("ID") item.add_asset( key="jupyter_notebook", asset=pystac.Asset( href=service_url_notebook + "?catalog=%s&filename=%s" % (catalog_path, "default_viewer.ipynb"), title="Jupyter Notebook", media_type=pystac.MediaType.HTML, ), ) for s in service.findall("{%s}service" % constants.unidata): service_url = utils.references_urls( url, s.get("base") ) + harvesting_vars["dataset"].get("urlPath") if s.get("suffix") is not None: service_url += s.get("suffix") if s.get("name") in [ "iso", "ncml", "uddc", ]: service_url += "?dataset=%s&&catalog=%s" % ( harvesting_vars["catalog_id"], quote_plus(harvesting_vars["catalog_url"]), ) elif s.get("name") in [ "wms", ]: if ( asset_properties is not None and asset_properties.get("explore_data") is True and asset_properties.get("verify_explore_data") is not True ): item.add_asset( key="explore_data", asset=pystac.Asset( href=utils.references_urls( url, "/thredds/Godiva.html" ) + "?server=" + service_url, title="Explore Data", media_type=pystac.MediaType.HTML, ), ) elif ( asset_properties is not None and asset_properties.get("explore_data") is True and asset_properties.get("verify_explore_data") is True ): head = requests.head( service_url + "?request=GetMetadata&item=menu" ) if head.status_code == 200: item.add_asset( key="explore_data", asset=pystac.Asset( href=utils.references_urls( url, "/thredds/Godiva.html" ) + "?server=" + service_url, title="Explore Data", media_type=pystac.MediaType.HTML, ), ) service_url += "?service=WMS&version=1.3.0&request=GetCapabilities" elif ( s.get("name") in [ "http", ] and "Seventh Scenario" in str(Recognizer_output) and aggregated_dataset_url is not None ): service_url = aggregated_dataset_url media_type_ = pystac.MediaType.HTML elif ( s.get("name") in [ "http", ] and "Seventh Scenario" in str(Recognizer_output) and aggregated_dataset_url is None ): # service_url += "?service=WMS&version=1.3.0&request=GetCapabilities" media_type_ = pystac.MediaType.HTML if s.get("name") in ["odap"]: service_url += ".html" # Determinatio of Media Type if s.get("name") in [ "iso", "ncml", "wms", "wcs", "wfs", "sos", ]: media_type_ = pystac.MediaType.XML elif s.get("name") in [ "http" ] and "Seventh Scenario" not in str(Recognizer_output): media_type_ = "application/netcdf" elif s.get("name") in [ "dap4", "odap", "uddc", ]: media_type_ = pystac.MediaType.HTML else: media_type_ = pystac.MediaType.TEXT if asset_properties is None or ( asset_properties.get("assets_list_allowed") is None and asset_properties.get("assets_list_avoided") is None ): item.add_asset( key=s.get("name"), asset=pystac.Asset( href=service_url, # title=without_slash, media_type=media_type_, ), ) elif ( asset_properties is not None and asset_properties["assets_list_allowed"] is not None and asset_properties["assets_list_avoided"] is not None and s.get("name") in asset_properties["assets_list_allowed"] and s.get("name") not in asset_properties["assets_list_avoided"] ): item.add_asset( key=s.get("name"), asset=pystac.Asset( href=service_url, # title=without_slash, media_type=media_type_, ), ) if asset_properties is not None: if asset_properties.get("item_thumbnail") is not None: thumbnail = Thumbnails() thumbnail.item( service, harvesting_vars["dataset"], harvesting_vars, url, item, asset_properties["item_thumbnail"], asset_properties["item_overview"], asset_properties["item_getminmax_thumbnail"], logger_properties=self.logger_properties, )
[docs] def collection( self, harvesting_vars: dict, collection_dict: dict, stac_catalog: pystac.Catalog, asset_properties: dict = dict(), logger_properties: dict = dict(), ): """ This is a function for adding assets to STAC-Collections. Args: asset_properties (dict): A dictionary containing the properties of the assets for more information refere to :class:`~tds2stac.TDS2STACIntegrator.asset_properties`. harvesting_vars (dict): A dictionary containing the variables required for harvesting. collection_dict (dict): A dictionary containing the properties of the collection. stac_catalog (pystac.Catalog): A STAC-Catalog. logger_properties (dict): A dictionary containing the properties of the logger for more information refere to :class:`~tds2stac.TDS2STACIntegrator.logger_properties`. """ if logger_properties is not None: self.logger_properties = logger_properties thumbnail = Thumbnails() thumbnail.collection( asset_properties["collection_thumbnail"], asset_properties["collection_overview"], dict(harvesting_vars)["services"], dict(harvesting_vars)["dataset"], dict(harvesting_vars), dict(collection_dict)["collection_id"], dict(harvesting_vars)["main_dataset_url"], stac_catalog, asset_properties["collection_link"], logger_properties=self.logger_properties, )