Source code for tds2stac.utils

# SPDX-FileCopyrightText: 2023 Karlsruher Institut für Technologie
#
# SPDX-License-Identifier: CC0-1.0

import os
from urllib import parse as urlparse

import requests


# TODO: we need to think about data with different extesions than .nc
# like .tar in Musica. Also it would be greate to first split the
# name based on the extension and then starts changing the name.

[docs]
def replacement_func(url):
    # TODO: fix the following issue when the user input wrong address like ncml address instead of data address:
    #   File "/Users/hadizadeh-m/dev/20231026/tds2stac/tds2stac/utils.py", line 18, in replacement_func
    #     temp = path_.split(k)
    # TypeError: a bytes-like object is required, not 'str'

    """A function for making a an id from catalog URL for collections"""
    splitted_arr = urlparse.urlsplit(url)
    trans_dict_path = {"%": "", "/catalog.xml": "", "/thredds/": "", "/": " "}
    path_ = splitted_arr.path
    query_ = splitted_arr.query
    for k, v in trans_dict_path.items():
        temp = path_.split(k)
        path_ = v.join(temp)
    if splitted_arr.query != "":
        trans_dict_query = {"=": " ", "?": " ", "/": " "}
        for k, v in trans_dict_query.items():
            temp = query_.split(k)
            query_ = v.join(temp)
        replaced_url = path_.replace(".xml", "") + " " + query_
        trans_dict_replaced = {".nc": "", "-": " ", "_": " "}
        for k, v in trans_dict_replaced.items():
            temp = replaced_url.split(k)
            replaced_url = v.join(temp)
    else:
        replaced_url = path_
        trans_dict_replaced = {".xml": "", ".nc": "", "-": " ", "_": " "}
        for k, v in trans_dict_replaced.items():
            temp = replaced_url.split(k)
            replaced_url = v.join(temp)
    replaced_url = replaced_url.replace(".", " ")
    return replaced_url.title()




[docs]
def replacement_func_collection_item_id(url):
    # TODO: fix the following issue when the user input wrong address like ncml address instead of data address:
    #   File "/Users/hadizadeh-m/dev/20231026/tds2stac/tds2stac/utils.py", line 18, in replacement_func
    #     temp = path_.split(k)
    # TypeError: a bytes-like object is required, not 'str'

    """A function for making a an id from catalog URL for collections"""
    splitted_arr = urlparse.urlsplit(url)
    trans_dict_path = {"%": "", "/catalog.xml": "", "/thredds/": "", "/": " "}
    path_ = splitted_arr.path
    query_ = splitted_arr.query
    for k, v in trans_dict_path.items():
        temp = path_.split(k)
        path_ = v.join(temp)
    if splitted_arr.query != "":
        trans_dict_query = {"=": " ", "?": " ", "/": " "}
        for k, v in trans_dict_query.items():
            temp = query_.split(k)
            query_ = v.join(temp)
        replaced_url = path_.replace(".xml", "") + " " + query_
        trans_dict_replaced = {".nc": "", "-": " ", "_": " "}
        for k, v in trans_dict_replaced.items():
            temp = replaced_url.split(k)
            replaced_url = v.join(temp)
    else:
        replaced_url = path_
        trans_dict_replaced = {".xml": "", ".nc": "", "-": " ", "_": " "}
        for k, v in trans_dict_replaced.items():
            temp = replaced_url.split(k)
            replaced_url = v.join(temp)
    replaced_url = replaced_url.lower()
    replaced_url = replaced_url.replace(" ", "_")
    replaced_url = replaced_url.replace(".", "_")
    return replaced_url




[docs]
def html2xml(url):
    """A function for making a an xml URL from html URL"""
    u = urlparse.urlsplit(url)
    path, extesion = os.path.splitext(u.path)
    if extesion == ".html":
        u = urlparse.urlsplit(url.replace(".html", ".xml"))
    return u.geturl()




[docs]
def xml2html(url):
    """A function for making a an html URL from xml URL"""
    u = urlparse.urlsplit(url)
    path, extesion = os.path.splitext(u.path)
    if extesion == ".xml":
        u = urlparse.urlsplit(url.replace(".xml", ".html"))
    return u.geturl()




[docs]
def get_xml(url, auth):
    """A function for getting XML content from url"""
    try:
        xml_url = requests.get(url, None, auth=auth, verify=False)
        xml = xml_url.text.encode("utf-8")
        return xml
    except BaseException:
        pass
        return None




[docs]
def references_urls(url, additional):
    splitted_arr = urlparse.urlsplit(url)
    common_url = str(splitted_arr.scheme) + "://" + str(splitted_arr.netloc)
    wihtout_catalog_xml = urlparse.urljoin(
        common_url, os.path.split(splitted_arr.path)[0]
    )

    if not additional:
        final_url = url
    elif additional[:4] == "http":
        # finding http or https
        final_url = additional
    elif additional[0] == "/":
        # Absolute paths
        final_url = urlparse.urljoin(common_url, additional)
    else:
        # Relative paths.
        final_url = wihtout_catalog_xml + "/" + additional
    return final_url




[docs]
def xml_processing(catalog, auth):
    """A function for getting out XML details of a catalog URL"""
    catalog_xml = html2xml(catalog)
    catalog_id = replacement_func(catalog_xml)
    xml_final = get_xml(catalog_xml, auth)
    return catalog_xml, catalog_id, xml_final




[docs]
def xml_tag_name_ncml(input_xml, var_name):
    """A function for finding the tag names in NcML XML files"""

    # A list for recognizign the exceptions.
    # This list contains variable's name with same `input_xml.tag` and different `var_name`s
    exception_list = ["var_lists", "var_dims", "var_descs", "keyword"]

    if var_name in exception_list:
        return input_xml.tag + "_" + var_name
    else:
        return str(input_xml.get("name")) + "_" + var_name




[docs]
def xml_tag_finder(input_xml, web_service, var_name):
    """A function for finding the tag names in all TDS webservices"""

    tag_finder_dict = {
        "iso": input_xml.tag + "_" + var_name,
        "ncml": xml_tag_name_ncml(input_xml, var_name),
        "wms": input_xml.tag + "_" + var_name,
    }

    return tag_finder_dict.get(
        web_service,
    )