Source code for pygeohydro.pygeohydro

"""Accessing data from the supported databases through their APIs."""
import io
import zipfile
from collections import OrderedDict
from typing import Any, Dict, List, Optional, Tuple, Union

import folium
import geopandas as gpd
import numpy as np
import pandas as pd
import pygeoogc as ogc
import pygeoutils as geoutils
import rasterio as rio
import xarray as xr
from pygeoogc import WMS, RetrySession, ServiceURL
from pynhd import NLDI, WaterData
from shapely.geometry import MultiPolygon, Point, Polygon

from . import helpers
from .exceptions import InvalidInputRange, InvalidInputType, InvalidInputValue

DEF_CRS = "epsg:4326"


[docs]class NID:
    """Retrieve data from the National Inventory of Dams."""

    def __init__(self) -> None:
        self.session = RetrySession()
        self.base_url = "https://nid.sec.usace.army.mil/ords"
        self.headers = {
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "Accept-Encoding": "gzip, deflate, br",
            "Connection": "keep-alive",
            "Upgrade-Insecure-Requests": "1",
            "DNT": "1",
        }

[docs]    def get_xlsx(self) -> io.BytesIO:
        """Get the excel file that containes the dam data."""
        self.headers.update({"Cookie": "ORA_WWV_APP_105=ORA_WWV-QM2rrHvxwzROBqNYVD0WIlg2"})
        payload = {"InFileName": "NID2019_U.xlsx"}
        r = self.session.get(
            f"{self.base_url}/NID_R.DOWNLOADFILE", payload=payload, headers=self.headers
        )
        return io.BytesIO(r.content)

[docs]    def get_attrs(self, variables: List[str]) -> Dict[str, str]:
        """Get descriptions of the NID variables."""
        self.headers.update({"Cookie": "ORA_WWV_APP_105=ORA_WWV-iaBJjzLW1v3a1s1mXEub0S7R"})

        desc: Dict[str, str] = {}
        for v in variables:
            payload = {"p": f"105:10:10326760693796::NO::P10_COLUMN_NAME:{v}"}
            page = self.session.get(f"{self.base_url}/f", payload=payload, headers=self.headers)
            tables = pd.read_html(page.text)
            desc[v] = tables[0]["Field Definition"].values[0]

        return desc

[docs]    def get_codes(self) -> str:
        """Get the definitions of letter codes in NID database."""
        self.headers.update({"Cookie": "ORA_WWV_APP_105=ORA_WWV-Bk16kg_4BwSK2anC36B4XBQn"})
        payload = {"p": "105:21:16137342922753::NO:::"}
        page = self.session.get(f"{self.base_url}/f", payload=payload, headers=self.headers)
        return page.text


[docs]def get_nid() -> gpd.GeoDataFrame:
    """Get all dams in the US (over 91K) from National Inventory of Dams 2019.

    Notes
    -----
    This function downloads a 25 MB `xlsx` file and convert it into a
    GeoDataFrame. So, your net speed might be a bottleneck. Another
    bottleneck is data loading since the dataset has more than 91K rows,
    it might take sometime for Pandas to load the data into memory.

    Returns
    -------
    geopandas.GeoDataFrame
        A GeoDataFrame containing all the available dams in the database. This dataframe
        has an ``attrs`` property that contains definitions of all the NID variables including
        their units. You can access this dictionary by, for example, ``nid.attrs`` assuming
        that ``nid`` is the dataframe. For example, ``nli.attrs["VOLUME"]`` returns the definition
        of the ``VOLUME`` column in NID.
    """
    service = NID()
    nid = pd.read_excel(service.get_xlsx(), engine="openpyxl", index_col=0)
    attrs = service.get_attrs(nid.columns)

    nid["INSPECTION_DATE"] = nid.INSPECTION_DATE.str.replace("3018", "2018")
    for c in ["INSPECTION_DATE", "SUBMIT_DATE", "EAP_LAST_REV_DATE"]:
        nid[c] = pd.to_datetime(nid[c])

    for c in ["NUMBER_OF_LOCKS", "YEAR_COMPLETED"]:
        nid[c] = nid[c].astype("Int16")

    nid["geometry"] = [Point(x, y) for x, y in zip(nid.LONGITUDE, nid.LATITUDE)]
    nid = gpd.GeoDataFrame(nid, crs="epsg:4326").drop(columns=["LONGITUDE", "LATITUDE"])
    nid.loc[~nid.is_valid, "geometry"] = pd.NA
    nid.attrs = attrs

    return nid


[docs]def get_nid_codes() -> pd.DataFrame:
    """Get the definitions of letter codes in NID database.

    Returns
    -------
    pandas.DataFrame
        A multi-index dataframe where the first index is code categories and the second one is
        letter codes. For example, ``tables.loc[('Core Type',  'A')]`` returns Bituminous Concrete.
    """
    tables_txt = NID().get_codes()

    summary = [
        s.split("summary=")[-1].split('"')[1::2][0]
        for s in tables_txt.split("\n")
        if "summary" in s
    ]

    tables = [
        pd.read_html(tables_txt, attrs={"class": "t-Report-report", "summary": s})[0]
        for s in summary[1:]
    ]
    tables = [
        tb.rename(columns={"CODE": "Code", "VALUE": "Value"}).set_index("Code") for tb in tables
    ]
    return pd.concat(tables, keys=summary[1:])


[docs]def ssebopeta_byloc(
    coords: Tuple[float, float],
    dates: Union[Tuple[str, str], Union[int, List[int]]],
) -> pd.DataFrame:
    """Daily actual ET for a location from SSEBop database in mm/day.

    Parameters
    ----------
    coords : tuple
        Longitude and latitude of the location of interest as a tuple (lon, lat)
    dates : tuple or list, optional
        Start and end dates as a tuple (start, end) or a list of years [2001, 2010, ...].

    Returns
    -------
    pandas.DataFrame
        Daily actual ET for a location
    """
    if not (isinstance(coords, tuple) and len(coords) == 2):
        raise InvalidInputType("coords", "tuple", "(lon, lat)")

    lon, lat = coords

    f_list = _get_ssebopeta_urls(dates)
    session = RetrySession()

    with session.onlyipv4():

        def _ssebop(urls: Tuple[str, str]) -> Dict[str, Union[str, List[float]]]:
            dt, url = urls
            r = session.get(url)
            z = zipfile.ZipFile(io.BytesIO(r.content))

            with rio.MemoryFile() as memfile:
                memfile.write(z.read(z.filelist[0].filename))
                with memfile.open() as src:
                    return {
                        "dt": dt,
                        "eta": [e[0] for e in src.sample([(lon, lat)])][0],
                    }

        eta_list = ogc.utils.threading(_ssebop, f_list, max_workers=4)
    eta = pd.DataFrame.from_records(eta_list)
    eta.columns = ["datetime", "eta (mm/day)"]
    eta = eta.set_index("datetime")
    return eta * 1e-3


[docs]def ssebopeta_bygeom(
    geometry: Union[Polygon, Tuple[float, float, float, float]],
    dates: Union[Tuple[str, str], Union[int, List[int]]],
    geo_crs: str = DEF_CRS,
) -> xr.DataArray:
    """Get daily actual ET for a region from SSEBop database.

    Notes
    -----
    Since there's still no web service available for subsetting SSEBop, the data first
    needs to be downloaded for the requested period then it is masked by the
    region of interest locally. Therefore, it's not as fast as other functions and
    the bottleneck could be the download speed.

    Parameters
    ----------
    geometry : shapely.geometry.Polygon or tuple
        The geometry for downloading clipping the data. For a tuple bbox,
        the order should be (west, south, east, north).
    dates : tuple or list, optional
        Start and end dates as a tuple (start, end) or a list of years [2001, 2010, ...].
    geo_crs : str, optional
        The CRS of the input geometry, defaults to epsg:4326.

    Returns
    -------
    xarray.DataArray
        Daily actual ET within a geometry in mm/day at 1 km resolution
    """
    _geometry = geoutils.geo2polygon(geometry, geo_crs, DEF_CRS)

    f_list = _get_ssebopeta_urls(dates)

    session = RetrySession()

    with session.onlyipv4():

        def _ssebop(url_stamped: Tuple[str, str]) -> Tuple[str, xr.DataArray]:
            dt, url = url_stamped
            resp = session.get(url)
            zfile = zipfile.ZipFile(io.BytesIO(resp.content))
            content = zfile.read(zfile.filelist[0].filename)
            ds = geoutils.gtiff2xarray({"eta": content}, _geometry, DEF_CRS)
            return dt, ds.expand_dims({"time": [dt]})

        resp_list = ogc.utils.threading(_ssebop, f_list, max_workers=4)
        data = xr.merge(OrderedDict(sorted(resp_list, key=lambda x: x[0])).values())

    eta = data.eta.copy()
    eta *= 1e-3
    eta.attrs.update({"units": "mm/day", "nodatavals": (np.nan,)})
    return eta


def _get_ssebopeta_urls(
    dates: Union[Tuple[str, str], Union[int, List[int]]]
) -> List[Tuple[pd.DatetimeIndex, str]]:
    """Get list of URLs for SSEBop dataset within a period or years."""
    if not isinstance(dates, (tuple, list, int)):
        raise InvalidInputType(
            "dates", "tuple, list, or int", "(start, end), year, or [years, ...]"
        )

    if isinstance(dates, tuple):
        if len(dates) != 2:
            raise InvalidInputType(
                "dates", "Start and end should be passed as a tuple of length 2."
            )
        start = pd.to_datetime(dates[0])
        end = pd.to_datetime(dates[1])
        if start < pd.to_datetime("2000-01-01") or end > pd.to_datetime("2020-12-31"):
            raise InvalidInputRange("SSEBop database ranges from 2000 to 2020.")
        date_range = pd.date_range(start, end)
    else:
        years = dates if isinstance(dates, list) else [dates]
        seebop_yrs = np.arange(2000, 2020)

        if any(y not in seebop_yrs for y in years):
            raise InvalidInputRange("SSEBop database ranges from 2000 to 2018.")

        d_list = [pd.date_range(f"{y}0101", f"{y}1231") for y in years]
        date_range = d_list[0] if len(d_list) == 1 else d_list[0].union_many(d_list[1:])

    base_url = ServiceURL().http.ssebopeta
    f_list = [
        (d, f"{base_url}/det{d.strftime('%Y%j')}.modisSSEBopETactual.zip") for d in date_range
    ]

    return f_list


[docs]def nlcd(
    geometry: Union[Polygon, MultiPolygon, Tuple[float, float, float, float]],
    resolution: float,
    years: Optional[Dict[str, Optional[int]]] = None,
    geo_crs: str = DEF_CRS,
    crs: str = DEF_CRS,
) -> xr.Dataset:
    """Get data from NLCD database (2016).

    Download land use/land cover data from NLCD (2016) database within
    a given geometry in epsg:4326.

    Parameters
    ----------
    geometry : Polygon, MultiPolygon, or tuple of length 4
        The geometry or bounding box (west, south, east, north) for extracting the data.
    resolution : float
        The data resolution in meters. The width and height of the output are computed in pixel
        based on the geometry bounds and the given resolution.
    years : dict, optional
        The years for NLCD data as a dictionary, defaults to
        {'impervious': 2016, 'cover': 2016, 'canopy': 2016}. Set the value of a layer to None,
        to ignore it.
    geo_crs : str, optional
        The CRS of the input geometry, defaults to epsg:4326.
    crs : str, optional
        The spatial reference system to be used for requesting the data, defaults to
        epsg:4326.

    Returns
    -------
     xarray.DataArray
         NLCD within a geometry
    """
    years = {"impervious": 2016, "cover": 2016, "canopy": 2016} if years is None else years
    layers = _nlcd_layers(years)

    _geometry = geoutils.geo2polygon(geometry, geo_crs, crs)

    wms = WMS(ServiceURL().wms.mrlc, layers=layers, outformat="image/geotiff", crs=crs)
    r_dict = wms.getmap_bybox(_geometry.bounds, resolution, box_crs=crs)

    ds = geoutils.gtiff2xarray(r_dict, _geometry, crs)

    if isinstance(ds, xr.DataArray):
        ds = ds.to_dataset()

    for n in ds.keys():
        if "cover" in n.lower():
            ds = ds.rename({n: "cover"})
            ds.cover.attrs["units"] = "classes"
        elif "canopy" in n.lower():
            ds = ds.rename({n: "canopy"})
            ds.canopy.attrs["units"] = "%"
        elif "impervious" in n.lower():
            ds = ds.rename({n: "impervious"})
            ds.impervious.attrs["units"] = "%"

    return ds


def _nlcd_layers(years: Dict[str, Optional[int]]) -> List[str]:
    """Get NLCD layers for the provided years dictionary."""
    nlcd_meta = helpers.nlcd_helper()

    names = ["impervious", "cover", "canopy"]
    avail_years = {n: nlcd_meta[f"{n}_years"] + [None] for n in names}

    if not isinstance(years, dict):
        raise InvalidInputType(
            "years", "dict", "{'impervious': 2016, 'cover': 2016, 'canopy': 2016}"  # noqa: FS003
        )

    if any(yr not in avail_years[lyr] for lyr, yr in years.items()):
        vals = [f"\n{lyr}: {', '.join(str(y) for y in yr)}" for lyr, yr in avail_years.items()]
        raise InvalidInputValue("years", vals)

    layers = [
        f'NLCD_{years["canopy"]}_Tree_Canopy_L48',
        f'NLCD_{years["cover"]}_Land_Cover_Science_product_L48',
        f'NLCD_{years["impervious"]}_Impervious_L48',
    ]

    nones = [lyr for lyr in layers if "None" in lyr]
    for lyr in nones:
        layers.remove(lyr)

    if len(layers) == 0:
        raise InvalidInputRange("At least one of the layers should have a non-None year.")

    return layers


[docs]class NWIS:
    """Access NWIS web service."""

    def __init__(self) -> None:
        self.session = RetrySession()
        self.url = ServiceURL().restful.nwis

[docs]    @staticmethod
    def query_byid(ids: Union[str, List[str]]) -> Dict[str, str]:
        """Generate the geometry keys and values of an ArcGISRESTful query."""
        if not isinstance(ids, (str, list)):
            raise InvalidInputType("ids", "str or list")

        ids = [str(i) for i in ids] if isinstance(ids, list) else [str(ids)]
        query = {"sites": ",".join(ids)}

        return query

[docs]    @staticmethod
    def query_bybox(bbox: Tuple[float, float, float, float]) -> Dict[str, str]:
        """Generate the geometry keys and values of an ArcGISRESTful query."""
        geoutils.check_bbox(bbox)
        query = {"bBox": ",".join(f"{b:.06f}" for b in bbox)}

        return query

[docs]    def get_info(self, query: Dict[str, str], expanded: bool = False) -> pd.DataFrame:
        """Get NWIS stations by a list of IDs or within a bounding box.

        Only stations that record(ed) daily streamflow data are returned.
        The following columns are included in the dataframe with expanded
        set to False:

        ==================  ==================================
        Name                Description
        ==================  ==================================
        site_no             Site identification number
        station_nm          Site name
        site_tp_cd          Site type
        dec_lat_va          Decimal latitude
        dec_long_va         Decimal longitude
        coord_acy_cd        Latitude-longitude accuracy
        dec_coord_datum_cd  Decimal Latitude-longitude datum
        alt_va              Altitude of Gage/land surface
        alt_acy_va          Altitude accuracy
        alt_datum_cd        Altitude datum
        huc_cd              Hydrologic unit code
        parm_cd             Parameter code
        stat_cd             Statistical code
        ts_id               Internal timeseries ID
        loc_web_ds          Additional measurement description
        medium_grp_cd       Medium group code
        parm_grp_cd         Parameter group code
        srs_id              SRS ID
        access_cd           Access code
        begin_date          Begin date
        end_date            End date
        count_nu            Record count
        hcdn_2009           Whether is in HCDN-2009 stations
        ==================  ==================================

        Parameters
        ----------
        query : dict
            A dictionary containing query by IDs or BBOX. Use ``query_byid`` or ``query_bbox``
            class methods to generate the queries.
        expanded : bool, optional
            Whether to get expanded sit information for example drainage area.

        Returns
        -------
        pandas.DataFrame
            NWIS stations
        """
        if not isinstance(query, dict):
            raise InvalidInputType("query", "dict")

        output_type = [{"outputDataTypeCd": "dv"}]
        if expanded:
            output_type.append({"siteOutput": "expanded"})

        site_list = []
        for t in output_type:
            payload = {
                **query,
                **t,
                "format": "rdb",
                "parameterCd": "00060",
                "siteStatus": "all",
                "hasDataTypeCd": "dv",
            }

            resp = self.session.post(f"{self.url}/site", payload).text.split("\n")

            r_list = [txt.split("\t") for txt in resp if "#" not in txt]
            r_dict = [dict(zip(r_list[0], st)) for st in r_list[2:]]

            site_list.append(pd.DataFrame.from_dict(r_dict).dropna())

        if expanded:
            sites = pd.merge(
                *site_list, on="site_no", how="outer", suffixes=("", "_overlap")
            ).filter(regex="^(?!.*_overlap)")
        else:
            sites = site_list[0]

        sites.loc[sites.alt_va == "", "alt_va"] = pd.NA
        try:
            sites = sites.drop(sites[sites.parm_cd != "00060"].index)
            sites["begin_date"] = pd.to_datetime(sites["begin_date"])
            sites["end_date"] = pd.to_datetime(sites["end_date"])
        except AttributeError:
            pass

        float_cols = ["dec_lat_va", "dec_long_va", "alt_va", "alt_acy_va"]
        if expanded:
            float_cols += ["drain_area_va", "contrib_drain_area_va"]

        sites[float_cols] = sites[float_cols].apply(lambda x: pd.to_numeric(x, errors="coerce"))

        sites = sites[sites.site_no.apply(len) == 8]

        site_ids = sites.site_no.tolist()
        gii = WaterData("gagesii", DEF_CRS)
        hcdn_dict: Dict[str, str] = {}
        try:
            hcdn = gii.byid("staid", site_ids)
            hcdn_dict.update(hcdn[["staid", "hcdn_2009"]].set_index("staid").hcdn_2009.to_dict())
        except AttributeError:
            for sid in site_ids:
                try:
                    hcdn = gii.byid("staid", sid)
                    hcdn_dict.update(
                        hcdn[["staid", "hcdn_2009"]].set_index("staid").hcdn_2009.to_dict()
                    )
                except AttributeError:
                    hcdn_dict.update({sid: None})

        def hcdn_2009(x: str) -> Optional[bool]:
            _hcdn = hcdn_dict.get(x)
            if _hcdn is not None:
                return len(_hcdn) > 0
            return None

        sites["hcdn_2009"] = sites.site_no.apply(hcdn_2009)

        return sites

[docs]    def get_streamflow(
        self, station_ids: Union[List[str], str], dates: Tuple[str, str], mmd: bool = False
    ) -> pd.DataFrame:
        """Get daily streamflow observations from USGS.

        Parameters
        ----------
        station_ids : str, list
            The gage ID(s)  of the USGS station.
        dates : tuple
            Start and end dates as a tuple (start, end).
        mmd : bool
            Convert cms to mm/day based on the contributing drainage area of the stations.

        Returns
        -------
        pandas.DataFrame
            Streamflow data observations in cubic meter per second (cms)
        """
        if not isinstance(station_ids, (str, list)):
            raise InvalidInputType("ids", "str or list")

        station_ids = station_ids if isinstance(station_ids, list) else [station_ids]

        if not isinstance(dates, tuple) or len(dates) != 2:
            raise InvalidInputType("dates", "tuple", "(start, end)")

        start = pd.to_datetime(dates[0])
        end = pd.to_datetime(dates[1])

        siteinfo = self.get_info(self.query_byid(station_ids))
        check_dates = siteinfo.loc[
            (
                (siteinfo.stat_cd == "00003")
                & (start < siteinfo.begin_date)
                & (end > siteinfo.end_date)
            ),
            "site_no",
        ].tolist()
        nas = [s for s in station_ids if s in check_dates]
        if len(nas) > 0:
            raise InvalidInputRange(
                "Daily Mean data unavailable for the specified time "
                + "period for the following stations:\n"
                + ", ".join(nas)
            )

        payload = {
            "format": "json",
            "sites": ",".join(station_ids),
            "startDT": start.strftime("%Y-%m-%d"),
            "endDT": end.strftime("%Y-%m-%d"),
            "parameterCd": "00060",
            "statCd": "00003",
            "siteStatus": "all",
        }

        resp = self.session.post(f"{self.url}/dv", payload)

        time_series = resp.json()["value"]["timeSeries"]
        r_ts = {
            t["sourceInfo"]["siteCode"][0]["value"]: t["values"][0]["value"] for t in time_series
        }

        def to_df(col: str, dic: Dict[str, Any]) -> pd.DataFrame:
            discharge = pd.DataFrame.from_records(dic, exclude=["qualifiers"], index=["dateTime"])
            discharge.index = pd.to_datetime(discharge.index)
            discharge.columns = [col]
            return discharge

        qobs = pd.concat([to_df(f"USGS-{s}", t) for s, t in r_ts.items()], axis=1)

        # Convert cfs to cms
        qobs = qobs.astype("float64") * 0.028316846592

        if mmd:
            nldi = NLDI()
            basins = nldi.get_basins(station_ids)
            eck4 = "+proj=eck4 +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs"
            area = basins.to_crs(eck4).area
            ms2mmd = 1000.0 * 24.0 * 3600.0
            qobs = qobs.apply(lambda x: x / area.loc[x.name.split("-")[-1]] * ms2mmd)
        return qobs


[docs]def interactive_map(bbox: Tuple[float, float, float, float]) -> folium.Map:
    """Generate an interactive map including all USGS stations within a bounding box.

    Notes
    -----
    Only stations that record(ed) daily streamflow data are included.

    Parameters
    ----------
    bbox : tuple
        List of corners in this order (west, south, east, north)

    Returns
    -------
    folium.Map
        Interactive map within a bounding box.
    """
    geoutils.check_bbox(bbox)

    nwis = NWIS()
    query = nwis.query_bybox(bbox)
    sites = nwis.get_info(query, expanded=True)
    sites["coords"] = list(sites[["dec_lat_va", "dec_long_va"]].itertuples(name=None, index=False))
    sites["altitude"] = (
        sites["alt_va"].astype("str") + " ft above " + sites["alt_datum_cd"].astype("str")
    )

    sites["drain_area_va"] = sites["drain_area_va"].astype("str") + " square miles"
    sites["contrib_drain_area_va"] = sites["contrib_drain_area_va"].astype("str") + " square miles"

    cols_old = [
        "site_no",
        "station_nm",
        "coords",
        "altitude",
        "huc_cd",
        "drain_area_va",
        "contrib_drain_area_va",
        "begin_date",
        "end_date",
        "hcdn_2009",
    ]

    cols_new = [
        "Site No.",
        "Station Name",
        "Coordinate",
        "Altitude",
        "HUC8",
        "Drainage Area",
        "Contributing Drainage Area",
        "Begin date",
        "End data",
        "HCDN 2009",
    ]
    sites = sites.rename(columns=dict(zip(cols_old, cols_new)))[cols_new]

    msgs = []
    for row in sites.itertuples(index=False):
        msg = ""
        for col in sites:
            msg += "".join(
                ["<strong>", col, "</strong> : ", f"{row[sites.columns.get_loc(col)]}<br>"]
            )
        msgs.append(msg[:-4])

    sites["msg"] = msgs

    west, south, east, north = bbox
    lon = (west + east) * 0.5
    lat = (south + north) * 0.5

    imap = folium.Map(location=(lat, lon), tiles="Stamen Terrain", zoom_start=10)

    for coords, msg in sites[["Coordinate", "msg"]].itertuples(name=None, index=False):
        folium.Marker(
            location=coords, popup=folium.Popup(msg, max_width=250), icon=folium.Icon()
        ).add_to(imap)

    return imap


[docs]def cover_statistics(ds: xr.Dataset) -> Dict[str, Union[np.ndarray, Dict[str, float]]]:
    """Percentages of the categorical NLCD cover data.

    Parameters
    ----------
    ds : xarray.Dataset

    Returns
    -------
    dict
        Statistics of NLCD cover data
    """
    nlcd_meta = helpers.nlcd_helper()
    cover_arr = ds.values
    total_pix = np.count_nonzero(~np.isnan(cover_arr))

    class_percentage = dict(
        zip(
            list(nlcd_meta["classes"].values()),
            [
                cover_arr[cover_arr == int(cat)].shape[0] / total_pix * 100.0
                for cat in list(nlcd_meta["classes"].keys())
            ],
        )
    )

    cov = np.floor_divide(cover_arr[~np.isnan(cover_arr)], 10).astype("int")
    cat_list = (
        np.array([np.count_nonzero(cov == c) for c in range(10) if c != 6]) / total_pix * 100.0
    )

    category_percentage = dict(zip(list(nlcd_meta["categories"].keys()), cat_list))

    return {"classes": class_percentage, "categories": category_percentage}