Source code for firebench.tools.check_data_quality
import numpy as np
from pint import Quantity
from .rate_of_spread_model import RateOfSpreadModel
from .input_info import ParameterType
from .logging_config import logger
[docs]
def check_input_completeness(input_data: dict, metadata_dict: dict):
"""
Check the completeness of the input data against the metadata dictionary.
Parameters
----------
input_data : dict
Dictionary containing the input data.
metadata_dict : dict
Dictionary containing metadata, where each key is a metadata item and each value is a dictionary
with at least the key "std_name" representing the standard name of the data item.
Raises
------
KeyError
If any standard name specified in the metadata is missing in the input data.
""" # pylint: disable=line-too-long
for key, item in metadata_dict.items():
# check that mandatory input is present
if item["type"] == ParameterType.input:
std_name_metadata = item["std_name"]
if std_name_metadata not in input_data:
logger.error("The data %s is missing in the input dict", std_name_metadata)
raise KeyError(f"The data '{std_name_metadata}' is missing in the input dict")
# check if optional input is present
if item["type"] == ParameterType.optional:
std_name_metadata = item["std_name"]
if std_name_metadata not in input_data:
logger.info(
"The optional data %s is missing in the input dict. Default value will be used.",
std_name_metadata,
)
[docs]
def convert_input_data_units(input_data: dict, metadata_dict: dict) -> dict:
"""
Convert the units of input data based on the metadata dictionary.
Parameters
----------
input_data : dict
Dictionary containing the input data with units.
metadata_dict : dict
Dictionary containing metadata, where each key is a metadata item and each value is a dictionary
with at least the key "std_name" representing the standard name of the data item and "units" specifying the target units.
Returns
-------
dict
A dictionary where the keys are standard variable names (as per metadata) and the values are quantities converted to the target units.
Raises
------
KeyError
If any standard name specified in the metadata is missing in the input data.
""" # pylint: disable=line-too-long
output_dict = {}
for key, item in metadata_dict.items():
if item["type"] == ParameterType.output:
continue
std_name_metadata = item["std_name"]
if std_name_metadata in input_data.keys():
data: Quantity = input_data[std_name_metadata]
output_dict[std_name_metadata] = data.to(item["units"])
return output_dict
[docs]
def check_validity_range(input_data: dict, metadata_dict: dict):
"""
Check if the input data values fall within the specified validity range in the metadata dictionary.
Parameters
----------
input_data : dict
Dictionary containing the input data with units.
metadata_dict : dict
Dictionary containing metadata, where each key is a metadata item and each value is a dictionary
with at least the key "std_name" representing the standard name of the data item, "units" specifying the units, and "range" specifying the valid range as a tuple (min, max).
Raises
------
ValueError
If any value in the input data is outside the specified validity range in the metadata.
""" # pylint: disable=line-too-long
for key, item in metadata_dict.items():
if item["type"] == ParameterType.output:
continue
std_name_metadata = item["std_name"]
if std_name_metadata in input_data.keys():
data: Quantity = input_data[std_name_metadata]
data_min = np.nanmin(data.magnitude)
if data_min < item["range"][0]:
raise ValueError(
f"min value of input variable {std_name_metadata}: {data_min:.2e} {item['units']} "
f"lower than lower bound of validity range {item['range'][0]:.2e}."
)
data_max = np.nanmax(data.magnitude)
if data_max > item["range"][1]:
raise ValueError(
f"max value of input variable {std_name_metadata}: {data_max:.2e} {item['units']} "
f"greater than upper bound of validity range {item['range'][1]:.2e}."
)
[docs]
def extract_magnitudes(input_dict):
"""
Extract magnitudes from a dictionary of quantities.
Parameters
----------
input_dict : dict
A dictionary where each value is expected to have a 'magnitude' attribute (from pint.Quantity).
Returns
-------
dict
A new dictionary with the same keys as `input_dict`, where each value is the
'magnitude' attribute of the corresponding value in `input_dict`.
Notes
-----
If accessing 'value.magnitude' raises an exception, a warning is logged, and the key is kept identical.
""" # pylint: disable=line-too-long
final_input = {}
for key, value in input_dict.items():
try:
final_input[key] = value.magnitude
except (AttributeError, TypeError, ValueError) as e:
logger.info("Failed to get magnitude for key '%s': %s", key, e)
final_input[key] = value
return final_input
[docs]
def check_data_quality_ros_model(input_dict: dict[str, Quantity], ros_model: RateOfSpreadModel) -> dict:
"""
Check and process the input data quality for a Rate of Spread (ROS) model.
This function performs the following checks and conversions on the input data:
- Completeness: Ensures all necessary inputs for the ROS model are present in the input dictionary.
- Unit Conversion: Converts units of input data to match the units specified in the ROS model's metadata.
- Validity Range: Verifies that the input data values are within the valid ranges specified by the model's metadata.
Parameters
----------
input_dict : dict
Dictionary containing the input data for the ROS model. The keys are the standard names of the variables,
and the values are quantities with units.
ros_model : RateOfSpreadModel
An instance of a subclass of `RateOfSpreadModel` that provides the metadata for the ROS model.
Returns
-------
dict
A new dictionary with the input data checked for completeness, units converted, and values verified to be within valid ranges.
The values are converted to their magnitude (unitless).
""" # pylint: disable=line-too-long
# Completeness check
check_input_completeness(input_dict, ros_model.metadata)
# Unit conversion
input_converted = convert_input_data_units(input_dict, ros_model.metadata)
# Validity range check
check_validity_range(input_converted, ros_model.metadata)
return input_converted