Source code for petab.lint

"""Integrity checks and tests for specific features used"""

import copy
import logging
import numbers
import re
from collections import Counter
from typing import Any, Iterable, Optional

import numpy as np
import pandas as pd
import sympy as sp
from sympy.abc import _clash

import petab

from . import core, measurements, parameters
from .C import *  # noqa: F403
from .models import Model

logger = logging.getLogger(__name__)
__all__ = [
    "assert_all_parameters_present_in_parameter_df",
    "assert_measured_observables_defined",
    "assert_measurement_conditions_present_in_condition_table",
    "assert_measurements_not_null",
    "assert_measurements_numeric",
    "assert_model_parameters_in_condition_or_parameter_table",
    "assert_no_leading_trailing_whitespace",
    "assert_noise_distributions_valid",
    "assert_parameter_bounds_are_numeric",
    "assert_parameter_estimate_is_boolean",
    "assert_parameter_id_is_string",
    "assert_parameter_prior_parameters_are_valid",
    "assert_parameter_prior_type_is_valid",
    "assert_parameter_scale_is_valid",
    "assert_unique_observable_ids",
    "assert_unique_parameter_ids",
    "check_condition_df",
    "check_ids",
    "check_measurement_df",
    "check_observable_df",
    "check_parameter_bounds",
    "check_parameter_df",
    "condition_table_is_parameter_free",
    "get_non_unique",
    "is_scalar_float",
    "is_valid_identifier",
    "lint_problem",
    "measurement_table_has_observable_parameter_numeric_overrides",
    "measurement_table_has_timepoint_specific_mappings",
    "observable_table_has_nontrivial_noise_formula",
]


def _check_df(df: pd.DataFrame, req_cols: Iterable, name: str) -> None:
    """Check if given columns are present in DataFrame

    Arguments:
        df: Dataframe to check
        req_cols: Column names which have to be present
        name: Name of the DataFrame to be included in error message

    Raises:
          AssertionError: if a column is missing
    """
    if missing_cols := set(req_cols) - set(df.columns.values):
        raise AssertionError(
            f"DataFrame {name} requires the columns {missing_cols}."
        )


[docs] def assert_no_leading_trailing_whitespace( names_list: Iterable[str], name: str ) -> None: """Check that there is no trailing whitespace in elements of Iterable Arguments: names_list: strings to check for whitespace name: name of `names_list` for error messages Raises: AssertionError: if there is trailing whitespace """ r = re.compile(r"(?:^\s)|(?:\s$)") for i, x in enumerate(names_list): if isinstance(x, str) and r.search(x): raise AssertionError(f"Whitespace around {name}[{i}] = '{x}'.")
[docs] def check_condition_df( df: pd.DataFrame, model: Optional[Model] = None, observable_df: Optional[pd.DataFrame] = None, mapping_df: Optional[pd.DataFrame] = None, ) -> None: """Run sanity checks on PEtab condition table Arguments: df: PEtab condition DataFrame model: Model for additional checking of parameter IDs observable_df: PEtab observables DataFrame mapping_df: PEtab mapping DataFrame Raises: AssertionError: in case of problems """ # Check required columns are present req_cols = [] _check_df(df, req_cols, "condition") # Check for correct index if df.index.name != CONDITION_ID: raise AssertionError( f"Condition table has wrong index {df.index.name}." f"expected {CONDITION_ID}." ) check_ids(df.index.values, kind="condition") if not df.index.is_unique: raise AssertionError( "Non-unique condition IDs: " f"{df.index.values[df.index.duplicated()]}" ) for column_name in req_cols: if not np.issubdtype(df[column_name].dtype, np.number): assert_no_leading_trailing_whitespace( df[column_name].values, column_name ) if model is not None: allowed_cols = set(model.get_valid_ids_for_condition_table()) if observable_df is not None: allowed_cols |= set( petab.get_output_parameters( model=model, observable_df=observable_df, mapping_df=mapping_df, ) ) if mapping_df is not None: allowed_cols |= set(mapping_df.index.values) for column_name in df.columns: if ( column_name != CONDITION_NAME and column_name not in allowed_cols ): raise AssertionError( "Condition table contains column for unknown entity '" f"{column_name}'." )
[docs] def check_measurement_df( df: pd.DataFrame, observable_df: Optional[pd.DataFrame] = None ) -> None: """Run sanity checks on PEtab measurement table Arguments: df: PEtab measurement DataFrame observable_df: PEtab observable DataFrame for checking if measurements are compatible with observable transformations. Raises: AssertionError, ValueError: in case of problems """ _check_df(df, MEASUREMENT_DF_REQUIRED_COLS, "measurement") for column_name in MEASUREMENT_DF_REQUIRED_COLS: if not np.issubdtype(df[column_name].dtype, np.number): assert_no_leading_trailing_whitespace( df[column_name].values, column_name ) for column_name in MEASUREMENT_DF_OPTIONAL_COLS: if column_name in df and not np.issubdtype( df[column_name].dtype, np.number ): assert_no_leading_trailing_whitespace( df[column_name].values, column_name ) if observable_df is not None: assert_measured_observables_defined(df, observable_df) measurements.assert_overrides_match_parameter_count(df, observable_df) if OBSERVABLE_TRANSFORMATION in observable_df: # Check for positivity of measurements in case of # log-transformation assert_unique_observable_ids(observable_df) # If the above is not checked, in the following loop # trafo may become a pandas Series for measurement, obs_id in zip(df[MEASUREMENT], df[OBSERVABLE_ID]): trafo = observable_df.loc[obs_id, OBSERVABLE_TRANSFORMATION] if measurement <= 0.0 and trafo in [LOG, LOG10]: raise ValueError( "Measurements with observable " f"transformation {trafo} must be " f"positive, but {measurement} <= 0." ) assert_measurements_not_null(df) assert_measurements_numeric(df)
[docs] def check_parameter_df( df: pd.DataFrame, model: Optional[Model] = None, observable_df: Optional[pd.DataFrame] = None, measurement_df: Optional[pd.DataFrame] = None, condition_df: Optional[pd.DataFrame] = None, mapping_df: Optional[pd.DataFrame] = None, ) -> None: """Run sanity checks on PEtab parameter table Arguments: df: PEtab parameter DataFrame model: Model for additional checking of parameter IDs observable_df: PEtab observable table for additional checks measurement_df: PEtab measurement table for additional checks condition_df: PEtab condition table for additional checks mapping_df: PEtab mapping table for additional checks Raises: AssertionError: in case of problems """ _check_df(df, PARAMETER_DF_REQUIRED_COLS[1:], "parameter") if df.index.name != PARAMETER_ID: raise AssertionError( f"Parameter table has wrong index {df.index.name}." f"expected {PARAMETER_ID}." ) check_ids(df.index.values, kind="parameter") for column_name in PARAMETER_DF_REQUIRED_COLS[1:]: # 0 is PARAMETER_ID if not np.issubdtype(df[column_name].dtype, np.number): assert_no_leading_trailing_whitespace( df[column_name].values, column_name ) # nominal value is generally optional, but required if any for any # parameter estimate != 1 non_estimated_par_ids = list( df.index[ (df[ESTIMATE] != 1) | ( pd.api.types.is_string_dtype(df[ESTIMATE]) and df[ESTIMATE] != "1" ) ] ) if non_estimated_par_ids: if NOMINAL_VALUE not in df: raise AssertionError( "Parameter table contains parameters " f"{non_estimated_par_ids} that are not " "specified to be estimated, " f"but column {NOMINAL_VALUE} is missing." ) try: df.loc[non_estimated_par_ids, NOMINAL_VALUE].apply(float) except ValueError as e: raise AssertionError( f"Expected numeric values for `{NOMINAL_VALUE}` in parameter " "table for all non-estimated parameters." ) from e assert_parameter_id_is_string(df) assert_parameter_scale_is_valid(df) assert_parameter_bounds_are_numeric(df) assert_parameter_estimate_is_boolean(df) assert_unique_parameter_ids(df) check_parameter_bounds(df) assert_parameter_prior_type_is_valid(df) if model and measurement_df is not None and condition_df is not None: assert_all_parameters_present_in_parameter_df( df, model, observable_df, measurement_df, condition_df, mapping_df )
[docs] def check_observable_df(observable_df: pd.DataFrame) -> None: """Check validity of observable table Arguments: observable_df: PEtab observable DataFrame Raises: AssertionError: in case of problems """ _check_df(observable_df, OBSERVABLE_DF_REQUIRED_COLS[1:], "observable") check_ids(observable_df.index.values, kind="observable") for column_name in OBSERVABLE_DF_REQUIRED_COLS[1:]: if not np.issubdtype(observable_df[column_name].dtype, np.number): assert_no_leading_trailing_whitespace( observable_df[column_name].values, column_name ) for column_name in OBSERVABLE_DF_OPTIONAL_COLS: if column_name in observable_df and not np.issubdtype( observable_df[column_name].dtype, np.number ): assert_no_leading_trailing_whitespace( observable_df[column_name].values, column_name ) assert_noise_distributions_valid(observable_df) assert_unique_observable_ids(observable_df) # Check that formulas are parsable for row in observable_df.itertuples(): obs = getattr(row, OBSERVABLE_FORMULA) try: sp.sympify(obs, locals=_clash) except sp.SympifyError as e: raise AssertionError( f"Cannot parse expression '{obs}' " f"for observable {row.Index}: {e}" ) from e noise = getattr(row, NOISE_FORMULA) try: sympified_noise = sp.sympify(noise, locals=_clash) if sympified_noise is None or ( sympified_noise.is_Number and not sympified_noise.is_finite ): raise AssertionError( f"No or non-finite {NOISE_FORMULA} " f"given for observable {row.Index}." ) except sp.SympifyError as e: raise AssertionError( f"Cannot parse expression '{noise}' " f"for noise model for observable " f"{row.Index}: {e}" ) from e
[docs] def assert_all_parameters_present_in_parameter_df( parameter_df: pd.DataFrame, model: Model, observable_df: pd.DataFrame, measurement_df: pd.DataFrame, condition_df: pd.DataFrame, mapping_df: pd.DataFrame = None, ) -> None: """Ensure all required parameters are contained in the parameter table with no additional ones Arguments: parameter_df: PEtab parameter DataFrame model: model observable_df: PEtab observable table measurement_df: PEtab measurement table condition_df: PEtab condition table mapping_df: PEtab mapping table for additional checks Raises: AssertionError: in case of problems """ required = parameters.get_required_parameters_for_parameter_table( model=model, condition_df=condition_df, observable_df=observable_df, measurement_df=measurement_df, mapping_df=mapping_df, ) allowed = parameters.get_valid_parameters_for_parameter_table( model=model, condition_df=condition_df, observable_df=observable_df, measurement_df=measurement_df, mapping_df=mapping_df, ) actual = set(parameter_df.index) missing = required - actual extraneous = actual - allowed # missing parameters might be present under a different name based on # the mapping table if missing and mapping_df is not None: model_to_petab_mapping = {} for map_from, map_to in zip( mapping_df.index.values, mapping_df[MODEL_ENTITY_ID] ): if map_to in model_to_petab_mapping: model_to_petab_mapping[map_to].append(map_from) else: model_to_petab_mapping[map_to] = [map_from] missing = { missing_id for missing_id in missing if missing_id not in model_to_petab_mapping or all( mapping_parameter not in actual for mapping_parameter in model_to_petab_mapping[missing_id] ) } if missing: raise AssertionError( "Missing parameter(s) in the model or the " "parameters table: " + str(missing) ) if extraneous: raise AssertionError( "Extraneous parameter(s) in parameter table: " + str(extraneous) )
[docs] def assert_measured_observables_defined( measurement_df: pd.DataFrame, observable_df: pd.DataFrame ) -> None: """Check if all observables in the measurement table have been defined in the observable table Arguments: measurement_df: PEtab measurement table observable_df: PEtab observable table Raises: AssertionError: in case of problems """ used_observables = set(measurement_df[OBSERVABLE_ID].values) defined_observables = set(observable_df.index.values) if undefined_observables := (used_observables - defined_observables): raise AssertionError( f"Observables {undefined_observables} used in " "measurement table but not defined in observables table." )
[docs] def condition_table_is_parameter_free(condition_df: pd.DataFrame) -> bool: """Check if all entries in the condition table are numeric (no parameter IDs) Arguments: condition_df: PEtab condition table Returns: ``True`` if there are no parameter overrides in the condition table, ``False`` otherwise. """ return len(petab.get_parametric_overrides(condition_df)) == 0
[docs] def assert_parameter_id_is_string(parameter_df: pd.DataFrame) -> None: """ Check if all entries in the parameterId column of the parameter table are string and not empty. Arguments: parameter_df: PEtab parameter DataFrame Raises: AssertionError: in case of problems """ for parameter_id in parameter_df: if isinstance(parameter_id, str): if parameter_id[0].isdigit(): raise AssertionError( f"{PARAMETER_ID} {parameter_id} starts with integer." ) else: raise AssertionError(f"Empty {PARAMETER_ID} found.")
[docs] def assert_unique_parameter_ids(parameter_df: pd.DataFrame) -> None: """ Check if the parameterId column of the parameter table is unique. Arguments: parameter_df: PEtab parameter DataFrame Raises: AssertionError: in case of problems """ non_unique_ids = get_non_unique(parameter_df.index) if len(non_unique_ids) > 0: raise AssertionError( f"Non-unique values found in the {PARAMETER_ID} column" " of the parameter table: " + str(non_unique_ids) )
[docs] def assert_parameter_scale_is_valid(parameter_df: pd.DataFrame) -> None: """ Check if all entries in the parameterScale column of the parameter table are 'lin' for linear, 'log' for natural logarithm or 'log10' for base 10 logarithm. Arguments: parameter_df: PEtab parameter DataFrame Raises: AssertionError: in case of problems """ for parameter_scale in parameter_df[PARAMETER_SCALE]: if parameter_scale not in [LIN, LOG, LOG10]: raise AssertionError( f"Expected {LIN}, {LOG}, or {LOG10}, but " f"got {parameter_scale}." )
[docs] def assert_parameter_bounds_are_numeric(parameter_df: pd.DataFrame) -> None: """ Check if all entries in the lowerBound and upperBound columns of the parameter table are numeric. Arguments: parameter_df: PEtab parameter DataFrame Raises: AssertionError: in case of problems """ parameter_df[LOWER_BOUND].apply(float).all() parameter_df[UPPER_BOUND].apply(float).all()
[docs] def check_parameter_bounds(parameter_df: pd.DataFrame) -> None: """ Check if all entries in the lowerBound are smaller than upperBound column in the parameter table and that bounds are positive for parameterScale log|log10. Arguments: parameter_df: PEtab parameter DataFrame Raises: AssertionError: in case of problems """ for _, row in parameter_df.iterrows(): if int(row[ESTIMATE]): if not row[LOWER_BOUND] <= row[UPPER_BOUND]: raise AssertionError( f"{LOWER_BOUND} greater than {UPPER_BOUND} for " f"{PARAMETER_ID} {row.name}." ) if (row[LOWER_BOUND] < 0.0 or row[UPPER_BOUND] < 0.0) and row[ PARAMETER_SCALE ] in [LOG, LOG10]: raise AssertionError( f"Bounds for {row[PARAMETER_SCALE]} scaled parameter " f"{ row.name} must be positive." )
[docs] def assert_parameter_prior_type_is_valid(parameter_df: pd.DataFrame) -> None: """Check that valid prior types have been selected Arguments: parameter_df: PEtab parameter table Raises: AssertionError: in case of invalid prior """ for col in [INITIALIZATION_PRIOR_TYPE, OBJECTIVE_PRIOR_TYPE]: if col not in parameter_df.columns: continue for _, row in parameter_df.iterrows(): if row[col] not in PRIOR_TYPES and not core.is_empty(row[col]): raise AssertionError( f"{col} must be one of {PRIOR_TYPES} but is " f"'{row[col]}'." )
[docs] def assert_parameter_prior_parameters_are_valid( parameter_df: pd.DataFrame, ) -> None: """Check that the prior parameters are valid. Arguments: parameter_df: PEtab parameter table Raises: AssertionError: in case of invalid prior parameters """ prior_type_cols = [INITIALIZATION_PRIOR_TYPE, OBJECTIVE_PRIOR_TYPE] prior_par_cols = [ INITIALIZATION_PRIOR_PARAMETERS, OBJECTIVE_PRIOR_PARAMETERS, ] # perform test for both priors for type_col, par_col in zip(prior_type_cols, prior_par_cols): # iterate over rows for _, row in parameter_df.iterrows(): # get type if type_col not in row or core.is_empty(row[type_col]): type_ = PARAMETER_SCALE_UNIFORM else: type_ = row[type_col] # get parameters pars_str = row.get(par_col, "") with_default_parameters = [PARAMETER_SCALE_UNIFORM] # check if parameters are empty if core.is_empty(pars_str): if type_ not in with_default_parameters: raise AssertionError( f"An empty {par_col} is only permitted with " f"{type_col} in {with_default_parameters}." ) # empty parameters fine continue # parse parameters try: pars = tuple( float(val) for val in pars_str.split(PARAMETER_SEPARATOR) ) except ValueError as e: raise AssertionError( f"Could not parse prior parameters '{pars_str}'." ) from e # all distributions take 2 parameters if len(pars) != 2: raise AssertionError( f"The prior parameters '{pars}' do not contain the " "expected number of entries (currently 'par1" f"{PARAMETER_SEPARATOR}par2' for all prior types)." )
[docs] def assert_parameter_estimate_is_boolean(parameter_df: pd.DataFrame) -> None: """ Check if all entries in the estimate column of the parameter table are 0 or 1. Arguments: parameter_df: PEtab parameter DataFrame Raises: AssertionError: in case of problems """ for estimate in parameter_df[ESTIMATE]: if int(estimate) not in [True, False]: raise AssertionError( f"Expected 0 or 1 but got {estimate} in {ESTIMATE} column." )
[docs] def is_scalar_float(x: Any): """ Checks whether input is a number or can be transformed into a number via float :param x: input :return: ``True`` if is or can be converted to number, ``False`` otherwise. """ if isinstance(x, numbers.Number): return True try: float(x) return True except (ValueError, TypeError): return False
[docs] def measurement_table_has_timepoint_specific_mappings( measurement_df: Optional[pd.DataFrame], allow_scalar_numeric_noise_parameters: bool = False, allow_scalar_numeric_observable_parameters: bool = False, ) -> bool: """ Are there time-point or replicate specific parameter assignments in the measurement table. Arguments: measurement_df: PEtab measurement table allow_scalar_numeric_noise_parameters: ignore scalar numeric assignments to noiseParameter placeholders allow_scalar_numeric_observable_parameters: ignore scalar numeric assignments to observableParameter placeholders Returns: True if there are time-point or replicate specific (non-numeric) parameter assignments in the measurement table, False otherwise. """ if measurement_df is None: return False # since we edit it, copy it first measurement_df = copy.deepcopy(measurement_df) # mask numeric values for col, allow_scalar_numeric in [ (OBSERVABLE_PARAMETERS, allow_scalar_numeric_observable_parameters), (NOISE_PARAMETERS, allow_scalar_numeric_noise_parameters), ]: if col not in measurement_df: continue measurement_df[col] = measurement_df[col].apply(str) if allow_scalar_numeric: measurement_df.loc[ measurement_df[col].apply(is_scalar_float), col ] = np.nan grouping_cols = core.get_notnull_columns( measurement_df, [ OBSERVABLE_ID, SIMULATION_CONDITION_ID, PREEQUILIBRATION_CONDITION_ID, OBSERVABLE_PARAMETERS, NOISE_PARAMETERS, ], ) grouped_df = measurement_df.groupby(grouping_cols, dropna=False) grouping_cols = core.get_notnull_columns( measurement_df, [ OBSERVABLE_ID, SIMULATION_CONDITION_ID, PREEQUILIBRATION_CONDITION_ID, ], ) grouped_df2 = measurement_df.groupby(grouping_cols) # data frame has timepoint specific overrides if grouping by noise # parameters and observable parameters in addition to observable, # condition and preeq id yields more groups return len(grouped_df) != len(grouped_df2)
[docs] def observable_table_has_nontrivial_noise_formula( observable_df: Optional[pd.DataFrame], ) -> bool: """ Does any observable have a noise formula that is not just a single parameter? Arguments: observable_df: PEtab observable table Returns: ``True`` if any noise formula does not consist of a single identifier, ``False`` otherwise. """ if observable_df is None: return False return ( not observable_df[NOISE_FORMULA] .apply( lambda x: is_scalar_float(x) or re.match(r"^[\w]+$", str(x)) is not None ) .all() )
[docs] def measurement_table_has_observable_parameter_numeric_overrides( measurement_df: pd.DataFrame, ) -> bool: """Are there any numbers to override observable parameters? Arguments: measurement_df: PEtab measurement table Returns: ``True`` if there are any numbers to override observable/noise parameters, ``False`` otherwise. """ if OBSERVABLE_PARAMETERS not in measurement_df: return False for _, row in measurement_df.iterrows(): for override in measurements.split_parameter_replacement_list( row.get(OBSERVABLE_PARAMETERS, None) ): if isinstance(override, numbers.Number): return True return False
[docs] def assert_noise_distributions_valid(observable_df: pd.DataFrame) -> None: """ Ensure that noise distributions and transformations for observables are valid. Arguments: observable_df: PEtab observable table Raises: AssertionError: in case of problems """ if OBSERVABLE_TRANSFORMATION in observable_df: # check for valid values for trafo in observable_df[OBSERVABLE_TRANSFORMATION]: if trafo not in ["", *OBSERVABLE_TRANSFORMATIONS] and not ( isinstance(trafo, numbers.Number) and np.isnan(trafo) ): raise ValueError( f"Unrecognized observable transformation in observable " f"table: {trafo}." ) if NOISE_DISTRIBUTION in observable_df: for distr in observable_df[NOISE_DISTRIBUTION]: if distr not in ["", *NOISE_MODELS] and not ( isinstance(distr, numbers.Number) and np.isnan(distr) ): raise ValueError( f"Unrecognized noise distribution in observable " f"table: {distr}." )
[docs] def assert_unique_observable_ids(observable_df: pd.DataFrame) -> None: """ Check if the observableId column of the observable table is unique. Arguments: observable_df: PEtab observable DataFrame Raises: AssertionError: in case of problems """ non_unique_ids = get_non_unique(observable_df.index) if len(non_unique_ids) > 0: raise AssertionError( f"Non-unique values found in the {OBSERVABLE_ID} column" " of the observable table: " + str(non_unique_ids) )
def get_non_unique(values): counter = Counter(values) return [value for (value, count) in counter.items() if count > 1]
[docs] def lint_problem(problem: "petab.Problem") -> bool: """Run PEtab validation on problem Arguments: problem: PEtab problem to check Returns: ``True`` if errors occurred, ``False`` otherwise """ # pylint: disable=too-many-statements errors_occurred = False if problem.extensions_config: logger.warning( "Validation of PEtab extensions is not yet implemented, " "but the given problem uses the following extensions: " f"{'', ''.join(problem.extensions_config.keys())}" ) # Run checks on individual files if problem.model is not None: logger.info("Checking model...") errors_occurred |= not problem.model.is_valid() else: logger.warning("Model not available. Skipping.") if problem.measurement_df is not None: logger.info("Checking measurement table...") try: check_measurement_df(problem.measurement_df, problem.observable_df) if problem.condition_df is not None: assert_measurement_conditions_present_in_condition_table( problem.measurement_df, problem.condition_df ) except AssertionError as e: logger.error(e) errors_occurred = True else: logger.warning("Measurement table not available. Skipping.") if problem.condition_df is not None: logger.info("Checking condition table...") try: check_condition_df( problem.condition_df, model=problem.model, observable_df=problem.observable_df, mapping_df=problem.mapping_df, ) except AssertionError as e: logger.error(e) errors_occurred = True else: logger.warning("Condition table not available. Skipping.") if problem.observable_df is not None: logger.info("Checking observable table...") try: check_observable_df(problem.observable_df) except AssertionError as e: logger.error(e) errors_occurred = True if problem.model is not None: for obs_id in problem.observable_df.index: if problem.model.has_entity_with_id(obs_id): logger.error( f"Observable ID {obs_id} shadows model " "entity." ) errors_occurred = True else: logger.warning("Observable table not available. Skipping.") if problem.parameter_df is not None: logger.info("Checking parameter table...") try: check_parameter_df( problem.parameter_df, problem.model, problem.observable_df, problem.measurement_df, problem.condition_df, problem.mapping_df, ) except AssertionError as e: logger.error(e) errors_occurred = True else: logger.warning("Parameter table not available. Skipping.") if ( problem.model is not None and problem.condition_df is not None and problem.parameter_df is not None ): try: assert_model_parameters_in_condition_or_parameter_table( problem.model, problem.condition_df, problem.parameter_df, problem.mapping_df, ) except AssertionError as e: logger.error(e) errors_occurred = True if problem.visualization_df is not None: logger.info("Checking visualization table...") from petab.visualize.lint import validate_visualization_df errors_occurred |= validate_visualization_df(problem) else: logger.warning("Visualization table not available. Skipping.") if errors_occurred: logger.error("Not OK") elif ( problem.measurement_df is None or problem.condition_df is None or problem.model is None or problem.parameter_df is None or problem.observable_df is None ): logger.warning( "Not all files of the PEtab problem definition could " "be checked." ) else: logger.info("PEtab format check completed successfully.") return errors_occurred
[docs] def assert_model_parameters_in_condition_or_parameter_table( model: Model, condition_df: pd.DataFrame, parameter_df: pd.DataFrame, mapping_df: pd.DataFrame = None, observable_df: pd.DataFrame = None, measurement_df: pd.DataFrame = None, ) -> None: """Model parameters that are rule targets must not be present in the parameter table. Other parameters must only be present in either in parameter table or condition table columns. Check that. Arguments: parameter_df: PEtab parameter DataFrame model: PEtab model condition_df: PEtab condition table mapping_df: PEtab mapping table observable_df: PEtab observable table measurement_df: PEtab measurement table Raises: AssertionError: in case of problems """ allowed_in_condition_cols = set(model.get_valid_ids_for_condition_table()) if mapping_df is not None: allowed_in_condition_cols |= { from_id for from_id, to_id in zip( mapping_df.index.values, mapping_df[MODEL_ENTITY_ID] ) # mapping table entities mapping to already allowed parameters if to_id in allowed_in_condition_cols # mapping table entities mapping to species or model.is_state_variable(to_id) } allowed_in_parameter_table = ( parameters.get_valid_parameters_for_parameter_table( model=model, condition_df=condition_df, observable_df=observable_df, measurement_df=measurement_df, mapping_df=mapping_df, ) ) entities_in_condition_table = set(condition_df.columns) - {CONDITION_NAME} entities_in_parameter_table = set(parameter_df.index.values) disallowed_in_condition = { x for x in (entities_in_condition_table - allowed_in_condition_cols) # we only check model entities here, not output parameters if model.has_entity_with_id(x) } if disallowed_in_condition: is_or_are = "is" if len(disallowed_in_condition) == 1 else "are" raise AssertionError( f"{disallowed_in_condition} {is_or_are} not " "allowed to occur in condition table " "columns." ) disallowed_in_parameters = { x for x in (entities_in_parameter_table - allowed_in_parameter_table) # we only check model entities here, not output parameters if model.has_entity_with_id(x) } if disallowed_in_parameters: is_or_are = "is" if len(disallowed_in_parameters) == 1 else "are" raise AssertionError( f"{disallowed_in_parameters} {is_or_are} not " "allowed to occur in the parameters table." ) in_both = entities_in_condition_table & entities_in_parameter_table if in_both: is_or_are = "is" if len(in_both) == 1 else "are" raise AssertionError( f"{in_both} {is_or_are} present in both " "the condition table and the parameter table." )
[docs] def assert_measurement_conditions_present_in_condition_table( measurement_df: pd.DataFrame, condition_df: pd.DataFrame ) -> None: """Ensure that all entries from measurement_df.simulationConditionId and measurement_df.preequilibrationConditionId are present in condition_df.index. Arguments: measurement_df: PEtab measurement table condition_df: PEtab condition table Raises: AssertionError: in case of problems """ used_conditions = set(measurement_df[SIMULATION_CONDITION_ID].values) if PREEQUILIBRATION_CONDITION_ID in measurement_df: used_conditions |= set( measurement_df[PREEQUILIBRATION_CONDITION_ID].dropna().values ) available_conditions = set(condition_df.index.values) if missing_conditions := (used_conditions - available_conditions): raise AssertionError( "Measurement table references conditions that " "are not specified in the condition table: " + str(missing_conditions) )
[docs] def assert_measurements_not_null( measurement_df: pd.DataFrame, ) -> None: """Check whether all measurements are not null. Arguments: measurement_df: PEtab measurement table. Raises: AssertionError: Some measurement value(s) are null (missing). """ if measurement_df[MEASUREMENT].isnull().any(): raise AssertionError("Some measurement(s) are null (missing).")
[docs] def assert_measurements_numeric( measurement_df: pd.DataFrame, ) -> None: """Check whether all measurements are numeric. Note that null (missing) measurements are ignored. Arguments: measurement_df: PEtab measurement table. Raises: AssertionError: Some measurement value(s) are not numeric. """ not_null_measurement_values = measurement_df[MEASUREMENT].dropna() all_measurements_are_numeric = ( pd.to_numeric(not_null_measurement_values, errors="coerce") .notnull() .all() ) if not all_measurements_are_numeric: raise AssertionError( "Some values in the `petab.C.MEASUREMENT` column of the PEtab " "measurements table are not numeric." )
[docs] def is_valid_identifier(x: str) -> bool: """Check whether `x` is a valid identifier Check whether `x` is a valid identifier for conditions, parameters, observables... . Identifiers may contain upper and lower case letters, digits and underscores, but must not start with a digit. Arguments: x: string to check Returns: ``True`` if valid, ``False`` otherwise """ if pd.isna(x): return False return re.match(r"^[a-zA-Z_]\w*$", x) is not None
[docs] def check_ids(ids: Iterable[str], kind: str = "") -> None: """Check IDs are valid Arguments: ids: Iterable of IDs to check kind: Kind of IDs, for more informative error message Raises: ValueError: in case of invalid IDs """ invalids = [ (index, _id) for index, _id in enumerate(ids) if not is_valid_identifier(_id) ] if invalids: # The first row is the header row, and Python lists are zero-indexed, # hence need to add 2 for the correct line number. offset = 2 error_output = "\n".join( [ f"Line {index+offset}: " + ("Missing ID" if pd.isna(_id) else _id) for index, _id in invalids ] ) raise ValueError(f"Invalid {kind} ID(s):\n{error_output}")