Source code for petab.v1.parameters

"""Functions operating on the PEtab parameter table"""

import numbers
import warnings
from collections import OrderedDict
from collections.abc import Iterable, Sequence, Set
from pathlib import Path
from typing import (
    Literal,
)

import libsbml
import numpy as np
import pandas as pd

from . import conditions, core, lint, measurements, observables
from .C import *  # noqa: F403
from .models import Model

__all__ = [
    "create_parameter_df",
    "get_optimization_parameter_scaling",
    "get_optimization_parameters",
    "get_parameter_df",
    "get_priors_from_df",
    "get_valid_parameters_for_parameter_table",
    "map_scale",
    "map_unscale",
    "normalize_parameter_df",
    "scale",
    "unscale",
    "write_parameter_df",
]

PARAMETER_SCALE_ARGS = Literal["", "lin", "log", "log10"]


[docs] def get_parameter_df( parameter_file: str | Path | pd.DataFrame | Iterable[str | Path | pd.DataFrame] | None, ) -> pd.DataFrame | None: """ Read the provided parameter file into a ``pandas.Dataframe``. Arguments: parameter_file: Name of the file to read from or pandas.Dataframe, or an Iterable. Returns: Parameter ``DataFrame``, or ``None`` if ``None`` was passed. """ if parameter_file is None: return None if isinstance(parameter_file, pd.DataFrame): parameter_df = parameter_file elif isinstance(parameter_file, str | Path): parameter_df = pd.read_csv( parameter_file, sep="\t", float_precision="round_trip" ) elif isinstance(parameter_file, Iterable): dfs = [get_parameter_df(x) for x in parameter_file if x] if not dfs: return None parameter_df = pd.concat(dfs) # Check for contradicting parameter definitions _check_for_contradicting_parameter_definitions(parameter_df) return parameter_df lint.assert_no_leading_trailing_whitespace( parameter_df.columns.values, "parameter" ) if not isinstance(parameter_df.index, pd.RangeIndex): parameter_df.reset_index( drop=parameter_file.index.name != PARAMETER_ID, inplace=True, ) try: parameter_df.set_index([PARAMETER_ID], inplace=True) except KeyError as e: raise KeyError( f"Parameter table missing mandatory field {PARAMETER_ID}." ) from e _check_for_contradicting_parameter_definitions(parameter_df) return parameter_df
def _check_for_contradicting_parameter_definitions(parameter_df: pd.DataFrame): """ Raises a ValueError for non-unique parameter IDs """ parameter_duplicates = set( parameter_df.index.values[parameter_df.index.duplicated()] ) if parameter_duplicates: raise ValueError( f"The values of `{PARAMETER_ID}` must be unique. The " f"following duplicates were found:\n{parameter_duplicates}" )
[docs] def write_parameter_df(df: pd.DataFrame, filename: str | Path) -> None: """Write PEtab parameter table Arguments: df: PEtab parameter table filename: Destination file name. The parent directory will be created if necessary. """ df = get_parameter_df(df) Path(filename).parent.mkdir(parents=True, exist_ok=True) df.to_csv(filename, sep="\t", index=True)
[docs] def get_optimization_parameters(parameter_df: pd.DataFrame) -> list[str]: """ Get list of optimization parameter IDs from parameter table. Arguments: parameter_df: PEtab parameter DataFrame Returns: List of IDs of parameters selected for optimization. """ return list(parameter_df.index[parameter_df[ESTIMATE] == 1])
[docs] def get_optimization_parameter_scaling( parameter_df: pd.DataFrame, ) -> dict[str, str]: """ Get Dictionary with optimization parameter IDs mapped to parameter scaling strings. Arguments: parameter_df: PEtab parameter DataFrame Returns: Dictionary with optimization parameter IDs mapped to parameter scaling strings. """ estimated_df = parameter_df.loc[parameter_df[ESTIMATE] == 1] return dict( zip(estimated_df.index, estimated_df[PARAMETER_SCALE], strict=True) )
[docs] def create_parameter_df( sbml_model: libsbml.Model | None = None, condition_df: pd.DataFrame | None = None, observable_df: pd.DataFrame | None = None, measurement_df: pd.DataFrame | None = None, model: Model | None = None, include_optional: bool = False, parameter_scale: str = LOG10, lower_bound: Iterable = None, upper_bound: Iterable = None, mapping_df: pd.DataFrame | None = None, ) -> pd.DataFrame: """Create a new PEtab parameter table All table entries can be provided as string or list-like with length matching the number of parameters Arguments: sbml_model: SBML Model (deprecated, mutually exclusive with ``model``) model: PEtab model (mutually exclusive with ``sbml_model``) condition_df: PEtab condition DataFrame observable_df: PEtab observable DataFrame measurement_df: PEtab measurement DataFrame include_optional: By default this only returns parameters that are required to be present in the parameter table. If set to ``True``, this returns all parameters that are allowed to be present in the parameter table (i.e. also including parameters specified in the model). parameter_scale: parameter scaling lower_bound: lower bound for parameter value upper_bound: upper bound for parameter value mapping_df: PEtab mapping DataFrame Returns: The created parameter DataFrame """ if sbml_model: warnings.warn( "Passing a model via the `sbml_model` argument is " "deprecated, use `model=petab.models.sbml_model." "SbmlModel(...)` instead.", DeprecationWarning, stacklevel=2, ) from .models.sbml_model import SbmlModel if model: raise ValueError( "Arguments `model` and `sbml_model` are mutually exclusive." ) model = SbmlModel(sbml_model=sbml_model) if include_optional: parameter_ids = list( get_valid_parameters_for_parameter_table( model=model, condition_df=condition_df, observable_df=observable_df, measurement_df=measurement_df, ) ) else: parameter_ids = list( get_required_parameters_for_parameter_table( model=model, condition_df=condition_df, observable_df=observable_df, measurement_df=measurement_df, mapping_df=mapping_df, ) ) df = pd.DataFrame( data={ PARAMETER_ID: parameter_ids, PARAMETER_NAME: parameter_ids, PARAMETER_SCALE: parameter_scale, LOWER_BOUND: lower_bound, UPPER_BOUND: upper_bound, NOMINAL_VALUE: np.nan, ESTIMATE: 1, INITIALIZATION_PRIOR_TYPE: "", INITIALIZATION_PRIOR_PARAMETERS: "", OBJECTIVE_PRIOR_TYPE: "", OBJECTIVE_PRIOR_PARAMETERS: "", } ) df.set_index([PARAMETER_ID], inplace=True) # For model parameters, set nominal values as defined in the model for parameter_id in df.index: try: df.loc[parameter_id, NOMINAL_VALUE] = model.get_parameter_value( parameter_id ) except ValueError: # parameter was introduced as condition-specific override and # is potentially not present in the model pass return df
def get_required_parameters_for_parameter_table( model: Model, condition_df: pd.DataFrame, observable_df: pd.DataFrame, measurement_df: pd.DataFrame, mapping_df: pd.DataFrame = None, ) -> Set[str]: """ Get set of parameters which need to go into the parameter table Arguments: model: PEtab model condition_df: PEtab condition table observable_df: PEtab observable table measurement_df: PEtab measurement table mapping_df: PEtab mapping table Returns: Set of parameter IDs which PEtab requires to be present in the parameter table. That is all {observable,noise}Parameters from the measurement table as well as all parametric condition table overrides that are not defined in the model. """ # use ordered dict as proxy for ordered set parameter_ids = OrderedDict() # Add parameters from measurement table, unless they are fixed parameters def append_overrides(overrides): for p in overrides: if isinstance(p, str) and p not in condition_df.columns: parameter_ids[p] = None for _, row in measurement_df.iterrows(): # we trust that the number of overrides matches append_overrides( measurements.split_parameter_replacement_list( row.get(OBSERVABLE_PARAMETERS, None) ) ) append_overrides( measurements.split_parameter_replacement_list( row.get(NOISE_PARAMETERS, None) ) ) # Add output parameters except for placeholders for formula_type, placeholder_sources in ( ( # Observable formulae {"observables": True, "noise": False}, # can only contain observable placeholders {"noise": False, "observables": True}, ), ( # Noise formulae {"observables": False, "noise": True}, # can contain noise and observable placeholders {"noise": True, "observables": True}, ), ): output_parameters = observables.get_output_parameters( observable_df, model, mapping_df=mapping_df, **formula_type, ) placeholders = observables.get_placeholders( observable_df, **placeholder_sources, ) for p in output_parameters: if p not in placeholders: parameter_ids[p] = None # Add condition table parametric overrides unless already defined in the # model for p in conditions.get_parametric_overrides(condition_df): if not model.has_entity_with_id(p): parameter_ids[p] = None # parameters that are overridden via the condition table are not allowed for p in condition_df.columns: try: del parameter_ids[p] except KeyError: pass return parameter_ids.keys()
[docs] def get_valid_parameters_for_parameter_table( model: Model, condition_df: pd.DataFrame, observable_df: pd.DataFrame, measurement_df: pd.DataFrame, mapping_df: pd.DataFrame = None, ) -> set[str]: """ Get set of parameters which may be present inside the parameter table Arguments: model: PEtab model condition_df: PEtab condition table observable_df: PEtab observable table measurement_df: PEtab measurement table mapping_df: PEtab mapping table for additional checks Returns: Set of parameter IDs which PEtab allows to be present in the parameter table. """ # - grab all allowed model parameters # - grab corresponding names from mapping table # - grab all output parameters defined in {observable,noise}Formula # - grab all parameters from measurement table # - grab all parametric overrides from condition table # - remove parameters for which condition table columns exist # - remove placeholder parameters # (only partial overrides are not supported) # must not go into parameter table blackset = set() if observable_df is not None: placeholders = set(observables.get_placeholders(observable_df)) # collect assignment targets blackset |= placeholders if condition_df is not None: blackset |= set(condition_df.columns.values) - {CONDITION_NAME} # don't use sets here, to have deterministic ordering, # e.g. for creating parameter tables parameter_ids = OrderedDict.fromkeys( p for p in model.get_valid_parameters_for_parameter_table() if p not in blackset ) if mapping_df is not None: for from_id, to_id in zip( mapping_df.index.values, mapping_df[MODEL_ENTITY_ID], strict=True ): if to_id in parameter_ids.keys(): parameter_ids[from_id] = None if observable_df is not None: # add output parameters from observables table output_parameters = observables.get_output_parameters( observable_df=observable_df, model=model ) for p in output_parameters: if p not in blackset: parameter_ids[p] = None # Append parameters from measurement table, unless they occur as condition # table columns def append_overrides(overrides): for p in overrides: if isinstance(p, str) and p not in blackset: parameter_ids[p] = None if measurement_df is not None: for _, row in measurement_df.iterrows(): # we trust that the number of overrides matches append_overrides( measurements.split_parameter_replacement_list( row.get(OBSERVABLE_PARAMETERS, None) ) ) append_overrides( measurements.split_parameter_replacement_list( row.get(NOISE_PARAMETERS, None) ) ) # Append parameter overrides from condition table if condition_df is not None: for p in conditions.get_parametric_overrides(condition_df): parameter_ids[p] = None return parameter_ids.keys()
[docs] def get_priors_from_df( parameter_df: pd.DataFrame, mode: Literal["initialization", "objective"], parameter_ids: Sequence[str] = None, ) -> list[tuple]: """Create list with information about the parameter priors Arguments: parameter_df: PEtab parameter table mode: ``'initialization'`` or ``'objective'`` parameter_ids: A sequence of parameter IDs for which to sample starting points. For subsetting or reordering the parameters. Defaults to all estimated parameters. Returns: List with prior information. """ # get types and parameters of priors from dataframe par_to_estimate = parameter_df.loc[parameter_df[ESTIMATE] == 1] if parameter_ids is not None: try: par_to_estimate = par_to_estimate.loc[parameter_ids, :] except KeyError as e: missing_ids = set(parameter_ids) - set(par_to_estimate.index) raise KeyError( "Parameter table does not contain estimated parameter(s) " f"{missing_ids}." ) from e prior_list = [] for _, row in par_to_estimate.iterrows(): # retrieve info about type prior_type = str(row.get(f"{mode}PriorType", "")) if core.is_empty(prior_type): prior_type = PARAMETER_SCALE_UNIFORM # retrieve info about parameters of priors, make it a tuple of floats pars_str = str(row.get(f"{mode}PriorParameters", "")) if core.is_empty(pars_str): lb, ub = map_scale( [row[LOWER_BOUND], row[UPPER_BOUND]], [row[PARAMETER_SCALE]] * 2, ) pars_str = f"{lb}{PARAMETER_SEPARATOR}{ub}" prior_pars = tuple( float(entry) for entry in pars_str.split(PARAMETER_SEPARATOR) ) # add parameter scale and bounds, as this may be needed par_scale = row[PARAMETER_SCALE] par_bounds = (row[LOWER_BOUND], row[UPPER_BOUND]) # if no prior is specified, we assume a non-informative (uniform) one if prior_type == "nan": prior_type = PARAMETER_SCALE_UNIFORM prior_pars = ( scale(row[LOWER_BOUND], par_scale), scale(row[UPPER_BOUND], par_scale), ) prior_list.append((prior_type, prior_pars, par_scale, par_bounds)) return prior_list
[docs] def scale( parameter: numbers.Number, scale_str: PARAMETER_SCALE_ARGS, ) -> numbers.Number: """Scale parameter according to ``scale_str``. Arguments: parameter: Parameter to be scaled. scale_str: One of ``'lin'`` (synonymous with ``''``), ``'log'``, ``'log10'``. Returns: The scaled parameter. """ if scale_str == LIN or not scale_str: return parameter if scale_str == LOG: return np.log(parameter) if scale_str == LOG10: with np.errstate(divide="ignore"): return np.log10(parameter) raise ValueError(f"Invalid parameter scaling: {scale_str}")
[docs] def unscale( parameter: numbers.Number, scale_str: PARAMETER_SCALE_ARGS, ) -> numbers.Number: """Unscale parameter according to ``scale_str``. Arguments: parameter: Parameter to be unscaled. scale_str: One of ``'lin'`` (synonymous with ``''``), ``'log'``, ``'log10'``. Returns: The unscaled parameter. """ if scale_str == LIN or not scale_str: return parameter if scale_str == LOG: return np.exp(parameter) if scale_str == LOG10: return 10**parameter raise ValueError(f"Invalid parameter scaling: {scale_str}")
[docs] def map_scale( parameters: Sequence[numbers.Number], scale_strs: Iterable[PARAMETER_SCALE_ARGS] | PARAMETER_SCALE_ARGS, ) -> Iterable[numbers.Number]: """Scale the parameters, i.e. as :func:`scale`, but for Sequences. Arguments: parameters: Parameters to be scaled. scale_strs: Scales to apply. Broadcast if a single string. Returns: The scaled parameters. """ if isinstance(scale_strs, str): scale_strs = [scale_strs] * len(parameters) return ( scale(par_val, scale_str) for par_val, scale_str in zip(parameters, scale_strs, strict=True) )
[docs] def map_unscale( parameters: Sequence[numbers.Number], scale_strs: Iterable[PARAMETER_SCALE_ARGS] | PARAMETER_SCALE_ARGS, ) -> Iterable[numbers.Number]: """Unscale the parameters, i.e. as :func:`unscale`, but for Sequences. Arguments: parameters: Parameters to be unscaled. scale_strs: Scales that the parameters are currently on. Broadcast if a single string. Returns: The unscaled parameters. """ if isinstance(scale_strs, str): scale_strs = [scale_strs] * len(parameters) return ( unscale(par_val, scale_str) for par_val, scale_str in zip(parameters, scale_strs, strict=True) )
[docs] def normalize_parameter_df(parameter_df: pd.DataFrame) -> pd.DataFrame: """Add missing columns and fill in default values.""" df = parameter_df.copy(deep=True) if PARAMETER_NAME not in df: df[PARAMETER_NAME] = df.reset_index()[PARAMETER_ID] prior_type_cols = [INITIALIZATION_PRIOR_TYPE, OBJECTIVE_PRIOR_TYPE] prior_par_cols = [ INITIALIZATION_PRIOR_PARAMETERS, OBJECTIVE_PRIOR_PARAMETERS, ] # iterate over initialization and objective priors for prior_type_col, prior_par_col in zip( prior_type_cols, prior_par_cols, strict=True ): # fill in default values for prior type if prior_type_col not in df: df[prior_type_col] = PARAMETER_SCALE_UNIFORM else: for irow, row in df.iterrows(): if core.is_empty(row[prior_type_col]): df.loc[irow, prior_type_col] = PARAMETER_SCALE_UNIFORM if prior_par_col not in df: df[prior_par_col] = None for irow, row in df.iterrows(): if ( core.is_empty(row[prior_par_col]) and row[prior_type_col] == PARAMETER_SCALE_UNIFORM ): lb, ub = map_scale( [row[LOWER_BOUND], row[UPPER_BOUND]], [row[PARAMETER_SCALE]] * 2, ) df.loc[irow, prior_par_col] = f"{lb}{PARAMETER_SEPARATOR}{ub}" return df