Python

scenario_vetting_criteria

Definitions of scenario validation criteria.

Use the load_criteria function to load definitions from raw definition files.

load_criteria

load_criteria(
    components=None,
    load_all=False,
    csv_engine="pandas",
    criteria_types=None,
    reference_subset=None,
    edition=None,
)

Load and return the criteria definitions contained in the package.

Parameters:

Name	Type	Description	Default
`components`	`str \| list[str] \| tuple[str]`	A string or list/vector of strings. The return type changes depending on whether a list/vector or a single string is provided.	`None`
`load_all`	`bool`	Alternatively to providing the names of individual components, the loading of all components can be instructed with the key-word argument `load_all=True`.	`False`
`csv_engine`	`str = 'pandas'`	The method for loading CSV files if these are supposed to be loaded. Must be one of `pandas` or `python`. Defaults to `pandas`. The output changes accordingly.	`'pandas'`
`criteria_types`	`str \| list[str] \| tuple[str]`	When loading the components `thresholds` and `descriptions`, by default all criteria types are loaded. Alternatively, a single string or a list or tuple of strings can be provided as argument `criteria_types` to load only a subset of criteria of corresponding type(s).	`None`
`reference_subset`	`str \| list[str] \| tuple[str]`	When loading the component `reference-data`, by default all sources are loaded. Alternatively, a single string or a list or tuple of strings can be provided as argument `reference_subset` to load only a subset of sources.	`None`
`edition`	`str`	Define the edition of the criteria definition to load. If not provided, the latest edition will be used.	`None`

Returns:

Type	Description
`DataFrame \| dict[str, str] \| dict[str, DataFrame \| dict[str, str]]`	Returns the loaded data. This data can be a dataframe or a nested list. If multiple data components are requested, then the components are returned inside a keyworded list.

Source code in python/scenario_vetting_criteria/__init__.py

def load_criteria(
    components: str | list[str] | tuple[str] | None = None,
    load_all: bool = False,
    csv_engine: Literal["pandas", "python"] = "pandas",
    criteria_types: str | list[str] | None = None,
    reference_subset: str | list[str] | tuple[str] | None = None,
    edition: str | None = None,
):
    """Load and return the criteria definitions contained in the package.

    Parameters
    ----------
    components : str | list[str] | tuple[str], optional
        A string or list/vector of strings. The return type changes depending
        on whether a list/vector or a single string is provided.
    load_all : bool, optional
        Alternatively to providing the names of individual components, the
        loading of all components can be instructed with the key-word argument
        `load_all=True`.
    csv_engine : str = 'pandas', optional
        The method for loading CSV files if these are supposed to be loaded.
        Must be one of `pandas` or `python`. Defaults to `pandas`. The output
        changes accordingly.
    criteria_types : str | list[str] | tuple[str], optional
        When loading the components `thresholds` and `descriptions`, by default
        all criteria types are loaded. Alternatively, a single string or a
        list or tuple of strings can be provided as argument `criteria_types`
        to load only a subset of criteria of corresponding type(s).
    reference_subset : str | list[str] | tuple[str], optional
        When loading the component `reference-data`, by default all sources
        are loaded. Alternatively, a single string or a list or tuple of
        strings can be provided as argument `reference_subset` to load only
        a subset of sources.
    edition : str, optional
        Define the edition of the criteria definition to load. If not
        provided, the latest edition will be used.

    Returns
    -------
    pd.DataFrame | dict[str, str] | dict[str, pd.DataFrame | dict[str, str]]
        Returns the loaded data. This data can be a dataframe or a nested
        list. If multiple data components are requested, then the components
        are returned inside a keyworded list.

    """
    if components is None and not load_all:
        raise Exception(
            "At least one component must be provided as function argument."
        )
    if components is not None and load_all:
        raise Exception(
            "Component name(s) and `load_all` cannot be provided as arguments "
            "at the same time."
        )
    if load_all:
        components = COMPONENTS
    if edition is None:
        edition = sorted(list(editions))[-1]
    elif edition not in editions:
        raise Exception(
            f"Edition '{edition}' not known. Choose from: "
            f"{', '.join(editions)}"
        )
    edition_path = editions[edition]
    if criteria_types is not None:
        if isinstance(criteria_types, str):
            criteria_types = [criteria_types]
        elif not isinstance(criteria_types, tuple):
            criteria_types = list(criteria_types)
    if reference_subset is not None:
        if isinstance(reference_subset, str):
            reference_subset = [reference_subset]
        elif isinstance(reference_subset, tuple):
            reference_subset = list(reference_subset)
    if isinstance(components, str):
        return _load_criteria_file(
            component=components,
            csv_engine=csv_engine,
            criteria_types=criteria_types,
            reference_subset=reference_subset,
            edition_path=edition_path,
        )
    elif (
        isinstance(components, list) and
        all(isinstance(c, str) for c in components)
    ):
        return {
            component: _load_criteria_file(
                component=component,
                csv_engine=csv_engine,
                criteria_types=criteria_types,
                reference_subset=reference_subset,
                edition_path=edition_path,
            )
            for component in components
        }
    else:
        raise Exception(
            "Argument `components` must be string or list of strings."
        )

scenario_vetting_criteria.formatting

Format bibliographic information on sources.

format_sources

format_sources(
    bib_data,
    style="alpha",
    target="plaintext",
    exclude_fields=None,
)

Convert sources to specific format.

Takes a citation style, a citation format, and (optionally) excluded fields, and returns a formatted list of sources based on the specified style and format. The sources are loaded from 'sources.bib' file.

When two or more entries share the same first author and year, a lower-case letter suffix is appended to the year to disambiguate them (e.g. "IAEA, 2024a" and "IAEA, 2024b"). Entries within each collision group are sorted alphabetically by their BibTeX key to ensure a deterministic assignment of letters.

Parameters:

Name	Type	Description	Default
`bib_data`	`BibliographyData`	Bibliography data loaded from BibTeX file.	required
`style`	`str`	Specifies the formatting style for the bibliography entries.	`'alpha'`
`target`	`str`	Specifies the format in which the citation should be rendered. It determines how the citation information will be displayed or structured in the final output. This can be 'plaintext' or 'html'.	`'plaintext'`
`exclude_fields`	`Optional[list]`	Specifies a list of fields that should be excluded from the final output. These fields will be removed from the entries before formatting and returning the citation data.	`None`

Returns:

Type	Description
`list[dict]`	A list of dictionaries containing the identifier, citation, and URL information for each entry in the bibliography data, formatted according to the specified style and form, with any excluded fields removed.

Source code in python/scenario_vetting_criteria/formatting.py

def format_sources(
    bib_data: BibliographyData,
    style: str = "alpha",
    target: str = "plaintext",
    exclude_fields: Optional[list] = None,
) -> dict[str, str]:
    """Convert sources to specific format.

    Takes a citation style, a citation format, and (optionally) excluded
    fields, and returns a formatted list of sources based on the specified
    style and format. The sources are loaded from 'sources.bib' file.

    When two or more entries share the same first author and year, a
    lower-case letter suffix is appended to the year to disambiguate them
    (e.g. "IAEA, 2024a" and "IAEA, 2024b"). Entries within each collision
    group are sorted alphabetically by their BibTeX key to ensure a
    deterministic assignment of letters.

    Parameters
    ----------
    bib_data
        Bibliography data loaded from BibTeX file.
    style
        Specifies the formatting style for the bibliography entries.
    target
        Specifies the format in which the citation should be rendered.
        It determines how the citation information will be displayed or
        structured in the final output. This can be 'plaintext' or 'html'.
    exclude_fields
        Specifies a list of fields that should be excluded from the
        final output. These fields will be removed from the entries
        before formatting and returning the citation data.

    Returns
    -------
        list[dict]
            A list of dictionaries containing the identifier, citation,
            and URL information for each entry in the bibliography
            data, formatted according to the specified style and form,
            with any excluded fields removed.

    """
    from collections import defaultdict

    exclude_fields = exclude_fields or []

    # --- Pass 1: collect (cite_auth, cite_year) for every entry ----------
    auth_year: dict[str, tuple[str, str]] = {}
    for identifier, entry in bib_data.entries.items():
        first_author = entry.persons.get("author", [])[0].last_names
        cite_auth = re.sub("[{}]", "", " ".join(first_author))
        cite_year = entry.fields.get("year", "n.d.")
        auth_year[identifier] = (cite_auth, str(cite_year))

    # --- Assign disambiguation suffixes for (auth, year) collisions ------
    groups: dict[tuple[str, str], list[str]] = defaultdict(list)
    for identifier, key in auth_year.items():
        groups[key].append(identifier)

    suffixes: dict[str, str] = {}
    for ids in groups.values():
        if len(ids) > 1:
            for i, identifier in enumerate(sorted(ids)):
                suffixes[identifier] = chr(ord("a") + i)

    # --- Patch year fields in-place before formatting --------------------
    for identifier, suffix in suffixes.items():
        entry = bib_data.entries[identifier]
        entry.fields["year"] = auth_year[identifier][1] + suffix

    # --- Exclude undesired fields ----------------------------------------
    if exclude_fields:
        for entry in bib_data.entries.values():
            for ef in exclude_fields:
                if ef in entry.fields.__dict__["_dict"]:
                    del entry.fields.__dict__["_dict"][ef]

    # --- Pass 2: format each entry ---------------------------------------
    pyb_style = find_plugin("pybtex.style.formatting", style)()
    pyb_format = find_plugin("pybtex.backends", target)()

    ret = {}
    for identifier in bib_data.entries:
        try:
            entry = bib_data.entries[identifier]
            cite_auth, base_year = auth_year[identifier]
            suffix = suffixes.get(identifier, "")
            cite_year = base_year + suffix

            doi = entry.fields.get("doi", None)
            url = entry.fields.get("url", None)
            pdf = entry.fields.get("pdf", None)
            url_doi = f"https://doi.org/{doi}" if doi else None

            if doi:
                del entry.fields["doi"]
            if url:
                del entry.fields["url"]
            if pdf:
                del entry.fields["pdf"]

            bib = next(pyb_style.format_entries([entry])).text.render(
                pyb_format
            )

            ret[identifier] = {
                "cite_auth": cite_auth,
                "cite_year": cite_year,
                "cite": f"{cite_auth} ({cite_year})",
                "citep": f"({cite_auth}, {cite_year})",
                "bib": bib,
                "doi": doi,
                "url_doi": url_doi,
                "url": url or url_doi,
                "pdf": pdf,
            }
        except Exception as ex:
            raise Exception(
                f"Error occurred while parsing '{identifier}':\n{ex}"
            )

    return ret

insert_citations

insert_citations(text, citations, link=None)

Insert citations into placeholders in a text.

Parameters:

Name	Type	Description	Default
`text`	`str`	Text that contains replacement patterns for citations.	required
`citations`	`dict[str, dict[str, str]]`	Formatted citations for each identifier.	required
`link`	`str \| None`	Top-level page address for all citations.	`None`

Returns:

Type	Description
`str`	The updated text, which has the patterns replaced with citations.

Source code in python/scenario_vetting_criteria/formatting.py

def insert_citations(
    text: str,
    citations: dict[str, dict[str, str]],
    link: str | None = None,
) -> str:
    """Insert citations into placeholders in a text.

    Parameters
    ----------
    text
        Text that contains replacement patterns for citations.
    citations
        Formatted citations for each identifier.
    link
        Top-level page address for all citations.

    Returns
    -------
        str
            The updated text, which has the patterns replaced with citations.

    """
    return re.sub(
        r"{{(cite|citep):([^}]+)}}",
        lambda m: (
            (f'<a href="{link}#{m.group(2)}">' if link is not None else "")
            + citations.get(m.group(2), {}).get(m.group(1), m.group(0))
            + ("</a>" if link is not None else "")
        ),
        text,
    )