Skip to content

Python

scenario_vetting_criteria

load_criteria

load_criteria(
    components=None,
    load_all=False,
    csv_engine="pandas",
    reference_subset=None,
)

Loads and returns the criteria definitions contained in the package.

Parameters:

Name Type Description Default
components str | list[str] | tuple[str] | None

A string or list/vector of strings. The return type changes depending on whether a list/vector or a single string is provided.

None
load_all bool

Alternatively to providing the names of individual components, the loading of all components can be instructed with the key-word argument load_all=True.

False
csv_engine str = 'pandas'

The method for loading CSV files if these are supposed to be loaded. Must be one of read.csv, readr, and data.table. Defaults to read.csv.

'pandas'
reference_subset str | list[str] | tuple[str] | None

When loading the component reference-data, by default all sources are loaded. Alternatively, a single string or a list or tuple of strings can be provided as argument reference_subset to load only a subset of sources.

None

Returns:

Type Description
pd.DataFrame | dict[str, str] | dict[str, pd.DataFrame | dict[str, str]]

Returns the loaded data. This data can be a dataframe or a nested list. If multiple data components are requested, then the components are returned inside a keyworded list.

Source code in python/scenario_vetting_criteria/loading.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
def load_criteria(components: str | list[str] | tuple[str] | None = None,
                  load_all: bool = False,
                  csv_engine: str = 'pandas',
                  reference_subset: str | list[str] | tuple[str] | None = None):
    """
    Loads and returns the criteria definitions contained in the package.

    Parameters
    ----------
    components : str | list[str] | tuple[str] | None
        A string or list/vector of strings. The return type changes depending 
        on whether a list/vector or a single string is provided.
    load_all : bool
        Alternatively to providing the names of individual components, the 
        loading of all components can be instructed with the key-word argument 
        `load_all=True`.
    csv_engine : str = 'pandas'
        The method for loading CSV files if these are supposed to be loaded. Must 
        be one of `read.csv`, `readr`, and `data.table`. Defaults to `read.csv`.
    reference_subset : str | list[str] | tuple[str] | None
        When loading the component `reference-data`, by default all sources are 
        loaded. Alternatively, a single string or a list or tuple of strings can 
        be provided as argument `reference_subset` to load only a subset of sources.

    Returns
    -------
        pd.DataFrame | dict[str, str] | dict[str, pd.DataFrame | dict[str, str]]
            Returns the loaded data. This data can be a dataframe or a nested list. 
            If multiple data components are requested, then the components are 
            returned inside a keyworded list.
    """
    if components is None and not load_all:
        raise Exception('At least one component must be provided as function argument.')
    if components is not None and load_all:
        raise Exception('Component name(s) and `load_all` cannot be provided as arguments at the same time.')
    if load_all:
        components = list(file_paths) + ['reference-data', 'reference-metadata']
    if reference_subset is not None:
        if isinstance(reference_subset, str):
            reference_subset = [reference_subset]
        elif isinstance(reference_subset, tuple):
            reference_subset = list(reference_subset)
    if isinstance(components, str):
        return _load_criteria_file(
            component=components,
            csv_engine=csv_engine,
            reference_subset=reference_subset,
        )
    else:
        return {
            component: _load_criteria_file(
                component=component,
                csv_engine=csv_engine,
                reference_subset=reference_subset,
            )
            for component in components
        }

scenario_vetting_criteria.formatting

format_sources

format_sources(
    bib_data,
    style="alpha",
    form="plaintext",
    exclude_fields=None,
)

Takes a citation style, a citation format, and (optionally) excluded fields, and returns a formatted list of sources based on the specified style and format. The sources are loaded from 'references-data.bib' file.

Parameters:

Name Type Description Default
style str

Specifies the formatting style for the bibliography entries.

'alpha'
form str

Specifies the format in which the citation should be rendered. It determines how the citation information will be displayed or structured in the final output. This can be 'plaintext' or 'html'.

'plaintext'
exclude_fields Optional[list]

Specifies a list of fields that should be excluded from the final output. These fields will be removed from the entries before formatting and returning the citation data.

None

Returns:

Type Description
list[dict]

A list of dictionaries containing the identifier, citation, and URL information for each entry in the bibliography data, formatted according to the specified style and form, with any excluded fields removed.

Source code in python/scenario_vetting_criteria/formatting.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
def format_sources(
        bib_data: BibliographyData,
        style: str = 'alpha',
        form: str = 'plaintext',
        exclude_fields: Optional[list] = None):
    """
    Takes a citation style, a citation format, and (optionally) excluded
    fields, and returns a formatted list of sources based on the specified
    style and format. The sources are loaded from 'references-data.bib' file.

    Parameters
    ----------
    style
        Specifies the formatting style for the bibliography entries.
    form
        Specifies the format in which the citation should be rendered.
        It determines how the citation information will be displayed or
        structured in the final output. This can be 'plaintext' or 'html'.
    exclude_fields
        Specifies a list of fields that should be excluded from the
        final output. These fields will be removed from the entries
        before formatting and returning the citation data.

    Returns
    -------
        list[dict]
            A list of dictionaries containing the identifier, citation,
            and URL information for each entry in the bibliography
            data, formatted according to the specified style and form,
            with any excluded fields removed.
    """
    # set exclude_fields to an empty list if provided as None
    exclude_fields = exclude_fields or []

    # load pybtext styles and formats based on arguments
    pyb_style = find_plugin('pybtex.style.formatting', style)()
    pyb_format = find_plugin('pybtex.backends', form)()

    # exclude undesired fields
    if exclude_fields:
        for entry in bib_data.entries.values():
            for ef in exclude_fields:
                if ef in entry.fields.__dict__['_dict']:
                    del entry.fields.__dict__['_dict'][ef]

    # loop over entries and format accordingly
    ret = {}
    for identifier in bib_data.entries:
        entry = bib_data.entries[identifier]
        fields = entry.fields.__dict__['_dict']

        cite_auth = ' '.join(entry.persons.get("author", [])[0].last_names).replace('{', '').replace('}', '')
        cite_year = entry.fields.get("year", "n.d.")

        doi = entry.fields.get("doi", None)
        url = entry.fields.get("url", None)

        ret[identifier] = {
            'cite_auth': cite_auth,
            'cite_year': cite_year,
            'cite': f"{cite_auth} ({cite_year})",
            'citep': f"({cite_auth}, {cite_year})",
            'bib': next(pyb_style.format_entries([entry])).text.render(pyb_format),
            'doi': doi,
            'link': f"https://doi.org/{doi}" if doi else url,
        }

    # return dict(sorted(ret.items(), key=lambda item: (item[1]['cite_auth'], item[1]['cite_year'])))
    return ret

insert_citations

insert_citations(text, citations, link=None)

Inserts citations into a text passed as a string.

Parameters:

Name Type Description Default
text str

Text that contains replacement patterns for citations.

required
citations dict[str]

Formatted citations for each identifier.

required

Returns:

Type Description
str

The updated text, which has the patterns replaced with citations.

Source code in python/scenario_vetting_criteria/formatting.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
def insert_citations(text: str, citations: dict[str], link: None | str = None):
    """
    Inserts citations into a text passed as a string.

    Parameters
    ----------
    text
        Text that contains replacement patterns for citations.
    citations
        Formatted citations for each identifier.

    Returns
    -------
        str
            The updated text, which has the patterns replaced with citations.
    """
    return re.sub(
        r'{{(cite|citep):([^}]+)}}',
        lambda m: (
            (f"<a href=\"{link}#{m.group(2)}\">" if link else '') + 
            citations.get(m.group(2), {}).get(m.group(1), m.group(0)) + 
            ('</a>' if link else '')
        ),
        text,
    )