Skip to content

tedf

TEBase

Base Class for Technoeconomic Data

Parameters:

Name Type Description Default
parent_variable str

Variable from which Data should be collected

required
Source code in python/posted/tedf.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
class TEBase:
    """
    Base Class for Technoeconomic Data

    Parameters
    ----------
    parent_variable: str
        Variable from which Data should be collected
    """
    # initialise
    def __init__(self, parent_variable: str):
        """ Set parent variable and technology specifications (var_specs) from input"""
        self._parent_variable: str = parent_variable
        self._var_specs: dict = {key: val for key, val in variables.items() if key.startswith(self._parent_variable)}

    @property
    def parent_variable(self) -> str:
        """ Get parent variable"""
        return self._parent_variable

parent_variable: str property

Get parent variable

__init__(parent_variable)

Set parent variable and technology specifications (var_specs) from input

Source code in python/posted/tedf.py
72
73
74
75
def __init__(self, parent_variable: str):
    """ Set parent variable and technology specifications (var_specs) from input"""
    self._parent_variable: str = parent_variable
    self._var_specs: dict = {key: val for key, val in variables.items() if key.startswith(self._parent_variable)}

TEDF

Bases: TEBase

Class to store Technoeconomic DataFiles

Parameters:

Name Type Description Default
parent_variable str

Variable from which Data should be collected

required
database_id str

Database from which to load data

'public'
file_path Optional[Path]

File Path from which to load file

None
data Optional[DataFrame]

Specific Technoeconomic data

None

Methods:

Name Description
load

Load TEDataFile if it has not been read yet

read

Read TEDF from CSV file

write

Write TEDF to CSV file

check

Check if TEDF is consistent

check_row

Check that row in TEDF is consistent and return all inconsistencies found for row

Source code in python/posted/tedf.py
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
class TEDF(TEBase):
    """
    Class to store Technoeconomic DataFiles

    Parameters
    ----------
    parent_variable: str
        Variable from which Data should be collected
    database_id: str, default: public
        Database from which to load data
    file_path: Path, optional
        File Path from which to load file
    data: pd.DataFrame, optional
        Specific Technoeconomic data

    Methods
    ----------
    load
        Load TEDataFile if it has not been read yet
    read
        Read TEDF from CSV file
    write
        Write TEDF to CSV file
    check
        Check if TEDF is consistent
    check_row
        Check that row in TEDF is consistent and return all inconsistencies found for row
    """

    # typed delcarations
    _df: None | pd.DataFrame
    _inconsistencies: dict
    _file_path: None | Path
    _fields: dict[str, AbstractFieldDefinition]
    _columns: dict[str, AbstractColumnDefinition]


    def __init__(self,
                 parent_variable: str,
                 database_id: str = 'public',
                 file_path: Optional[Path] = None,
                 data: Optional[pd.DataFrame] = None,
                 ):
        """ Initialise parent class and object fields"""
        TEBase.__init__(self, parent_variable)

        self._df = data
        self._inconsistencies = {}
        self._file_path = (
            None if data is not None else
            file_path if file_path is not None else
            databases[database_id] / 'tedfs' / ('/'.join(self._parent_variable.split('|')) + '.csv')
        )
        self._fields, comments = read_fields(self._parent_variable)
        self._columns = self._fields | base_columns | comments

    @property
    def file_path(self) -> Path:
        """ Get or set the file File Path"""
        return self._file_path

    @file_path.setter
    def file_path(self, file_path: Path):
        self._file_path = file_path


    def load(self):
        """
        load TEDataFile (only if it has not been read yet)

        Warns
        ----------
        warning
            Warns if TEDF is already loaded
        Returns
        --------
            TEDF
                Returns the TEDF object it is called on
        """
        if self._df is None:
            self.read()
        else:
            warnings.warn('TEDF is already loaded. Please execute .read() if you want to load from file again.')

        return self

    def read(self):
        """
        read TEDF from CSV file

        Raises
        ------
        Exception
            If there is no file path from which to read
        """

        if self._file_path is None:
            raise Exception('Cannot read from file, as this TEDF object has been created from a dataframe.')

        # read CSV file
        self._df = pd.read_csv(
            self._file_path,
            sep=',',
            quotechar='"',
            encoding='utf-8',
        )

        # check column IDs match base columns and fields
        if not all(c in self._columns for c in self._df.columns):
            raise Exception(f"Column IDs used in CSV file do not match columns definition: {self._df.columns.tolist()}")

        # adjust row index to start at 1 instead of 0
        self._df.index += 1

        # insert missing columns and reorder via reindexing, then update dtypes
        df_new = self._df.reindex(columns=list(self._columns.keys()))
        for col_id, col in self._columns.items():
            if col_id in self._df:
                continue
            df_new[col_id] = df_new[col_id].astype(col.dtype)
            df_new[col_id] = col.default
        self._df = df_new

    def write(self):
        """
        Write TEDF to CSV file

        Raises
        ------
        Exception
            If there is no file path that specifies where to write
        """
        if self._file_path is None:
            raise Exception('Cannot write to file, as this TEDataFile object has been created from a dataframe. Please '
                            'first set a file path on this object.')

        self._df.to_csv(
            self._file_path,
            index=False,
            sep=',',
            quotechar='"',
            encoding='utf-8',
            na_rep='',
        )


    @property
    def data(self) -> pd.DataFrame:
        """Get data, i.e. access dataframe"""
        return self._df

    @property
    def inconsistencies(self) -> dict[int, TEDFInconsistencyException]:
        """Get inconsistencies"""
        return self._inconsistencies

    def check(self, raise_exception: bool = True):
        """
        Check that TEDF is consistent and add inconsistencies to internal parameter

        Parameters
        ----------
        raise_exception: bool, default: True
            If exception is to be raised
        """
        self._inconsistencies = {}

        # check row consistency for each row individually
        for row_id in self._df.index:
            self._inconsistencies[row_id] = self.check_row(row_id, raise_exception=raise_exception)

    def check_row(self, row_id: int, raise_exception: bool) -> list[TEDFInconsistencyException]:
        """
        Check that row in TEDF is consistent and return all inconsistencies found for row

        Parameters
        ----------
        row_id: int
            Index of the row to check
        raise_exception: bool
            If exception is to be raised

        Returns
        -------
            list
                List of inconsistencies
        """
        row = self._df.loc[row_id]
        ikwargs = {'row_id': row_id, 'file_path': self._file_path, 'raise_exception': raise_exception}
        ret = []

        # check whether fields are among those defined in the technology specs
        for col_id, col in self._columns.items():
            cell = row[col_id]
            if col.col_type == 'variable':
                cell = cell if pd.isnull(cell) else self.parent_variable + '|' + cell
            if not col.is_allowed(cell):
                ret.append(new_inconsistency(
                    message=f"Invalid cell for column of type '{col.col_type}': {cell}", col_id=col_id, **ikwargs,
                ))

        # check that reported and reference units match variable definition
        for col_prefix in ['', 'reference_']:
            raw_variable = row[col_prefix + 'variable']
            col_id = col_prefix + 'unit'
            unit = row[col_id]
            if pd.isnull(raw_variable) and pd.isnull(unit):
                continue
            if pd.isnull(raw_variable) or pd.isnull(unit):
                ret.append(new_inconsistency(
                    message=f"Variable and unit must either both be set or both be unset': {raw_variable} -- {unit}",
                    col_id=col_id, **ikwargs,
                ))
            variable = self.parent_variable + '|' + raw_variable
            var_specs = variables[variable]
            if 'dimension' not in var_specs:
                if unit is not np.nan:
                    ret.append(new_inconsistency(
                        message=f"Unexpected unit '{unit}' for {col_id}.", col_id=col_id, **ikwargs,
                    ))
                continue
            dimension = var_specs['dimension']

            flow_id = var_specs['flow_id'] if 'flow_id' in var_specs else None
            allowed, message = unit_allowed(unit=unit, flow_id=flow_id, dimension=dimension)
            if not allowed:
                ret.append(new_inconsistency(message=message, col_id=col_id, **ikwargs))

        return ret

data: pd.DataFrame property

Get data, i.e. access dataframe

file_path: Path property writable

Get or set the file File Path

inconsistencies: dict[int, TEDFInconsistencyException] property

Get inconsistencies

__init__(parent_variable, database_id='public', file_path=None, data=None)

Initialise parent class and object fields

Source code in python/posted/tedf.py
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
def __init__(self,
             parent_variable: str,
             database_id: str = 'public',
             file_path: Optional[Path] = None,
             data: Optional[pd.DataFrame] = None,
             ):
    """ Initialise parent class and object fields"""
    TEBase.__init__(self, parent_variable)

    self._df = data
    self._inconsistencies = {}
    self._file_path = (
        None if data is not None else
        file_path if file_path is not None else
        databases[database_id] / 'tedfs' / ('/'.join(self._parent_variable.split('|')) + '.csv')
    )
    self._fields, comments = read_fields(self._parent_variable)
    self._columns = self._fields | base_columns | comments

check(raise_exception=True)

Check that TEDF is consistent and add inconsistencies to internal parameter

Parameters:

Name Type Description Default
raise_exception bool

If exception is to be raised

True
Source code in python/posted/tedf.py
239
240
241
242
243
244
245
246
247
248
249
250
251
252
def check(self, raise_exception: bool = True):
    """
    Check that TEDF is consistent and add inconsistencies to internal parameter

    Parameters
    ----------
    raise_exception: bool, default: True
        If exception is to be raised
    """
    self._inconsistencies = {}

    # check row consistency for each row individually
    for row_id in self._df.index:
        self._inconsistencies[row_id] = self.check_row(row_id, raise_exception=raise_exception)

check_row(row_id, raise_exception)

Check that row in TEDF is consistent and return all inconsistencies found for row

Parameters:

Name Type Description Default
row_id int

Index of the row to check

required
raise_exception bool

If exception is to be raised

required

Returns:

Type Description
list

List of inconsistencies

Source code in python/posted/tedf.py
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
def check_row(self, row_id: int, raise_exception: bool) -> list[TEDFInconsistencyException]:
    """
    Check that row in TEDF is consistent and return all inconsistencies found for row

    Parameters
    ----------
    row_id: int
        Index of the row to check
    raise_exception: bool
        If exception is to be raised

    Returns
    -------
        list
            List of inconsistencies
    """
    row = self._df.loc[row_id]
    ikwargs = {'row_id': row_id, 'file_path': self._file_path, 'raise_exception': raise_exception}
    ret = []

    # check whether fields are among those defined in the technology specs
    for col_id, col in self._columns.items():
        cell = row[col_id]
        if col.col_type == 'variable':
            cell = cell if pd.isnull(cell) else self.parent_variable + '|' + cell
        if not col.is_allowed(cell):
            ret.append(new_inconsistency(
                message=f"Invalid cell for column of type '{col.col_type}': {cell}", col_id=col_id, **ikwargs,
            ))

    # check that reported and reference units match variable definition
    for col_prefix in ['', 'reference_']:
        raw_variable = row[col_prefix + 'variable']
        col_id = col_prefix + 'unit'
        unit = row[col_id]
        if pd.isnull(raw_variable) and pd.isnull(unit):
            continue
        if pd.isnull(raw_variable) or pd.isnull(unit):
            ret.append(new_inconsistency(
                message=f"Variable and unit must either both be set or both be unset': {raw_variable} -- {unit}",
                col_id=col_id, **ikwargs,
            ))
        variable = self.parent_variable + '|' + raw_variable
        var_specs = variables[variable]
        if 'dimension' not in var_specs:
            if unit is not np.nan:
                ret.append(new_inconsistency(
                    message=f"Unexpected unit '{unit}' for {col_id}.", col_id=col_id, **ikwargs,
                ))
            continue
        dimension = var_specs['dimension']

        flow_id = var_specs['flow_id'] if 'flow_id' in var_specs else None
        allowed, message = unit_allowed(unit=unit, flow_id=flow_id, dimension=dimension)
        if not allowed:
            ret.append(new_inconsistency(message=message, col_id=col_id, **ikwargs))

    return ret

load()

load TEDataFile (only if it has not been read yet)

Warns:

Type Description
warning

Warns if TEDF is already loaded

Returns:

Type Description
TEDF

Returns the TEDF object it is called on

Source code in python/posted/tedf.py
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
def load(self):
    """
    load TEDataFile (only if it has not been read yet)

    Warns
    ----------
    warning
        Warns if TEDF is already loaded
    Returns
    --------
        TEDF
            Returns the TEDF object it is called on
    """
    if self._df is None:
        self.read()
    else:
        warnings.warn('TEDF is already loaded. Please execute .read() if you want to load from file again.')

    return self

read()

read TEDF from CSV file

Raises:

Type Description
Exception

If there is no file path from which to read

Source code in python/posted/tedf.py
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
def read(self):
    """
    read TEDF from CSV file

    Raises
    ------
    Exception
        If there is no file path from which to read
    """

    if self._file_path is None:
        raise Exception('Cannot read from file, as this TEDF object has been created from a dataframe.')

    # read CSV file
    self._df = pd.read_csv(
        self._file_path,
        sep=',',
        quotechar='"',
        encoding='utf-8',
    )

    # check column IDs match base columns and fields
    if not all(c in self._columns for c in self._df.columns):
        raise Exception(f"Column IDs used in CSV file do not match columns definition: {self._df.columns.tolist()}")

    # adjust row index to start at 1 instead of 0
    self._df.index += 1

    # insert missing columns and reorder via reindexing, then update dtypes
    df_new = self._df.reindex(columns=list(self._columns.keys()))
    for col_id, col in self._columns.items():
        if col_id in self._df:
            continue
        df_new[col_id] = df_new[col_id].astype(col.dtype)
        df_new[col_id] = col.default
    self._df = df_new

write()

Write TEDF to CSV file

Raises:

Type Description
Exception

If there is no file path that specifies where to write

Source code in python/posted/tedf.py
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
def write(self):
    """
    Write TEDF to CSV file

    Raises
    ------
    Exception
        If there is no file path that specifies where to write
    """
    if self._file_path is None:
        raise Exception('Cannot write to file, as this TEDataFile object has been created from a dataframe. Please '
                        'first set a file path on this object.')

    self._df.to_csv(
        self._file_path,
        index=False,
        sep=',',
        quotechar='"',
        encoding='utf-8',
        na_rep='',
    )

TEDFInconsistencyException

Bases: Exception

Exception raised for inconsistencies in TEDFs.

Attributes: message -- message explaining the inconsistency row_id -- row where the inconsistency occurs col_id -- column where the inconsistency occurs file_path -- path to the file where the inconsistency occurs

Source code in python/posted/tedf.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
class TEDFInconsistencyException(Exception):
    """Exception raised for inconsistencies in TEDFs.

    Attributes:
        message -- message explaining the inconsistency
        row_id -- row where the inconsistency occurs
        col_id -- column where the inconsistency occurs
        file_path -- path to the file where the inconsistency occurs
    """
    def __init__(self, message: str = "Inconsistency detected", row_id: None | int = None,
                 col_id: None | str = None, file_path: None | Path = None):
        self.message: str = message
        self.row_id: None | int = row_id
        self.col_id: None | str = col_id
        self.file_path: None | Path = file_path

        # add tokens at the end of the error message
        message_tokens = []
        if file_path is not None:
            message_tokens.append(f"file \"{file_path}\"")
        if row_id is not None:
            message_tokens.append(f"line {row_id}")
        if col_id is not None:
            message_tokens.append(f"in column \"{col_id}\"")

        # compose error message from tokens
        exception_message: str = message
        if message_tokens:
            exception_message += f"\n    " + (", ".join(message_tokens)).capitalize()

        super().__init__(exception_message)

new_inconsistency(raise_exception, **kwargs)

Create new inconsistency object based on kwqargs

Source code in python/posted/tedf.py
46
47
48
49
50
51
52
53
54
55
56
57
58
def new_inconsistency(raise_exception: bool, **kwargs) -> TEDFInconsistencyException:
    """
    Create new inconsistency object based on kwqargs

    Parameters
    ----------

    """
    exception = TEDFInconsistencyException(**kwargs)
    if raise_exception:
        raise exception
    else:
        return exception