TUBAF data
The data from several experimental campaigns of the TU Bergakademie Freiberg (TUBAF) was curated within a dissertation thesis and is available in differing data structures from the zenodo publication 10.5281/zenodo.6654150.
One structure holds the relevant data in tabular form, within three tables
Units, methods and concepts were manually assigned
A pipeline was manually created to transform the data in a machine readable data structure
[5]:
from dataset_loader import *
import matplotlib.pyplot as plt
[6]:
TUBAF = load_sp_datapackage({"sourcedir": "example_datasets/TUBAF/"},'TUBAF')
TUBAF
[6]:
{'resources': [{'name': 'meta',
'type': 'table',
'path': 'tables/meta.csv',
'scheme': 'file',
'format': 'csv',
'mediatype': 'text/csv',
'encoding': 'utf-8',
'schema': {'fields': [{'name': 'soilloss',
'type': 'number',
'unit': 'kg',
'method': 'cumulative soil loss of '
'whole experiment',
'concept': 'http://aims.fao.org/aos/agrovoc/c_8dab65bc'},
{'name': 'endinfil',
'type': 'number',
'concept': '',
'unit': 'mm/min',
'method': 'own:DiffRainfall-Runoffatsteadystate'},
{'name': 'duration',
'type': 'integer',
'concept': 'http://aims.fao.org/aos/agrovoc/c_7778',
'unit': 's',
'method': 'own:stopwatch'},
{'name': 'cumq',
'type': 'number',
'concept': 'http://aims.fao.org/aos/agrovoc/c_6697',
'unit': '',
'method': 'own:SumOfRunoffTimeseries'},
{'name': 'rough',
'type': 'number',
'concept': '',
'unit': '',
'method': 'own:derivedOfRunofVelocitySlopeAndRunoffVolume'},
{'name': 'bulk',
'type': 'integer',
'concept': 'http://aims.fao.org/aos/agrovoc/c_7167',
'unit': 'kg/m^3',
'method': 'own:SoilCoreSamplingAndLabAnalysis'},
{'name': 'corg',
'type': 'number',
'concept': 'http://aims.fao.org/aos/agrovoc/c_389fe908',
'unit': 'M-%',
'method': 'own:CombustionDINXXXX'},
{'name': 'cSa',
'type': 'integer',
'concept': 'http://aims.fao.org/aos/agrovoc/c_c03927e1',
'unit': 'M-%',
'method': 'own:SoilCoreSamplingAndLabAnalysisWithoutDispergation'},
{'name': 'mSa',
'type': 'integer',
'concept': 'http://aims.fao.org/aos/agrovoc/c_7b64842d',
'unit': 'M-%',
'method': 'own:SoilCoreSamplingAndLabAnalysisWithoutDispergation'},
{'name': 'fSa',
'type': 'integer',
'concept': 'http://aims.fao.org/aos/agrovoc/c_a91f25d6',
'unit': 'M-%',
'method': 'own:SoilCoreSamplingAndLabAnalysisWithoutDispergation'},
{'name': 'cSi',
'type': 'integer',
'concept': 'http://aims.fao.org/aos/agrovoc/c_c1b90f01',
'unit': 'M-%',
'method': 'own:SoilCoreSamplingAndLabAnalysisWithoutDispergation'},
{'name': 'mSi',
'type': 'integer',
'concept': 'http://aims.fao.org/aos/agrovoc/c_090f427c',
'unit': 'M-%',
'method': 'own:SoilCoreSamplingAndLabAnalysisWithoutDispergation'},
{'name': 'fSi',
'type': 'integer',
'concept': 'http://aims.fao.org/aos/agrovoc/c_0f15e6e6',
'unit': 'M-%',
'method': 'own:SoilCoreSamplingAndLabAnalysisWithoutDispergation'},
{'name': 'slope',
'type': 'number',
'concept': 'http://aims.fao.org/aos/agrovoc/c_fa256eab',
'unit': 'degree',
'method': 'own:waterscale'},
{'name': 'plotwidth',
'type': 'integer',
'concept': ['http://aims.fao.org/aos/agrovoc/c_36811',
'http://aims.fao.org/aos/agrovoc/c_8387'],
'unit': 'm',
'method': 'own:SetByDesign'},
{'name': 'plotlength',
'type': 'integer',
'concept': ['http://aims.fao.org/aos/agrovoc/c_36811',
'http://aims.fao.org/aos/agrovoc/c_4260'],
'unit': 'm',
'method': 'own:SetByDesign'},
{'name': 'BBCH', 'type': 'integer'},
{'name': 'crop', 'type': 'string'},
{'name': 'treat', 'type': 'string'},
{'name': 'cover', 'type': 'integer'},
{'name': 'moist',
'type': 'number',
'concept': 'http://aims.fao.org/aos/agrovoc/c_4886',
'unit': 'percent',
'method': 'own:SoilCoreSamplingWetWeightingDryWeighting'},
{'name': 'runtype', 'type': 'integer'},
{'name': 'project', 'type': 'string'},
{'name': 'No',
'type': 'string',
'concept': 'own:primaryID',
'unit': '',
'method': 'own:defined'},
{'name': 'plot', 'type': 'integer'},
{'name': 'bulk.run', 'type': 'integer'},
{'name': 'CLAY', 'type': 'integer'},
{'name': 'SILT', 'type': 'integer'},
{'name': 'SAND', 'type': 'integer'},
{'name': 'KA5class', 'type': 'string'},
{'name': 'KA5main', 'type': 'string'},
{'name': 'D',
'type': 'number',
'concept': 'mean grain size diameter',
'unit': 'mm',
'method': 'log normalized average '
'(Campbell 1985)'},
{'name': 'SigP',
'type': 'number',
'concept': 'Standard deviation of mean '
'grain size diameter',
'unit': 'mm',
'method': 'Standard deviation of log '
'normalized average (Campbell '
'1985)'},
{'name': 'crop_original',
'type': 'string',
'concept': 'own:ForeignKey',
'unit': '',
'method': 'own:defined'},
{'name': 'lat', 'type': 'number'},
{'name': 'lon', 'type': 'number'}],
'missingValues': ['NA', ''],
'primaryKey': ['No'],
'foreignKeys': []}},
{'name': 'time',
'type': 'table',
'path': 'tables/time.csv',
'scheme': 'file',
'format': 'csv',
'mediatype': 'text/csv',
'encoding': 'utf-8',
'schema': {'fields': [{'name': 'No',
'type': 'string',
'concept': 'own:ForeignKey',
'unit': '',
'method': 'own:defined'},
{'name': 'time',
'type': 'integer',
'concept': 'time, timedelta',
'unit': 's',
'method': 'stopwatch'},
{'name': 'runoffrate', 'type': 'number'},
{'name': 'sedconc',
'type': 'number',
'concept': '',
'unit': 'g/l',
'method': 'sediment concentration '
'measured in runoff samples'},
{'name': 'rainfall',
'type': 'number',
'concept': 'http://aims.fao.org/aos/agrovoc/c_36860',
'unit': 'm*m*m/s',
'method': 'own:rainfall intensity valid '
'in the preceding time '
'interval'},
{'name': 'plotwidth',
'type': 'integer',
'concept': ['http://aims.fao.org/aos/agrovoc/c_36811',
'http://aims.fao.org/aos/agrovoc/c_8387'],
'unit': 'm',
'method': 'own:SetByDesign'},
{'name': 'plotlength',
'type': 'integer',
'concept': ['http://aims.fao.org/aos/agrovoc/c_36811',
'http://aims.fao.org/aos/agrovoc/c_4260'],
'unit': 'm',
'method': 'own:SetByDesign'},
{'name': 'overflow',
'type': 'boolean',
'concept': '',
'unit': '',
'method': 'Boolean indicator for '
'identification of '
'measurements taken in runoff '
'feeding phase.'}],
'missingValues': ['NA', ''],
'primaryKey': ['No', 'time'],
'foreignKeys': [{'fields': ['No'],
'reference': {'resource': 'meta',
'fields': ['No']}}]}}]}
[7]:
templates = {
'runoff_coefficient': {
'field':{
'concept': 'https://dbpedia.org/resource/Runoff_curve_number', # concept for
'unit': '[%]',
'method': 'df["runoff_coefficient"] = round(df.Q_OF_mean_selected/ df.P_mean_selected * 100, 1)'},
'requires':
[
{'alternative_concept': 'https://dbpedia.org/resource/Runoff',
'concept': 'http://aims.fao.org/aos/agrovoc/c_35388',
'name': 'runoff_volume',
'unit': '[mm]',
'method': 'runoff_volume'
},
{'alternative_concept': 'https://dbpedia.org/resource/Rain',
'concept': 'http://aims.fao.org/aos/agrovoc/c_36860',
'name': 'rainfall_volume',
'unit': '[mm]',
'method': 'rainfall_volume'
}
]
},
'runoff_volume': {
'target_concept': 'https://dbpedia.org/resource/Runoff',
'target_unit': '[mm]',
'target_method': 'df_target = df.pivot_table(values=["P_mean_selected","Q_OF_mean_selected"], columns=["Site_number","Experiment_numbe"], aggfunc="sum", dropna=True)',
'requires':
[
{'concept': 'https://dbpedia.org/resource/Runoff',
'alternative_concept': 'http://aims.fao.org/aos/agrovoc/c_35388',
'name': 'runoff_rate',
'unit': '[mm]',
},
{'concept': 'http://aims.fao.org/aos/agrovoc/c_7778',
'name': 'duration',
'unit': '[mm]'
}
]
},
'runoff_volume': {
'target_concept': 'https://dbpedia.org/resource/Runoff',
'target_unit': '[l]',
'target_method': 'runoff_volume = [sum_timesteps("ruoff_rate * duration") for row in data.rows "]', # we need to aggregate timesteps here
'requires':
[
{'concept': 'https://dbpedia.org/resource/Runoff',
'alternative_concept': 'http://aims.fao.org/aos/agrovoc/c_35388',
'name': 'runoff_rate',
'unit': '[l/s]',
},
{'concept': 'http://aims.fao.org/aos/agrovoc/c_7778',
'name': 'duration',
'unit': '[s]'
}
]
},
'rainfall_volume': {
'target_concept': 'https://dbpedia.org/resource/Rain',
'target_unit': '[l]',
'target_method': 'ruoff_volume = [sum_timesteps("rainfall_rate * duration", id) for id in data.ids "]', # we need to aggregate timesteps here
'requires':
[
{'concept': 'https://dbpedia.org/resource/Rain',
'alternative_concept': 'http://aims.fao.org/aos/agrovoc/c_36860',
'name': 'rainfall_rate',
'unit': '[l/s]'
},
{'concept': 'http://aims.fao.org/aos/agrovoc/c_7778',
'name': 'duration',
'unit': '[s]'
}
]
}
}
#def sum_timesteps(operation, data):
# ... see Jonas dissertation functions...{
# rainfall https://github.com/jonaslenz/diss/blob/gitbook/database/hydraulic_func.R#L45-L59
# runoff https://github.com/jonaslenz/diss/blob/gitbook/database/hydraulic_func.R#L29-L43
[8]:
a = templates['runoff_coefficient']
print(a)
b = get_sp_data(TUBAF,
fielddefinition=[
a['field']
]
)
if b.resources == []:
print('going deeper')
b = get_sp_data(TUBAF,
fielddefinition=[{'concept': x['concept']} for x in a['requires']]
)
b
{'field': {'concept': 'https://dbpedia.org/resource/Runoff_curve_number', 'unit': '[%]', 'method': 'df["runoff_coefficient"] = round(df.Q_OF_mean_selected/ df.P_mean_selected * 100, 1)'}, 'requires': [{'alternative_concept': 'https://dbpedia.org/resource/Runoff', 'concept': 'http://aims.fao.org/aos/agrovoc/c_35388', 'name': 'runoff_volume', 'unit': '[mm]', 'method': 'runoff_volume'}, {'alternative_concept': 'https://dbpedia.org/resource/Rain', 'concept': 'http://aims.fao.org/aos/agrovoc/c_36860', 'name': 'rainfall_volume', 'unit': '[mm]', 'method': 'rainfall_volume'}]}
going deeper
[8]:
{'resources': [{'name': 'time',
'type': 'table',
'data': [],
'scheme': '',
'format': 'inline',
'mediatype': 'text/csv',
'extrapaths': [],
'schema': {'fields': [{'name': 'No',
'type': 'string',
'concept': 'own:ForeignKey',
'unit': '',
'method': 'own:defined'},
{'name': 'time',
'type': 'integer',
'concept': 'time, timedelta',
'unit': 's',
'method': 'stopwatch'},
{'name': 'rainfall',
'type': 'number',
'concept': 'http://aims.fao.org/aos/agrovoc/c_36860',
'unit': 'm*m*m/s',
'method': 'own:rainfall intensity valid '
'in the preceding time '
'interval'}],
'missingValues': ['NA', ''],
'primaryKey': ['No', 'time'],
'foreignKeys': [{'fields': ['No'],
'reference': {'resource': 'meta',
'fields': ['No']}}]}}]}
[11]:
a = get_sp_data(TUBAF,
fielddefinition=[{'name': 'soilloss'}, {'name': 'rainfall'}]
)
view = merge_foreign_keys(a)
for z in view.resources:
z.open()
z.close()
b = view.resources[1].to_pandas()
[12]:
mod = b.rainfall*b.soilloss
mod.head()
[12]:
No time
102 1440 0.000002
1500 0.000002
1560 0.000002
1620 0.000002
1680 0.000002
dtype: float64
[13]:
b.plot()
[13]:
<AxesSubplot: xlabel='No,time'>
[17]:
TUBAF = load_sp_datapackage({"sourcedir": "example_datasets/TUBAF/"}, 'TUBAF')
view_sp_resource(TUBAF.resources[0], fields = ['SigP'], row_filters = ['SigP >9', 'SigP < 12']).to_pandas()
get_dataset_concepts(TUBAF)
get_sp_data(TUBAF, fielddefinition=[{'name': 'SigP'}])
view = merge_foreign_keys(
get_sp_data(
TUBAF,
fielddefinition=[
{'name': 'SigP',
'row_filters': ['No == "8"']
},
{'unit': 'g/l',
'row_filters': ['not sedconc == None', 'No == "8"']
}
]
)
)
view = get_sp_data(
TUBAF,
fielddefinition=[
{'name': 'SigP',
'row_filters': ['SigP > 10']
},
{'unit': 'g/l',
'row_filters': ['not sedconc == None and sedconc >=300']
}
]
)
view = merge_foreign_keys(
get_sp_data(
TUBAF,
fielddefinition=[
{'name': 'SigP',
'row_filters': ['No == "8"']
},
{'unit': 'g/l',
'row_filters': ['not sedconc == None', 'No == "8"']
}
]
)
)
try:
view.extract()
except:
view.extract()
# model requirements can be defined by row constraints
#from frictionless import validate, checks, transform
#validate(TUBAF.resources[0],
# checks=[
# checks.row_constraint(formula="fSi + mSi + cSi == SILT"),
# checks.row_constraint(formula="fSa + mSa + cSa == SAND"),
# checks.row_constraint(formula="SILT + SAND + CLAY == 100")
# ]
# )
TUBAF.resources[1].to_pandas().head()
[17]:
| runoffrate | sedconc | rainfall | plotwidth | plotlength | overflow | ||
|---|---|---|---|---|---|---|---|
| No | time | ||||||
| 1 | 0 | 0.000000 | NaN | 0.000000 | 2 | 22 | False |
| 420 | 0.000005 | NaN | 0.000011 | 2 | 22 | False | |
| 480 | 0.000010 | NaN | 0.000011 | 2 | 22 | False | |
| 540 | 0.000010 | NaN | 0.000011 | 2 | 22 | False | |
| 600 | 0.000011 | NaN | 0.000011 | 2 | 22 | False |
[ ]: