{ "cells": [ { "cell_type": "markdown", "id": "f3bd6a1d-ed51-489a-8432-b1e8c40fc156", "metadata": {}, "source": [ "# Get started with SoilPulse" ] }, { "cell_type": "markdown", "id": "242b9c77-a35b-402c-a675-2a922296a2f3", "metadata": {}, "source": [ "## Ingest data to soilpulse-core - step 1\n", "\n", "Here we show how you can provide your data (and existing metadata) to create a soilpulsecore project." ] }, { "cell_type": "code", "execution_count": 1, "id": "530379ab-3ec1-48fd-aa9d-e529037e8eba", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "> Crawler type 'zero' registered.\n", "Container type 'filesystem' registered\n", "* Keywords database soilpulse\\databases\\keywords_filesystem registered as 'filesystem'\n", "Container type 'file' registered\n", "Container type 'directory' registered\n", "Container type 'archive' registered\n", "> Crawler type 'filesystem' registered.\n", "> Crawler type 'csv' registered.\n", "> Crawler type 'txt' registered.\n", "Container type 'mysql' registered\n", "* Keywords database soilpulse\\databases\\keywords_mysql registered as 'mysql'\n", "Container type 'xml' registered\n", "* Keywords database soilpulse\\databases\\keywords_xml registered as 'xml'\n", "Container type 'table' registered\n", "> Crawler type 'table' registered.\n", "Container type 'column' registered\n", "> Crawler type 'column' registered.\n", "Container type 'json' registered\n", "* Keywords database soilpulse\\databases\\keywords_json registered as 'json'\n", "> Crawler type 'json' registered.\n", "Publisher 'Zenodo' registered\n" ] } ], "source": [ "# first we import all relevant soilpulsecore functionalities\n", "\n", "from soilpulsecore.project_management import *\n", "from soilpulsecore.resource_managers.filesystem import *\n", "from soilpulsecore.resource_managers.mysql import *\n", "from soilpulsecore.resource_managers.xml import *\n", "from soilpulsecore.resource_managers.data_structures import *\n", "from soilpulsecore.resource_managers.json import *\n", "from soilpulsecore.data_publishers import *\n", "from soilpulsecore.metadata_scheme import *\n", "from soilpulsecore.db_access import EntityKeywordsDB, NullConnector" ] }, { "cell_type": "code", "execution_count": 2, "id": "4228e21e-ac3f-41a5-b4da-ab3b48fbe0f5", "metadata": {}, "outputs": [], "source": [ "# then we define some example DOI records that can be used\n", "example_doi = {\"name\": \"Soil erosion data of TUBAF rainsimlators in Lenz, 2022\",\n", " \"doi\": \"10.5281/zenodo.6654150\"}\n", "example_doi_url = {\"name\": \"Rainfall simulation data Ries et al. 2019\",\n", " \"doi\": \"10.6094/unifr/151460\",\n", " \"url\": \"https://freidok.uni-freiburg.de/files/151460/twflMtwtvn01bDCC/Extreme_rainfall_experiment_data_06122019.zip\"}\n", "example_url = {\"name\": \"Soil erosion data in Punjab, India, Lenz et. al\",\n", " \"url\": \"https://www.mdpi.com/2076-3263/8/11/396/s1\"}\n", "example_file_upload = {\"name\": \"CTU soil erosion data example\"}\n", "example_reload_soilpulse_project = {\"\": \"\"}" ] }, { "cell_type": "markdown", "id": "bbea6b41-9503-4cab-bbf1-1e5cb1dd025d", "metadata": {}, "source": [ "### by DOI" ] }, { "cell_type": "code", "execution_count": 3, "id": "8a544aa0-dfb3-4123-839b-d7c7ced0380c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "failed to load concept vocabulary 'AGROVOC' from 'vocabularies\\agrovoc.json'\n", "failed to load concept vocabulary 'TestConceptVocabulary' from 'vocabularies\\_concepts_vocabulary_1.json'\n", "failed to load method vocabulary 'TestMethodsVocabulary' from 'vocabularies\\_methods_vocabulary_1.json'\n", "loaded methods vocabularies: \n", "failed to load units vocabulary 'TestUnitsVocabulary' from 'vocabularies\\_units_vocabulary_1.json'\n", "loaded units vocabularies: \n", "doi: '10.5281/zenodo.6654150'\n", "\n", "Obtaining metadata from DOI registration agency ...\n", " ... successful\n", "\n", "File 'DOI_metadata.json' successfuly saved.\n", "File 'Publisher_metadata.json' successfuly saved.\n", "downloading remote files to SoilPulse storage ...\n", "\t10-toolboxvignette.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\t06-lookout.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\tindex.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "Extracting 'lenz2022.zip' to 'C:\\Users\\JL\\SoilPulse\\project_files\\temp_1\\lenz2022_zip'\n", "\t01-intro.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\t02-state_know.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\t03-database.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\t03a-code.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\t04-results.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\t04a-results_PO.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\t04b-results-statis.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\t05-discussion.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\t05a-reallookout.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\t06-lookout.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\t07-references.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\t08-E3DIssues.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\t09-database.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\t10-toolboxvignette.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\t11-varrain.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\tcomp_E3D_landlab.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\tcomp_infil.R - unsupported Crawler subclass special type 'R' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\timplicite_GA.R - unsupported Crawler subclass special type 'R' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\tGA_comparison.ipynb - unsupported Crawler subclass special type 'ipynb' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "Searching for delimiters in container log.txt failed.\n", "\tfunctions_for_DC.R - unsupported Crawler subclass special type 'R' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\tfunctions_for_visualization.R - unsupported Crawler subclass special type 'R' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\thydraulic_func.R - unsupported Crawler subclass special type 'R' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "Content of container 'Diss_michael_anlage.csv' couldn't be analyzed due to encoding issues.\n", "Searching for delimiters in container remarks_on_AnneRuns.txt failed.\n", "\tsingle_file.R - unsupported Crawler subclass special type 'R' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\tDatensatz2.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\tprep.R - unsupported Crawler subclass special type 'R' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\tindex.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\taMC_in_R.R - unsupported Crawler subclass special type 'R' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\taMC_in_R_ewid.R - unsupported Crawler subclass special type 'R' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\tavisualization.R - unsupported Crawler subclass special type 'R' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\tavisualization2.R - unsupported Crawler subclass special type 'R' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\tpreamble.tex - unsupported Crawler subclass special type 'tex' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\t_output.yml - unsupported Crawler subclass special type 'yml' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\t09-database.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\t11-varrain.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\t08-E3DIssues.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\t_output.yml - unsupported Crawler subclass special type 'yml' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\t07-references.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\tpreamble.tex - unsupported Crawler subclass special type 'tex' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", "\t03a-code.Rmd - unsupported Crawler subclass special type 'Rmd' (registered types are: 'zero','filesystem','csv','txt','table','column','json') - 'zero crawler' will be used instead.\n", "No content analysis procedure defined for crawler type 'zero'\n", " ... successful\n", "\n" ] }, { "data": { "text/plain": [ "['C:\\\\Users\\\\JL\\\\SoilPulse\\\\project_files\\\\temp_1\\\\10-toolboxvignette.Rmd',\n", " 'C:\\\\Users\\\\JL\\\\SoilPulse\\\\project_files\\\\temp_1\\\\06-lookout.Rmd',\n", " 'C:\\\\Users\\\\JL\\\\SoilPulse\\\\project_files\\\\temp_1\\\\index.Rmd',\n", " 'C:\\\\Users\\\\JL\\\\SoilPulse\\\\project_files\\\\temp_1\\\\lenz2022.zip',\n", " 'C:\\\\Users\\\\JL\\\\SoilPulse\\\\project_files\\\\temp_1\\\\09-database.Rmd',\n", " 'C:\\\\Users\\\\JL\\\\SoilPulse\\\\project_files\\\\temp_1\\\\11-varrain.Rmd',\n", " 'C:\\\\Users\\\\JL\\\\SoilPulse\\\\project_files\\\\temp_1\\\\08-E3DIssues.Rmd',\n", " 'C:\\\\Users\\\\JL\\\\SoilPulse\\\\project_files\\\\temp_1\\\\_output.yml',\n", " 'C:\\\\Users\\\\JL\\\\SoilPulse\\\\project_files\\\\temp_1\\\\07-references.Rmd',\n", " 'C:\\\\Users\\\\JL\\\\SoilPulse\\\\project_files\\\\temp_1\\\\preamble.tex',\n", " 'C:\\\\Users\\\\JL\\\\SoilPulse\\\\project_files\\\\temp_1\\\\03a-code.Rmd']" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "# then we establish a new soilpulse core project from the given information\n", "dbcon = NullConnector()\n", "user_id = 1\n", "project_doi = ProjectManager(dbcon, user_id, **example_doi)\n", "\n", "project_doi.downloadPublishedFiles()\n" ] }, { "cell_type": "markdown", "id": "341527de-a0f3-4936-bd6f-909a394a31d9", "metadata": {}, "source": [ "### by url" ] }, { "cell_type": "code", "execution_count": 4, "id": "d5accb44-dd65-4c47-bd53-f923a2a19d38", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "failed to load concept vocabulary 'AGROVOC' from 'vocabularies\\agrovoc.json'\n", "failed to load concept vocabulary 'TestConceptVocabulary' from 'vocabularies\\_concepts_vocabulary_1.json'\n", "failed to load method vocabulary 'TestMethodsVocabulary' from 'vocabularies\\_methods_vocabulary_1.json'\n", "loaded methods vocabularies: \n", "failed to load units vocabulary 'TestUnitsVocabulary' from 'vocabularies\\_units_vocabulary_1.json'\n", "loaded units vocabularies: \n", "doi: 'None'\n", "Empty DOI provided. DOI metadata were not retrieved.\n", "The list of published files is empty.\n", "\n", "\n", "================================================================================\n", "Soil erosion data in Punjab, India, Lenz et. al\n", "container tree:\n", "--------------------------------------------------------------------------------\n", "================================================================================\n", "\n", "\n", "\n", "Saving project \"Soil erosion data in Punjab, India, Lenz et. al\" with ID 2 ... \n", "\t(no containers to save)\n", "\t(no datasets to save)\n", "\tconcepts vocabulary saved\n", "\tmethods vocabulary saved\n", "\tunits vocabulary saved\n", " ... successful.\n" ] } ], "source": [ "dbcon = NullConnector()\n", "user_id = 1\n", "project_url = ProjectManager(dbcon, user_id, **example_url)\n", "\n", "project_url.downloadPublishedFiles()\n", "\n", "project_url.showContainerTree()\n", "project_url.updateDBrecord()\n" ] }, { "cell_type": "markdown", "id": "2a3202a0-37ca-41d8-8256-1dfb44d32753", "metadata": {}, "source": [ "## step 2 - analyze file system structure" ] }, { "cell_type": "code", "execution_count": null, "id": "5108cf92-d345-471c-b9b7-c42c4c615dfb", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.7" } }, "nbformat": 4, "nbformat_minor": 5 }