From 04ae420c2400f6fbd4b5da720e0e72378f4be9d3 Mon Sep 17 00:00:00 2001 From: SimonTaurus Date: Sat, 27 Apr 2024 12:20:48 +0200 Subject: [PATCH] feat: implement serialization Refs: #2 --- pyproject.toml | 1 + src/ontopint/__init__.py | 46 +++++++++++++++++- ...on_test.py => 020_deserialization_test.py} | 38 +-------------- tests/030_serialization_test.py | 47 +++++++++++++++++++ tests/040_roundtrip_test.py | 24 ++++++++++ tests/common.py | 39 +++++++++++++++ 6 files changed, 157 insertions(+), 38 deletions(-) rename tests/{deserialization_test.py => 020_deserialization_test.py} (58%) create mode 100644 tests/030_serialization_test.py create mode 100644 tests/040_roundtrip_test.py create mode 100644 tests/common.py diff --git a/pyproject.toml b/pyproject.toml index 23cd397..0434cd8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ dependencies = [ [project.optional-dependencies] dev = [ "pytest", + "deepdiff", ] [project.license] diff --git a/src/ontopint/__init__.py b/src/ontopint/__init__.py index 3b7889d..0712e95 100644 --- a/src/ontopint/__init__.py +++ b/src/ontopint/__init__.py @@ -6,6 +6,7 @@ # from pint import UnitRegistry from ucumvert import PintUcumRegistry +import pint # ureg = UnitRegistry() ureg = PintUcumRegistry() @@ -100,6 +101,33 @@ def _replace_units(obj, context, original_key_lookup_dict): ] else: return obj + +def _serialize_units(obj, context, original_key_lookup_dict): + if isinstance(obj, dict): + for key in list(obj.keys()): # make a list copy in order to delete keys while iterating + value = obj[key] + if (isinstance(value, pint.Quantity)): + # see https://pint.readthedocs.io/en/stable/user/formatting.html + quantity_value = float(format(value, 'f#~').split(' ')[0]) + unit_code = format(value.u, '~') + # ToDo: use ucum code + unit_iri = get_qunit_iri_from_unit_code(unit_code) + # note: "urn:ontopint:iri" is just any iri not existing in the input data + unit_compact_iri = jsonld.compact( + {"@context": {**context, "urn:ontopint:iri": {"@type": "@id"}}, "urn:ontopint:iri": unit_iri}, + {**context, "urn:ontopint:iri": {"@type": "@id"}} + )["urn:ontopint:iri"] + obj[original_key_lookup_dict['value']] = quantity_value + obj[original_key_lookup_dict['unit']] = unit_compact_iri + + else: obj[key] = _serialize_units(value, context, original_key_lookup_dict) + return obj + elif isinstance(obj, list): + return [ + _serialize_units(value, context, original_key_lookup_dict) for value in obj + ] + else: + return obj def parse_units(json_ld: dict) -> dict: @@ -113,5 +141,21 @@ def parse_units(json_ld: dict) -> dict: # reverse the dict original_key_lookup_dict = {v: k for k, v in compacted.items()} parsed_json = _replace_units(json_ld, original_context, original_key_lookup_dict) - parsed_json['@context'] = original_context + parsed_json = {'@context': original_context, **parsed_json} + json_ld['@context'] = original_context # restore context + return parsed_json + +def export_units(json_ld: dict, context = processing_context) -> dict: + original_context = json_ld.pop('@context', context) + key_dict = {'@context': processing_context, 'unit': 'unit', 'value': 'value'} + # inverse expand-reverse cycle + expanded = jsonld.expand(key_dict, processing_context) + compacted = jsonld.compact(expanded, original_context) + # remove the context + del compacted['@context'] + # reverse the dict + original_key_lookup_dict = {v: k for k, v in compacted.items()} + parsed_json = _serialize_units(json_ld, original_context, original_key_lookup_dict) + parsed_json = {'@context': original_context, **parsed_json} + json_ld['@context'] = original_context # restore context return parsed_json diff --git a/tests/deserialization_test.py b/tests/020_deserialization_test.py similarity index 58% rename from tests/deserialization_test.py rename to tests/020_deserialization_test.py index 9f92d69..a401932 100644 --- a/tests/deserialization_test.py +++ b/tests/020_deserialization_test.py @@ -1,43 +1,7 @@ -import os import ontopint -import json import pint -def _load_test_data(file_name: str) -> dict: - """loads a json file from the test data folder - - Parameters - ---------- - file_name - the name of the json file - - Returns - ------- - the content of the file as dict - """ - data = {} - with open(os.path.join(os.path.dirname(__file__), "data", file_name)) as f: - data = json.load(f) - return data - -def _recursive_items(dictionary: dict): - """Returns a generator of tuples for every key-value pair in the dict - - Parameters - ---------- - dictionary - any (nested) dict - - Yields - ------ - iterator for key-value tuples of the dict - """ - for key, value in dictionary.items(): - if type(value) is dict: - yield (key, value) - yield from _recursive_items(value) - else: - yield (key, value) +from common import _load_test_data, _recursive_items def test_default_keys(): """test input data with default keys 'value' and 'unit' diff --git a/tests/030_serialization_test.py b/tests/030_serialization_test.py new file mode 100644 index 0000000..e5d9bca --- /dev/null +++ b/tests/030_serialization_test.py @@ -0,0 +1,47 @@ +import ontopint +import deepdiff + +def test_default_keys(): + """test input data with default keys 'value' and 'unit' + """ + + test = { + "value": ontopint.ureg.Quantity( + 1.123, ontopint.ureg.from_ucum("eV") + ) + } + expected = { + "value": 1.123, + "unit": "qunit:EV" + } + result = ontopint.export_units(test) + del result["@context"] + assert (len(deepdiff.DeepDiff(expected, result).keys()) == 0) # no diff + +def test_custom_keys(): + """test input data with custom keys 'my_value' and 'my_unit' + """ + test = { + "@context": { + "qudt": "http://qudt.org/schema/qudt/", + "qunit": "http://qudt.org/vocab/unit/", + "qkind": "http://qudt.org/vocab/quantkind/", + "my_unit": { + "@id": "qudt:hasUnit", + "@type": "@id" + }, + "my_value": "qudt:value", + }, + "my_value": ontopint.ureg.Quantity( + 1.123, ontopint.ureg.from_ucum("eV") + ) + } + expected = { + "my_value": 1.123, + "my_unit": "qunit:EV" + } + result = ontopint.export_units(test) + del result["@context"] + assert (len(deepdiff.DeepDiff(expected, result).keys()) == 0) # no diff + + \ No newline at end of file diff --git a/tests/040_roundtrip_test.py b/tests/040_roundtrip_test.py new file mode 100644 index 0000000..9dbbde0 --- /dev/null +++ b/tests/040_roundtrip_test.py @@ -0,0 +1,24 @@ +import ontopint +import deepdiff + +from common import _load_test_data, _recursive_items + +def test_default_keys(): + """test input data with default keys 'value' and 'unit' + """ + input_jsonld = _load_test_data("test_data_default_keys.jsonld") + parsed_jsonld = ontopint.parse_units(input_jsonld) + result = ontopint.export_units(parsed_jsonld) + assert (len(deepdiff.DeepDiff(input_jsonld, result).keys()) == 0) # no diff + +def test_custom_keys(): + """test input data with custom keys 'my_value' and 'my_unit' + """ + input_jsonld = _load_test_data("test_data_custom_keys.jsonld") + parsed_jsonld = ontopint.parse_units(input_jsonld) + result = ontopint.export_units(parsed_jsonld) + diff = deepdiff.DeepDiff(input_jsonld, result) + print(deepdiff.DeepDiff(input_jsonld, result)) + assert (len(deepdiff.DeepDiff(input_jsonld, result).keys()) == 0) # no diff + +test_custom_keys() \ No newline at end of file diff --git a/tests/common.py b/tests/common.py new file mode 100644 index 0000000..874bf6d --- /dev/null +++ b/tests/common.py @@ -0,0 +1,39 @@ +import json +import os + + +def _load_test_data(file_name: str) -> dict: + """loads a json file from the test data folder + + Parameters + ---------- + file_name + the name of the json file + + Returns + ------- + the content of the file as dict + """ + data = {} + with open(os.path.join(os.path.dirname(__file__), "data", file_name)) as f: + data = json.load(f) + return data + +def _recursive_items(dictionary: dict): + """Returns a generator of tuples for every key-value pair in the dict + + Parameters + ---------- + dictionary + any (nested) dict + + Yields + ------ + iterator for key-value tuples of the dict + """ + for key, value in dictionary.items(): + if type(value) is dict: + yield (key, value) + yield from _recursive_items(value) + else: + yield (key, value) \ No newline at end of file