I want to add semantic enrichment to the JSON schema generated by pydantic BaseModels. The problem occurs when I want an element of the model to be an enumerated type. I can't figure out the correct way to add semantic enrichment to the new Enum or the values specified in the Enum's definition. Below is code I hacked. It generates the output I am looking for but it definitely isn't the correct solution. Please offer a better solution or let me know if there is simply a better way to document my data objects with semantic information.
I am using Python 3.11.3 and the following packages:
- annotated-types==0.6.0
- packaging==23.2
- pydantic==2.5.2
- pydantic_core==2.14.5
- typing_extensions==4.8.0
from enum import Enum
import json
from typing import Dict, Any
from pydantic import BaseModel, Field
def clean_dictionary(base: dict):
"""cleans the taxonomy out of the JSON"""
keys = list(base.keys())
# recursion exit
if len(keys) == 2 and "value" in keys and "taxonomy" in keys:
return base["value"]
for key in keys:
item = base[key]
if isinstance(item, dict):
base[key] = clean_dictionary(item)
elif isinstance(item, list):
for ii, sub in enumerate(item):
if isinstance(sub, dict):
item[ii] = clean_dictionary(sub)
return base
class OntologyModel(BaseModel):
"""A model sub-class that cleans the Enums when it generates JSON"""
def model_dump(
self,
*,
mode: str = "python",
include=None,
exclude=None,
by_alias: bool = False,
exclude_unset: bool = False,
exclude_defaults: bool = False,
exclude_none: bool = False,
round_trip: bool = False,
warnings: bool = True
) -> dict[str, Any]:
"""override of BaseModel method"""
text = self.model_dump_json(
include=include,
exclude=exclude,
by_alias=by_alias,
exclude_unset=exclude_unset,
exclude_defaults=exclude_defaults,
exclude_none=exclude_none,
round_trip=round_trip,
warnings=warnings,
)
return json.loads(text)
def model_dump_json(
self,
indent: int | None = None,
include=None,
exclude=None,
by_alias: bool = False,
exclude_unset: bool = False,
exclude_defaults: bool = False,
exclude_none: bool = False,
round_trip: bool = False,
warnings: bool = True,
):
"""override of BaseModel method"""
data = json.loads(
super().model_dump_json(
indent=indent,
include=include,
exclude=exclude,
by_alias=by_alias,
exclude_unset=exclude_unset,
exclude_defaults=exclude_defaults,
exclude_none=exclude_none,
round_trip=round_trip,
warnings=warnings,
)
)
data = clean_dictionary(data)
return json.dumps(data, indent=indent)
class FlowerEnum(Enum):
"""taxonomy: //example.com/flowers/F000021"""
DAN = {"value": "dandelion", "taxonomy": "//example.com/flowers#D00012"}
ORC = {"value": "ochid", "taxonomy": "//example.com/flowers#O00032"}
class ColorEnum(Enum):
"""taxonomy: https://example.com/colors/C000000"""
RED = {"value": "red", "taxonomy": "//example.com/colors#C000001"}
PUR = {"value": "purple", "taxonomy": "//example.com/colors#C000002"}
class Flower(OntologyModel):
"""An instance of a specific flower"""
class Config:
json_schema_extra = {"taxonomy": "//example.com/flowers#F000003"}
variety: FlowerEnum = Field(
...,
description="The type of flower",
json_schema_extra={"taxonomy": "//example.com/flowers#F000004"},
)
color: ColorEnum = Field(
...,
description="The flower's color",
json_schema_extra={"taxonomy": "//example.com/colors#C000005"},
)
if __name__ == "__main__":
from pprint import pprint
flower = Flower(variety=FlowerEnum.ORC, color=ColorEnum.PUR)
print("\n", "*" * 80, "\n")
pprint(flower.model_json_schema())
print("\n", "*" * 80, "\n")
pprint(flower.model_dump())
print("\n", "*" * 80, "\n")
The code generates something semi-suitable for my purposes but I would rather create schemas that are more in line with best practices and don't have the goofy hidden requirement that the Enum values be specified in this peculiar way. Below is the output of the code as written:
{'$defs': {'ColorEnum': {'description': 'taxonomy: /example.com/colors/C000000',
'enum': [{'taxonomy': '/example.com/colors#C000001',
'value': 'red'},
{'taxonomy': '/example.com/colors#C000002',
'value': 'purple'}],
'title': 'ColorEnum'},
'FlowerEnum': {'description': 'taxonomy: '
'/example.com/flowers/F000021',
'enum': [{'taxonomy': '/example.com/flowers#D00012',
'value': 'dandelion'},
{'taxonomy': '/example.com/flowers#O00032',
'value': 'ochid'}],
'title': 'FlowerEnum'}},
'description': 'An instance of a specific flower',
'properties': {'color': {'allOf': [{'$ref': '#/$defs/ColorEnum'}],
'description': "The flower's color",
'taxonomy': '/example.com/colors#C000005'},
'variety': {'allOf': [{'$ref': '#/$defs/FlowerEnum'}],
'description': 'The type of flower',
'taxonomy': '/example.com/flowers#F000004'}},
'required': ['variety', 'color'],
'taxonomy': '/example.com/flowers#F000003',
'title': 'Flower',
'type': 'object'}
********************************************************************************
{'color': 'purple', 'variety': 'ochid'}
********************************************************************************