Source code for load_atoms.database.database_entry
# explicitly not using future annotations since this is not supported# by pydantic for python versions we want to targetfrompathlibimportPathfromtypingimportDict,Literal,Optional,UnionimportyamlfrompydanticimportBaseModel,field_validatorfromload_atoms.utilsimportBASE_REMOTE_URLLICENSE_URLS={"CC BY-NC-SA 4.0":"https://creativecommons.org/licenses/by-nc-sa/4.0/deed.en","CC BY-NC 4.0":"https://creativecommons.org/licenses/by-nc/4.0/deed.en","CC BY 4.0":"https://creativecommons.org/licenses/by/4.0/deed.en","CC0":"https://creativecommons.org/publicdomain/zero/1.0/","MIT":"https://opensource.org/licenses/MIT","GPLv3":"https://www.gnu.org/licenses/gpl-3.0.html",}VALID_LICENSES=list(LICENSE_URLS.keys())VALID_CATEGORIES=["Benchmarks","Potential Fitting","Synthetic Data"]classPropertyDescription(BaseModel):""" Holds a description of a property, such that it can be automatically validated upon creation. """desc:str"""A description of the property"""units:Optional[str]=None"""The units of the property"""
[docs]classDatabaseEntry(BaseModel):""" Holds all the required metadata for a named dataset, such that it can be automatically downloaded using :func:`~load_atoms.load_dataset`, and so that documentation can be automatically generated. """name:str"""The name of the dataset"""year:int"""The year the dataset was created"""description:str"""A description of the dataset (in ``.rst`` format)"""category:str""" The category of the dataset (e.g. ``"Potential Fitting"``, ``"Benchmarks"``) """format:Literal["lmdb","memory"]="memory""""The format of the dataset"""minimum_load_atoms_version:Union[str,None]=None""" The minimum version of load-atoms that is required to load the dataset. """citation:Optional[str]=None"""A citation for the dataset (in BibTeX format)"""license:Optional[str]=None"""The license identifier of the dataset (e.g. ``"CC BY-NC-SA 4.0"``)"""representative_structure:Optional[int]=None"""The index of a representative structure (for visualisation purposes)"""per_atom_properties:Optional[Dict[str,PropertyDescription]]=None"""A mapping from per-atom properties to their descriptions"""per_structure_properties:Optional[Dict[str,PropertyDescription]]=None"""A mapping from per-structure properties to their descriptions"""@field_validator("category")defvalidate_category(cls,v):ifvnotinVALID_CATEGORIES:raiseValueError(f"Invalid category: {v}. Must be one of {VALID_CATEGORIES}")returnv@field_validator("license")defvalidate_license(cls,v):ifvnotinVALID_LICENSES:raiseValueError(f"Invalid license: {v}. Must be one of {VALID_LICENSES}")returnv@field_validator("citation")defvalidate_citation(cls,v):v=v.strip()ifv.startswith("@")andv.endswith("}"):returnvraiseValueError(f"Invalid BibTeX: {v}")@field_validator("minimum_load_atoms_version",mode="before")defconvert_minimum_version_to_str(cls,v):ifvisNone:returnNonereturnstr(v)@classmethoddeffrom_yaml_file(cls,path:Union[Path,str])->"DatabaseEntry":path=Path(path).resolve()withopen(path)asf:data=yaml.safe_load(f)try:returncls(**data)exceptExceptionase:raiseValueError(f"Error loading dataset description from {path}. It may be ""that you have a stale version of this dataset's yaml file on ""disk. Please delete the file and try again:\n"f' $ rm "{path}"')frome@classmethoddefremote_url_for_yaml(cls,dataset_id:str)->str:returnBASE_REMOTE_URL+f"{dataset_id}/{dataset_id}.yaml"@classmethoddefimporter_file_stem(cls,dataset_id:str)->str:returndataset_id.lower().replace("-","_")@classmethoddefremote_url_for_importer(cls,dataset_id:str)->str:fname=DatabaseEntry.importer_file_stem(dataset_id)returnBASE_REMOTE_URL+f"src/load_atoms/database/importers/{fname}.py"