[docs]classASEDatabase(Sequence[ase.Atoms]):""" A class that wraps an ASE database file, allowing for indexing into the database to obtain :class:`ase.Atoms` objects. We assume that each row contains labels in the ``data`` attribute, as a mapping from property names to values, and that units are "standard" ASE units, e.g. ``eV``, ``eV/Å``, etc. Fully compatible with `SchNetPack Dataset Files <https://schnetpack.readthedocs.io/en/latest/tutorials/tutorial_01_preparing_data.html>`__. See the `ASE documentation <https://wiki.fysik.dtu.dk/ase/ase/db/db.html>`__ for more details about this file format. .. warning:: This dataset indexes into a database, performing many random access reads from disk. This can be very slow! If you are using a distributed compute cluster, ensure you copy your database file to somewhere with fast local storage (as opposed to network-attached storage). Similarly, consider using several workers when loading the dataset, e.g. ``fitting/loader_kwargs/num_workers=8``. Parameters ---------- path: str | pathlib.Path The path to the database. """# noqa: E501def__init__(self,path:str|pathlib.Path):path=pathlib.Path(path)ifnotpath.exists():raiseFileNotFoundError(f"Database file {path} does not exist")self.path=pathself.db=ase.db.connect(path,use_lock_file=False)@overloaddef__getitem__(self,index:int)->ase.Atoms:...@overloaddef__getitem__(self,index:slice)->Sequence[ase.Atoms]:...def__getitem__(self,index:int|slice)->ase.Atoms|Sequence[ase.Atoms]:ifisinstance(index,slice):indices=slice_to_range(index,len(self))return[self[i]foriinindices]atoms=self.db.get_atoms(index+1,add_additional_information=True)data=atoms.info.pop("data",{})arrays={k:vfork,vindata.items()ifisinstance(v,np.ndarray)andv.shape[0]==len(atoms)}info={k:vfork,vindata.items()ifknotinarrays}atoms.arrays.update(arrays)atoms.info.update(info)returnatomsdef__len__(self)->int:returnself.db.count()