Source code for graph_pes.utils.shift_and_scale

from __future__ import annotations

from typing import Sequence

import ase
import numpy as np
import torch
from ase.data import chemical_symbols
from sklearn.linear_model import Ridge

from graph_pes.atomic_graph import (
    AtomicGraph,
    number_of_structures,
    sum_per_structure,
    to_batch,
)
from graph_pes.graph_pes_model import GraphPESModel
from graph_pes.utils.nn import PerElementParameter

from .logger import logger


[docs] def guess_per_element_mean_and_var( per_structure_quantity: torch.Tensor, batch: AtomicGraph, min_variance: float = 0.01, ) -> tuple[dict[int, float], dict[int, float]]: r""" Guess the per-element mean (:math:`\mu_Z`) and variance (:math:`\sigma_Z^2`) of a per-structure quantity using ridge regression under the following assumptions: 1. the per-structure property, :math:`P`, is a summation over local properties of its components atoms: :math:`P = \sum_{i=1}^{N} p_{Z_i}`. 2. the per-atom properties, :math:`p_{Z_i}`, are independent and identically distributed (i.i.d.) for each atom of type :math:`Z_i` according to a normal distribution: :math:`p_{Z_i} \sim \mathcal{N}(\mu_{Z_i}, \sigma_{Z_i}^2)`. Parameters ---------- per_structure_quantity The per-structure quantity to guess the per-element mean and variance of. batch The batch of graphs to use for guessing the per-element mean and variance. Returns ------- means A dictionary mapping atomic numbers to per-element means. variances A dictionary mapping atomic numbers to per-element variances. """ # noqa: E501 # extract the atomic numbers to tensor N such that: # N[structure, Z] is the number of atoms of atomic number Z in structure unique_Zs = torch.unique(batch.Z) # (n_Z,) N = torch.zeros(number_of_structures(batch), len(unique_Zs)) # (batch, n_Z) for i, Z in enumerate(unique_Zs): N[:, i] = sum_per_structure((batch.Z == Z).float(), batch) if np.linalg.matrix_rank(N.numpy()) < N.shape[1]: logger.warning( """\ We are attempting to guess the mean per-element contribution for a per-structure quantity (usually the total energy). However, the composition of the training set is such that no unique solution is possible. This is probably because you are training on structures all with the same composition (e.g. all structures are of the form n H2O). Consider explicitly setting the per-element contributions if you know them, or including a variety of structures of different compositions in the training set. """ ) # calculate the per-element mean # use Ridge rather than LinearRegression to avoid singular matrices # when e.g. only one structure contains an atom of a given type... ridge = Ridge(fit_intercept=False, alpha=0.00001) ridge.fit(N.numpy(), per_structure_quantity) mu_Z = torch.tensor(ridge.coef_) means = {int(Z): float(mu) for Z, mu in zip(unique_Zs, mu_Z)} # calculate the per-element variance residuals = per_structure_quantity - N @ mu_Z # assuming that the squared residuals are a sum of the independent # variances for each atom, we can estimate these variances again # using Ridge regression ridge.fit(N.numpy(), residuals**2) var_Z = ridge.coef_ # avoid negative variances by clipping to min value variances = { int(Z): max(float(var), min_variance) for Z, var in zip(unique_Zs, var_Z) } logger.debug(f"Per-element means: {means}") logger.debug(f"Per-element variances: {variances}") return means, variances
@torch.no_grad() def add_auto_offset( model: GraphPESModel, structures: Sequence[AtomicGraph] | Sequence[ase.Atoms], ) -> GraphPESModel: from graph_pes.models import AdditionModel, LearnableOffset logger.info("""\ Attempting to automatically detect the offset energy for each element. We do this by first generating predictions for each training structure (up to `config.fitting.max_n_pre_fit` if specified). This is a slow process! If you already know the reference energies (or the difference in reference energies if you are fine-tuning an existing model to a different level of theory), we recommend setting `config.fitting.auto_fit_reference_energies` to `False` and manually specifying a `LearnableOffset` component of your model. See the "Fine-tuning" tutorial in the docs for more information: https://jla-gardner.github.io/graph-pes/quickstart/fine-tuning.html""") graphs = [ AtomicGraph.from_ase(s, model.cutoff.item()) if isinstance(s, ase.Atoms) else s for s in structures ] model.eval() predictions = torch.tensor([model.predict_energy(g) for g in graphs]) references = torch.tensor([g.properties["energy"] for g in graphs]) diffs = references - predictions means, _ = guess_per_element_mean_and_var(diffs, to_batch(graphs)) if all(abs(mu) < 1e-4 for mu in means.values()): logger.info( "When attempting to automatically fit the offset energy, " "no offset was detected, so skipping." ) return model offset_param = PerElementParameter.from_dict( **{chemical_symbols[Z]: mu for Z, mu in means.items()}, requires_grad=True, ) offset_model = LearnableOffset() offset_model._offsets = offset_param offset_model._has_been_pre_fit.fill_(1) if isinstance(model, AdditionModel) and "auto_offset" not in model.models: model.models["auto_offset"] = offset_model else: model = AdditionModel(base=model, auto_offset=offset_model) model.train() return model