Source code for dfa_recommender.df_class

'''
A set of utility functions for density fitting.
Some Credits to: https://gitlab.com/jmargraf/kdf
'''

import numpy as np
import psi4
import os
from typing import Tuple


[docs]def get_molecule(xyzfile: str, charge: int, spin: int, sym: str = 'c1') -> Tuple[psi4.core.Molecule, list]: ''' Assemble a molecule object from xyzfile, charge and spin. Parameters ---------- xyzfile: str, path to the xyz file of the input molecule. charge: int, charge of the input molecule. spin: int, spin multiplicity (2*S + 1) for the input molecule sym: str, Optional, default: c1 point group symmetry of the input molecule Returns ---------- mol: psi4.geometry object psi4.geometry object for the input molecule symbols: list list of atom symbols ''' wholetext = "%s %s\n" % (charge, spin) symbols = [] if os.path.isfile(xyzfile): with open(xyzfile, "r") as fo: natoms = int(fo.readline().split()[0]) fo.readline() for ii in range(natoms): line = fo.readline() wholetext += line symbols.append(line.split()[0]) else: raise FileNotFoundError("No file named : ", xyzfile) wholetext += "\nsymmetry %s\nnoreorient\nnocom\n" % sym mol = psi4.geometry("""%s""" % wholetext) return mol, symbols
[docs]class DensityFitting: ''' Density fitting class to project the electron density onto auxiliary basis sets. ''' def __init__(self, wfnpath: str, xyzfile: str, basis: str, charge: int = 0, spin: int = 1, wfnpath2: str = 'NA') -> None: ''' Calculate a set of variables used in the final density fitting stage Parameters ---------- wfnpath: str, path to the wavefunction file of the input molecule. xyzfile: str, path to the xyz file of the input molecule. basis: str, name of the auxiliary basis set for density fitting. charge: int, charge of the input molecule. spin: int, spin multiplicity (2*S + 1) for the input molecule. ''' self.basis = basis self.charge = charge self.spin = spin self.wfnpath = wfnpath self.wfnpath2 = wfnpath2 self.xyzfile = xyzfile self.construct_aux() self.get_dab() self.calc_utilities() def __str__(self) -> None: return f'wfnpath: {self.wfnpath}\nxyzfile: {self.xyzfile}\nbasis: {self.basis}' @property def wfnpath(self) -> None: return self._wfnpath @wfnpath.setter def wfnpath(self, wfnpath: str) -> None: ''' set wfn and orb attribute ''' self._wfnpath = wfnpath self.wfn = psi4.core.Wavefunction.from_file(self._wfnpath) assert isinstance(self.wfn, psi4.core.Wavefunction) self.orb = self.wfn.basisset() @property def wfnpath2(self) -> None: return self._wfnpath2 @wfnpath2.setter def wfnpath2(self, wfnpath2: str) -> None: ''' set wfn2 attribute. wfn2 is the wfn of the other electronic state at the same geometry ''' self._wfnpath2 = wfnpath2 if wfnpath2 == "NA": pass elif os.path.isfile(wfnpath2): self.wfn2 = psi4.core.Wavefunction.from_file(self._wfnpath2) assert isinstance(self.wfn2, psi4.core.Wavefunction) else: raise FileNotFoundError("wfn file is not availbale: ", wfnpath2) @property def xyzfile(self) -> None: return self._mol @xyzfile.setter def xyzfile(self, xyzfile: str) -> None: ''' set psi4 molecule and symbols attribute ''' self._xyzfile = xyzfile self.mol, self.symbols = get_molecule(self._xyzfile, self.charge, self.spin) assert isinstance(self.mol, psi4.core.Molecule)
[docs] def construct_aux(self) -> None: ''' Load files, transform them to utility varibles, and check data types ''' #psi4.core.set_global_option('df_basis_scf', "def2-universal-JFIT") self.aux = psi4.core.BasisSet.build(self.mol, "DF_BASIS_SCF", "", "JKFIT", self.basis)
[docs] def get_dab(self) -> None: ''' Build dab_P tensor as tensor before contracting to aux coeffiecients (np.ndarray) ''' zero_bas = psi4.core.BasisSet.zero_ao_basis_set() mints = psi4.core.MintsHelper(self.orb) #abQ = mints.ao_eri(self.orb, self.orb, self.aux, zero_bas) #Jinv = mints.ao_eri(zero_bas, self.aux, zero_bas, self.aux) abQ = mints.ao_eri(self.aux, zero_bas, self.orb, self.orb) Jinv = mints.ao_eri(self.aux, zero_bas, self.aux, zero_bas) Jinv.power(-1.0, 1.e-14) abQ = np.squeeze(abQ) self.Jinv = np.squeeze(Jinv) self.dab_P = np.einsum('Qab,QP->abP', abQ, self.Jinv, optimize=True)
[docs] def calc_utilities(self) -> None: ''' Calculate the shell numbers and number of basis functions in each shell. ''' self.numfuncatom = np.zeros(self.mol.natom()) shells = [] for func in range(0, self.aux.nbf()): current = self.aux.function_to_center(func) shell = self.aux.function_to_shell(func) shells.append(shell) self.numfuncatom[current] += 1 self.shellmap = [] ii = 0 tmp, tmp_count = [], 0 for shell in range(self.aux.nshell()): count = shells.count(shell) tmp_count += count tmp.append((count-1)//2) if tmp_count == self.numfuncatom[ii]: ii += 1 self.shellmap.append(tmp) tmp = [] tmp_count = 0
[docs] def get_df_coeffs(self, D : psi4.core.Matrix) -> None: ''' Calculate the raw density fitting coefficients. ''' self.C_P = np.einsum('abP,ab->P', self.dab_P, D, optimize=True)
[docs] def compensate_charges(self): ''' Compensate charges in the density fitting. NOTE that currently only work for alpha. ''' q = [] counter = 0 shellmap = np.concatenate(self.shellmap) for i in range(self.mol.natom()): for j in range(counter, counter + int(self.numfuncatom[i])): shell_num = self.aux.function_to_shell(j) shell = self.aux.shell(shell_num) # assumes that each shell only has 1 primitive. true for a2 basis normalization = shell.coef(0) exponent = shell.exp(0) if shellmap[shell_num] == 0: integral = (1/(4*exponent))*np.sqrt(np.pi/exponent) q.append(4*np.pi*normalization*integral) else: q.append(0.0) counter += 1 q = np.array(q)*0.5 bigQ = self.wfn.nalpha() # compute lambda numer = bigQ - np.dot(q, self.C_P*2) # print(bigQ, np.dot(q, self.C_P)) denom = np.dot(np.dot(q, self.Jinv),q) lambchop = numer/denom self.C_P += np.dot(self.Jinv, lambchop*q)*0.5
# print("sum q: ", np.sum(q)) # print("bigQ: ", bigQ) # print(numer, denom, lambchop)
[docs] def calc_powerspec(self) -> np.array: ''' Calculates powerspectrum to yeild a invariant representation from density fitting coefficients Returns ---------- powerspec: np.ndarray powerspectrum derived from density fitting coefficients. ''' shells = [] shells_to_at = [] preshell = -1 currshell = [] for i_bf in range(self.aux.nbf()): f2s = self.aux.function_to_shell(i_bf) f2c = int(self.aux.function_to_center(i_bf)) if f2s == preshell: currshell.append(i_bf) else: shells.append(currshell) currshell = [i_bf] preshell = f2s shells_to_at.append(f2c) shells.append(currshell) powerspec = [] preat = -1 currat = [] for i_s, shell in enumerate(shells[1:]): p = np.sum(self.C_P[shell]**2) if shells_to_at[i_s] == preat: currat.append(p) else: powerspec.append(currat) currat = [p] preat = shells_to_at[i_s] powerspec.append(currat) return np.array(powerspec[1:])
[docs] def pad_df_coeffs(self) -> None: ''' Convert self.C_P (a 1D array) to self.self.C_P_pad (N_atoms x M), where M corresponds to the largest dim of coeffs of all atoms. For example, H2O at def2-universal-jkfit basis has 113 coeffs. H -> [0, 0, 1, 1, 2, 2] -> 18 coeffs O -> [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4] -> 77 coeffs Then self.self.C_P_pad is a (3, 77) np.array with zero-padding at corresponding irreps ''' self.max_shell = np.array(self.shellmap[np.argmax(self.numfuncatom)]) self.n_coeff_atom = int(np.max(self.numfuncatom)) self.max_tmplate = [0] self.irreps = [] for ii in range(np.max(self.max_shell) + 1): self.max_tmplate.append((self.max_shell == ii).sum()*(2*ii + 1) + self.max_tmplate[-1]) evenodd = "e" if ii%2==0 else "o" self.irreps += [str((self.max_shell == ii).sum()) + "x" + str(ii) + evenodd] self.irreps = "+".join(self.irreps) tmplates = [] for jj in range(len(self.shellmap)): tmplate = [0] _shell = np.array(self.shellmap[jj]) for ii in range(np.max(_shell)): tmplate.append((_shell == ii).sum()*(2*ii + 1) + tmplate[-1]) tmplate.append(int(self.numfuncatom[jj])) tmplates.append(tmplate) self.C_P_pad = np.zeros(shape=(self.numfuncatom.shape[0], self.n_coeff_atom)) count = 0 for ii, tmplate in enumerate(tmplates): for jj in range(np.max(self.shellmap[ii])+1): _end = min(self.max_tmplate[jj] + tmplate[jj+1] - tmplate[jj], self.max_tmplate[jj+1]) self.C_P_pad[ii, self.max_tmplate[jj]: _end] = self.C_P[(count + tmplate[jj]): (count +tmplate[jj+1])] count += int(self.numfuncatom[ii])
[docs] def convert_CP2e3nn(self) -> None: ''' match m between psi4 and e3nn convension within the same l. For example, for l=1, psi4 m: [0, 1, -1] e3nn m: [-1, 0, 1] For example, for l=2, psi4 m: [0, 1, -1, 2, -2] e3nn m: [-2, -1, 0, 1, 2] ''' psi4_2_e3nn = [[0],[2,0,1],[4,2,0,1,3],[6,4,2,0,1,3,5],[8,6,4,2,0,1,3,5,7]] self.C_P_pad_e3nn = np.zeros(shape=self.C_P_pad.shape) for ii in range(self.C_P_pad.shape[0]): for jj, ele in enumerate(self.irreps.split("+")): num, l = int(ele.split("x")[0]), int(ele.split("x")[1][0]) coeffs = self.C_P_pad[ii][self.max_tmplate[jj]: self.max_tmplate[jj+1]] coeffs = np.array_split(coeffs, num) coeffs_trans = [] for coeff in coeffs: for k in psi4_2_e3nn[l]: coeffs_trans.append(coeff[k]) self.C_P_pad_e3nn[ii][self.max_tmplate[jj]: self.max_tmplate[jj+1]] = coeffs_trans mat = np.where(np.abs(self.C_P_pad_e3nn)< 1e-8) self.C_P_pad_e3nn[mat[0], mat[1]] = 0