Skip to content

Commit

Permalink
make one_hot optional (#158)
Browse files Browse the repository at this point in the history
  • Loading branch information
pzhanggit authored Jan 17, 2023
1 parent a417f61 commit 9fae102
Showing 1 changed file with 85 additions and 37 deletions.
122 changes: 85 additions & 37 deletions hydragnn/utils/atomicdescriptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def __init__(
embeddingfilename,
overwritten=True,
element_types=["C", "H", "O", "N", "F", "S"],
one_hot=False,
):
if os.path.exists(embeddingfilename) and not overwritten:
print("loading from existing file: ", embeddingfilename)
Expand All @@ -31,7 +32,8 @@ def __init__(
for ele in mendeleev.get_all_elements():
if ele.symbol in element_types:
self.element_types.append(ele.symbol)
self.element_types = element_types
self.one_hot = one_hot
type_id = self.get_type_ids()
group_id = self.get_group_ids()
period = self.get_period()
covalent_radius = self.get_covalent_radius()
Expand All @@ -43,10 +45,41 @@ def __init__(
electronegativity = self.get_electronegativity()
valenceelectrons = self.get_valence_electrons()
ionenergies = self.get_ionenergies()
if self.one_hot:
# properties with integer values
group_id = self.convert_integerproperty_onehot(group_id, num_classes=-1)
period = self.convert_integerproperty_onehot(period, num_classes=-1)
atomic_number = self.convert_integerproperty_onehot(
atomic_number, num_classes=-1
)
valenceelectrons = self.convert_integerproperty_onehot(
valenceelectrons, num_classes=-1
)
# properties with real values
covalent_radius = self.convert_realproperty_onehot(
covalent_radius, num_classes=10
)
electron_affinity = self.convert_realproperty_onehot(
electron_affinity, num_classes=10
)
atomic_volume = self.convert_realproperty_onehot(
atomic_volume, num_classes=10
)
atomic_weight = self.convert_realproperty_onehot(
atomic_weight, num_classes=10
)
electronegativity = self.convert_realproperty_onehot(
electronegativity, num_classes=10
)
ionenergies = self.convert_realproperty_onehot(
ionenergies, num_classes=10
)

for iele, ele in enumerate(self.element_types):
nfeatures = 0
self.atom_embeddings[str(mendeleev.element(ele).atomic_number)] = []
for var in [
type_id,
group_id,
period,
covalent_radius,
Expand All @@ -66,12 +99,15 @@ def __init__(
with open(embeddingfilename, "w") as f:
json.dump(self.atom_embeddings, f)

def get_group_ids(self):
def get_type_ids(self):
type_id = F.one_hot(torch.arange(len(self.element_types)), num_classes=-1)
return type_id

def get_group_ids(self, num_classes=-1):
group_id = []
for ele in self.element_types:
group_id.append(self.get_group_id(ele) - 1)
group_id = F.one_hot(torch.Tensor(group_id).long(), num_classes=-1)
return group_id
return torch.Tensor(group_id).reshape(len(self.element_types), -1)

def get_group_id(self, element):
if mendeleev.element(element).group_id is not None:
Expand All @@ -81,14 +117,13 @@ def get_group_id(self, element):
f"None is returned by Mendeleev for group_id of element: {element}"
)

def get_period(self):
def get_period(self, num_classes=-1):
period = []
for ele in self.element_types:
period.append(mendeleev.element(ele).period - 1)
period = F.one_hot(torch.Tensor(period).long(), num_classes=-1)
return period
return torch.Tensor(period).reshape(len(self.element_types), -1)

def __listocategorical__(self, prop_list, prop_name, num_classes=10):
def __propertynormalize__(self, prop_list, prop_name):

None_elements = [
ele for ele, item in zip(self.element_types, prop_list) if item is None
Expand All @@ -100,43 +135,42 @@ def __listocategorical__(self, prop_list, prop_name, num_classes=10):

minval = min(prop_list)
maxval = max(prop_list)
delval = (maxval - minval) / num_classes
categories = [
min(int((item - minval) / delval), num_classes - 1) for item in prop_list
]
return [(item - minval) / (maxval - minval) for item in prop_list]

def __realtocategorical__(self, prop_tensor, num_classes=10):

delval = (prop_tensor.max() - prop_tensor.min()) / num_classes
categories = torch.minimum(
(prop_tensor - prop_tensor.min()) / delval, torch.tensor([num_classes - 1])
)
return categories

def get_covalent_radius(self):
cr = []
for ele in self.element_types:
cr.append(mendeleev.element(ele).covalent_radius)
cr = self.__listocategorical__(cr, "covalent_radius")
cr = F.one_hot(torch.Tensor(cr).long(), num_classes=10)
return cr
cr = self.__propertynormalize__(cr, "covalent_radius")
return torch.Tensor(cr).reshape(len(self.element_types), -1)

def get_atomic_number(self):
an = []
for ele in self.element_types:
an.append(mendeleev.element(ele).atomic_number)
an = self.__listocategorical__(an, "atomic_number")
an = F.one_hot(torch.Tensor(an).long(), num_classes=10)
return an
return torch.Tensor(an).reshape(len(self.element_types), -1)

def get_atomic_weight(self):
aw = []
for ele in self.element_types:
aw.append(mendeleev.element(ele).atomic_weight)
aw = self.__listocategorical__(aw, "atomic_weight")
aw = F.one_hot(torch.Tensor(aw).long(), num_classes=10)
return aw
aw = self.__propertynormalize__(aw, "atomic_weight")
return torch.Tensor(aw).reshape(len(self.element_types), -1)

def get_electron_affinity(self):
ea = []
for ele in self.element_types:
ea.append(mendeleev.element(ele).electron_affinity)
ea = self.__listocategorical__(ea, "electron_affinity")
ea = F.one_hot(torch.Tensor(ea).long(), num_classes=10)
return ea
ea = self.__propertynormalize__(ea, "electron_affinity")
return torch.Tensor(ea).reshape(len(self.element_types), -1)

def get_block(self):
blocklist = ["s", "p", "d", "f"]
Expand All @@ -150,24 +184,21 @@ def get_atomic_volume(self):
av = []
for ele in self.element_types:
av.append(mendeleev.element(ele).atomic_volume)
av = self.__listocategorical__(av, "atomic_volume")
av = F.one_hot(torch.Tensor(av).long(), num_classes=10)
return av
av = self.__propertynormalize__(av, "atomic_volume")
return torch.Tensor(av).reshape(len(self.element_types), -1)

def get_electronegativity(self):
en = []
for ele in self.element_types:
en.append(mendeleev.element(ele).en_pauling)
en = self.__listocategorical__(en, "en_pauling")
en = F.one_hot(torch.Tensor(en).long(), num_classes=10)
return en
en = self.__propertynormalize__(en, "en_pauling")
return torch.Tensor(en).reshape(len(self.element_types), -1)

def get_valence_electrons(self):
ve = []
for ele in self.element_types:
ve.append(mendeleev.element(ele).nvalence() - 1)
ve = F.one_hot(torch.Tensor(ve).long(), num_classes=-1)
return ve
ve.append(mendeleev.element(ele).nvalence())
return torch.Tensor(ve).reshape(len(self.element_types), -1)

def get_ionenergies(self):
ie = []
Expand All @@ -178,9 +209,17 @@ def get_ionenergies(self):
else:
degree = min(degrees)
ie.append(mendeleev.element(ele).ionenergies[degree])
ie = self.__listocategorical__(ie, "ionenergies")
ie = F.one_hot(torch.Tensor(ie).long(), num_classes=10)
return ie
ie = self.__propertynormalize__(ie, "ionenergies")
return torch.Tensor(ie).reshape(len(self.element_types), -1)

def convert_integerproperty_onehot(self, prop, num_classes=-1):
prop = F.one_hot(prop.to(torch.int64).squeeze(), num_classes=num_classes)
return prop

def convert_realproperty_onehot(self, prop, num_classes=10):
prop = self.__realtocategorical__(prop, num_classes=num_classes)
prop = F.one_hot(prop.to(torch.int64).squeeze(), num_classes=num_classes)
return prop

def get_atom_features(self, atomtype):
if isinstance(atomtype, str):
Expand All @@ -190,6 +229,15 @@ def get_atom_features(self, atomtype):

if __name__ == "__main__":
atomicdescriptor = atomicdescriptors(
"./embedding.json", overwritten=True, element_types=["C", "H"]
"./embedding.json", overwritten=True, element_types=["C", "H", "S"]
)
print(atomicdescriptor.get_atom_features("C"))
print(len(atomicdescriptor.get_atom_features("C")))
atomicdescriptor_onehot = atomicdescriptors(
"./embedding_onehot.json",
overwritten=True,
element_types=["C", "H", "S"],
one_hot=True,
)
print(atomicdescriptor_onehot.get_atom_features("C"))
print(len(atomicdescriptor_onehot.get_atom_features("C")))

0 comments on commit 9fae102

Please sign in to comment.