diff --git a/docs/src/conf.py b/docs/src/conf.py index 58ba2a1a8..6dbdd4bfe 100644 --- a/docs/src/conf.py +++ b/docs/src/conf.py @@ -6,6 +6,7 @@ import toml + ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) sys.path.append(os.path.join(ROOT, "python", "src")) @@ -86,8 +87,8 @@ def setup(app): "gallery_dirs": ["examples"], "min_reported_time": 60, # Make the code snippet for equistore functions clickable - #"reference_url": {"equistore": None}, - #"prefer_full_module": ["equistore"], + "reference_url": {"equistore": None}, + "prefer_full_module": ["equistore"], } breathe_projects = { diff --git a/docs/src/explanations/data.rst b/docs/src/explanations/data.rst index 4062a08c4..9cd3233ab 100644 --- a/docs/src/explanations/data.rst +++ b/docs/src/explanations/data.rst @@ -1,10 +1,12 @@ Where does the actual data come from? ===================================== -Equistore manages the metadata, where does the data come from. How does -equistore deal with it and how to register new data origins in the python -wrapper +TBD -(Python automagically transforms data to numpy.ndarray or a torch.tensor) -Equistore has no idea about the data itself (only knows the pointer to data -and operations you can perform on it - create, destroy move and reshape data) +.. Equistore manages the metadata, where does the data come from. How does +.. equistore deal with it and how to register new data origins in the python +.. wrapper + +.. (Python automagically transforms data to numpy.ndarray or a torch.tensor) +.. Equistore has no idea about the data itself (only knows the pointer to data +.. and operations you can perform on it - create, destroy move and reshape data) diff --git a/docs/src/explanations/gradients.rst b/docs/src/explanations/gradients.rst index 3a7cf75aa..e5656822b 100644 --- a/docs/src/explanations/gradients.rst +++ b/docs/src/explanations/gradients.rst @@ -1,14 +1,17 @@ Gradients and how we manage them ================================ -Gradient samples - "special" format +TBD -first sample of gradients is "sample" that refers to the row in block.values -that we are taking the gradient of. -the other samples - what we are taking the gradient with respect to. -Write what this entails -- block.gradients.sample (i A j) (pair feature i j A k) -Cell gradients - Sample (i) -components [[x y z ] [x y z]] (displacement matrix) +.. Gradient samples - "special" format -Gradient wrt hypers +.. first sample of gradients is "sample" that refers to the row in block.values +.. that we are taking the gradient of. +.. the other samples - what we are taking the gradient with respect to. +.. Write what this entails -- block.gradients.sample (i A j) (pair feature i j A k) + +.. Cell gradients - Sample (i) +.. components [[x y z ] [x y z]] (displacement matrix) + +.. Gradient wrt hypers diff --git a/docs/src/explanations/index.rst b/docs/src/explanations/index.rst index 9f8408ff7..690e1ea38 100644 --- a/docs/src/explanations/index.rst +++ b/docs/src/explanations/index.rst @@ -3,10 +3,11 @@ Explanations ============ -The explanation section discusses topics that broaden your knowledge of -equistore. The theory behind the calculators and additional useful information -are found here to give you more clarity and understanding of what equistore is -all about. +The explanation section discusses key topics and concepts at a fairly high level +and provides useful explanations to expand your knowledge of equistore. It +requires at least basic to intermediate knowledge of equistore If you are an +absolute beginner, we recommend you start from the :ref:`userdoc-get-started` +section of the documentation. .. toctree:: :maxdepth: 2 diff --git a/docs/src/get-started/equistore.rst b/docs/src/get-started/equistore.rst deleted file mode 100644 index 69640b275..000000000 --- a/docs/src/get-started/equistore.rst +++ /dev/null @@ -1,28 +0,0 @@ -What is equistore -================= - -Equistore is a specialized data storage format suited to all your atomistic -simulation needs and more. Equistore provides an accessible and understandable -storage format for the data one comes across in atomistic machine learning. - -When working with large amounts of data, especially relating to atomistic -simulations, one often needs access to the metadata such as the nature of -the atomic scale objects being represented, various components seprated by -symmetry, and .. to name a few. This metadata is implicit when storing this -data as an array and it becomes increasingly painstaking to locate entries -in the data corresponding to a specific selection of metadata (for example, -imagine locating the gradients of the (nlm) component of the representation -of atom *i* in structure *A* with respect to another atom *j*) with the size -of the data (or the atomic entity). - -Another example arises when using equistore -to compute atom-centered density correlation (ACDC) features, we can divide the -descriptor data into blocks indexed by the chemical nature of the centers, -behavior under symmetry operations (rotational and inversion), and the correlation -order of the representation. Higher order features (in terms of correlations -around the same center or including higher number of centers) can be computed -by combining these blocks, a process that helps highlight their roles in model -performance and tracks the information flow completely. -This data that has been unraveled and stored into different blocks can be -reassembled to a contiguous storage format, if desired, with a step-by-step -control of data recombination. diff --git a/docs/src/get-started/index.rst b/docs/src/get-started/index.rst index e00e53431..d68cd324c 100644 --- a/docs/src/get-started/index.rst +++ b/docs/src/get-started/index.rst @@ -8,7 +8,5 @@ The following sections describes how to install and start with using equistore. .. toctree:: :maxdepth: 2 - equistore concepts installation - tutorials/index diff --git a/docs/src/get-started/tutorials/index.rst b/docs/src/get-started/tutorials/index.rst deleted file mode 100644 index d54bfd50d..000000000 --- a/docs/src/get-started/tutorials/index.rst +++ /dev/null @@ -1,11 +0,0 @@ -.. _userdoc-tutorials: - -Tutorials: using equistore from Python -====================================== - -The presented tutorials allow you to perform basic calculations in equistore. - -.. toctree:: - :maxdepth: 1 - - ../../examples/first-tensormap diff --git a/docs/src/how-to/index.rst b/docs/src/how-to/index.rst deleted file mode 100644 index cecaeffa9..000000000 --- a/docs/src/how-to/index.rst +++ /dev/null @@ -1,11 +0,0 @@ -.. _userdoc-how-to: - -How-to guides -============= - -.. toctree:: - :maxdepth: 2 - - linear-model - operations - torch diff --git a/docs/src/how-to/operations.rst b/docs/src/how-to/operations.rst deleted file mode 100644 index bf836b0bc..000000000 --- a/docs/src/how-to/operations.rst +++ /dev/null @@ -1,4 +0,0 @@ -Performing operations ---------------------- - -TBD diff --git a/docs/src/how-to/torch.rst b/docs/src/how-to/torch.rst deleted file mode 100644 index 6dbe9b0aa..000000000 --- a/docs/src/how-to/torch.rst +++ /dev/null @@ -1,3 +0,0 @@ -Torch ------ -Using torch tensors as data (gpus, autograd) diff --git a/docs/src/index.rst b/docs/src/index.rst index df1e383e6..4553ad16d 100644 --- a/docs/src/index.rst +++ b/docs/src/index.rst @@ -1,65 +1,50 @@ -Overview of Equistore's Documentation -===================================== - -This documentation covers everything you need to know about equistore. -It comprises of the following five broad sections: - -- :ref:`userdoc-get-started` -- :ref:`userdoc-how-to` -- :ref:`userdoc-references` -- :ref:`userdoc-explanations` -- :ref:`devdoc` - -If you are new to equistore we recommend starting with the -:ref:`userdoc-get-started` section. If you want to contribute to the development -of the library please have a look at our :ref:`developer documentation -`. +Equistore: data storage for atomistic machine learning +====================================================== +Equistore is a specialized data storage format suited to all your atomistic +machine learning needs and more. You can think of it like ``numpy.ndarray`` or +``torch.Tensor``, but carrying extra metadata together with the data. -Getting started ---------------- +This metadata can be about the nature of the **objects** being described, about +**how** this object is being described, about **symmetry** properties of the +data (this is especially relevant for equivariant machine learning), different +**sparsity** linked to one-hot encoding of species or **components** of +gradients of the above with respect to various parameters. -If you are an absolute beginner, we recommend you to start with the get started -pages to familiarize yourself with equistore and the equistore ecosystem. +For example, the object being described could be "one atom in a structure", or +"a pair of atoms", while the how could be "using SOAP power spectrum features" +or "Hamiltonian matrix elements". -How-to guides -------------- +Equistore main concern is about representing and manipulating this metadata, +while using other well established library handle the data itself. We currently +support using arbitrary CPU arrays created by any language (including numpy +arrays), as well as PyTorch Tensor --- including full support for GPU and +automatic differentiation. -This section comprises of guides that will take you through series of steps -involved in addressing key problems and use-cases in equistore. It requires -intermediate to advanced knowledge of how equistore works. If you are an -absolute beginner, it is recommended you start from the -:ref:`userdoc-get-started` section before going to the How to Guides. +.. TODO: the end goal is to create an ecosystem of inter-operable libraries for atomistic ML +.. TODO: equistore does not create data, other libraries do +.. TODO: add a figure -Reference guides ----------------- +-------------------------------------------------------------------------------- -The Reference Guide contains technical references for equistore's APIs. -It describes the various functionalities -provided by equistore. You can always refer to this section to learn more about -classes, functions, modules, and other aspects of equistore's machinery you come -across. - -Explanations ------------- - -The explanation section discusses key topics and concepts at a fairly high level -and provides useful explanations to expand your knowledge of equistore. It -requires at least basic to intermediate knowledge of equistore If you are an -absolute beginner, we recommend you start from the :ref:`userdoc-get-started` -section of the documentation. - -Developer documentation ------------------------ +This documentation covers everything you need to know about equistore. +It comprises of the following five broad sections: -The developer guide introduces the aspects of how contributing to the code base -or the documentation of equistore. +- :ref:`userdoc-get-started`: familiarize yourself with equistore and it's + ecosystem; +- :ref:`userdoc-tutorials`: step-by-step tutorials addressing key problems and + use-cases for equistore; +- :ref:`userdoc-references`: technical description of all the functionalities + provided by equistore; +- :ref:`userdoc-explanations`: high-level explanation of more advanced + functionalities; +- :ref:`devdoc`: how to contribute to the code or the documentation of equistore. .. toctree:: :hidden: get-started/index - how-to/index + tutorials/index reference/index explanations/index devdoc/index diff --git a/docs/src/reference/index.rst b/docs/src/reference/index.rst index 94eefd371..7ee54b8bf 100644 --- a/docs/src/reference/index.rst +++ b/docs/src/reference/index.rst @@ -3,8 +3,10 @@ Reference guides ---------------- -The reference guides describe how the equistore API -can be used from each language. +The reference guides contains technical references for equistore's APIs. They +describes the various functionalities provided by equistore. You can always +refer to this section to learn more about classes, functions, modules, and other +aspects of equistore's machinery you come across. .. toctree:: diff --git a/docs/src/tutorials/first-tensormap.rst b/docs/src/tutorials/first-tensormap.rst new file mode 100644 index 000000000..a5e1fe4c7 --- /dev/null +++ b/docs/src/tutorials/first-tensormap.rst @@ -0,0 +1,17 @@ +Creating TensorMap manually +=========================== + +.. container:: sphx-glr-footer sphx-glr-footer-example + + .. container:: sphx-glr-download sphx-glr-download-python + + :download:`Download Python source code for this example: first-tensormap.py <../examples/first-tensormap.py>` + + .. container:: sphx-glr-download sphx-glr-download-jupyter + + :download:`Download Jupyter notebook for this example: first-tensormap.ipynb <../examples/first-tensormap.ipynb>` + + +.. include:: ../examples/first-tensormap.rst + :start-after: start-body + :end-before: end-body diff --git a/docs/src/tutorials/index.rst b/docs/src/tutorials/index.rst new file mode 100644 index 000000000..40fc89b7b --- /dev/null +++ b/docs/src/tutorials/index.rst @@ -0,0 +1,18 @@ +.. _userdoc-tutorials: + +Tutorials +========= + +This section comprises of guides that will take you through series of steps +involved in addressing key problems and use-cases in equistore. It requires +intermediate to advanced knowledge of how equistore works. If you are an +absolute beginner, it is recommended you start from the +:ref:`userdoc-get-started` section before going to the How to Guides. + +.. toctree:: + :maxdepth: 2 + + first-tensormap + linear-model + operations + torch diff --git a/docs/src/how-to/linear-model.rst b/docs/src/tutorials/linear-model.rst similarity index 100% rename from docs/src/how-to/linear-model.rst rename to docs/src/tutorials/linear-model.rst diff --git a/docs/src/tutorials/operations.rst b/docs/src/tutorials/operations.rst new file mode 100644 index 000000000..d7973c392 --- /dev/null +++ b/docs/src/tutorials/operations.rst @@ -0,0 +1,4 @@ +Operating on equistore data +--------------------------- + +TBD diff --git a/docs/src/tutorials/torch.rst b/docs/src/tutorials/torch.rst new file mode 100644 index 000000000..83ec02693 --- /dev/null +++ b/docs/src/tutorials/torch.rst @@ -0,0 +1,6 @@ +Integration with PyTorch +------------------------ + +TBD + +.. Using torch tensors as data (gpus, autograd) diff --git a/python/examples/first-tensormap.py b/python/examples/first-tensormap.py index 8e53cc7f1..c64682626 100644 --- a/python/examples/first-tensormap.py +++ b/python/examples/first-tensormap.py @@ -1,40 +1,52 @@ """ -.. _userdoc-tutorials-first-tensormap: - Getting your first Tensormap ============================ -We will start by importing all the required packages: the classic numpy; -``ase`` to load the data, and of course equistore. +.. start-body + +Most of the time, users of equistore will not have to create +:py:class:`equistore.TensorMap` themselves, but rather manipulate and use +TensorMaps created by other software (for example atomic-scale representations +computed by `rascaline `_). However, for +the sake of keeping the examples in equistore simple and only about equistore, +we will show here how to create a TensorMap manually. This will also be useful +if you would like to integrate your data with equistore to get access to all the +corresponding models and operations. + + +This example is written in Python, however the concepts are going to be the same +in every programming language, with some small changes in API to adapt to each +language capacities. Interested users should refer to the :ref:`API reference +` for the language they would like to use. + +------------------ + +TODO: link to dataset.xyz + +------------------ """ +# %% +# +# We will start by importing all the required packages: the classic numpy; +# ``ase`` to load the data, and of course equistore. + from itertools import product +import ase.io import numpy as np -from ase.io import read from equistore import Labels, TensorBlock, TensorMap -frames = [] # Load the dataset - -# frames=[] -# with Trajectory('dataset.xyz') as dataset: -# frames = [f for f in dataset] - -frames = read("dataset.xyz", ":10") +frames = ase.io.read("dataset.xyz", ":10") # %% # -# Equistore -# --------- -# -# In this tutorial, we are going to use a new storage format, Equistore, -# https://github.com/lab_cosmo/equistore. -# %% # Creating your first TensorMap -# -------------------------------- +# ----------------------------- +# # Let us start with the example of storing bond lengths as a TensorMap. We can think of # categorizing the data based on the chemical nature (atomic species) of the two atoms # involved in the pair. As not all species might be present in all the frames, this @@ -55,15 +67,16 @@ # %% # -# For each species pairs, find the relevant samples, i.e. the list of all frames and -# index of atoms in the frame that correspond to the species pair in block_samples +# For each species pairs, find the relevant samples, i.e. the list of all frames +# and index of atoms in the frame that correspond to the species pair in +# block_samples block_samples = [] for (a1, a2) in species_pairs: frame_samples = [] for idx_frame, f in enumerate(frames): - # create tuples of the form (idx_frame, idx_i, idx_j) - # where idx_i is the index of atoms in the frame such that they have species =a1 - # and idx_j is the index of atoms in the frame such that they have species =a2 + # create tuples of the form (idx_frame, idx_i, idx_j) where idx_i is the + # index of atoms in the frame such that they have species =a1 and idx_j + # is the index of atoms in the frame such that they have species =a2 idx_i, idx_j = np.where(f.numbers == a1)[0], np.where(f.numbers == a2)[0] frame_samples.append(list(product([idx_frame], idx_i, idx_j))) @@ -77,19 +90,21 @@ ) # %% # -# Equistore uses Labels that describe or enumerate each column of the values being -# considered. For example in the code snippet above, we used labels to specify that -# the array of sample indices has three columns the first column always holds the -# structure index, whereas the two following columns have info about the atoms +# Equistore uses Labels that describe or enumerate each column of the values +# being considered. For example in the code snippet above, we used labels to +# specify that the array of sample indices has three columns the first column +# always holds the structure index, whereas the two following columns have info +# about the atoms # Labels((name1, name2, name3), [(value1, value2, value3), # (value1, value2, value3), # (value1, value2, value3)]) # %% +# # For this particular case, each row describes the corresponding row of -# "TensorMap.values" that are called samples Now we need to find the corresponding -# values of the bond length for each sample +# "TensorMap.values" that are called samples Now we need to find the +# corresponding values of the bond length for each sample block_values = [] for (a1, a2) in species_pairs: @@ -100,9 +115,9 @@ block_values.append(np.vstack(frame_values)) # %% # -# We could have easily merged this operation with the loops above but for clarity we -# are repeating them here. We use ASE's get_all_distances() function to calculate the -# bond lengths +# We could have easily merged this operation with the loops above but for +# clarity we are repeating them here. We use ASE's get_all_distances() function +# to calculate the bond lengths block_components = [Labels(["spherical_symmetry"], np.asarray([[0]], dtype=np.int32))] # spherical_symmetry has just one value = 0 to specify that this quantity is a scalar @@ -115,12 +130,13 @@ # number of **components** = (2 x *lambda* + 1) where lambda tags the behaviour # under the irreducible SO(3) group action. block_properties = Labels(["Angstrom"], np.asarray([(0,)], dtype=np.int32)) + # TODO # %% # -# We have collected all the necessary ingredients to create our first TensorMap. Since -# a TensorMap is a container that holds blocks of data - namely TensorBlocks, let us -# transform our data to TensorBlock format +# We have collected all the necessary ingredients to create our first TensorMap. +# Since a TensorMap is a container that holds blocks of data - namely +# TensorBlocks, let us transform our data to TensorBlock format blocks = [] for block_idx, samples in enumerate(block_samples): blocks.append( @@ -136,19 +152,20 @@ # %% # -# A TensorBlock is the fundamental constituent of a TensorMap. Each Tensorblock is -# associated with "values" or data array with n-dimensions (here 3 dimensions), each -# identified by a Label. The first dimension refers to the *samples*. +# A TensorBlock is the fundamental constituent of a TensorMap. Each Tensorblock +# is associated with "values" or data array with n-dimensions (here 3 +# dimensions), each identified by a Label. The first dimension refers to the +# *samples*. # -# The last dimension of the n-dimensional array is the one indexing the "properties" or -# features of what we are describing in the TensorBlock. These also usually correspond -# to all the entries in the basis or :math: ``. -# For the given example, the property dimension is a dummy variable since we are just -# storing one number corresponding to the bondlength(A). But for instance, we could have -# chosen to project these values on a radial basis , then the properties -# dimension would correspond to the radial channels or *n* going from 0 up to -# :math:`n_max`. +# The last dimension of the n-dimensional array is the one indexing the +# "properties" or features of what we are describing in the TensorBlock. These +# also usually correspond to all the entries in the basis or :math: ``. +# For the given example, the property dimension is a dummy variable since we are +# just storing one number corresponding to the bondlength(A). But for instance, +# we could have chosen to project these values on a radial basis , +# then the properties dimension would correspond to the radial channels or *n* +# going from 0 up to :math:`n_max`. # # All intermediate dimensions of the array are referred to as *components* and # are used to describe vectorial or tensorial components of the data. @@ -166,14 +183,16 @@ # labels (this special class of labels for blocks are also called "keys") # %% +# # Storing potential targets as TensorMaps -# ------------------------------------------- -# Before we use the bond lengths with models to predict the energies of the structure, -# lets also briefly look at how potential targets such as energies or forces would be -# stored as Equistore TensorMaps. +# --------------------------------------- +# +# Before we use the bond lengths with models to predict the energies of the +# structure, lets also briefly look at how potential targets such as energies or +# forces would be stored as Equistore TensorMaps. # -# Just like we did for the bond-lengths, we can create a TensorMap for energies of the -# structures +# Just like we did for the bond-lengths, we can create a TensorMap for energies +# of the structures energies = np.array([f.info["energy"] for f in frames]) energy_tmap = TensorMap( @@ -191,8 +210,9 @@ ], ) # %% -# we created a dummy index to address our block of the energy_tmap that just has one -# tensorblock. +# +# we created a dummy index to address our block of the energy_tmap that just has +# one tensorblock. force_values = [] force_samples = [] for idx_frame, f in enumerate(frames): @@ -221,13 +241,15 @@ ) # %% +# # Summary to building Tensormaps -# --------------------------------------------- +# ------------------------------ # -# A TensorMap is simply obtained by collecting some Tensorblocks, each addressed by a -# key value, into a common container. The TensorBlocks contain within them the actual -# values (some n-dimensional array or tensor) you might be interested in working with, -# but carry along the Labels specifying what each dimension corresponds to. +# A TensorMap is simply obtained by collecting some Tensorblocks, each addressed +# by a key value, into a common container. The TensorBlocks contain within them +# the actual values (some n-dimensional array or tensor) you might be interested +# in working with, but carry along the Labels specifying what each dimension +# corresponds to. # list_of_blocks = [] # list_of_blocks.append( TensorBlock(block.values = values, @@ -240,41 +262,46 @@ # %% # # Accessing different Blocks of the Tensormap -# ------------------------------------------------ +# ------------------------------------------- # -# There are multiple ways to access blocks on the TensorMap, either by specifying the -# index value corresponding to the absolute position of the block in the TensorMap, -# or by specifying the values of one or multiple keys of the TensorMap. -# For instance, the first tensorblock can be accessed using TensorMap.block(0) -# In the example above, +# There are multiple ways to access blocks on the TensorMap, either by +# specifying the index value corresponding to the absolute position of the block +# in the TensorMap, or by specifying the values of one or multiple keys of the +# TensorMap. For instance, the first tensorblock can be accessed using +# TensorMap.block(0) In the example above, energy_tmap.block(0) # %% +# # just returns the only block in the energy tensormap, whereas bond_lengths.block(1) # %% -# returns the block corresponding to key = bond_lengths.keys[1] (that happens to be the -# H-C block, i.e. species_1 = 1 and species_2 = 6) # -# The second method involves specifying the values of the keys of the TensorBlock -# directly, for instance if we are interested in the bond length block between H and C, -# we can also get them +# returns the block corresponding to key = bond_lengths.keys[1] (that happens to +# be the H-C block, i.e. species_1 = 1 and species_2 = 6) +# +# The second method involves specifying the values of the keys of the +# TensorBlock directly, for instance if we are interested in the bond length +# block between H and C, we can also get them bond_lengths.block(species_1=1, species_2=6) # %% -# If we are just interested in blocks that have the first atom as H irrespective of the -# species of the second atom, +# +# If we are just interested in blocks that have the first atom as H irrespective +# of the species of the second atom, bond_lengths.blocks(species_1=1) + # %% -# Notice that we use TensorMap.block**s** as more than one block satisfies the selection -# criteria. This returns the list of relevant block. If one is interested in identifying -# the -# indices of these blocks in the TensorMap, +# +# Notice that we use TensorMap.block**s** as more than one block satisfies the +# selection criteria. This returns the list of relevant block. If one is +# interested in identifying the indices of these blocks in the TensorMap, bond_lengths.blocks_matching(species_1=1) # %% +# # precisely returns the list of indices of all the blocks where species_1 = 1 # (namely 0,1,2,3) and one can then use these indices to also identify the # corresponding keys @@ -283,7 +310,7 @@ # %% # # Simple operations on TensorMaps -# ------------------------------------------- +# ------------------------------- # # 1. Reshaping Blocks # 2. Reindexing Blocks @@ -298,21 +325,22 @@ # %% # # Training your first model using Equistore -# ---------------------------------------------- -# To demonstrate the accessibility and flexiblity of Equistore, we are going to use -# the polynomial features of the bond lengths, with a cutoff based on the +# ----------------------------------------- +# +# To demonstrate the accessibility and flexibility of Equistore, we are going to +# use the polynomial features of the bond lengths, with a cutoff based on the # atomic number of the species involved, to predict the energy of the system. Cutoff = {1: 2, 6: 3, 7: 4, 8: 4} # %% # -# As Equistore indexes features based on their metadata, it facillitates the -# implementation of customizable feature engineering for different feature subsets. -# In the following code block, we will build the polynomial features of the bond -# lengths, with its corresponding cutoff. For instance, -# the bond length of C-H will have its cutoff at :math:`3 + 2 = 5`, meaning that -# we will take polynomial features of C-H up to degree 5. +# As Equistore indexes features based on their metadata, it facilitates the +# implementation of customizable feature engineering for different feature +# subsets. In the following code block, we will build the polynomial features of +# the bond lengths, with its corresponding cutoff. For instance, the bond length +# of C-H will have its cutoff at :math:`3 + 2 = 5`, meaning that we will take +# polynomial features of C-H up to degree 5. training_features = [] @@ -336,8 +364,8 @@ # %% # # Using these features, we can now build our model to predict the energy of the -# system. For the sake of simplicity, we are going to use Sklearn's implementation -# for Linear Regression. +# system. For the sake of simplicity, we are going to use Sklearn's +# implementation for Linear Regression. from sklearn.linear_model import LinearRegression # noqa @@ -346,3 +374,7 @@ print( "The R2 score for our model is {}".format(model.score(training_features, energies)) ) + +# %% +# +# .. end-body