aiidateam · gbrunin · Jul 29, 2024 · Jul 30, 2024 · Aug 7, 2024 · Aug 28, 2024
diff --git a/.gitignore b/.gitignore
@@ -25,6 +25,9 @@ wheels/
 .installed.cfg
 *.egg
 
+# Pycharm
+.idea/*
+
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
@@ -106,3 +109,4 @@ ENV/
 *.pyc
 *~
 .DS_Store
+tmp.md
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -2,6 +2,7 @@ repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.2.0
     hooks:
+    -   id: double-quote-string-fixer
     -   id: end-of-file-fixer
     -   id: fix-encoding-pragma
     -   id: mixed-line-ending

diff --git a/docs/Makefile b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/conf.py b/docs/conf.py
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+# Configuration file for the Sphinx documentation builder.
+#
+# For the full list of built-in configuration values, see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
+
+project = 'qe-tools'
+copyright = '2024, Marnik Bercx'
+author = 'Marnik Bercx'
+
+# -- General configuration ---------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
+
+extensions = [
+    'myst_parser',
+    # "autodoc2",
+    # "sphinx.ext.intersphinx",
+    # "sphinx.ext.viewcode",
+    # "sphinx.ext.autodoc",
+    # "sphinx.ext.autosummary",
+    'sphinx_design',
+    'sphinx_copybutton',
+]
+
+templates_path = ['_templates']
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+
+# -- Options for HTML output -------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
+
+html_theme = 'sphinx_book_theme'
+html_static_path = ['_static']
diff --git a/docs/design/quicknotes.md b/docs/design/quicknotes.md
@@ -0,0 +1,61 @@
+# Quick notes
+
+A place to write down quick notes on the design decisions made for the `qe-tools` package.
+
+## Main goal
+
+The main goal of this package should be to develop **easy to use** tools to deal with Quantum ESPRESSO that **just work**.
+
+* **Easy to use**: Tools should be easy to find, intuitive and require as few steps as possible to get the desired outcome.
+* **Just work**: Tools should be robust and clever, working as the user intends with a variety of input options without having to understand their functionality in too much detail.
+
+## One input per code
+
+Typically, each executable in the Quantum ESPRESSO suite will have a single input file.
+Technical exceptions are restart files such as the charge density, but those are full (sometimes binary system-specific) files that don't require a Python object to represent.
+The input class should allow for two use cases:
+
+1. Generate a Quantum ESPRESSO input file from various Python types.
+2. Parse an existing input file into various Python types.
+
+## One output object for each calculation
+
+Although QE can provide the output of a calculation distributed over various files, it would be useful to gather all of these into a single "output" object from which the user can access all data they are interested in.
+
+Question: would it instead not be useful to have one object that has both inputs and outputs? Reasons could be:
+
+1. The user might want to just load both input/output from the directory in one fell swoop, since they might want to work with the output of the calculation differently depending on the input.
+2. Some parsing functionality might be easier to implement if the inputs are known. I think it may even be necessary for some outputs to know e.g. what the number of k-points are. Some of the inputs are also in the XML output though...
+
+## One output file, one parser
+
+Even though the user might want to obtain all outputs from a single object, it still is sensible to have a separate parser tool for each output file.
+The output class can then rely on these nicely separated parsers to combine all outputs into one based on preference.
+
+## ASE/`pymatgen`/AiiDA/... support
+
+Most users will want to provide e.g. the input structure or output data in the flavour of their choosing.
+We should provide tools for converting:
+
+1. The flavored Python types (`Structure`, `Atoms`, ...) into the Quantum ESPRESSO input file.
+2. The Quantum ESPRESSO raw parsed output into the flavour's Python type.
+
+---
+
+Notes from Guillaume:
+
+I have changed the structure of the code to better reflect the distinction between input and output parsers.
+
+For the outputs, I created objects such as PwOutput, that inherit from an abstract BaseOutput, that for now can be instantiated with a from_dir method (a from_files method could/should be added as well).
+
+A user with a job that ran in a given directory could get the outputs easily using this classmethod. In these from_dir methods, specific XML and/or standard output Parsers would be used to get the results.
+
+Each Parser would parse a single file, and the logic of parsing and extracting the outputs from the different codes would have to be implemented in each from_dir method.
+
+For instance, a NebOutput.from_dir would parse the standard output of the global computation and probably the standard outputs and/or XML files for each image. The extracted outputs would be stored as a simple dictionary and these objects would not rely on any external package.
+
+Then, in qe_tools.extractors, ASE and pymatgen objects could be constructed (e.g., ase.Atoms/pymatgen.Structure, band structures,...), allowing each to be optional dependencies.
+
+This is only the base logic of the new structure and many things remain to be implemented.
+
+The idea now is to see what breaks with this in `aiida-quantumespresso` and how the parsing could be moved from there to here. Then, more will be added depending on the needs.
diff --git a/docs/index.md b/docs/index.md
@@ -0,0 +1,23 @@
+# Welcome to `qe-tools`'s documentation!
+
+## 💾 Installation
+
+To install the package from the [PyPI](https://pypi.org/), simply use `pip`:
+
+```
+pip install qe-tools
+```
+
+## 💡 Tutorials
+
+```{toctree}
+tutorials/getting_started.md
+tutorials/inputs_desired_usage.md
+tutorials/outputs_desired_usage.md
+```
+
+## 🤔 Design Documents
+
+```{toctree}
+design/quicknotes.md
+```
diff --git a/docs/make.bat b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.https://www.sphinx-doc.org/
+	exit /b 1
+)
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/docs/tutorials/getting_started.md b/docs/tutorials/getting_started.md
@@ -0,0 +1,34 @@
+# Getting started
+
+## Parsing `pw.x` outputs
+
+Say we have just run a `pw.x` calculation in the `qe_dir` directory:
+
+```
+from qe_tools.outputs.pw import PwOutput
+
+pw_out = PwOutput.from_dir("qe_dir")
+pw_out.outputs
+```
+
+## Parsing a single output file
+
+If you only want to parse the `stdout` of the `pw.x` calculation, you can load the parser class directly:
+
+```
+from qe_tools.outputs.parsers.pw import PwStdoutParser
+pw_out = PwStdoutParser.from_file('qe_dir/pw.out')
+```
+
+## Parsing an already existing input file
+
+Currently the input class `PwInputFile` only supports parsing an already existing input file:
+
+```
+from qe_tools.inputs.pw import PwInputFile
+from pathlib import Path
+
+pw_input = PwInputFile(Path('qe_dir/pw.in').read_text())
+```
+
+This will also only really parse the structure and k-points.
diff --git a/docs/tutorials/inputs_desired_usage.md b/docs/tutorials/inputs_desired_usage.md
@@ -0,0 +1,40 @@
+# Inputs: Desired usage
+
+## Producing a `pw.x` input file
+
+A user might want to generate a Quantum ESPRESSO input file from other Python types, e.g.:
+
+```
+PwInput(
+    structure=
+    parameters=
+    kpoints=
+    magmom=
+    ...
+)
+```
+
+It will definitely be challenging to define a complete API that doesn't get overly big or convoluted.
+
+Another approach for constructing a class that defines the inputs of a Quantum ESPRESSO calculation can be found in:
+
+[https://github.com/elinscott/qe_input_prototype](https://github.com/elinscott/qe_input_prototype)
+
+This defines the inputs files as `pydantic` models, and allows you to complete the namelists with tab-completion, also offers validation features based on the Quantum ESPRESSO files that define the inputs.
+However, at first glance it requires a lot of Quantum ESPRESSO knowledge to populate the inputs.
+We could use these classes under the hood, having a wrapper class that knows how to convert commonly used formats from ASE, `pymatgen`, etc into the corresponding namelists.
+
+One challenge here will always be that Quantum ESPRESSO expects properties etc to be defined in terms of the _kinds_, whereas most other structure classes have a site-based approach.
+This in combination with the 3-character limit for the kind names means that we'll typically have to use threshold to combine sites into one kind even if they don't have exactly the same value for all properties.
+Otherwise we would create too many kinds for
+
+## Parsing an existing `pw.x` input file
+
+It would also be nice to be able to directly parse from a file, e.g.:
+
+```
+PwInput.from_file('path/to/file')
+```
+
+These features are already present in the package, but seem to be a bit limited.
+Moreover, I would not call the class `PwInputFile`, but rather `PwInput`, to be in line with `PwOutput`, _and_ the fact that it doesn't just represent the file.
diff --git a/docs/tutorials/outputs_desired_usage.md b/docs/tutorials/outputs_desired_usage.md
@@ -0,0 +1,25 @@
+# Outputs: Desired usage
+
+## One class to rule both input/output
+
+If you want to parse all the inputs and outputs from a `pw.x` run in the `pw_run` directory:
+
+```
+from qe_tools.parsers import PwParser
+
+parser = PwParser.from_dir('pw_run')
+```
+
+Then you can obtain the outputs as
+
+```
+parser.outputs['structure']
+```
+
+similarly, the inputs can then be obtained from the `inputs` attribute:
+
+```
+parser.inputs['structure']
+```
+
+(Maybe this should not be a "parser", but a "calculation". E.g. `PwCalc` that has `inputs` and `outputs`.)
diff --git a/pyproject.toml b/pyproject.toml
@@ -26,6 +26,7 @@ dependencies = [
     'numpy',
     'scipy',
     'packaging',
+    'xmlschema',
 ]
 
 [project.urls]
@@ -41,12 +42,14 @@ dev = [
     'pytest~=7.0',
     'pytest-cov',
     'pytest-cases~=3.2',
+    'pytest-regressions',
     'ruamel.yaml',
 ]
 docs = [
     'Sphinx',
     'docutils',
     'sphinx_rtd_theme',
+    'myst-parser',
 ]
 
 [tool.flit.module]

diff --git a/src/qe_tools/__init__.py b/src/qe_tools/__init__.py
@@ -2,8 +2,10 @@
 """A set of useful tools to manage Quantum ESPRESSO files."""
 
 from ._constants import DEFAULT as CONSTANTS  # isort:skip
-from . import converters, exceptions, parsers, extractors  # isort:skip
+from ._elements import ELEMENTS
+
+from . import converters, exceptions, extractors  # isort:skip
 
 __version__ = '2.3.0'
 
-__all__ = ('CONSTANTS', 'parsers', 'converters', 'exceptions', 'extractors')
+__all__ = ('CONSTANTS', 'ELEMENTS', 'converters', 'exceptions', 'extractors')