-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbench_regression.py
137 lines (110 loc) · 4.93 KB
/
bench_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
"""
Suite of tests that go through the entire process of creating a new fktable
from a empty folder.
The target theory is 400 and the relevant `.toml`, theory runcard and eko template
are downloaded from https://github.com/NNPDF/theories during this test so this tests
has the double effect of ensuring compatibility between both repositories.
"""
import itertools
from pathlib import Path
from subprocess import run
from urllib.request import urlretrieve
import numpy as np
import pytest
from eko.interpolation import XGrid
from eko.io.runcards import OperatorCard
from pineappl.fk_table import FkTable
from yaml import dump, safe_load
THEORIES_REPO = "https://raw.githubusercontent.com/NNPDF/theories/main"
THEORYID = 400
REGRESSION_ROOT = Path(__file__).parent / "regression_data"
def _download_resources(filename, tmp_path):
"""Download resources (filename) from the theories repo and put it in the same path
relative to THEORIES_REPO
"""
output_file = tmp_path / filename
output_file.parent.mkdir(exist_ok=True)
urlretrieve(f"{THEORIES_REPO}/{filename}", output_file)
return output_file
def _download_dataset(dataset, theoryid, tmp_path):
"""Download both the yaml file and all grids for a given dataset for a given theory"""
yaml_file = f"data/yamldb/{theoryid}/{dataset}.yaml"
_download_resources(yaml_file, tmp_path)
# However, the yaml file goes into the "root" of the yaml directory, move it there!
right_yaml = tmp_path / "data" / "yamldb" / f"{dataset}.yaml"
(tmp_path / yaml_file).rename(right_yaml)
# Download the relevant grids for this dataset
res = safe_load(right_yaml.read_text(encoding="utf-8"))
grids = list(itertools.chain(*res["operands"]))
for grid_name in grids:
_download_resources(f"data/grids/400/{grid_name}.pineappl.lz4", tmp_path)
return grids
class _FakePDF:
"""A Fake lhapdf-like PDF to evolve the grids"""
def __init__(self):
pids = np.arange(-6, 8)
pids[6] = 21
pids[-1] = 22
alphas = np.linspace(1.2, 1.8, len(pids))
betas = np.linspace(1.2, 3.8, len(pids))
self._alphas = dict(zip(pids, alphas))
self._betas = dict(zip(pids, betas))
def xfxQ2(self, pid, x, q2):
"""Compute x^alpha*(1-x)^beta"""
alpha = self._alphas[pid]
beta = self._betas[pid]
return np.power(x, alpha) * np.power(1 - x, beta)
def _trim_template(template_card, take_points=10):
"""Trim the template card so that the number of x-values to compute is much smaller"""
raw_card = safe_load(template_card.read_text(encoding="utf-8"))
raw_card["init"] = (raw_card["mu0"], 4)
del raw_card["mu0"]
card_info = OperatorCard.from_dict(raw_card)
original_x = card_info.xgrid
size = len(original_x.raw)
skip = int(size / take_points)
card_info.xgrid = XGrid(original_x.raw[:size:skip])
template_card.write_text(dump(card_info.raw), encoding="utf-8")
@pytest.mark.parametrize("dataset", ["LHCBWZMU8TEV", "INTEGXT3"])
def benchmark_regression(tmp_path, dataset):
"""Run pineko through subprocess to ensure that the shell scripts are working exactly
as intended.
If the data does not exist, create it and fail the test, i.e., in order to regenerate
the data just remove the previous dataset.npy file
"""
# We start by downloading pineko.toml in order to generate the folder structure
_download_resources("pineko.toml", tmp_path)
# Which we create... now!
run(["pineko", "scaffold", "new"], cwd=tmp_path, check=True)
# Now download other necessary objects
_download_resources(f"data/theory_cards/{THEORYID}.yaml", tmp_path)
template_card = _download_resources(
f"data/operator_cards/{THEORYID}/_template.yaml", tmp_path
)
_trim_template(template_card)
# And use some small (but not trivial!) dataset to test
gridnames = _download_dataset(dataset, THEORYID, tmp_path)
# Now go, first with eko creation
run(
["pineko", "theory", "opcards", str(THEORYID), dataset],
cwd=tmp_path,
check=True,
)
run(["pineko", "theory", "ekos", str(THEORYID), dataset], cwd=tmp_path, check=True)
# Then FK Table production!
run(["pineko", "theory", "fks", str(THEORYID), dataset], cwd=tmp_path, check=True)
# Now loop over the grids and check the results of the convolution with the PDF
pdf = _FakePDF()
regression_path = REGRESSION_ROOT / f"{dataset}.npy"
result = []
for grid_name in gridnames:
fkt = FkTable.read(
tmp_path / "data" / "fktables" / str(THEORYID) / f"{grid_name}.pineappl.lz4"
)
result.append(fkt.convolve_with_one(2212, pdf.xfxQ2))
result = np.concatenate(result)
if not regression_path.exists():
np.save(regression_path, result)
raise FileNotFoundError("Regression did not exist and has been regenerated")
regression_data = np.load(regression_path)
np.testing.assert_allclose(regression_data, result, rtol=4e-6)