Skip to content

Commit 122048f

Browse files
committed
git merge --stats -- add options for binning (resolution shells
1 parent c699e5f commit 122048f

File tree

3 files changed

+84
-27
lines changed

3 files changed

+84
-27
lines changed

docs/merge-help.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Options:
1717
--stats[=N] Print data metrics in N resol. shells (default: 10).
1818
Add U (e.g. =10U or =U) for unweighted metrics.
1919
Add X for XDS-like weighted metrics (details in docs).
20+
Add s or e for different binning (more in docs).
2021
--compare Compare unmerged and merged data (no output file).
2122
--print-all Print all compared reflections.
2223

docs/program.rst

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,9 @@ or calculate quality metrics for unmerged data.
666666
.. literalinclude:: merge-help.txt
667667
:language: console
668668

669+
Quality metrics
670+
---------------
671+
669672
Here is an example output of the quality metrics::
670673

671674
$ gemmi merge --stats=10 mdm2_unmerged.mtz
@@ -722,7 +725,30 @@ Gemmi can calculate all three variants:
722725
**u**\ nweighted formulas.
723726
* With added `X` (e.g. `--stats=10X`) it is compatible with XDS and cctbx.
724727

725-
TBC (CC1/2, sigma-tau, `--anom`, binning, filtering/misfits, unit cell in MTZ)
728+
CC:sub:`1/2` is calculated using the σ-τ method, proposed by
729+
`Assmann et al (2016) <https://doi.org/10.1107/S1600576716005471>`_ in 2016
730+
and described in detail on
731+
`this XDSwiki page <https://wiki.uni-konstanz.de/xds/index.php?title=CC1/2>`_.
732+
(TBC: weighting)
733+
734+
With the `--anom` option, I+ and I- values are treated separately.
735+
Centric reflections are counted in and treated as I+ (unlike, for instance,
736+
in MRFANA "within I+/I-" values, which contain only actual I+/I- -- the
737+
difference is negligible).
738+
739+
Three ways of setting up resolution shells are supported:
740+
741+
* default -- shells with equal volumes (equispaced in d*³),
742+
* 's' -- shells with increasing volumes (equispaced in d*²),
743+
* 'e' -- shells with an equal number of observations.
744+
745+
Minor detail: MTZ files may store different unit cell parameters for different
746+
frames (batches). Additionally, they store global and per-dataset cell
747+
parameters. These differences don't really matter, but are annoying when
748+
comparing values obtained from different programs. So we have two options
749+
here: ...
750+
751+
Filtering/misfits - TBC
726752

727753
ecalc
728754
=====

prog/merge.cpp

Lines changed: 56 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ const option::Descriptor Usage[] = {
4747
{ Stats, 0, "", "stats", Arg::Optional,
4848
" --stats[=N] \tPrint data metrics in N resol. shells (default: 10)."
4949
"\n\tAdd U (e.g. =10U or =U) for unweighted metrics."
50-
"\n\tAdd X for XDS-like weighted metrics (details in docs)." },
50+
"\n\tAdd X for XDS-like weighted metrics (details in docs)."
51+
"\n\tAdd s or e for different binning (more in docs)." },
5152
{ Compare, 0, "", "compare", Arg::None,
5253
" --compare \tCompare unmerged and merged data (no output file)." },
5354
{ PrintAll, 0, "", "print-all", Arg::None,
@@ -257,22 +258,15 @@ double take_average(const std::vector<MergingStats>& stats,
257258
return sum / count;
258259
}
259260

260-
void print_merging_statistics(const Intensities& intensities, int nbins, char use_weights) {
261-
gemmi::Binner binner;
262-
const gemmi::Binner* binner_ptr = nullptr;
263-
if (nbins > 1) {
264-
auto method = gemmi::Binner::Method::Dstar3;
265-
binner.setup(nbins, method, gemmi::IntensitiesDataProxy{intensities});
266-
binner_ptr = &binner;
267-
}
268-
auto stats = intensities.calculate_merging_stats(binner_ptr, use_weights);
269-
if (nbins > 1) {
261+
void print_merging_statistics(const std::vector<MergingStats>& stats,
262+
const gemmi::Binner* binner) {
263+
if (binner) {
270264
printf("In resolution shells:\n"
271265
" d_max d_min #obs #uniq #used Rmerge Rmeas Rpim CC1/2\n");
272-
for (int i = 0; i < nbins; ++i) {
266+
for (size_t i = 0; i < binner->size(); ++i) {
273267
const MergingStats& ms = stats[i];
274268
printf("%7.3f %5.3f %7d %6d %6d %7.3f %7.3f %7.3f %8.4f\n",
275-
binner.dmax_of_bin(i), binner.dmin_of_bin(i),
269+
binner->dmax_of_bin(i), binner->dmin_of_bin(i),
276270
ms.all_refl, ms.unique_refl, ms.stats_refl,
277271
ms.r_merge(), ms.r_meas(), ms.r_pim(), ms.cc_half());
278272
}
@@ -284,16 +278,23 @@ void print_merging_statistics(const Intensities& intensities, int nbins, char us
284278
printf("\nObservations (all reflections): %d\n", total.all_refl);
285279
printf("Unique reflections: %d\n", total.unique_refl);
286280
printf("Used refl. (those with multiplicity 2+): %d\n", total.stats_refl);
287-
printf(" Overall Avg of %d shells weighted by #used\n", nbins);
288-
printf("R-merge: %7.4f %7.4f\n",
289-
total.r_merge(), take_average(stats, &MergingStats::r_merge));
290-
printf("R-meas: %7.4f %7.4f\n",
291-
total.r_meas(), take_average(stats, &MergingStats::r_meas));
292-
printf("R-pim: %7.4f %7.4f\n",
293-
total.r_pim(), take_average(stats, &MergingStats::r_pim));
281+
printf(" Overall");
282+
if (binner)
283+
printf(" Avg of %zu shells weighted by #used", binner->size());
284+
printf("\nR-merge: %7.4f", total.r_merge());
285+
if (binner)
286+
printf(" %7.4f", take_average(stats, &MergingStats::r_merge));
287+
printf("\nR-meas: %7.4f", total.r_meas());
288+
if (binner)
289+
printf(" %7.4f", take_average(stats, &MergingStats::r_meas));
290+
printf("\nR-pim: %7.4f", total.r_pim());
291+
if (binner)
292+
printf(" %7.4f", take_average(stats, &MergingStats::r_pim));
294293
// xdscc12 prints CC1/2 with 5 significant digits, do the same for easy comparison
295-
printf("CC1/2: %8.5f %8.5f\n",
296-
total.cc_half(), take_average(stats, &MergingStats::cc_half));
294+
printf("\nCC1/2: %8.5f", total.cc_half());
295+
if (binner)
296+
printf(" %8.5f", take_average(stats, &MergingStats::cc_half));
297+
printf("\n");
297298
}
298299

299300
} // anonymous namespace
@@ -340,6 +341,7 @@ int GEMMI_MAIN(int argc, char **argv) {
340341
if (verbose)
341342
output_intensity_statistics(intensities);
342343

344+
auto binning_method = gemmi::Binner::Method::Dstar3;
343345
if (p.options[Stats]) {
344346
int nbins = 10;
345347
char use_weights = 'Y';
@@ -354,19 +356,47 @@ int GEMMI_MAIN(int argc, char **argv) {
354356
}
355357
}
356358
for (; *endptr != '\0'; ++endptr) {
357-
if (*endptr == 'U' || *endptr == 'X') {
358-
use_weights = *endptr;
359+
char c = *endptr;
360+
if (c == 'U' || c == 'X') {
361+
use_weights = c;
362+
} else if (c == 'e') {
363+
binning_method = gemmi::Binner::Method::EqualCount;
364+
} else if (c == 's') {
365+
binning_method = gemmi::Binner::Method::Dstar2;
359366
} else {
360-
fprintf(stderr, "Wrong argument for option --stats: %s\n", arg);
367+
fprintf(stderr, "Unexpected character '%c' in option --stats: %s\n", c, arg);
361368
return 1;
362369
}
363370
}
364371
}
372+
if (verbose) {
373+
if (use_weights == 'Y')
374+
printf("Using weights for R-metrics like Aimless\n");
375+
else if (use_weights == 'X')
376+
printf("Using weights for R-metrics like XDS\n");
377+
else if (use_weights == 'U')
378+
printf("NOT using weights for R-metrics.\n");
379+
}
365380
try {
366381
if (to_anom)
367382
intensities.set_isigns(DataType::Anomalous);
368383
intensities.sort();
369-
print_merging_statistics(intensities, nbins, use_weights);
384+
gemmi::Binner binner;
385+
const gemmi::Binner* binner_ptr = nullptr;
386+
if (nbins > 1) {
387+
if (verbose) {
388+
const char* binning_desc = "equal volumes (equispaced in d*^3)";
389+
if (binning_method == gemmi::Binner::Method::Dstar2)
390+
binning_desc = "increasing volumes (equispaced in d*^2)";
391+
else if (binning_method == gemmi::Binner::Method::EqualCount)
392+
binning_desc = "equal reflection count";
393+
printf("Setting up resolution shells with %s.\n", binning_desc);
394+
}
395+
binner.setup(nbins, binning_method, gemmi::IntensitiesDataProxy{intensities});
396+
binner_ptr = &binner;
397+
}
398+
auto stats = intensities.calculate_merging_stats(binner_ptr, use_weights);
399+
print_merging_statistics(stats, binner_ptr);
370400
} catch (std::exception& e) {
371401
std::fprintf(stderr, "ERROR: %s\n", e.what());
372402
return 1;

0 commit comments

Comments
 (0)