Rebased the branch this is on on master.

Added code from Sympy's output checker for handling floating point values in output. This is similar to my previous approach but takes it further, resolving most of the shortcomings of my original approach. I merged our existing OutputChecker subclass with the code from SymPy into a new AstropyOutputChecker. As roughly 100 lines of code were borrowed from the SymPy project I included a copy of their license.
mdboom · Jun 25, 2014 · 7eb2506 · 7eb2506
1 parent 55cd727
commit 7eb2506
Show file tree

Hide file tree

Showing 3 changed files with 196 additions and 118 deletions.
diff --git a/astropy/tests/output_checker.py b/astropy/tests/output_checker.py
@@ -0,0 +1,166 @@
+"""
+Implements a replacement for `doctest.OutputChecker` that handles certain
+normalizations of Python expression output.  See the docstring on
+`AstropyOutputChecker` for more details.
+"""
+
+from __future__ import (absolute_import, division, print_function,
+                        unicode_literals)
+
+import doctest
+import re
+
+# Much of this code, particularly the parts of floating point handling, is
+# borrowed from the SymPy project with permission.  See licenses/SYMPY.rst
+# for the full SymPy license.
+
+FIX = doctest.register_optionflag('FIX')
+FLOAT_COMPARISON = doctest.register_optionflag('FLOAT_COMPARISON')
+
+
+class AstropyOutputChecker(doctest.OutputChecker):
+    """
+    - Removes u'' prefixes on string literals
+    - Ignores the 'L' suffix on long integers
+    - In Numpy dtype strings, removes the leading pipe, i.e. '|S9' ->
+      'S9'.  Numpy 1.7 no longer includes it in display.
+    - Supports the FLOAT_COMPARISON flag, which parses floating point values
+      out of the output and compares their numerical values rather than their
+      string representation.  This naturally supports complex numbers as well
+      (simply by comparing their real and imaginary parts separately).
+    """
+
+    _original_output_checker = doctest.OutputChecker
+
+    _str_literal_re = re.compile(
+        r"(\W|^)[uU]([rR]?[\'\"])", re.UNICODE)
+    _byteorder_re = re.compile(
+        r"([\'\"])[|<>]([biufcSaUV][0-9]+)([\'\"])", re.UNICODE)
+    _fix_32bit_re = re.compile(
+        r"([\'\"])([iu])[48]([\'\"])", re.UNICODE)
+    _long_int_re = re.compile(
+        r"([0-9]+)L", re.UNICODE)
+
+    def __init__(self):
+        # NOTE OutputChecker is an old-style class with no __init__ method,
+        # so we can't call the base class version of __init__ here
+
+        got_floats = r'(\d+\.\d*|\.\d+)'
+
+        # floats in the 'want' string may contain ellipses
+        want_floats = got_floats + r'(\.{3})?'
+
+        front_sep = r'\s|\+|\-|\*|,'
+        back_sep = front_sep + r'|j|e'
+
+        fbeg = r'^%s(?=%s|$)' % (got_floats, back_sep)
+        fmidend = r'(?<=%s)%s(?=%s|$)' % (front_sep, got_floats, back_sep)
+        self.num_got_rgx = re.compile(r'(%s|%s)' %(fbeg, fmidend))
+
+        fbeg = r'^%s(?=%s|$)' % (want_floats, back_sep)
+        fmidend = r'(?<=%s)%s(?=%s|$)' % (front_sep, want_floats, back_sep)
+        self.num_want_rgx = re.compile(r'(%s|%s)' %(fbeg, fmidend))
+
+    def do_fixes(self, want, got):
+        want = re.sub(self._str_literal_re, r'\1\2', want)
+        want = re.sub(self._byteorder_re, r'\1\2\3', want)
+        want = re.sub(self._fix_32bit_re, r'\1\2\3', want)
+        want = re.sub(self._long_int_re, r'\1', want)
+
+        got = re.sub(self._str_literal_re, r'\1\2', got)
+        got = re.sub(self._byteorder_re, r'\1\2\3', got)
+        got = re.sub(self._fix_32bit_re, r'\1\2\3', got)
+        got = re.sub(self._long_int_re, r'\1', got)
+
+        return want, got
+
+    def normalize_floats(self, want, got, flags):
+        """
+        Alternative to the built-in check_output that also handles parsing
+        float values and comparing their numeric values rather than their
+        string representations.
+
+        This requires rewriting enough of the basic check_output that, when
+        FLOAT_COMPARISON is enabled, it totally takes over for check_output.
+        """
+
+        # Handle the common case first, for efficiency:
+        # if they're string-identical, always return true.
+        if got == want:
+            return True
+
+        # TODO parse integers as well ?
+        # Parse floats and compare them. If some of the parsed floats contain
+        # ellipses, skip the comparison.
+        matches = self.num_got_rgx.finditer(got)
+        numbers_got = [match.group(1) for match in matches]  # list of strs
+        matches = self.num_want_rgx.finditer(want)
+        numbers_want = [match.group(1) for match in matches]  # list of strs
+        if len(numbers_got) != len(numbers_want):
+            return False
+
+        if len(numbers_got) > 0:
+            nw_ = []
+            for ng, nw in zip(numbers_got, numbers_want):
+                if '...' in nw:
+                    nw_.append(ng)
+                    continue
+                else:
+                    nw_.append(nw)
+
+                if abs(float(ng)-float(nw)) > 1e-5:
+                    return False
+
+            got = self.num_got_rgx.sub(r'%s', got)
+            got = got % tuple(nw_)
+
+        # <BLANKLINE> can be used as a special sequence to signify a
+        # blank line, unless the DONT_ACCEPT_BLANKLINE flag is used.
+        if not (flags & doctest.DONT_ACCEPT_BLANKLINE):
+            # Replace <BLANKLINE> in want with a blank line.
+            want = re.sub('(?m)^%s\s*?$' % re.escape(doctest.BLANKLINE_MARKER),
+                          '', want)
+            # If a line in got contains only spaces, then remove the
+            # spaces.
+            got = re.sub('(?m)^\s*?$', '', got)
+            if got == want:
+                return True
+
+        # This flag causes doctest to ignore any differences in the
+        # contents of whitespace strings. Note that this can be used
+        # in conjunction with the ELLIPSIS flag.
+        if flags & doctest.NORMALIZE_WHITESPACE:
+            got = ' '.join(got.split())
+            want = ' '.join(want.split())
+            if got == want:
+                return True
+
+        # The ELLIPSIS flag says to let the sequence "..." in `want`
+        # match any substring in `got`.
+        if flags & doctest.ELLIPSIS:
+            if doctest._ellipsis_match(want, got):
+                return True
+
+        # We didn't find any match; return false.
+        return False
+
+    def check_output(self, want, got, flags):
+        if flags & FIX:
+            want, got = self.do_fixes(want, got)
+
+        if flags & FLOAT_COMPARISON:
+            return self.normalize_floats(want, got, flags)
+
+        # Can't use super here because doctest.OutputChecker is not a
+        # new-style class.
+        return self._original_output_checker.check_output(
+            self, want, got, flags)
+
+    def output_difference(self, want, got, flags):
+        if flags & FIX:
+            want, got = self.do_fixes(want, got)
+
+        # Can't use super here because doctest.OutputChecker is not a
+        # new-style class.
+        return self._original_output_checker.output_difference(
+            self, want, got, flags)
diff --git a/astropy/tests/pytest_plugins.py b/astropy/tests/pytest_plugins.py
@@ -29,6 +29,7 @@
 from .helper import (
     pytest, treat_deprecations_as_exceptions, enable_deprecations_as_exceptions)
 from .disable_internet import turn_off_internet, turn_on_internet
+from .output_checker import AstropyOutputChecker, FIX, FLOAT_COMPARISON
 
 # these pytest hooks allow us to mark tests and run the marked tests with
 # specific command line options.
@@ -60,127 +61,10 @@ def pytest_addoption(parser):
                   default=False)
 
 
-class OutputCheckerFix(doctest.OutputChecker):
-    """
-    A special doctest OutputChecker that normalizes a number of things common
-    to astropy doctests.
-
-    - Removes u'' prefixes on string literals
-    - In Numpy dtype strings, removes the leading pipe, i.e. '|S9' ->
-      'S9'.  Numpy 1.7 no longer includes it in display.
-    - Supports the FLOAT_COMPARISON flag, which parses floating point values
-      out of the output and compares their numerical values rather than their
-      string representation.  This naturally supports complex numbers as well
-      (simply by comparing their real and imaginary parts separately).  Note,
-      +FLOAT_COMPARISON is currently incompatible with +ELLIPSIS, so tests
-      with the +FLOAT_COMPARISON flag automatically disable +ELLIPSIS
-    """
-
-    _literal_re = re.compile(
-        r"(\W|^)[uU]([rR]?[\'\"])", re.UNICODE)
-    _remove_byteorder = re.compile(
-        r"([\'\"])[|<>]([biufcSaUV][0-9]+)([\'\"])", re.UNICODE)
-    _fix_32bit = re.compile(
-        r"([\'\"])([iu])[48]([\'\"])", re.UNICODE)
-    _ignore_long_int = re.compile(
-        r"([0-9]+)L", re.UNICODE)
-
-    # Translated to a regexp right from the Python language grammar, but
-    # including the option for a leading sign
-    # http://docs.python.org/2/reference/lexical_analysis.html#floating-point-literals
-    _point_float_re = r'\d*(?:\.\d+|\d+\.)'
-    _float_re = re.compile(r'[+-]?(?:(?:\d+|{0})[eE][+-]?\d+|{0})'.format(
-        _point_float_re))
-
-    _original_output_checker = doctest.OutputChecker
-
-    def do_fixes(self, want, got):
-        want = re.sub(self._literal_re, r'\1\2', want)
-        want = re.sub(self._remove_byteorder, r'\1\2\3', want)
-        want = re.sub(self._fix_32bit, r'\1\2\3', want)
-        want = re.sub(self._ignore_long_int, r'\1', want)
-
-        got = re.sub(self._literal_re, r'\1\2', got)
-        got = re.sub(self._remove_byteorder, r'\1\2\3', got)
-        got = re.sub(self._fix_32bit, r'\1\2\3', got)
-        got = re.sub(self._ignore_long_int, r'\1', got)
-
-        return want, got
-
-    def normalize_floats(self, want, got):
-        """
-        Find all floating point values in the 'want' (expected) string, and
-        replace corresponding floats in the 'got' string with the string
-        representation from the expected string *if* the numerical values
-        compare equal.
-
-        Obviously if the two strings do not have the same number of floating
-        point values the output will not compare equal overall.  It should also
-        be noted that this could hide subtle float representation bugs; there
-        should be separate regression tests in cases where we need to test for
-        that though--doctests ought to be more flexible.
-        """
-
-        want = want.splitlines()
-        got = got.splitlines()
-
-        if len(want) != len(got):
-            # Don't bother
-            return
-
-        def repl(m):
-            want = want_floats.pop(0)
-            got = m.group(0)
-
-            if float(want) == float(got):
-                return want
-            else:
-                return got
-
-        for idx in range(len(want)):
-            wline = want[idx]
-            gline = got[idx]
-
-            want_floats = self._float_re.findall(wline)
-            gline = self._float_re.sub(repl, gline)
-
-            # If the 'got' line did not contain the same number of floats then
-            # want_floats will be left non-empty, in which case we can assume
-            # the results are "not equal"
-            if not want_floats:
-                got[idx] = gline
-            else:
-                break
-
-        return '\n'.join(want), '\n'.join(got)
-
-    def check_output(self, want, got, flags):
-        if flags & FIX:
-            want, got = self.do_fixes(want, got)
-
-        if flags & FLOAT_COMPARISON:
-            flags = flags & ~doctest.ELLIPSIS
-            want, got = self.normalize_floats(want, got)
-        # Can't use super here because doctest.OutputChecker is not a
-        # new-style class.
-        return self._original_output_checker.check_output(
-            self, want, got, flags)
-
-    def output_difference(self, want, got, flags):
-        if flags & FIX:
-            want, got = self.do_fixes(want, got)
-        # Can't use super here because doctest.OutputChecker is not a
-        # new-style class.
-        return self._original_output_checker.output_difference(
-            self, want, got, flags)
-
-
 # We monkey-patch in our replacement doctest OutputChecker.  Not
 # great, but there isn't really an API to replace the checker when
 # using doctest.testfile, unfortunately.
-FIX = doctest.register_optionflag('FIX')
-FLOAT_COMPARISON = doctest.register_optionflag('FLOAT_COMPARISON')
-doctest.OutputChecker = OutputCheckerFix
+doctest.OutputChecker = AstropyOutputChecker
 
 
 REMOTE_DATA = doctest.register_optionflag('REMOTE_DATA')

diff --git a/licenses/SYMPY.rst b/licenses/SYMPY.rst
@@ -0,0 +1,28 @@
+Copyright (c) 2006-2014 SymPy Development Team
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+  a. Redistributions of source code must retain the above copyright notice,
+     this list of conditions and the following disclaimer.
+  b. Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+  c. Neither the name of SymPy nor the names of its contributors
+     may be used to endorse or promote products derived from this software
+     without specific prior written permission.
+
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.