Skip to content

BREAKING: Support typesetting apostrophe (') and backtick (`) #3105

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 56 additions & 14 deletions pygmt/helpers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,41 @@ def _is_printable_ascii(argstr: str) -> bool:
return all(32 <= ord(c) <= 126 for c in argstr)


def _has_apostrophe_or_backtick(argstr: str) -> bool:
"""
Check if a string contains apostrophe (') or backtick (`).

For typographical reasons, apostrophe (') and backtick (`) are mapped to left and
right single quotation marks (‘ and ’) in Adobe ISOLatin1+ encoding. To ensure what
you type is what you get (https://github.com/GenericMappingTools/pygmt/issues/3476),
they need special handling in the ``_check_encoding`` and ``non_ascii_to_octal``
functions. More specifically, a string that contains printable ASCII characters with
apostrophe (') and backtick (`) will not be considered as "ascii" encoding.

Parameters
----------
argstr
The string to be checked.

Returns
-------
``True`` if the string contains apostrophe (') or backtick (`). Otherwise, return
``False``.

Examples
--------
>>> _has_apostrophe_or_backtick("12AB±β①②")
False
>>> _has_apostrophe_or_backtick("12AB`")
True
>>> _has_apostrophe_or_backtick("12AB'")
True
>>> _has_apostrophe_or_backtick("12AB'`")
True
""" # noqa: RUF002
return "'" in argstr or "`" in argstr


def _check_encoding(argstr: str) -> Encoding:
"""
Check the charset encoding of a string.
Expand Down Expand Up @@ -206,8 +241,9 @@ def _check_encoding(argstr: str) -> Encoding:
>>> _check_encoding("123AB中文") # Characters not in any charset encoding
'ISOLatin1+'
"""
# Return "ascii" if the string only contains printable ASCII characters.
if _is_printable_ascii(argstr):
# Return "ascii" if the string only contains printable ASCII characters, excluding
# apostrophe (') and backtick (`).
if _is_printable_ascii(argstr) and not _has_apostrophe_or_backtick(argstr):
return "ascii"
# Loop through all supported encodings and check if all characters in the string
# are in the charset of the encoding. If all characters are in the charset, return
Expand Down Expand Up @@ -402,9 +438,14 @@ def non_ascii_to_octal(argstr: str, encoding: Encoding = "ISOLatin1+") -> str:
'ABC \\261120\\260 DEF @~\\141@~ @%34%\\252@%%'
>>> non_ascii_to_octal("12ABāáâãäåβ①②", encoding="ISO-8859-4")
'12AB\\340\\341\\342\\343\\344\\345@~\\142@~@%34%\\254@%%@%34%\\255@%%'
>>> non_ascii_to_octal("'‘’\"“”")
'\\234\\140\\047"\\216\\217'
""" # noqa: RUF002
# Return the input string if it only contains printable ASCII characters.
if encoding == "ascii" or _is_printable_ascii(argstr):
# Return the input string if it only contains printable ASCII characters, excluding
# apostrophe (') and backtick (`).
if encoding == "ascii" or (
_is_printable_ascii(argstr) and not _has_apostrophe_or_backtick(argstr)
):
return argstr

# Dictionary mapping non-ASCII characters to octal codes
Expand All @@ -420,6 +461,11 @@ def non_ascii_to_octal(argstr: str, encoding: Encoding = "ISOLatin1+") -> str:

# Remove any printable characters.
mapping = {k: v for k, v in mapping.items() if k not in string.printable}

if encoding == "ISOLatin1+":
# Map apostrophe (') and backtick (`) to correct octal codes.
# See _has_apostrophe_or_backtick() for explanations.
mapping.update({"'": "\\234", "`": "\\221"})
return argstr.translate(str.maketrans(mapping))


Expand Down Expand Up @@ -465,16 +511,12 @@ def build_arg_list( # noqa: PLR0912
['-A', '-D0', '-E200', '-F', '-G1/2/3/4']
>>> build_arg_list(dict(A="1/2/3/4", B=["xaf", "yaf", "WSen"], C=("1p", "2p")))
['-A1/2/3/4', '-BWSen', '-Bxaf', '-Byaf', '-C1p', '-C2p']
>>> print(
... build_arg_list(
... dict(
... B=["af", "WSne+tBlank Space"],
... F='+t"Empty Spaces"',
... l="'Void Space'",
... )
... )
... )
['-BWSne+tBlank Space', '-Baf', '-F+t"Empty Spaces"', "-l'Void Space'"]
>>> build_arg_list(dict(B=["af", "WSne+tBlank Space"]))
['-BWSne+tBlank Space', '-Baf']
>>> build_arg_list(dict(F='+t"Empty Spaces"'))
['-F+t"Empty Spaces"']
>>> build_arg_list(dict(l="'Void Space'"))
['-l\\234Void Space\\234', '--PS_CHAR_ENCODING=ISOLatin1+']
>>> print(
... build_arg_list(
... dict(A="0", B=True, C="rainbow"),
Expand Down
4 changes: 2 additions & 2 deletions pygmt/tests/baseline/test_text_quotation_marks.png.dvc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
outs:
- md5: 90d08c5a11c606abed51b84eafcdea04
size: 1662
- md5: f3ddc9b50f3da1facdbcd32261db3bd6
size: 2965
hash: md5
path: test_text_quotation_marks.png
12 changes: 8 additions & 4 deletions pygmt/tests/test_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,13 +466,17 @@ def test_text_nonascii(encoding):
@pytest.mark.mpl_image_compare
def test_text_quotation_marks():
"""
Test typesetting quotation marks.
Test typesetting single and double quotation marks.

See https://github.com/GenericMappingTools/pygmt/issues/3104.
See https://github.com/GenericMappingTools/pygmt/issues/3104 and
https://github.com/GenericMappingTools/pygmt/issues/3476.
"""
quotations = "` ' ‘ ’ \" “ ”" # noqa: RUF001
fig = Figure()
fig.basemap(projection="X4c/2c", region=[0, 4, 0, 2], frame=0)
fig.text(x=2, y=1, text='\\234 ‘ ’ " “ ”', font="20p") # noqa: RUF001
fig.basemap(
projection="X4c/2c", region=[0, 4, 0, 2], frame=["S", f"x+l{quotations}"]
)
fig.text(x=2, y=1, text=quotations, font="20p")
return fig


Expand Down
Loading