diff --git a/toml.abnf b/toml.abnf index 0446f8b6..580a1f13 100644 --- a/toml.abnf +++ b/toml.abnf @@ -58,8 +58,11 @@ unquoted-key-char =/ %xB2 / %xB3 / %xB9 / %xBC-BE ; superscript digits, fr unquoted-key-char =/ %xC0-D6 / %xD8-F6 / %xF8-37D ; non-symbol chars in Latin block unquoted-key-char =/ %x37F-1FFF ; exclude GREEK QUESTION MARK, which is basically a semi-colon unquoted-key-char =/ %x200C-200D / %x203F-2040 ; from General Punctuation Block, include the two tie symbols and ZWNJ, ZWJ -unquoted-key-char =/ %x2070-218F / %x2460-24FF ; include super-/subscripts, letterlike/numberlike forms, enclosed alphanumerics -unquoted-key-char =/ %x2C00-2FEF / %x3001-D7FF ; skip arrows, math, box drawing etc, skip 2FF0-3000 ideographic up/down markers and spaces +unquoted-key-char =/ %x2070-21FF / %x2300-24FF ; skip math operators +unquoted-key-char =/ %x25A0-268B / %x2690-2757 ; skip box drawing, block elements, and some yin-yang symbols +unquoted-key-char =/ %x2762-2767 / %x2776-27E5 ; skip some Dingbat punctuation +unquoted-key-char =/ %x2801-297F ; skip some math brackets and arrows, and braille blank +unquoted-key-char =/ %x2B00-2FFF / %x3001-D7FF ; skip various math operators and symbols, and ideographic space unquoted-key-char =/ %xF900-FDCF / %xFDF0-FFFD ; skip D800-DFFF surrogate block, E000-F8FF Private Use area, FDD0-FDEF intended for process-internal use (unicode) unquoted-key-char =/ %x10000-EFFFF ; all chars outside BMP range, excluding Private Use planes (F0000-10FFFF) diff --git a/toml.md b/toml.md index 8862f0ea..33bde080 100644 --- a/toml.md +++ b/toml.md @@ -104,10 +104,15 @@ first = "Tom" last = "Preston-Werner" # INVALID A key may be either bare, quoted, or dotted. **Bare keys** may contain any letter-like or number-like Unicode character from -any Unicode script, as well as ASCII digits, dashes and underscores. -Punctuation, spaces, arrows, box drawing and private use characters are not -allowed. Note that bare keys are allowed to be composed of only ASCII digits, -e.g. 1234, but are always interpreted as strings. +any Unicode script, as well as digits, dashes and underscores. Various symbols +(such as emojis or arrows) and punctuation marks from outside the ASCII range +are also allowed, while whitespace and private use characters are forbidden. It +is advisable to use only words (in arbitrary languages) as bare keys, as they +are generally accepted, while not all symbols and punctuation marks are. If you +want to use a bare key made up of several words, use a suitable separator +character (such as an underscore or hyphen) between the words, as spaces are not +allowed. Note that bare keys are allowed to be composed of only digits, e.g. +1234, but are always interpreted as strings. ℹ️ The exact ranges of allowed code points can be found in the [ABNF grammar file][abnf]. @@ -118,7 +123,6 @@ bare_key = "value" bare-key = "value" 1234 = "value" Fuß = "value" -😂 = "value" 汉语大字典 = "value" 辭源 = "value" பெண்டிரேம் = "value"