Skip to content

Commit 1dd67d5

Browse files
committed
Add CJK extension
1 parent 95efaa1 commit 1dd67d5

File tree

6 files changed

+352
-52
lines changed

6 files changed

+352
-52
lines changed

README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,8 @@ Parser and Renderer options
180180
- [PHP Markdown Extra: Footnotes](https://michelf.ca/projects/php-markdown/extra/#footnotes)
181181
- `extension.Typographer`
182182
- This extension substitutes punctuations with typographic entities like [smartypants](https://daringfireball.net/projects/smartypants/).
183+
- `extension.CJK`
184+
- This extension is a shortcut for CJK related functionalities.
183185

184186
### Attributes
185187
The `parser.WithAttribute` option allows you to define attributes on some elements.
@@ -369,6 +371,17 @@ footnote-prefix: article1
369371
# My article
370372

371373
```
374+
375+
### CJK extension
376+
CommonMark gives compatibilities a high priority and original markdown was designed by westerners. So CommonMark lacks considerations for languages like CJK.
377+
378+
This extension provides additional options for CJK users.
379+
380+
| Functional option | Type | Description |
381+
| ----------------- | ---- | ----------- |
382+
| `extension.WithEastAsianLineBreaks` | `-` | Soft line breaks are rendered as a newline. Some asian users will see it as an unnecessary space. With this option, soft line breaks between east asian wide characters will be ignored. |
383+
| `extension.WithEscapedSpace` | `-` | Without spaces around an emphasis started with east asian punctuations, it is not interpreted as an emphasis(as defined in CommonMark spec). With this option, you can avoid this inconvenient behavior by putting 'not rendered' spaces around an emphasis like `太郎は\ **「こんにちわ」**\ といった`. |
384+
372385

373386
Security
374387
--------------------

ast/inline.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ func (n *Text) SetSoftLineBreak(v bool) {
9191
if v {
9292
n.flags |= textSoftLineBreak
9393
} else {
94-
n.flags = n.flags &^ textHardLineBreak
94+
n.flags = n.flags &^ textSoftLineBreak
9595
}
9696
}
9797

extension/cjk.go

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
package extension
2+
3+
import (
4+
"github.com/yuin/goldmark"
5+
"github.com/yuin/goldmark/renderer/html"
6+
)
7+
8+
// A CJKOption sets options for CJK support mostly for HTML based renderers.
9+
type CJKOption func(*cjk)
10+
11+
// WithEastAsianLineBreaks is a functional option that indicates whether softline breaks
12+
// between east asian wide characters should be ignored.
13+
func WithEastAsianLineBreaks() CJKOption {
14+
return func(c *cjk) {
15+
c.EastAsianLineBreaks = true
16+
}
17+
}
18+
19+
// WithEscapedSpace is a functional option that indicates that a '\' escaped half-space(0x20) should not be rendered.
20+
func WithEscapedSpace() CJKOption {
21+
return func(c *cjk) {
22+
c.EscapedSpace = true
23+
}
24+
}
25+
26+
type cjk struct {
27+
EastAsianLineBreaks bool
28+
EscapedSpace bool
29+
}
30+
31+
var CJK = NewCJK(WithEastAsianLineBreaks(), WithEscapedSpace())
32+
33+
// NewCJK returns a new extension with given options.
34+
func NewCJK(opts ...CJKOption) goldmark.Extender {
35+
e := &cjk{}
36+
for _, opt := range opts {
37+
opt(e)
38+
}
39+
return e
40+
}
41+
42+
func (e *cjk) Extend(m goldmark.Markdown) {
43+
if e.EastAsianLineBreaks {
44+
m.Renderer().AddOptions(html.WithEastAsianLineBreaks())
45+
}
46+
if e.EscapedSpace {
47+
m.Renderer().AddOptions(html.WithWriter(html.NewWriter(html.WithEscapedSpace())))
48+
}
49+
}

extension/cjk_test.go

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
package extension
2+
3+
import (
4+
"testing"
5+
6+
"github.com/yuin/goldmark"
7+
"github.com/yuin/goldmark/renderer/html"
8+
"github.com/yuin/goldmark/testutil"
9+
)
10+
11+
func TestEscapedSpace(t *testing.T) {
12+
markdown := goldmark.New(goldmark.WithRendererOptions(
13+
html.WithXHTML(),
14+
html.WithUnsafe(),
15+
))
16+
no := 1
17+
testutil.DoTestCase(
18+
markdown,
19+
testutil.MarkdownTestCase{
20+
No: no,
21+
Description: "Without spaces around an emphasis started with east asian punctuations, it is not interpreted as an emphasis(as defined in CommonMark spec)",
22+
Markdown: "太郎は**「こんにちわ」**と言った\nんです",
23+
Expected: "<p>太郎は**「こんにちわ」**と言った\nんです</p>",
24+
},
25+
t,
26+
)
27+
28+
no = 2
29+
testutil.DoTestCase(
30+
markdown,
31+
testutil.MarkdownTestCase{
32+
No: no,
33+
Description: "With spaces around an emphasis started with east asian punctuations, it is interpreted as an emphasis(but remains unnecessary spaces)",
34+
Markdown: "太郎は **「こんにちわ」** と言った\nんです",
35+
Expected: "<p>太郎は <strong>「こんにちわ」</strong> と言った\nんです</p>",
36+
},
37+
t,
38+
)
39+
40+
// Enables EscapedSpace
41+
markdown = goldmark.New(goldmark.WithRendererOptions(
42+
html.WithXHTML(),
43+
html.WithUnsafe(),
44+
),
45+
goldmark.WithExtensions(NewCJK(WithEscapedSpace())),
46+
)
47+
48+
no = 3
49+
testutil.DoTestCase(
50+
markdown,
51+
testutil.MarkdownTestCase{
52+
No: no,
53+
Description: "With spaces around an emphasis started with east asian punctuations,it is interpreted as an emphasis",
54+
Markdown: "太郎は\\ **「こんにちわ」**\\ と言った\nんです",
55+
Expected: "<p>太郎は<strong>「こんにちわ」</strong>と言った\nんです</p>",
56+
},
57+
t,
58+
)
59+
}
60+
61+
func TestEastAsianLineBreaks(t *testing.T) {
62+
markdown := goldmark.New(goldmark.WithRendererOptions(
63+
html.WithXHTML(),
64+
html.WithUnsafe(),
65+
))
66+
no := 1
67+
testutil.DoTestCase(
68+
markdown,
69+
testutil.MarkdownTestCase{
70+
No: no,
71+
Description: "Soft line breaks are rendered as a newline, so some asian users will see it as an unnecessary space",
72+
Markdown: "太郎は\\ **「こんにちわ」**\\ と言った\nんです",
73+
Expected: "<p>太郎は\\ <strong>「こんにちわ」</strong>\\ と言った\nんです</p>",
74+
},
75+
t,
76+
)
77+
78+
// Enables EastAsianLineBreaks
79+
80+
markdown = goldmark.New(goldmark.WithRendererOptions(
81+
html.WithXHTML(),
82+
html.WithUnsafe(),
83+
),
84+
goldmark.WithExtensions(NewCJK(WithEastAsianLineBreaks())),
85+
)
86+
87+
no = 2
88+
testutil.DoTestCase(
89+
markdown,
90+
testutil.MarkdownTestCase{
91+
No: no,
92+
Description: "Soft line breaks between east asian wide characters are ignored",
93+
Markdown: "太郎は\\ **「こんにちわ」**\\ と言った\nんです",
94+
Expected: "<p>太郎は\\ <strong>「こんにちわ」</strong>\\ と言ったんです</p>",
95+
},
96+
t,
97+
)
98+
99+
no = 3
100+
testutil.DoTestCase(
101+
markdown,
102+
testutil.MarkdownTestCase{
103+
No: no,
104+
Description: "Soft line breaks between western characters are rendered as a newline",
105+
Markdown: "太郎は\\ **「こんにちわ」**\\ と言ったa\nbんです",
106+
Expected: "<p>太郎は\\ <strong>「こんにちわ」</strong>\\ と言ったa\nbんです</p>",
107+
},
108+
t,
109+
)
110+
111+
no = 4
112+
testutil.DoTestCase(
113+
markdown,
114+
testutil.MarkdownTestCase{
115+
No: no,
116+
Description: "Soft line breaks between a western character and an east asian wide character are rendered as a newline",
117+
Markdown: "太郎は\\ **「こんにちわ」**\\ と言ったa\nんです",
118+
Expected: "<p>太郎は\\ <strong>「こんにちわ」</strong>\\ と言ったa\nんです</p>",
119+
},
120+
t,
121+
)
122+
123+
no = 5
124+
testutil.DoTestCase(
125+
markdown,
126+
testutil.MarkdownTestCase{
127+
No: no,
128+
Description: "Soft line breaks between an east asian wide character and a western character are rendered as a newline",
129+
Markdown: "太郎は\\ **「こんにちわ」**\\ と言った\nbんです",
130+
Expected: "<p>太郎は\\ <strong>「こんにちわ」</strong>\\ と言った\nbんです</p>",
131+
},
132+
t,
133+
)
134+
135+
// WithHardWraps take precedence over WithEastAsianLineBreaks
136+
markdown = goldmark.New(goldmark.WithRendererOptions(
137+
html.WithHardWraps(),
138+
html.WithXHTML(),
139+
html.WithUnsafe(),
140+
),
141+
goldmark.WithExtensions(NewCJK(WithEastAsianLineBreaks())),
142+
)
143+
no = 6
144+
testutil.DoTestCase(
145+
markdown,
146+
testutil.MarkdownTestCase{
147+
No: no,
148+
Description: "WithHardWraps take precedence over WithEastAsianLineBreaks",
149+
Markdown: "太郎は\\ **「こんにちわ」**\\ と言った\nんです",
150+
Expected: "<p>太郎は\\ <strong>「こんにちわ」</strong>\\ と言った<br />\nんです</p>",
151+
},
152+
t,
153+
)
154+
}

0 commit comments

Comments
 (0)