|
7 | 7 | , '_toCodePointArray'/1]).
|
8 | 8 |
|
9 | 9 | '_unsafeCodePointAt0'(_Fallback) -> fun (Str) ->
|
10 |
| - case unicode:characters_to_list(Str) of |
11 |
| - [CP|_] -> CP; |
12 |
| - _ -> error("told you I was unsafe") |
| 10 | + case string:next_codepoint(Str) of |
| 11 | + [CP|_Rest] -> CP; |
| 12 | + _ -> error(badarg) % malformed utf-8 |
13 | 13 | end
|
14 | 14 | end.
|
15 | 15 |
|
16 | 16 | '_codePointAt'(_Fallback, Just, Nothing, _unsafeCodePointAt0, Index, Str) ->
|
17 |
| - Cps = unicode:characters_to_list(Str, utf8), |
18 |
| - case length(Cps) of |
19 |
| - Length when Index < 0, Index >= Length -> Nothing; |
20 |
| - _ -> Just(lists:nth(Index+1, Cps)) |
21 |
| - end |
22 |
| -. |
| 17 | + if is_integer(Index), Index >= 0, Index < byte_size(Str) -> |
| 18 | + <<_:Index/binary,S/binary>> = Str, |
| 19 | + case string:next_codepoint(S) of |
| 20 | + [CP | _Rest] -> Just(CP); |
| 21 | + _ -> Nothing % malformed utf-8 |
| 22 | + end; |
| 23 | + true -> Nothing |
| 24 | + end. |
23 | 25 |
|
24 | 26 | '_fromCodePointArray'(_Fallback, Array) ->
|
25 | 27 | List = array:to_list(Array),
|
|
28 | 30 | '_singleton'(_Fallback) -> fun (CP) ->
|
29 | 31 | unicode:characters_to_binary([CP], utf8)
|
30 | 32 | end.
|
| 33 | + |
31 | 34 | '_take'(_Fallback) ->
|
32 | 35 | fun (N) ->
|
33 |
| - fun (S) -> |
34 |
| - unicode:characters_to_binary(lists:sublist(unicode:characters_to_list(S, utf8), N), utf8) |
35 |
| - end |
| 36 | + fun (S) -> take(N, S, <<>>) end |
36 | 37 | end.
|
| 38 | + |
| 39 | +take(N, _S, Cs) when N =< 0 -> Cs; |
| 40 | +take(_N, <<>>, _Cs) -> _Cs; |
| 41 | +take(N, S, Cs) -> |
| 42 | + %% note that right-appending to a binary in a loop is efficient |
| 43 | + case string:next_codepoint(S) of |
| 44 | + [CP | Rest] -> take(N-1, Rest, <<Cs/binary, CP/utf8>>); |
| 45 | + _ -> error(badarg) % malformed utf-8 |
| 46 | + end. |
| 47 | + |
37 | 48 | '_toCodePointArray'(_Fallback) -> fun (_UnsafeCodePointAt0) ->
|
38 | 49 | fun (Str) ->
|
39 | 50 | array:from_list(unicode:characters_to_list(Str, utf8))
|
|
0 commit comments