Skip to content

Commit aec8fe1

Browse files
committed
Merge branch 'string-codepoint-fixes' of github.com:richcarl/purerl-strings into richcarl-string-codepoint-fixes
2 parents d0368d1 + b1b3bf0 commit aec8fe1

File tree

1 file changed

+23
-12
lines changed

1 file changed

+23
-12
lines changed

src/Data/String/CodePoints.erl

+23-12
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,21 @@
77
, '_toCodePointArray'/1]).
88

99
'_unsafeCodePointAt0'(_Fallback) -> fun (Str) ->
10-
case unicode:characters_to_list(Str) of
11-
[CP|_] -> CP;
12-
_ -> error("told you I was unsafe")
10+
case string:next_codepoint(Str) of
11+
[CP|_Rest] -> CP;
12+
_ -> error(badarg) % malformed utf-8
1313
end
1414
end.
1515

1616
'_codePointAt'(_Fallback, Just, Nothing, _unsafeCodePointAt0, Index, Str) ->
17-
Cps = unicode:characters_to_list(Str, utf8),
18-
case length(Cps) of
19-
Length when Index < 0, Index >= Length -> Nothing;
20-
_ -> Just(lists:nth(Index+1, Cps))
21-
end
22-
.
17+
if is_integer(Index), Index >= 0, Index < byte_size(Str) ->
18+
<<_:Index/binary,S/binary>> = Str,
19+
case string:next_codepoint(S) of
20+
[CP | _Rest] -> Just(CP);
21+
_ -> Nothing % malformed utf-8
22+
end;
23+
true -> Nothing
24+
end.
2325

2426
'_fromCodePointArray'(_Fallback, Array) ->
2527
List = array:to_list(Array),
@@ -28,12 +30,21 @@ end.
2830
'_singleton'(_Fallback) -> fun (CP) ->
2931
unicode:characters_to_binary([CP], utf8)
3032
end.
33+
3134
'_take'(_Fallback) ->
3235
fun (N) ->
33-
fun (S) ->
34-
unicode:characters_to_binary(lists:sublist(unicode:characters_to_list(S, utf8), N), utf8)
35-
end
36+
fun (S) -> take(N, S, <<>>) end
3637
end.
38+
39+
take(N, _S, Cs) when N =< 0 -> Cs;
40+
take(_N, <<>>, _Cs) -> _Cs;
41+
take(N, S, Cs) ->
42+
%% note that right-appending to a binary in a loop is efficient
43+
case string:next_codepoint(S) of
44+
[CP | Rest] -> take(N-1, Rest, <<Cs/binary, CP/utf8>>);
45+
_ -> error(badarg) % malformed utf-8
46+
end.
47+
3748
'_toCodePointArray'(_Fallback) -> fun (_UnsafeCodePointAt0) ->
3849
fun (Str) ->
3950
array:from_list(unicode:characters_to_list(Str, utf8))

0 commit comments

Comments
 (0)