✨ class String

ArcletProject · Nov 15, 2024 · ba072d7 · ba072d7
1 parent 2bde377
commit ba072d7
Show file tree

Hide file tree

Showing 5 changed files with 119 additions and 14 deletions.
diff --git a/src/tarina/_string_c.pxd b/src/tarina/_string_c.pxd
@@ -0,0 +1 @@
+cpdef inline tuple split_once_index_only(str text, str separator, Py_ssize_t offset, bint crlf=True)
diff --git a/src/tarina/_string_c.pyi b/src/tarina/_string_c.pyi
@@ -51,3 +51,22 @@ def split_once_index_only(text: str, separator: str, offset: int, crlf: bool = T
         Tuple[str, str]: 切割后的字符串, 可能含有空格
     """
     ...
+
+
+class String:
+    left_index: int
+    right_index: int
+    next_index: int
+    _len: int
+    text: str
+
+    def __init__(self, text: str): ...
+
+    def step(self, separator: str, crlf: bool = True) -> None: ...
+
+    def val(self) -> str: ...
+
+    def apply(self) -> None: ...
+
+    @property
+    def complete(self) -> bool: ...
diff --git a/src/tarina/_string_c.pyx b/src/tarina/_string_c.pyx
@@ -28,7 +28,7 @@ cdef extern from "_op.h":
 cdef dict QUOTES = {'"': '"', "'": "'"}
 cdef unicode CRLF = "\n\r"
 
-cpdef inline list split(str text, str separator, bint crlf=True):
+def split(str text, str separator, bint crlf=True):
     if crlf:
         separator = PyUnicode_Concat(separator, CRLF)
     text = str_strip(text, BOTHSTRIP, separator)
@@ -81,7 +81,7 @@ cpdef inline list split(str text, str separator, bint crlf=True):
     return PyUnicode_Split(PyUnicode_Join('', result), '\1', -1)
 
 
-cpdef inline tuple split_once(str text, str separator, bint crlf=True):
+def split_once(str text, str separator, bint crlf=True):
     if crlf:
         separator = PyUnicode_Concat(separator, CRLF)
     text = str_strip(text, LEFTSTRIP, separator)
@@ -129,7 +129,9 @@ cpdef inline tuple split_once(str text, str separator, bint crlf=True):
     return PyUnicode_Join('', out_text), PyUnicode_Substring(text, index, PY_SSIZE_T_MAX)
 
 
-cpdef inline tuple split_once_without_escape(str text, str separator, bint crlf=True):
+def split_once_without_escape(str text, str separator, bint crlf=True):
+    if crlf:
+        separator = PyUnicode_Concat(separator, CRLF)
     text = str_strip(text, LEFTSTRIP, separator)
     cdef:
         Py_ssize_t index = 0
@@ -141,7 +143,7 @@ cpdef inline tuple split_once_without_escape(str text, str separator, bint crlf=
     while index < length:
         ch = PyUnicode_READ_CHAR(text, index)
         index += 1
-        if str_contains(separator, ch) or (crlf and str_contains(CRLF, ch)):
+        if str_contains(separator, ch):
             if quotation == 0:
                 break
             if first_quoted_sep_index == -1:
@@ -161,6 +163,8 @@ cpdef inline tuple split_once_without_escape(str text, str separator, bint crlf=
 
 
 cpdef inline tuple split_once_index_only(str text, str separator, Py_ssize_t offset, bint crlf=True):
+    if crlf:
+        separator = PyUnicode_Concat(separator, CRLF)
     cdef:
         Py_ssize_t index = offset
         Py_UCS4 quotation = 0
@@ -172,9 +176,9 @@ cpdef inline tuple split_once_index_only(str text, str separator, Py_ssize_t off
     while index < length:
         ch = PyUnicode_READ_CHAR(text, index)
         index += 1
-        if str_contains(separator, ch) or (crlf and str_contains(CRLF, ch)):
+        if str_contains(separator, ch):
             if quotation == 0:
-                sep = sep + 1
+                sep += 1
                 continue
             if first_quoted_sep_index == -1:
                 first_quoted_sep_index = index
@@ -191,3 +195,40 @@ cpdef inline tuple split_once_index_only(str text, str separator, Py_ssize_t off
     if index == length and first_quoted_sep_index != -1:
         return first_quoted_sep_index, sep
     return index, sep
+
+
+cdef class String:
+    cdef Py_ssize_t left_index
+    cdef Py_ssize_t right_index
+    cdef Py_ssize_t next_index
+    cdef Py_ssize_t _len
+    cdef str text
+
+    def __init__(self, str text):
+        self.text = text
+        self._len = PyUnicode_GET_LENGTH(text)
+        self.left_index = 0
+        self.right_index = 0
+        self.next_index = 0
+
+    def step(self, str separator, bint crlf=True):
+        cdef offset
+        self.next_index, offset = split_once_index_only(self.text, separator, self.left_index, crlf)
+        self.right_index = self.next_index - offset
+
+    def val(self):
+        return PyUnicode_Substring(self.text, self.left_index, self.right_index)
+
+    def apply(self):
+        self.left_index = self.next_index
+        self.right_index = self._len
+
+    @property
+    def complete(self):
+        return self.left_index == self._len
+
+    def __repr__(self):
+        return f"String({self.text!r}[{self.left_index}:{self.right_index}])"
+
+    def __str__(self):
+        return self.val()
diff --git a/src/tarina/_string_py.py b/src/tarina/_string_py.py
@@ -65,16 +65,17 @@ def split_once_without_escape(text: str, separator: str, crlf: bool = True):
     Returns:
         Tuple[str, str]: 切割后的字符串, 可能含有空格
     """
+    if crlf:
+        separator += CRLF
     index, quotation = 0, ""
     text = text.lstrip()
     first_quoted_sep_index = -1
     last_quote_index = 0
     tlen = len(text)
     for char in text:
         index += 1
-        if char in separator or (crlf and char in CRLF):
+        if char in separator:
             if not quotation:
-                #index -= 1
                 break
             if first_quoted_sep_index == -1:
                 first_quoted_sep_index = index
@@ -104,6 +105,8 @@ def split_once_index_only(text: str, separator: str, offset: int, crlf: bool = T
     Returns:
         Tuple[str, str]: 切割后的字符串, 可能含有空格
     """
+    if crlf:
+        separator += CRLF
     index = offset
     quotation = ""
     sep = 0
@@ -113,7 +116,7 @@ def split_once_index_only(text: str, separator: str, offset: int, crlf: bool = T
     tlen = len(text)
     for char in text:
         index += 1
-        if char in separator or (crlf and char in CRLF):
+        if char in separator:
             if not quotation:
                 sep += 1
                 continue
@@ -136,7 +139,6 @@ def split_once_index_only(text: str, separator: str, offset: int, crlf: bool = T
     return index, sep
 
 
-
 def split(text: str, separator: str, crlf: bool = True):
     """尊重引号与转义的字符串切分
 
@@ -186,3 +188,39 @@ def split(text: str, separator: str, crlf: bool = True):
         for i in quoted_sep_index:
             result[i] = "\0"
     return str.join("", result).split("\0")
+
+
+class String:
+    left_index: int
+    right_index: int
+    next_index: int
+    _len: int
+    text: str
+
+    def __init__(self, text: str):
+        self.text = text
+        self._len = len(text)
+        self.left_index = 0
+        self.right_index = 0
+        self.next_index = 0
+
+    def step(self, separator: str, crlf: bool = True):
+        self.next_index, offset = split_once_index_only(self.text, separator, self.left_index, crlf)
+        self.right_index = self.next_index - offset
+
+    def val(self):
+        return self.text[self.left_index:self.right_index]
+
+    def apply(self):
+        self.left_index = self.next_index
+        self.right_index = self._len
+
+    @property
+    def complete(self):
+        return self.left_index == self._len
+
+    def __repr__(self):
+        return f"String({self.text!r}[{self.left_index}:{self.right_index}])"
+
+    def __str__(self):
+        return self.val()
diff --git a/src/tarina/string.py b/src/tarina/string.py
@@ -1,7 +1,7 @@
 import os
 import sys
 
-__all__ = ("split", "split_once")
+__all__ = ("split", "split_once", "split_once_without_escape", "split_once_index_only", "String")
 
 
 NO_EXTENSIONS = bool(os.environ.get("TARINA_NO_EXTENSIONS"))  # type: bool
@@ -13,12 +13,18 @@
     try:
         from ._string_c import split as split  # type: ignore[misc]
         from ._string_c import split_once as split_once  # type: ignore[misc]
-        from ._string_c import split_once_without_escape as split_once_without_escape  # type: ignore[misc
+        from ._string_c import split_once_without_escape as split_once_without_escape  # type: ignore[misc]
+        from ._string_c import split_once_index_only as split_once_index_only  # type: ignore[misc]
+        from ._string_c import String as String  # type: ignore[misc]
     except ImportError:  # pragma: no cover
         from ._string_py import split as split  # type: ignore[misc]
         from ._string_py import split_once as split_once  # type: ignore[misc]
-        from ._string_py import split_once_without_escape as split_once_without_escape
+        from ._string_py import split_once_without_escape as split_once_without_escape  # type: ignore[misc]
+        from ._string_py import split_once_index_only as split_once_index_only  # type: ignore[misc]
+        from ._string_py import String as String  # type: ignore[misc]
 else:
     from ._string_py import split as split  # type: ignore[misc]
     from ._string_py import split_once as split_once  # type: ignore[misc]
-    from ._string_py import split_once_without_escape as split_once_without_escape
+    from ._string_py import split_once_without_escape as split_once_without_escape  # type: ignore[misc]
+    from ._string_py import split_once_index_only as split_once_index_only  # type: ignore[misc]
+    from ._string_py import String as String  # type: ignore[misc]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		cpdef inline tuple split_once_index_only(str text, str separator, Py_ssize_t offset, bint crlf=True)