Skip to content

Commit

Permalink
✨ class String
Browse files Browse the repository at this point in the history
  • Loading branch information
RF-Tar-Railt committed Nov 15, 2024
1 parent 2bde377 commit ba072d7
Show file tree
Hide file tree
Showing 5 changed files with 119 additions and 14 deletions.
1 change: 1 addition & 0 deletions src/tarina/_string_c.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
cpdef inline tuple split_once_index_only(str text, str separator, Py_ssize_t offset, bint crlf=True)
19 changes: 19 additions & 0 deletions src/tarina/_string_c.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,22 @@ def split_once_index_only(text: str, separator: str, offset: int, crlf: bool = T
Tuple[str, str]: 切割后的字符串, 可能含有空格
"""
...


class String:
left_index: int
right_index: int
next_index: int
_len: int
text: str

def __init__(self, text: str): ...

def step(self, separator: str, crlf: bool = True) -> None: ...

def val(self) -> str: ...

def apply(self) -> None: ...

@property
def complete(self) -> bool: ...
53 changes: 47 additions & 6 deletions src/tarina/_string_c.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ cdef extern from "_op.h":
cdef dict QUOTES = {'"': '"', "'": "'"}
cdef unicode CRLF = "\n\r"

cpdef inline list split(str text, str separator, bint crlf=True):
def split(str text, str separator, bint crlf=True):
if crlf:
separator = PyUnicode_Concat(separator, CRLF)
text = str_strip(text, BOTHSTRIP, separator)
Expand Down Expand Up @@ -81,7 +81,7 @@ cpdef inline list split(str text, str separator, bint crlf=True):
return PyUnicode_Split(PyUnicode_Join('', result), '\1', -1)


cpdef inline tuple split_once(str text, str separator, bint crlf=True):
def split_once(str text, str separator, bint crlf=True):
if crlf:
separator = PyUnicode_Concat(separator, CRLF)
text = str_strip(text, LEFTSTRIP, separator)
Expand Down Expand Up @@ -129,7 +129,9 @@ cpdef inline tuple split_once(str text, str separator, bint crlf=True):
return PyUnicode_Join('', out_text), PyUnicode_Substring(text, index, PY_SSIZE_T_MAX)


cpdef inline tuple split_once_without_escape(str text, str separator, bint crlf=True):
def split_once_without_escape(str text, str separator, bint crlf=True):
if crlf:
separator = PyUnicode_Concat(separator, CRLF)
text = str_strip(text, LEFTSTRIP, separator)
cdef:
Py_ssize_t index = 0
Expand All @@ -141,7 +143,7 @@ cpdef inline tuple split_once_without_escape(str text, str separator, bint crlf=
while index < length:
ch = PyUnicode_READ_CHAR(text, index)
index += 1
if str_contains(separator, ch) or (crlf and str_contains(CRLF, ch)):
if str_contains(separator, ch):
if quotation == 0:
break
if first_quoted_sep_index == -1:
Expand All @@ -161,6 +163,8 @@ cpdef inline tuple split_once_without_escape(str text, str separator, bint crlf=


cpdef inline tuple split_once_index_only(str text, str separator, Py_ssize_t offset, bint crlf=True):
if crlf:
separator = PyUnicode_Concat(separator, CRLF)
cdef:
Py_ssize_t index = offset
Py_UCS4 quotation = 0
Expand All @@ -172,9 +176,9 @@ cpdef inline tuple split_once_index_only(str text, str separator, Py_ssize_t off
while index < length:
ch = PyUnicode_READ_CHAR(text, index)
index += 1
if str_contains(separator, ch) or (crlf and str_contains(CRLF, ch)):
if str_contains(separator, ch):
if quotation == 0:
sep = sep + 1
sep += 1
continue
if first_quoted_sep_index == -1:
first_quoted_sep_index = index
Expand All @@ -191,3 +195,40 @@ cpdef inline tuple split_once_index_only(str text, str separator, Py_ssize_t off
if index == length and first_quoted_sep_index != -1:
return first_quoted_sep_index, sep
return index, sep


cdef class String:
cdef Py_ssize_t left_index
cdef Py_ssize_t right_index
cdef Py_ssize_t next_index
cdef Py_ssize_t _len
cdef str text

def __init__(self, str text):
self.text = text
self._len = PyUnicode_GET_LENGTH(text)
self.left_index = 0
self.right_index = 0
self.next_index = 0

def step(self, str separator, bint crlf=True):
cdef offset
self.next_index, offset = split_once_index_only(self.text, separator, self.left_index, crlf)
self.right_index = self.next_index - offset

def val(self):
return PyUnicode_Substring(self.text, self.left_index, self.right_index)

def apply(self):
self.left_index = self.next_index
self.right_index = self._len

@property
def complete(self):
return self.left_index == self._len

def __repr__(self):
return f"String({self.text!r}[{self.left_index}:{self.right_index}])"

def __str__(self):
return self.val()
46 changes: 42 additions & 4 deletions src/tarina/_string_py.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,17 @@ def split_once_without_escape(text: str, separator: str, crlf: bool = True):
Returns:
Tuple[str, str]: 切割后的字符串, 可能含有空格
"""
if crlf:
separator += CRLF
index, quotation = 0, ""
text = text.lstrip()
first_quoted_sep_index = -1
last_quote_index = 0
tlen = len(text)
for char in text:
index += 1
if char in separator or (crlf and char in CRLF):
if char in separator:
if not quotation:
#index -= 1
break
if first_quoted_sep_index == -1:
first_quoted_sep_index = index
Expand Down Expand Up @@ -104,6 +105,8 @@ def split_once_index_only(text: str, separator: str, offset: int, crlf: bool = T
Returns:
Tuple[str, str]: 切割后的字符串, 可能含有空格
"""
if crlf:
separator += CRLF
index = offset
quotation = ""
sep = 0
Expand All @@ -113,7 +116,7 @@ def split_once_index_only(text: str, separator: str, offset: int, crlf: bool = T
tlen = len(text)
for char in text:
index += 1
if char in separator or (crlf and char in CRLF):
if char in separator:
if not quotation:
sep += 1
continue
Expand All @@ -136,7 +139,6 @@ def split_once_index_only(text: str, separator: str, offset: int, crlf: bool = T
return index, sep



def split(text: str, separator: str, crlf: bool = True):
"""尊重引号与转义的字符串切分
Expand Down Expand Up @@ -186,3 +188,39 @@ def split(text: str, separator: str, crlf: bool = True):
for i in quoted_sep_index:
result[i] = "\0"
return str.join("", result).split("\0")


class String:
left_index: int
right_index: int
next_index: int
_len: int
text: str

def __init__(self, text: str):
self.text = text
self._len = len(text)
self.left_index = 0
self.right_index = 0
self.next_index = 0

def step(self, separator: str, crlf: bool = True):
self.next_index, offset = split_once_index_only(self.text, separator, self.left_index, crlf)
self.right_index = self.next_index - offset

def val(self):
return self.text[self.left_index:self.right_index]

def apply(self):
self.left_index = self.next_index
self.right_index = self._len

@property
def complete(self):
return self.left_index == self._len

def __repr__(self):
return f"String({self.text!r}[{self.left_index}:{self.right_index}])"

def __str__(self):
return self.val()
14 changes: 10 additions & 4 deletions src/tarina/string.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import sys

__all__ = ("split", "split_once")
__all__ = ("split", "split_once", "split_once_without_escape", "split_once_index_only", "String")


NO_EXTENSIONS = bool(os.environ.get("TARINA_NO_EXTENSIONS")) # type: bool
Expand All @@ -13,12 +13,18 @@
try:
from ._string_c import split as split # type: ignore[misc]
from ._string_c import split_once as split_once # type: ignore[misc]
from ._string_c import split_once_without_escape as split_once_without_escape # type: ignore[misc
from ._string_c import split_once_without_escape as split_once_without_escape # type: ignore[misc]
from ._string_c import split_once_index_only as split_once_index_only # type: ignore[misc]
from ._string_c import String as String # type: ignore[misc]
except ImportError: # pragma: no cover
from ._string_py import split as split # type: ignore[misc]
from ._string_py import split_once as split_once # type: ignore[misc]
from ._string_py import split_once_without_escape as split_once_without_escape
from ._string_py import split_once_without_escape as split_once_without_escape # type: ignore[misc]
from ._string_py import split_once_index_only as split_once_index_only # type: ignore[misc]
from ._string_py import String as String # type: ignore[misc]
else:
from ._string_py import split as split # type: ignore[misc]
from ._string_py import split_once as split_once # type: ignore[misc]
from ._string_py import split_once_without_escape as split_once_without_escape
from ._string_py import split_once_without_escape as split_once_without_escape # type: ignore[misc]
from ._string_py import split_once_index_only as split_once_index_only # type: ignore[misc]
from ._string_py import String as String # type: ignore[misc]

0 comments on commit ba072d7

Please sign in to comment.