diff --git a/unix-ffi/re/re.py b/unix-ffi/re/re.py index d37584320..bd9566cb9 100644 --- a/unix-ffi/re/re.py +++ b/unix-ffi/re/re.py @@ -1,36 +1,55 @@ import sys import ffilib import array +import uctypes +pcre2 = ffilib.open("libpcre2-8") -pcre = ffilib.open("libpcre") +# pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length, +# uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset, +# pcre2_compile_context *ccontext); +pcre2_compile = pcre2.func("p", "pcre2_compile_8", "siippp") -# pcre *pcre_compile(const char *pattern, int options, -# const char **errptr, int *erroffset, -# const unsigned char *tableptr); -pcre_compile = pcre.func("p", "pcre_compile", "sipps") +# int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, +# PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options, +# pcre2_match_data *match_data, pcre2_match_context *mcontext); +pcre2_match = pcre2.func("i", "pcre2_match_8", "Psiiipp") -# int pcre_exec(const pcre *code, const pcre_extra *extra, -# const char *subject, int length, int startoffset, -# int options, int *ovector, int ovecsize); -pcre_exec = pcre.func("i", "pcre_exec", "PPsiiipi") +# int pcre2_pattern_info(const pcre2_code *code, uint32_t what, +# void *where); +pcre2_pattern_info = pcre2.func("i", "pcre2_pattern_info_8", "Pip") -# int pcre_fullinfo(const pcre *code, const pcre_extra *extra, -# int what, void *where); -pcre_fullinfo = pcre.func("i", "pcre_fullinfo", "PPip") +# PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data); +pcre2_get_ovector_pointer = pcre2.func("p", "pcre2_get_ovector_pointer_8", "p") +# pcre2_match_data *pcre2_match_data_create_from_pattern(const pcre2_code *code, +# pcre2_general_context *gcontext); +pcre2_match_data_create_from_pattern = pcre2.func( + "p", "pcre2_match_data_create_from_pattern_8", "Pp" +) -IGNORECASE = I = 1 -MULTILINE = M = 2 -DOTALL = S = 4 -VERBOSE = X = 8 -PCRE_ANCHORED = 0x10 +# PCRE2_SIZE that is of type size_t. +# Use ULONG as type to support both 32bit and 64bit. +PCRE2_SIZE_SIZE = uctypes.sizeof({"field": 0 | uctypes.ULONG}) +PCRE2_SIZE_TYPE = "L" + +# Real value in pcre2.h is 0xFFFFFFFF for 32bit and +# 0x0xFFFFFFFFFFFFFFFF for 64bit that is equivalent +# to -1 +PCRE2_ZERO_TERMINATED = -1 + + +IGNORECASE = I = 0x8 +MULTILINE = M = 0x400 +DOTALL = S = 0x20 +VERBOSE = X = 0x80 +PCRE2_ANCHORED = 0x80000000 # TODO. Note that Python3 has unicode by default ASCII = A = 0 UNICODE = U = 0 -PCRE_INFO_CAPTURECOUNT = 2 +PCRE2_INFO_CAPTURECOUNT = 0x4 class PCREMatch: @@ -67,19 +86,23 @@ def __init__(self, compiled_ptn): def search(self, s, pos=0, endpos=-1, _flags=0): assert endpos == -1, "pos: %d, endpos: %d" % (pos, endpos) buf = array.array("i", [0]) - pcre_fullinfo(self.obj, None, PCRE_INFO_CAPTURECOUNT, buf) + pcre2_pattern_info(self.obj, PCRE2_INFO_CAPTURECOUNT, buf) cap_count = buf[0] - ov = array.array("i", [0, 0, 0] * (cap_count + 1)) - num = pcre_exec(self.obj, None, s, len(s), pos, _flags, ov, len(ov)) + match_data = pcre2_match_data_create_from_pattern(self.obj, None) + num = pcre2_match(self.obj, s, len(s), pos, _flags, match_data, None) if num == -1: # No match return None + ov_ptr = pcre2_get_ovector_pointer(match_data) + # pcre2_get_ovector_pointer return PCRE2_SIZE + ov_buf = uctypes.bytearray_at(ov_ptr, PCRE2_SIZE_SIZE * (cap_count + 1) * 2) + ov = array.array(PCRE2_SIZE_TYPE, ov_buf) # We don't care how many matching subexpressions we got, we # care only about total # of capturing ones (including empty) return PCREMatch(s, cap_count + 1, ov) def match(self, s, pos=0, endpos=-1): - return self.search(s, pos, endpos, PCRE_ANCHORED) + return self.search(s, pos, endpos, PCRE2_ANCHORED) def sub(self, repl, s, count=0): if not callable(repl): @@ -141,9 +164,9 @@ def findall(self, s): def compile(pattern, flags=0): - errptr = bytes(4) + errcode = bytes(4) erroffset = bytes(4) - regex = pcre_compile(pattern, flags, errptr, erroffset, None) + regex = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, flags, errcode, erroffset, None) assert regex return PCREPattern(regex) @@ -154,7 +177,7 @@ def search(pattern, string, flags=0): def match(pattern, string, flags=0): - r = compile(pattern, flags | PCRE_ANCHORED) + r = compile(pattern, flags | PCRE2_ANCHORED) return r.search(string)