1
1
import sys
2
- import ffilib
3
2
import array
3
+ import ffilib
4
+ import uctypes
4
5
6
+ pcre2 = ffilib .open ("libpcre2-8" )
5
7
6
- pcre = ffilib .open ("libpcre" )
8
+ # pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length,
9
+ # uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset,
10
+ # pcre2_compile_context *ccontext);
11
+ pcre2_compile = pcre2 .func ("p" , "pcre2_compile_8" , "siippp" )
7
12
8
- # pcre *pcre_compile (const char *pattern, int options ,
9
- # const char **errptr, int *erroffset ,
10
- # const unsigned char *tableptr );
11
- pcre_compile = pcre .func ("p " , "pcre_compile " , "sipps " )
13
+ # int pcre2_match (const pcre2_code *code, PCRE2_SPTR subject ,
14
+ # PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options ,
15
+ # pcre2_match_data *match_data, pcre2_match_context *mcontext );
16
+ pcre2_match = pcre2 .func ("i " , "pcre2_match_8 " , "Psiiipp " )
12
17
13
- # int pcre_exec(const pcre *code, const pcre_extra *extra,
14
- # const char *subject, int length, int startoffset,
15
- # int options, int *ovector, int ovecsize);
16
- pcre_exec = pcre .func ("i" , "pcre_exec" , "PPsiiipi" )
18
+ # int pcre2_pattern_info(const pcre2_code *code, uint32_t what,
19
+ # void *where);
20
+ pcre2_pattern_info = pcre2 .func ("i" , "pcre2_pattern_info_8" , "Pip" )
17
21
18
- # int pcre_fullinfo(const pcre *code, const pcre_extra *extra,
19
- # int what, void *where);
20
- pcre_fullinfo = pcre .func ("i" , "pcre_fullinfo" , "PPip" )
22
+ # PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data);
23
+ pcre2_get_ovector_pointer = pcre2 .func ("p" , "pcre2_get_ovector_pointer_8" , "p" )
21
24
25
+ # pcre2_match_data *pcre2_match_data_create_from_pattern(const pcre2_code *code,
26
+ # pcre2_general_context *gcontext);
27
+ pcre2_match_data_create_from_pattern = pcre2 .func (
28
+ "p" , "pcre2_match_data_create_from_pattern_8" , "Pp"
29
+ )
22
30
23
- IGNORECASE = I = 1
24
- MULTILINE = M = 2
25
- DOTALL = S = 4
31
+ IGNORECASE = I = 0x8
32
+ MULTILINE = M = 0x400
33
+ DOTALL = S = 0x20
26
34
VERBOSE = X = 8
27
- PCRE_ANCHORED = 0x10
35
+ PCRE2_ANCHORED = 0x80000000
36
+
37
+ # Real value in pcre2.h is 0xFFFFFFFF for 32bit and
38
+ # 0x0xFFFFFFFFFFFFFFFF for 64bit that is equivalent
39
+ # to -1
40
+ PCRE2_ZERO_TERMINATED = - 1
28
41
29
42
# TODO. Note that Python3 has unicode by default
30
43
ASCII = A = 0
31
44
UNICODE = U = 0
32
45
33
- PCRE_INFO_CAPTURECOUNT = 2
46
+ PCRE2_INFO_CAPTURECOUNT = 4
34
47
35
48
36
49
class PCREMatch :
@@ -67,19 +80,26 @@ def __init__(self, compiled_ptn):
67
80
def search (self , s , pos = 0 , endpos = - 1 , _flags = 0 ):
68
81
assert endpos == - 1 , "pos: %d, endpos: %d" % (pos , endpos )
69
82
buf = array .array ("i" , [0 ])
70
- pcre_fullinfo (self .obj , None , PCRE_INFO_CAPTURECOUNT , buf )
83
+ pcre2_pattern_info (self .obj , PCRE2_INFO_CAPTURECOUNT , buf )
71
84
cap_count = buf [0 ]
72
- ov = array . array ( "i" , [ 0 , 0 , 0 ] * ( cap_count + 1 ) )
73
- num = pcre_exec (self .obj , None , s , len (s ), pos , _flags , ov , len ( ov ) )
85
+ match_data = pcre2_match_data_create_from_pattern ( self . obj , None )
86
+ num = pcre2_match (self .obj , s , len (s ), pos , _flags , match_data , None )
74
87
if num == - 1 :
75
88
# No match
76
89
return None
90
+ ov_ptr = pcre2_get_ovector_pointer (match_data )
91
+ # pcre2_get_ovector_pointer return PCRE2_SIZE that is of type
92
+ # size_t. Use ULONG as type to support both 32bit and 64bit.
93
+ ov_buf = uctypes .bytearray_at (
94
+ ov_ptr , uctypes .sizeof ({"field" : 0 | uctypes .ULONG }) * (cap_count + 1 ) * 2
95
+ )
96
+ ov = array .array ("L" , ov_buf )
77
97
# We don't care how many matching subexpressions we got, we
78
98
# care only about total # of capturing ones (including empty)
79
99
return PCREMatch (s , cap_count + 1 , ov )
80
100
81
101
def match (self , s , pos = 0 , endpos = - 1 ):
82
- return self .search (s , pos , endpos , PCRE_ANCHORED )
102
+ return self .search (s , pos , endpos , PCRE2_ANCHORED )
83
103
84
104
def sub (self , repl , s , count = 0 ):
85
105
if not callable (repl ):
@@ -141,9 +161,9 @@ def findall(self, s):
141
161
142
162
143
163
def compile (pattern , flags = 0 ):
144
- errptr = bytes (4 )
164
+ errcode = bytes (4 )
145
165
erroffset = bytes (4 )
146
- regex = pcre_compile (pattern , flags , errptr , erroffset , None )
166
+ regex = pcre2_compile (pattern , PCRE2_ZERO_TERMINATED , flags , errcode , erroffset , None )
147
167
assert regex
148
168
return PCREPattern (regex )
149
169
@@ -154,7 +174,7 @@ def search(pattern, string, flags=0):
154
174
155
175
156
176
def match (pattern , string , flags = 0 ):
157
- r = compile (pattern , flags | PCRE_ANCHORED )
177
+ r = compile (pattern , flags | PCRE2_ANCHORED )
158
178
return r .search (string )
159
179
160
180
0 commit comments