-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathstring-processing.py
112 lines (80 loc) · 2.82 KB
/
string-processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
## @knitr py-basics
print( "My" + "name" + "is" + "Chris" + "." )
out = ' '.join(("My", "name", "is", "Chris", "."))
print(out)
print( len(out) )
print( out.split(' ') )
## @knitr py-substrings
var = "13:47:00"
print( var[3:5] )
## @knitr py-lists
var = list("13:47:00")
print( var )
var[0:2] = ["0", "1"]
print( ''.join(var) )
## @knitr py-find-substrings
vars = ["08-03-2016", "09-05-2007", "01-02-2016"]
print( vars[0].find('2016') )
print( vars[1].find('2016') )
print( [v.find('2016') for v in vars] )
## @knitr py-search
import re
text = ["Here's my number: 919-543-3300.", "hi John, good to meet you",
"They bought 731 bananas", "Please call 919.554.3800"]
m = re.search("\d+", text[0])
print( m.group() )
print( m.start() )
print( m.end() )
print( m.span() )
## @knitr py-search2
import re
text = ["Here's my number: 919-543-3300.", "hi John, good to meet you",
"They bought 731 bananas", "Please call 919.554.3800"]
print( re.findall("\d+", text[0]) )
## @knitr py-ignore-case
import re
str = "That cat in the Hat"
print( re.findall("hat", str, re.IGNORECASE) )
## @knitr py-list-comp
import re
text = ["Here's my number: 919-543-3300.", "hi John, good to meet you",
"They bought 731 bananas", "Please call 919.554.3800"]
def return_group(pattern, txt):
m = re.search(pattern, txt)
if m:
return(m.group())
else:
return(None)
print( [return_group("\d+", str) for str in text] )
## @knitr py-replace1
import re
text = ["Here's my number: 919-543-3300.", "hi John, good to meet you",
"They bought 731 bananas", "Please call 919.554.3800"]
print( re.sub("\d", "Z", text[0]) )
## @knitr py-replace2
import re
text = '"H4NY07011","ACKERMAN, GARY L.","H","$13,242",,,'
print( re.sub("([^\",]),", "\\1", text) )
## @knitr py-greedy
import re
text = "Do an internship <b> in place </b> of <b> one </b> course."
print( re.sub("<.*>", "", text) )
print( re.sub("<.*?>", "", text) )
## @knitr py-compile
import re
text = ["Here's my number: 919-543-3300.", "hi John, good to meet you",
"They bought 731 bananas", "Please call 919.554.3800"]
p = re.compile('\d+')
m = p.search(text[0])
print( m.group() )
## @knitr py-escape
strings = ["Hello", "Hello.", "Hello\nthere", "Hello\\nthere"]
print(strings[2])
print(strings[3])
print(re.search(".", strings[0])) ## . means any character
print(re.search("\.", strings[0])) ## \. escapes the period and treats it literally
print(re.search("\.", strings[1])) ## \. escapes the period and treats it literally
print(re.search("\n", strings[2])) ## \n looks for the special symbol \n
print(re.search("\n", strings[3])) ## \n looks for the special symbol \n
print(re.search("\\\\", strings[3])) ## string parser removes two \ to give \\;
## then in regex \\ treats second \ literally