Skip to content

Commit 8a30323

Browse files
authored
Merge pull request #9 from jenojp/develop
Spacy 3.3 support
2 parents ce7edbb + 95c6a0a commit 8a30323

File tree

4 files changed

+40
-23
lines changed

4 files changed

+40
-23
lines changed

azure-pipelines.yml

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,34 +12,34 @@ trigger:
1212

1313
strategy:
1414
matrix:
15-
Python36Linux:
16-
imageName: 'ubuntu-18.04'
17-
python.version: '3.6'
1815
Python37Linux:
19-
imageName: 'ubuntu-18.04'
16+
imageName: 'ubuntu-20.04'
2017
python.version: '3.7'
2118
Python38Linux:
22-
imageName: 'ubuntu-18.04'
19+
imageName: 'ubuntu-20.04'
2320
python.version: '3.8'
24-
Python36Mac:
25-
imageName: 'macos-10.15'
26-
python.version: '3.6'
21+
Python39Linux:
22+
imageName: 'ubuntu-20.04'
23+
python.version: '3.9'
2724
Python37Mac:
28-
imageName: 'macos-10.15'
25+
imageName: 'macos-11'
2926
python.version: '3.7'
3027
Python38Mac:
31-
imageName: 'macos-10.15'
28+
imageName: 'macos-11'
3229
python.version: '3.8'
33-
Python36Windows:
34-
imageName: 'vs2017-win2016'
35-
python.version: '3.6'
30+
Python39Mac:
31+
imageName: 'macos-11'
32+
python.version: '3.9'
3633
Python37Windows:
37-
imageName: 'vs2017-win2016'
34+
imageName: 'windows-2019'
3835
python.version: '3.7'
3936
Python38Windows:
40-
imageName: 'vs2017-win2016'
37+
imageName: 'windows-2019'
4138
python.version: '3.8'
42-
maxParallel: 4
39+
Python39Windows:
40+
imageName: 'windows-2019'
41+
python.version: '3.9'
42+
maxParallel: 9
4343

4444
pool:
4545
vmImage: $(imageName)

extractacy/extract.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ def __call__(self, doc):
2323
in the pipeline, if available.
2424
"""
2525
matches = self.matcher(doc)
26+
for match_id, start, end in matches:
27+
print(self.nlp.vocab.strings[match_id], start, end)
2628
for e in doc.ents:
2729
if e.label_ not in self.ent_patterns.keys():
2830
e._.value_extract = []

extractacy/test.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,12 @@ def build_docs():
1111
"Discharge Date: 11/15/2008. Patient had temp reading of 102.6 degrees. Insurance claim sent to patient's account on file: 1112223. 12/31/2008: Payment received.",
1212
[
1313
("Discharge Date", ["11/15/2008"]),
14-
("11/15/2008", []),
14+
# ("11/15/2008", []),
1515
("temp", ["102.6 degrees"]),
1616
("102.6 degrees", []),
1717
("account", ["1112223"]),
1818
("1112223", []),
19-
("12/31/2008", []),
19+
# ("12/31/2008", []),
2020
("Payment received", ["12/31/2008"]),
2121
],
2222
)
@@ -60,6 +60,7 @@ def build_docs():
6060
(
6161
"We believe 01/01/1980 is his date of birth but it could also be 01/02/1980",
6262
[
63+
("01/01/1980",[]),
6364
("date of birth", ["01/01/1980", "01/02/1980"]),
6465
("01/02/1980",[])
6566
],
@@ -78,14 +79,17 @@ def build_docs():
7879
docs.append(
7980
(
8081
"We believe 01/01/1980 is his date of birth",
81-
[("date of birth", ["01/01/1980"])],
82+
[
83+
("01/01/1980", []),
84+
("date of birth", ["01/01/1980"])
85+
],
8286
)
8387
)
8488
# test outside boundary
8589
docs.append(
8690
(
8791
"Discharge date unknown. 12/12/1999 date of confirmation.",
88-
[("Discharge date", []), ("12/12/1999 date", [])],
92+
[("Discharge date", []), ("12/12/1999", [])],
8993
)
9094
)
9195

@@ -164,9 +168,20 @@ def test():
164168
docs = build_docs()
165169
for d in docs:
166170
doc = nlp(d[0])
171+
print()
172+
print()
173+
print("sentences...")
174+
for s in doc.sents:
175+
print(s)
176+
print("tokens....")
177+
print([t.text for t in doc])
178+
print("entities....")
179+
for e in doc.ents:
180+
print(e.text, e.start, e.end)
181+
print("value extracts....")
167182
for i, e in enumerate(doc.ents):
168183
print(e.text, e._.value_extract)
169-
print([t.text for t in doc])
184+
for i, e in enumerate(doc.ents):
170185
assert (e.text, e._.value_extract) == d[1][i]
171186

172187

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
setup(
55
name = 'extractacy',
6-
version = 'v1.0.1',
6+
version = 'v1.0.2',
77
url = 'https://github.com/jenojp/extractacy',
88
author = 'Jeno Pizarro',
99
author_email = '[email protected]',
@@ -24,7 +24,7 @@
2424
packages = find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]),
2525
license="MIT",
2626
install_requires=[
27-
"spacy>=3.0.1,<3.2.0",
27+
"spacy>=3.0.1,<4.0.0",
2828
],
2929
tests_require=[
3030
"pytest",

0 commit comments

Comments
 (0)