-
Notifications
You must be signed in to change notification settings - Fork 0
/
pytesser.py
25 lines (20 loc) · 717 Bytes
/
pytesser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
"""OCR in Python using the Tesseract engine from Google
https://github.com/baituhuangyu/pytesser
by baituhuangyu
V 0.0.1, 2016/01/01"""
# require tesseract 3.03
import subprocess
import traceback
import StringIO
def image_to_string(tmp_img, language = "eng", psm = "3"):
""" OCR in Python using the Tesseract """
try:
f = StringIO.StringIO()
tmp_img.save(f, "BMP")
img = f.getvalue()
p = subprocess.Popen(["tesseract", "stdin", "stdout", "-l", language, "-psm", psm], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
stdoutput, erroutput = p.communicate(img)
# print stdoutput
return stdoutput
except Exception, e:
traceback.print_exc(e)