Skip to content

Response contains notranslate #37

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 31 additions & 34 deletions GoogleTranslate/gtranslate.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,40 +174,37 @@
# the html request
def translate(to_translate, to_language="auto", language="auto"):
# send request
r = requests.get("https://translate.google.com/m?sl=%s&tl=%s&q=%s&op=translate"% (language, to_language, to_translate.replace(" ", "+")))
if("notranslate" in r.text):
return(to_translate)
else:
# set markers that enclose the charset identifier
beforecharset='charset='
aftercharset='" http-equiv'
# extract charset
parsed1=r.text[r.text.find(beforecharset)+len(beforecharset):]
parsed2=parsed1[:parsed1.find(aftercharset)]
# Display warning when encoding mismatch
if(parsed2!=r.encoding):
print('\x1b[1;31;40m' + 'Warning: Potential Charset conflict' )
print(" Encoding as extracted by SELF : "+parsed2)
print(" Encoding as detected by REQUESTS : "+r.encoding+ '\x1b[0m')

# Work around an AGE OLD Python bug in case of windows-874 encoding
# https://bugs.python.org/issue854511
if(r.encoding=='windows-874' and os.name=='posix'):
print('\x1b[1;31;40m' + "Alert: Working around age old Python bug (https://bugs.python.org/issue854511)\nOn Linux, charset windows-874 must be labeled as charset cp874"+'\x1b[0m')
r.encoding='cp874'

# convert html tags
text=html.unescape(r.text)
# set markers that enclose the wanted translation
before_trans = 'class="result-container">'
after_trans='</div>'
# extract translation and return it
parsed1=r.text[r.text.find(before_trans)+len(before_trans):]
parsed2=parsed1[:parsed1.find(after_trans)]
# fix parameter strings
parsed3 = re.sub('% ([ds])', r' %\1', parsed2)
parsed4 = re.sub('% ([\d]) \$ ([ds])', r' %\1$\2', parsed3).strip()
return html.unescape(parsed4).replace("'", r"\'")
r = requests.get("https://translate.google.com/m?sl=%s&tl=%s&q=%s&op=translate"% (language, to_language, to_translate.replace(" ", "+")))
beforecharset = 'charset='
aftercharset = '" http-equiv'
# extract charset
parsed1 = r.text[r.text.find(beforecharset) + len(beforecharset):]
parsed2 = parsed1[:parsed1.find(aftercharset)]
# Display warning when encoding mismatch
if (parsed2 != r.encoding):
print('\x1b[1;31;40m' + 'Warning: Potential Charset conflict')
print(" Encoding as extracted by SELF : " + parsed2)
print(" Encoding as detected by REQUESTS : " + r.encoding + '\x1b[0m')

# Work around an AGE OLD Python bug in case of windows-874 encoding
# https://bugs.python.org/issue854511
if (r.encoding == 'windows-874' and os.name == 'posix'):
print(
'\x1b[1;31;40m' + "Alert: Working around age old Python bug (https://bugs.python.org/issue854511)\nOn Linux, charset windows-874 must be labeled as charset cp874" + '\x1b[0m')
r.encoding = 'cp874'

# convert html tags
text = html.unescape(r.text)
# set markers that enclose the wanted translation
before_trans = 'class="result-container">'
after_trans = '</div>'
# extract translation and return it
parsed1 = r.text[r.text.find(before_trans) + len(before_trans):]
parsed2 = parsed1[:parsed1.find(after_trans)]
# fix parameter strings
parsed3 = re.sub('% ([ds])', r' %\1', parsed2)
parsed4 = re.sub('% ([\d]) \$ ([ds])', r' %\1$\2', parsed3).strip()
return html.unescape(parsed4).replace("'", r"\'")



Expand Down