-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Achraf KHAZRI
authored and
Achraf KHAZRI
committed
Oct 3, 2019
1 parent
b382bd2
commit c8fbc74
Showing
3 changed files
with
43 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
## Translate API | ||
## :books: Documentation links | ||
- Translate python lib : this library is not free, you can use it for a few examples in a period of time per day. [Link](https://pypi.org/project/translate/) | ||
## Licence | ||
GuideMeGlasses | ||
:eyeglasses: |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
from translate import Translator | ||
from nltk.tokenize import sent_tokenize | ||
import unicodedata | ||
|
||
|
||
class TranslateEngine(): | ||
|
||
def __init__(self, language): | ||
|
||
# Translator class instance | ||
self.translator = Translator(to_lang=language) | ||
|
||
def strip_accents(self, text): | ||
return "".join(char for char in | ||
unicodedata.normalize('NFKD', text) | ||
if unicodedata.category(char) != 'Mn') | ||
|
||
def translate(self, text): | ||
|
||
# Output translated text variable | ||
translated = "" | ||
|
||
# Subdevise text into sentences because the translate can translate only 500 char per step | ||
sentences = sent_tokenize(text) | ||
|
||
# Loop over sentences | ||
for s in sentences: | ||
|
||
# Translate sentence | ||
translation = self.translator.translate(s) | ||
|
||
# Append all sentences to get all the text | ||
translated = translated + " " + translation | ||
|
||
# Remove accents, in french lang | ||
output = self.strip_accents(translated) | ||
return output |