Skip to content

Commit

Permalink
Encodings
Browse files Browse the repository at this point in the history
  • Loading branch information
Nils Reimers committed Jul 11, 2017
1 parent 506142b commit cd78a2d
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 28 deletions.
14 changes: 4 additions & 10 deletions 2017-07_Seminar/Session 1 - SENNA/code for POS/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,18 +191,12 @@ def getCasing(word, caseLookup):
else:
print(embeddingsPath, "does not exist. Please provide pre-trained embeddings")
exit()

if embeddingsPath.endswith('.gz'):
try:
fEmbeddings = gzip.open(embeddingsPath, "rt", encoding="utf8")
except ValueError:
# Workaround for Python 2.7 under Windows
fEmbeddings = gzip.open(embeddingsPath, "r", encoding="utf8")
else:
fEmbeddings = open(embeddingsPath, encoding="utf8")

# :: Load the pre-trained embeddings file ::
fEmbeddings = gzip.open(embeddingsPath, "r") if embeddingsPath.endswith('.gz') else open(embeddingsPath, encoding="utf8")

for line in fEmbeddings:
split = line.strip().split(" ")
split = line.decode("utf-8").strip().split(" ")
word = split[0]

if len(word2Idx) == 0: #Add padding+unknown
Expand Down
11 changes: 2 additions & 9 deletions 2017-07_Seminar/Session 2 - Sentence CNN/code/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,18 +124,11 @@ def readFile(filepath):
exit()

# :: Load the pre-trained embeddings file ::
if embeddingsPath.endswith('.gz'):
try:
fEmbeddings = gzip.open(embeddingsPath, "rt", encoding="utf8")
except ValueError:
# Workaround for Python 2.7 under Windows
fEmbeddings = gzip.open(embeddingsPath, "r", encoding="utf8")
else:
fEmbeddings = open(embeddingsPath, encoding="utf8")
fEmbeddings = gzip.open(embeddingsPath, "r") if embeddingsPath.endswith('.gz') else open(embeddingsPath, encoding="utf8")

print("Load pre-trained embeddings file")
for line in fEmbeddings:
split = line.strip().split(" ")
split = line.decode("utf-8").strip().split(" ")
word = split[0]

if len(word2Idx) == 0: #Add padding+unknown
Expand Down
11 changes: 2 additions & 9 deletions 2017-07_Seminar/Session 3 - Relation CNN/code/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,18 +151,11 @@ def getWordIdx(token, word2Idx):
exit()

# :: Load the pre-trained embeddings file ::
if embeddingsPath.endswith('.gz'):
try:
fEmbeddings = gzip.open(embeddingsPath, "rt", encoding="utf8")
except ValueError:
# Workaround for Python 2.7 under Windows
fEmbeddings = gzip.open(embeddingsPath, "r", encoding="utf8")
else:
fEmbeddings = open(embeddingsPath, encoding="utf8")
fEmbeddings = gzip.open(embeddingsPath, "r") if embeddingsPath.endswith('.gz') else open(embeddingsPath, encoding="utf8")

print("Load pre-trained embeddings file")
for line in fEmbeddings:
split = line.strip().split(" ")
split = line.decode('utf-8').strip().split(" ")
word = split[0]

if len(word2Idx) == 0: #Add padding+unknown
Expand Down

0 comments on commit cd78a2d

Please sign in to comment.