Skip to content

Commit

Permalink
newer lib, eval
Browse files Browse the repository at this point in the history
  • Loading branch information
suchanv committed Nov 26, 2017
1 parent 2fad361 commit 929411e
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 14 deletions.
7 changes: 6 additions & 1 deletion eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ def build_parser():
help='Get all NNs (not just top k) to output all rankings')
parser.add_argument('-w', '--overwrite', dest='overwrite', action='store_true',
help='overwrite inference results')
parser.add_argument('-f', '--filepath_word_embedding', dest='filepath_word_embedding',
help='filepath of word embedding model')
return parser


Expand All @@ -50,9 +52,12 @@ def build_parser():
with open(args.synset_list, "r") as testing_synsets:
testing_wnids = testing_synsets.read().split()

main_word2vec = load_pre_trained_vector(args.filepath_word_embedding)

count_correct, count_total = accuracy(args.threshold, args.top_t, testing_wnids, args.cnn_results_dir, args.output_dir,\
label_pool, args.error_log_file, args.output_log_file,\
debug=args.debug, overwrite=args.overwrite, get_all_nns=args.get_all_nns, top_k=args.top_k)
debug=args.debug, overwrite=args.overwrite, get_all_nns=args.get_all_nns, top_k=args.top_k, \
main_word2vec = main_word2vec)

if count_total != 0:
accuracy = 1.0 * count_correct / count_total
Expand Down
60 changes: 47 additions & 13 deletions library.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import numpy as np
import sys
sys.path.insert(0, '../models/research/slim/')
from datasets import imagenet
import heapq
import os
import timeit
Expand Down Expand Up @@ -34,7 +32,8 @@ def add_child(parent, child, dic_parent = {}, dic_child = {}):
# ====================================================================
# loading dictionary mapping word to vector
print "Loading Glove..."
MAIN_WORD2VEC = load_pre_trained_vector("glove.840B.300d.txt")
# MAIN_WORD2VEC = load_pre_trained_vector("../glove.6B.50d.txt")
MAIN_WORD2VEC = None
print "Finished loading Glove"

# create dictionary mapping synset to list of labels
Expand Down Expand Up @@ -102,29 +101,29 @@ def is_one_word(synset, main_word2vec = MAIN_WORD2VEC, id_labels = ID_LABELS):
labels = id_labels[synset]
res = 1
for label in labels:
if valid_one_word(label, MAIN_WORD2VEC):
if valid_one_word(label, main_word2vec):
res *= 0
else:
res *= 1
return not res

# return synset which is the most recent ancestor that has one-word label
# ancestor including itself
def get_recent_oneword_ancestor(synset, dic_parent = DIC_PARENT, main_word2vec = MAIN_WORD2VEC):
def get_recent_oneword_ancestor(synset, dic_parent = DIC_PARENT, main_word2vec = MAIN_WORD2VEC, id_labels = ID_LABELS):
level = 0
while level >= 0:
for ancestor in find_ancestor(synset, level, dic_parent):
if is_one_word(ancestor, main_word2vec):
if is_one_word(ancestor, main_word2vec, id_labels):
return ancestor
level += 1

# return any one-word label from the most recent ancestor (including itself)
def get_one_word(synset, dic_parent = DIC_PARENT, main_word2vec = MAIN_WORD2VEC, id_labels = ID_LABELS):
curr = get_recent_oneword_ancestor(synset, dic_parent, main_word2vec)
curr = get_recent_oneword_ancestor(synset, dic_parent, main_word2vec, id_labels)
labels = id_labels[curr]
for label in labels:
if valid_one_word(label, main_word2vec):
return label, is_one_word(synset, main_word2vec)
return label, is_one_word(synset, main_word2vec, id_labels)

# return set of words having distance dist from node
def hop_dist(node, dist, start_at_1 = False, dic_parent = DIC_PARENT, dic_child = DIC_CHILD):
Expand Down Expand Up @@ -178,9 +177,46 @@ def max_similarity_score_vec_id(vec, label_id, id_labels = ID_LABELS, main_word2
similarity = similarity_score(vec, word_embed_name)
max_similarity = max(similarity, max_similarity)
if max_similarity == -1000:
max_similarity = similarity_score(get_vec(get_one_word(label_id, dic_parent, main_word2vec, id_labels)[0]), vec)
max_similarity = similarity_score(get_vec(get_one_word(label_id, dic_parent, main_word2vec, id_labels)[0], main_word2vec), vec)
return max_similarity

def create_mini_glove(glove_filename, mini_glove_output_path, id_labels = ID_LABELS):
'''
big_word2vec = load_pre_trained_vector(glove_filename)
mini_dict = dict()
for index in range(1000):
word_1k = get_one_word(index_to_1k_id(index))
mini_dict[word_1k] = big_word2vec[word_1k]
synset_pool = list()
with open(available_hop_filename) as f:
for line in f:
synset_pool.append(line[:-1])
for synset in synset_pool:
all_synset_labels = id_labels[synset]
for word in all_synset_labels:
if word in big_word2vec:
mini_dict[word] = big_word2vec[word]
'''
big_word2vec = load_pre_trained_vector(glove_filename)
mini_dict = dict()
for synset_id in id_labels:
described_words = id_labels[synset_id]
for word in described_words:
if word in big_word2vec:
mini_dict[word] = big_word2vec[word]

with open(mini_glove_output_path, 'w') as f:
for word in mini_dict:
word_embed = mini_dict[word]
line_to_be_printed = word
for value in word_embed:
line_to_be_printed += " {0:.5f}".format(value)
f.write(line_to_be_printed + "\n")





def get_synthetic_vec(probability_distribution, T, all_ids = ALL_IDS, dic_parent = DIC_PARENT, main_word2vec = MAIN_WORD2VEC, id_labels = ID_LABELS):
Expand Down Expand Up @@ -265,7 +301,7 @@ def nearest_neighbor_with_threshold(probability_distribution, top_k, label_pool,

def accuracy_one_synset(threshold, T, testing_wnid, probs_result_dir, words_result_dir,\
label_pool, overwrite=False, get_all_nns=False, top_k = 100, all_ids = ALL_IDS, dic_parent = DIC_PARENT,\
main_word2vec = MAIN_WORD2VEC, id_labels = ID_LABELS, max_images_to_run=100):
main_word2vec = MAIN_WORD2VEC, id_labels = ID_LABELS):
"""
probs_result_dir - contains n*/n*_*.txt, which contains 1k lines of
probabilities from CNN inference
Expand All @@ -277,9 +313,7 @@ def accuracy_one_synset(threshold, T, testing_wnid, probs_result_dir, words_resu

count_total = 0
count_correct = 0
if len(os.listdir(probs_result_dir_synset)) < max_images_to_run:
return (0, 0)
for probs_file in os.listdir(probs_result_dir_synset)[:max_images_to_run]:
for probs_file in os.listdir(probs_result_dir_synset):
# print "Processing %s" % probs_file
probability_distribution = np.loadtxt(os.path.join(probs_result_dir_synset, probs_file))

Expand Down

0 comments on commit 929411e

Please sign in to comment.