-
Notifications
You must be signed in to change notification settings - Fork 4
/
hello_app.py
107 lines (94 loc) · 3.88 KB
/
hello_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import warnings
warnings.filterwarnings("ignore")
import os, argparse
import keras
import cv2, spacy, numpy as np
from keras.models import model_from_json
from keras.optimizers import SGD
from sklearn.externals import joblib
from keras import backend as K
from keras.utils.vis_utils import plot_model
K.set_image_data_format('channels_first')
VQA_model_file_name = 'VQA/VQA_MODEL.json'
VQA_weights_file_name = 'VQA/VQA_MODEL_WEIGHTS.hdf5'
label_encoder_file_name = 'VQA/FULL_labelencoder_trainval.pkl'
def get_image_model():
from keras.applications.vgg16 import VGG16
model = VGG16(weights='imagenet')
from keras.models import Model
new_input = model.input
hidden_layer = model.layers[-2].output
model_new = Model(new_input, hidden_layer)
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model_new.compile(optimizer=sgd, loss='categorical_crossentropy')
return model_new
model_vgg = get_image_model()
def get_image_features(image_file_name):
image_features = np.zeros((1, 4096))
im = cv2.resize(cv2.imread(image_file_name), (224, 224))
im = im.transpose((2,0,1))
im = np.expand_dims(im, axis=0)
image_features[0,:] = model_vgg.predict(im)[0]
return image_features
def get_question_features(question):
word_embeddings = spacy.load('en_vectors_web_lg')
tokens = word_embeddings(question)
question_tensor = np.zeros((1, 30, 300))
for j in range(len(tokens)):
question_tensor[0,j,:] = tokens[j].vector
return question_tensor
import win32com.client as wincl
speak = wincl.Dispatch("SAPI.SpVoice")
def get_VQA_model(VQA_model_file_name, VQA_weights_file_name):
vqa_model = model_from_json(open(VQA_model_file_name).read())
vqa_model.load_weights(VQA_weights_file_name)
vqa_model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
return vqa_model
model_vqa = get_VQA_model(VQA_model_file_name, VQA_weights_file_name)
labelencoder = joblib.load(label_encoder_file_name)
import numpy as np
import cv2
from flask import request
from flask import jsonify
from flask import Flask
app = Flask(__name__)
@app.route('/hello', methods=['POST'])
def hello():
message = request.get_json(force=True)
#question = message['name']
import speech_recognition as sr
r = sr.Recognizer()
with sr.Microphone() as source:
print("Ask:")
audio = r.listen(source)
try:
print("You asked " + r.recognize_google(audio) + " ?")
question = r.recognize_google(audio)+" ?"
except sr.UnknownValueError:
print("Could not understand the question")
except sr.RequestError as e:
print("Could not request results; {0}".format(e))
cap = cv2.VideoCapture(1)
while(True):
ret, frame = cap.read()
#question = u'what is there in the picture?'
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
cv2.imshow('frame',gray)
if cv2.waitKey(1) & 0xFF == ord('c'):
image_features = np.zeros((1, 4096))
im = cv2.resize(frame, (224, 224))
im = im.transpose((2,0,1))
im = np.expand_dims(im, axis=0)
image_features[0,:] = model_vgg.predict(im)[0]
question_features = get_question_features(question)
y_output = model_vqa.predict([question_features, image_features])
warnings.filterwarnings("ignore", category=DeprecationWarning)
labelencoder = joblib.load(label_encoder_file_name)
for label in reversed(np.argsort(y_output)[0,-5:]):
ans = "According to me the answer is "+labelencoder.inverse_transform(label)
speak.Speak(ans)
break
response = {'greeting':question+'\n'+ ans + '.'}
break
cv2.destroyAllWindows()
return jsonify(response)