New wakeword! (#62)

samgiles · web-flow · commit 72bff6d810ca · 2016-08-04T14:08:34.000+01:00
* Ignore Mac file system rubbish

* Add kws library and copy files over from dist directory

* Use new recogniser rather than JsSpeechRecogniser

* Use the AMD bundle

* Tweak threshold

* Remove debug

* Unused dependency

* No need for a closure

* Style changes

* Remove unsued wakeword model

* Style changes

* Bump

* Rewrite URls

* Use https explicitly..
diff --git a/.gitignore b/.gitignore
@@ -3,3 +3,4 @@
 /dist
 /.publish
 npm-debug.log
+.DS_Store
diff --git a/.travis.yml b/.travis.yml
@@ -12,3 +12,6 @@ before_install:
   - sh -e /etc/init.d/xvfb start
   - sleep 3 # give xvfb some time to start
   - export CHROME_BIN=google-chrome
+# Rewrite ssh URLs that git uses (for SSH clones in submodules of npm
+# dependencies
+  - git config --global url."https://".insteadOf git://
diff --git a/app/data/wakeword_model.json b/app/data/wakeword_model.json
diff --git a/app/js/lib/speech-controller.js b/app/js/lib/speech-controller.js
@@ -9,13 +9,11 @@ import IntentParser from './intent-parser';
 const p = Object.freeze({
   // Properties
   wakewordRecogniser: Symbol('wakewordRecogniser'),
-  wakewordModelUrl: Symbol('wakewordModelUrl'),
   speechRecogniser: Symbol('speechRecogniser'),
   speechSynthesis: Symbol('speechSynthesis'),
   idle: Symbol('idle'),
 
   // Methods
-  initialiseSpeechRecognition: Symbol('initialiseSpeechRecognition'),
   startListeningForWakeword: Symbol('startListeningForWakeword'),
   stopListeningForWakeword: Symbol('stopListeningForWakeword'),
   listenForUtterance: Symbol('listenForUtterance'),
@@ -52,7 +50,6 @@ export default class SpeechController extends EventDispatcher {
     super(EVENT_INTERFACE);
 
     this[p.idle] = true;
-    this[p.wakewordModelUrl] = 'data/wakeword_model.json';
 
     this[p.speechRecogniser] = new SpeechRecogniser();
     this[p.speechSynthesis] = new SpeechSynthesis();
@@ -73,8 +70,7 @@ export default class SpeechController extends EventDispatcher {
   }
 
   start() {
-    return this[p.initialiseSpeechRecognition]()
-      .then(this[p.startListeningForWakeword].bind(this));
+    return this[p.startListeningForWakeword]();
   }
 
   startSpeechRecognition() {
@@ -105,14 +101,6 @@ export default class SpeechController extends EventDispatcher {
     this[p.speechSynthesis].speak(text);
   }
 
-  [p.initialiseSpeechRecognition]() {
-    return fetch(this[p.wakewordModelUrl])
-      .then((response) => response.json())
-      .then((model) => {
-        this[p.wakewordRecogniser].loadModel(model);
-      });
-  }
-
   [p.startListeningForWakeword]() {
     this.emit(EVENT_INTERFACE[0], { type: EVENT_INTERFACE[0] });
     this[p.idle] = true;
diff --git a/app/js/lib/wakeword/recogniser.js b/app/js/lib/wakeword/recogniser.js
@@ -1,60 +1,65 @@
 'use strict';
 
-import JsSpeechRecognizer from 'components/jsspeechrecognizer';
+import PocketSphinx from 'components/webaudiokws';
 
 export default class WakeWordRecogniser {
-  constructor(options = {}) {
-    const minimumConfidence = options.minimumConfidence || 0.35;
-    const bufferCount = options.bufferCount || 80;
-    const maxVoiceActivityGap = options.maxVoiceActivityGap || 300;
-    const numGroups = options.numGroups || 60;
-    const groupSize = options.groupSize || 5;
+  constructor() {
+    this.audioContext = new AudioContext();
 
-    this.recogniser = new JsSpeechRecognizer();
+    this.audioSource = navigator.mediaDevices.getUserMedia({
+      audio: true,
+    })
+      .then((stream) => {
+        return this.audioContext.createMediaStreamSource(stream);
+      })
+      .catch((error) => {
+        console.error(`Could not getUserMedia: ${error}`);
+        throw error;
+      });
 
-    this.recogniser.keywordSpottingMinimumConfidence = minimumConfidence;
-    this.recogniser.keywordSpottingBufferCount = bufferCount;
-    this.recogniser.keywordSpottingMaxVoiceActivityGap = maxVoiceActivityGap;
-    this.recogniser.numGroups = numGroups;
-    this.recogniser.groupSize = groupSize;
+    this.recogniser = new PocketSphinx(this.audioContext, {
+      pocketSphinxUrl: '/js/components/pocketsphinx.js',
+      workerUrl: '/js/components/ps-worker.js',
+      args: [['-kws_threshold', '2']],
+    });
+
+    const dictionary = {
+      'MAKE': ['M EY K'],
+      'A': ['AH'],
+      'NOTE': ['N OW T'],
+    };
+
+    const keywordReady = this.recogniser.addDictionary(dictionary)
+      .then(() => this.recogniser.addKeyword('MAKE A NOTE'));
 
+    this.ready = Promise.all([keywordReady, this.audioSource]);
     Object.seal(this);
   }
 
   startListening() {
-    return new Promise((resolve) => {
-      this.recogniser.closeMic(); // Make sure we don't start another instance
-      this.recogniser.openMic();
-      if (!this.recogniser.isRecording()) {
-        this.recogniser.startKeywordSpottingRecording();
-      }
-
-      resolve();
-    });
+    return this.ready
+      .then(() => {
+        return this.audioSource;
+      })
+      .then((source) => {
+        source.connect(this.recogniser);
+        this.recogniser.connect(this.audioContext.destination);
+        return;
+      });
   }
 
   stopListening() {
-    return new Promise((resolve) => {
-      if (this.recogniser.isRecording()) {
-        this.recogniser.stopRecording();
-      }
-
-      this.recogniser.closeMic();
-
-      resolve();
-    });
-  }
-
-  loadModel(modelData) {
-    if (this.recogniser.isRecording()) {
-      throw new Error(
-        'Load the model data before listening for wakeword');
-    }
-
-    this.recogniser.model = modelData;
+    return this.ready
+      .then(() => {
+        return this.audioSource;
+      })
+      .then((source) => {
+        source.disconnect();
+        this.recogniser.disconnect();
+      });
   }
 
   setOnKeywordSpottedCallback(fn) {
-    this.recogniser.keywordSpottedCallback = fn;
+    this.recogniser.on('keywordspotted', fn);
   }
 }
diff --git a/gulpfile.js b/gulpfile.js
@@ -82,8 +82,6 @@ gulp.task('copy-app-common', () => {
       .pipe(rename('js/alameda.js')),
 
     // Components.
-    gulp.src('./node_modules/jsspeechrecognizer/dist/JsSpeechRecognizer.js')
-      .pipe(rename('js/components/jsspeechrecognizer.js')),
     gulp.src('./node_modules/lodash/lodash.min.js')
       .pipe(rename('js/components/lodash.js')),
     gulp.src('./node_modules/moment/min/moment-with-locales.min.js')
@@ -94,6 +92,12 @@ gulp.task('copy-app-common', () => {
       .pipe(rename('js/components/cldr/core.js')),
     gulp.src('./node_modules/twitter_cldr/min/en.min.js')
       .pipe(rename('js/components/cldr/en.js')),
+    gulp.src('./node_modules/webaudio-kws-node/dist/amd-library.js')
+      .pipe(rename('js/components/webaudiokws.js')),
+    gulp.src('./node_modules/webaudio-kws-node/dist/ps-worker.js')
+      .pipe(rename('js/components/ps-worker.js')),
+    gulp.src('./node_modules/webaudio-kws-node/dist/pocketsphinx.js')
+      .pipe(rename('js/components/pocketsphinx.js')),
 
     // Polyfills.
     gulp.src('./node_modules/whatwg-fetch/fetch.js')
diff --git a/package.json b/package.json
@@ -22,13 +22,13 @@
   "dependencies": {
     "alameda": "^1.0.0",
     "chrono-node": "^1.2.3",
-    "jsspeechrecognizer": "fxbox/JsSpeechRecognizer",
     "lodash": "^4.13.1",
     "moment": "^2.13.0",
     "react": "^15.1.0",
     "react-dom": "^15.1.0",
     "twitter_cldr": "^2.1.1",
     "url-search-params": "^0.5.0",
+    "webaudio-kws-node": "https://github.com/fxbox/webaudio-kws-node",
     "webrtc-adapter": "^1.4.0",
     "whatwg-fetch": "^1.0.0"
   },