Skip to content

Commit 5fb002f

Browse files
Add wordforms/search_suggest
Results of lexemes/search_suggest are wrapped in 'lexeme' for consistency.
1 parent 1c31cf8 commit 5fb002f

File tree

8 files changed

+222
-40
lines changed

8 files changed

+222
-40
lines changed

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,16 @@ Run all tests with `npm test`.
3939
Run an individual testsuite with `npx mocha --exit test/schema.js` or use the `--grep` flag.
4040
To stop on first failure, use `--bail`
4141

42+
### Using test data
43+
44+
1. Set DB URL in `server-config.js` to `...gabra-test` (or something else)
45+
2. ```
46+
node scripts/node/populate.js test/data/*.json
47+
node scripts/node/resolve-lexeme-ids.js
48+
node scripts/node/create-indexes.js
49+
(cd scripts/node && ./run.js update-glosses-collection.js)
50+
```
51+
4252
## Repository
4353

4454
- `master` branch is used for development.

package-lock.json

Lines changed: 20 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "gabra-api",
3-
"version": "2.12.0",
3+
"version": "2.13.0",
44
"description": "Ġabra: an open lexicon for Maltese",
55
"author": "John J. Camilleri <[email protected]> (http://johnjcamilleri.com/)",
66
"license": "MIT",

public/markdown/api.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,12 +96,14 @@ The results are sorted by part of speech and derived form, and will not include
9696
|:-----------------|:------------|:---------------------------|
9797
| `:id` (required) | Lexeme ID | `5200a366e36f237975000f26` |
9898

99-
### Search suggest
99+
### Search suggest <small>Changed in v2.13</small>
100100

101-
List variations in spelling (diacritics, character case) of a search term, from lemmas:
101+
Find matching words which vary in spelling (diacritics, character case) of the search term, from lemmas or wordforms:
102102

103103
> [/lexemes/search_suggest?s=Hareg](#{baseURL}/lexemes/search_suggest?s=Hareg)
104104
105+
> [/wordforms/search_suggest?s=ohorgu](#{baseURL}/wordforms/search_suggest?s=ohorgu)
106+
105107
| Argument | Description | Example |
106108
|:---------------|:-------------|:--------|
107109
| `s` (required) | Search query | `Hareg` |

routes/lexemes.js

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -178,24 +178,24 @@ router.get('/search', function (req, res) {
178178
collection.find(conds_l, opts),
179179
collection.count(conds_l)
180180
])
181-
.then(values => {
182-
var docs = values[0]
183-
var count = values[1]
184-
queryObj.result_count = count
185-
res.json({
186-
'results': docs.map(doc => {
187-
return {
188-
'lexeme': doc
189-
}
190-
}),
191-
'query': queryObj
192-
})
193-
})
194-
.catch(err => {
195-
console.error(err)
196-
res.status(500).end()
181+
.then(values => {
182+
var docs = values[0]
183+
var count = values[1]
184+
queryObj.result_count = count
185+
res.json({
186+
'results': docs.map(doc => {
187+
return {
188+
'lexeme': doc
189+
}
190+
}),
191+
'query': queryObj
192+
})
193+
})
194+
.catch(err => {
195+
console.error(err)
196+
res.status(500).end()
197+
})
197198
})
198-
})
199199
})
200200

201201
/*
@@ -312,17 +312,16 @@ router.get('/search_suggest', function (req, res) {
312312
// s = s.replace(/^([^\[])/, function (m,c,o,s) { return '[' + c.toUpperCase() + ']'})
313313

314314
// Handle diacritics
315-
s = s.replace(/^\^/, '')
316-
s = s.replace(/\$$/, '')
317315
s = s.replace(/c/g, 'ċ')
318316
s = s.replace(/g/g, '[gġ]')
319317
s = s.replace(/h/g, '[hħ]')
320318
s = s.replace(/z/g, '[zż]')
321319

322320
// No substrings
321+
s = s.replace(/^\^/, '')
322+
s = s.replace(/\$$/, '')
323323
s = '^' + s + '$'
324324

325-
var collection = db.get('lexemes')
326325
var query = {
327326
'$or': [
328327
{
@@ -334,23 +333,24 @@ router.get('/search_suggest', function (req, res) {
334333
],
335334
'pending': {'$ne': true}
336335
}
336+
337337
var opts = {
338338
'projection': {'lemma': true}
339339
}
340-
collection.find(query, opts, function (err, docs) {
341-
if (err) {
340+
db.get('lexemes').find(query, opts)
341+
.catch(function (err) {
342342
console.error(err)
343343
res.status(500).end()
344-
return
345-
}
346-
res.json({
347-
'results': docs,
348-
'query': {
349-
'term': orig,
350-
'result_count': docs.length
351-
}
352344
})
353-
})
345+
.then(function (data) {
346+
res.json({
347+
'results': data.map((l) => { return {'lexeme': l} }),
348+
'query': {
349+
'term': orig,
350+
'result_count': data.length
351+
}
352+
})
353+
})
354354
})
355355

356356
/*

routes/wordforms.js

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ var express = require('express')
22
var router = express.Router()
33
var passport = require('passport')
44
var async = require('async')
5+
var regexquote = require('regexp-quote')
56
var monk = require('monk')
67

78
var log = require('./helpers/logger').makeLogger('wordforms')
@@ -154,6 +155,57 @@ router.post('/replace/:lexeme_id',
154155
})
155156
})
156157

158+
/*
159+
* GET search suggest
160+
*/
161+
router.get('/search_suggest', function (req, res) {
162+
var db = req.db
163+
164+
var orig = req.query.s
165+
var s = regexquote(orig)
166+
167+
// Handle capitalisation
168+
s = s.toLowerCase()
169+
// s = s.replace(/^\[(.+?)\]/, function (m,c,o,s) { return '[' + c.toLowerCase() + c.toUpperCase() + ']'})
170+
// s = s.replace(/^([^\[])/, function (m,c,o,s) { return '[' + c.toUpperCase() + ']'})
171+
172+
// Handle diacritics
173+
s = s.replace(/c/g, 'ċ')
174+
s = s.replace(/g/g, '[gġ]')
175+
s = s.replace(/h/g, '[hħ]')
176+
s = s.replace(/z/g, '[zż]')
177+
178+
// No substrings
179+
s = s.replace(/^\^/, '')
180+
s = s.replace(/\$$/, '')
181+
s = '^' + s + '$'
182+
183+
var query = {
184+
'surface_form': {'$regex': s, '$ne': orig},
185+
'pending': {'$ne': true}
186+
}
187+
var opts = {
188+
'projection': {
189+
'surface_form': true,
190+
'lexeme_id': true
191+
}
192+
}
193+
db.get('wordforms').find(query, opts)
194+
.catch(function (err) {
195+
console.error(err)
196+
res.status(500).end()
197+
})
198+
.then(function (data) {
199+
res.json({
200+
'results': data.map((l) => { return {'wordform': l} }),
201+
'query': {
202+
'term': orig,
203+
'result_count': data.length
204+
}
205+
})
206+
})
207+
})
208+
157209
/*
158210
* GET count
159211
*/

test/data/wordforms.json

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,5 +123,79 @@
123123
"Camilleri2013"
124124
],
125125
"pending" : true
126+
},
127+
{
128+
"aspect": "perf",
129+
"dir_obj": null,
130+
"generated": true,
131+
"ind_obj": null,
132+
"lexeme": {
133+
"lemma": "ħareġ",
134+
"pos": "VERB"
135+
},
136+
"phonetic": "hrɪʧt",
137+
"polarity": "pos",
138+
"sources": ["Camilleri2013"],
139+
"subject": {
140+
"person": "p1",
141+
"number": "sg"
142+
},
143+
"surface_form": "ħriġt"
144+
},
145+
{
146+
"aspect": "perf",
147+
"dir_obj": null,
148+
"generated": true,
149+
"ind_obj": null,
150+
"lexeme": {
151+
"lemma": "ħareġ",
152+
"pos": "VERB"
153+
},
154+
"phonetic": "hrɪʧt",
155+
"polarity": "pos",
156+
"sources": ["Camilleri2013"],
157+
"subject": {
158+
"person": "p2",
159+
"number": "sg"
160+
},
161+
"surface_form": "ħriġt"
162+
},
163+
{
164+
"aspect": "perf",
165+
"dir_obj": null,
166+
"generated": true,
167+
"ind_obj": null,
168+
"lexeme": {
169+
"lemma": "ħareġ",
170+
"pos": "VERB"
171+
},
172+
"phonetic": "hɐrɛʧ",
173+
"polarity": "pos",
174+
"sources": ["Camilleri2013"],
175+
"subject": {
176+
"person": "p3",
177+
"number": "sg",
178+
"gender": "m"
179+
},
180+
"surface_form": "ħareġ"
181+
},
182+
{
183+
"aspect": "perf",
184+
"dir_obj": null,
185+
"generated": true,
186+
"ind_obj": null,
187+
"lexeme": {
188+
"lemma": "ħareġ",
189+
"pos": "VERB"
190+
},
191+
"phonetic": "hɐrʤɛt",
192+
"polarity": "pos",
193+
"sources": ["Camilleri2013"],
194+
"subject": {
195+
"person": "p3",
196+
"number": "sg",
197+
"gender": "f"
198+
},
199+
"surface_form": "ħarġet"
126200
}
127201
]

test/search.js

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,23 @@ describe('Search', function () {
2222
res.body.query.result_count.should.be.greaterThanOrEqual(opts.result_count)
2323
}
2424

25-
// Results should contain these lemmas (in any order)
25+
// Lexeme results should contain these lemmas (in any order)
2626
if (opts.lemmas) {
2727
for (let i in opts.lemmas) {
2828
let lemma = opts.lemmas[i]
2929
res.body.results.should.matchAny(function (value) {
3030
value.lexeme.lemma.should.equal(lemma)
31-
}, 'lemma "' + lemma + '" not found in results')
31+
}, `lemma "${lemma}" not found in results`)
32+
}
33+
}
34+
35+
// Wordform results should contain these surface forms (in any order)
36+
if (opts.surface_forms) {
37+
for (let i in opts.surface_forms) {
38+
let sf = opts.surface_forms[i]
39+
res.body.results.should.matchAny(function (value) {
40+
value.wordform.surface_form.should.equal(sf)
41+
}, `surface form "${sf}" not found in results`)
3242
}
3343
}
3444

@@ -93,6 +103,24 @@ describe('Search', function () {
93103

94104
// -------------------------------------------------------------------------
95105

106+
describe('Search suggest', function () {
107+
it('suggest lexeme', function (done) {
108+
request(server)
109+
.get('/lexemes/search_suggest?s=Hareg')
110+
.expect(200)
111+
.end(checkResponse({lemmas: ['ħareġ']}, done))
112+
})
113+
114+
it('suggest wordform', function (done) {
115+
request(server)
116+
.get('/wordforms/search_suggest?s=harget')
117+
.expect(200)
118+
.end(checkResponse({surface_forms: ['ħarġet']}, done))
119+
})
120+
})
121+
122+
// -------------------------------------------------------------------------
123+
96124
describe('Load stuff', function () {
97125
var lexeme_id
98126

0 commit comments

Comments
 (0)