@@ -170,7 +170,7 @@ export function create<T extends AnyOrama, TSchema extends T['schema']>(
170
170
index . vectorIndexes [ path ] = {
171
171
type : 'Vector' ,
172
172
node : new VectorIndex ( getVectorSize ( type ) ) ,
173
- isArray : false ,
173
+ isArray : false
174
174
}
175
175
} else {
176
176
const isArray = / \[ / . test ( type as string )
@@ -273,7 +273,16 @@ export function insert(
273
273
return insertVector ( index , prop , value as number [ ] | Float32Array , id , internalId )
274
274
}
275
275
276
- const insertScalar = insertScalarBuilder ( implementation , index , prop , internalId , language , tokenizer , docsCount , options )
276
+ const insertScalar = insertScalarBuilder (
277
+ implementation ,
278
+ index ,
279
+ prop ,
280
+ internalId ,
281
+ language ,
282
+ tokenizer ,
283
+ docsCount ,
284
+ options
285
+ )
277
286
278
287
if ( ! isArrayType ( schemaType ) ) {
279
288
return insertScalar ( value )
@@ -286,7 +295,13 @@ export function insert(
286
295
}
287
296
}
288
297
289
- export function insertVector ( index : AnyIndexStore , prop : string , value : number [ ] | VectorType , id : DocumentID , internalDocumentId : InternalDocumentID ) : void {
298
+ export function insertVector (
299
+ index : AnyIndexStore ,
300
+ prop : string ,
301
+ value : number [ ] | VectorType ,
302
+ id : DocumentID ,
303
+ internalDocumentId : InternalDocumentID
304
+ ) : void {
290
305
index . vectorIndexes [ prop ] . node . add ( internalDocumentId , value )
291
306
}
292
307
@@ -372,7 +387,18 @@ export function remove(
372
387
const elements = value as Array < string | number | boolean >
373
388
const elementsLength = elements . length
374
389
for ( let i = 0 ; i < elementsLength ; i ++ ) {
375
- removeScalar ( implementation , index , prop , id , internalId , elements [ i ] , innerSchemaType , language , tokenizer , docsCount )
390
+ removeScalar (
391
+ implementation ,
392
+ index ,
393
+ prop ,
394
+ id ,
395
+ internalId ,
396
+ elements [ i ] ,
397
+ innerSchemaType ,
398
+ language ,
399
+ tokenizer ,
400
+ docsCount
401
+ )
376
402
}
377
403
378
404
return true
@@ -396,7 +422,7 @@ export function calculateResultScores(
396
422
const fieldLengths = index . fieldLengths [ prop ]
397
423
const oramaOccurrences = index . tokenOccurrences [ prop ]
398
424
const oramaFrequencies = index . frequencies [ prop ]
399
-
425
+
400
426
// oramaOccurrences[term] can be undefined, 0, string, or { [k: string]: number }
401
427
const termOccurrences = typeof oramaOccurrences [ term ] === 'number' ? oramaOccurrences [ term ] ?? 0 : 0
402
428
@@ -417,14 +443,7 @@ export function calculateResultScores(
417
443
418
444
const tf = oramaFrequencies ?. [ internalId ] ?. [ term ] ?? 0
419
445
420
- const bm25 = BM25 (
421
- tf ,
422
- termOccurrences ,
423
- docsCount ,
424
- fieldLengths [ internalId ] ! ,
425
- avgFieldLength ,
426
- bm25Relevance ,
427
- )
446
+ const bm25 = BM25 ( tf , termOccurrences , docsCount , fieldLengths [ internalId ] ! , avgFieldLength , bm25Relevance )
428
447
429
448
if ( resultsMap . has ( internalId ) ) {
430
449
resultsMap . set ( internalId , resultsMap . get ( internalId ) ! + bm25 * boostPerProperty )
@@ -434,46 +453,6 @@ export function calculateResultScores(
434
453
}
435
454
}
436
455
437
- function searchInProperty (
438
- index : Index ,
439
- tree : RadixTree ,
440
- prop : string ,
441
- tokens : string [ ] ,
442
- exact : boolean ,
443
- tolerance : number ,
444
- resultsMap : Map < number , number > ,
445
- boostPerProperty : number ,
446
- bm25Relevance : Required < BM25Params > ,
447
- docsCount : number ,
448
- whereFiltersIDs : Set < InternalDocumentID > | undefined ,
449
- keywordMatchesMap : Map < InternalDocumentID , Map < string , number > >
450
- ) {
451
- const tokenLength = tokens . length ;
452
- for ( let i = 0 ; i < tokenLength ; i ++ ) {
453
- const term = tokens [ i ] ;
454
- const searchResult = tree . find ( { term, exact, tolerance } )
455
-
456
- const termsFound = Object . keys ( searchResult )
457
- const termsFoundLength = termsFound . length ;
458
- for ( let j = 0 ; j < termsFoundLength ; j ++ ) {
459
- const word = termsFound [ j ]
460
- const ids = searchResult [ word ]
461
- calculateResultScores (
462
- index ,
463
- prop ,
464
- word ,
465
- ids ,
466
- docsCount ,
467
- bm25Relevance ,
468
- resultsMap ,
469
- boostPerProperty ,
470
- whereFiltersIDs ,
471
- keywordMatchesMap ,
472
- )
473
- }
474
- }
475
- }
476
-
477
456
export function search (
478
457
index : Index ,
479
458
term : string ,
@@ -486,13 +465,15 @@ export function search(
486
465
relevance : Required < BM25Params > ,
487
466
docsCount : number ,
488
467
whereFiltersIDs : Set < InternalDocumentID > | undefined ,
489
- threshold = 0 ,
468
+ threshold = 0
490
469
) : TokenScore [ ] {
491
470
const tokens = tokenizer . tokenize ( term , language )
492
471
const keywordsCount = tokens . length || 1
493
472
494
473
// Track keyword matches per document and property
495
474
const keywordMatchesMap = new Map < InternalDocumentID , Map < string , number > > ( )
475
+ // Track which tokens were found in the search
476
+ const tokenFoundMap = new Map < string , boolean > ( )
496
477
const resultsMap = new Map < number , number > ( )
497
478
498
479
for ( const prop of propertiesToSearch ) {
@@ -515,20 +496,37 @@ export function search(
515
496
tokens . push ( '' )
516
497
}
517
498
518
- searchInProperty (
519
- index ,
520
- tree . node ,
521
- prop ,
522
- tokens ,
523
- exact ,
524
- tolerance ,
525
- resultsMap ,
526
- boostPerProperty ,
527
- relevance ,
528
- docsCount ,
529
- whereFiltersIDs ,
530
- keywordMatchesMap
531
- )
499
+ // Process each token in the search term
500
+ const tokenLength = tokens . length
501
+ for ( let i = 0 ; i < tokenLength ; i ++ ) {
502
+ const token = tokens [ i ]
503
+ const searchResult = tree . node . find ( { term : token , exact, tolerance } )
504
+
505
+ // See if this token was found (for threshold=0 filtering)
506
+ const termsFound = Object . keys ( searchResult )
507
+ if ( termsFound . length > 0 ) {
508
+ tokenFoundMap . set ( token , true )
509
+ }
510
+
511
+ // Process each matching term
512
+ const termsFoundLength = termsFound . length
513
+ for ( let j = 0 ; j < termsFoundLength ; j ++ ) {
514
+ const word = termsFound [ j ]
515
+ const ids = searchResult [ word ]
516
+ calculateResultScores (
517
+ index ,
518
+ prop ,
519
+ word ,
520
+ ids ,
521
+ docsCount ,
522
+ relevance ,
523
+ resultsMap ,
524
+ boostPerProperty ,
525
+ whereFiltersIDs ,
526
+ keywordMatchesMap
527
+ )
528
+ }
529
+ }
532
530
}
533
531
534
532
// Convert to array and sort by score
@@ -545,20 +543,42 @@ export function search(
545
543
return results
546
544
}
547
545
546
+ // For threshold=0, check if all tokens were found
547
+ if ( threshold === 0 ) {
548
+ // Quick return for single tokens - already validated
549
+ if ( keywordsCount === 1 ) {
550
+ return results
551
+ }
552
+
553
+ // For multiple tokens, verify that ALL tokens were found
554
+ // If any token wasn't found, return an empty result
555
+ for ( const token of tokens ) {
556
+ if ( ! tokenFoundMap . get ( token ) ) {
557
+ return [ ]
558
+ }
559
+ }
560
+
561
+ // Find documents that have all keywords in at least one property
562
+ const fullMatches = results . filter ( ( [ id ] ) => {
563
+ const propertyMatches = keywordMatchesMap . get ( id )
564
+ if ( ! propertyMatches ) return false
565
+
566
+ // Check if any property has all keywords
567
+ return Array . from ( propertyMatches . values ( ) ) . some ( ( matches ) => matches === keywordsCount )
568
+ } )
569
+
570
+ return fullMatches
571
+ }
572
+
548
573
// Find documents that have all keywords in at least one property
549
574
const fullMatches = results . filter ( ( [ id ] ) => {
550
575
const propertyMatches = keywordMatchesMap . get ( id )
551
576
if ( ! propertyMatches ) return false
552
-
577
+
553
578
// Check if any property has all keywords
554
- return Array . from ( propertyMatches . values ( ) ) . some ( matches => matches === keywordsCount )
579
+ return Array . from ( propertyMatches . values ( ) ) . some ( ( matches ) => matches === keywordsCount )
555
580
} )
556
581
557
- // If threshold is 0, return only full matches
558
- if ( threshold === 0 ) {
559
- return fullMatches
560
- }
561
-
562
582
// If we have full matches and threshold < 1, return full matches plus a percentage of partial matches
563
583
if ( fullMatches . length > 0 ) {
564
584
const remainingResults = results . filter ( ( [ id ] ) => ! fullMatches . some ( ( [ fid ] ) => fid === id ) )
@@ -656,9 +676,11 @@ export function searchByWhereClause<T extends AnyOrama>(
656
676
}
657
677
658
678
if ( type === 'Flat' ) {
659
- const results = new Set ( isArray
660
- ? node . filterArr ( operation as EnumArrComparisonOperator )
661
- : node . filter ( operation as EnumComparisonOperator ) )
679
+ const results = new Set (
680
+ isArray
681
+ ? node . filterArr ( operation as EnumArrComparisonOperator )
682
+ : node . filter ( operation as EnumComparisonOperator )
683
+ )
662
684
663
685
filtersMap [ param ] = setUnion ( filtersMap [ param ] , results )
664
686
@@ -668,7 +690,7 @@ export function searchByWhereClause<T extends AnyOrama>(
668
690
if ( type === 'AVL' ) {
669
691
const operationOpt = operationKeys [ 0 ] as keyof ComparisonOperator
670
692
const operationValue = ( operation as ComparisonOperator ) [ operationOpt ]
671
- let filteredIDs : Set < InternalDocumentID >
693
+ let filteredIDs : Set < InternalDocumentID >
672
694
673
695
switch ( operationOpt ) {
674
696
case 'gt' : {
@@ -818,12 +840,7 @@ export function save<R = unknown>(index: Index): R {
818
840
const savedIndexes : any = { }
819
841
for ( const name of Object . keys ( indexes ) ) {
820
842
const { type, node, isArray } = indexes [ name ]
821
- if ( type === 'Flat'
822
- || type === 'Radix'
823
- || type === 'AVL'
824
- || type === 'BKD'
825
- || type === 'Bool'
826
- ) {
843
+ if ( type === 'Flat' || type === 'Radix' || type === 'AVL' || type === 'BKD' || type === 'Bool' ) {
827
844
savedIndexes [ name ] = {
828
845
type,
829
846
node : node . toJSON ( ) ,
@@ -866,7 +883,10 @@ export function createIndex(): IIndex<Index> {
866
883
}
867
884
}
868
885
869
- function addGeoResult ( set : Set < InternalDocumentID > | undefined , ids : Array < { docIDs : InternalDocumentID [ ] } > ) : Set < InternalDocumentID > {
886
+ function addGeoResult (
887
+ set : Set < InternalDocumentID > | undefined ,
888
+ ids : Array < { docIDs : InternalDocumentID [ ] } >
889
+ ) : Set < InternalDocumentID > {
870
890
if ( ! set ) {
871
891
set = new Set ( )
872
892
}
@@ -883,7 +903,10 @@ function addGeoResult(set: Set<InternalDocumentID> | undefined, ids: Array<{ doc
883
903
return set
884
904
}
885
905
886
- function addFindResult ( set : Set < InternalDocumentID > | undefined , filteredIDsResults : FindResult ) : Set < InternalDocumentID > {
906
+ function addFindResult (
907
+ set : Set < InternalDocumentID > | undefined ,
908
+ filteredIDsResults : FindResult
909
+ ) : Set < InternalDocumentID > {
887
910
if ( ! set ) {
888
911
set = new Set ( )
889
912
}
0 commit comments