Skip to content

Commit

Permalink
Proper arabic analyzer
Browse files Browse the repository at this point in the history
  • Loading branch information
Yugi committed Feb 22, 2024
1 parent 86673e9 commit 378a833
Showing 1 changed file with 16 additions and 6 deletions.
22 changes: 16 additions & 6 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,17 @@ def create_and_update_index(index_name, documents, fields_to_not_index):
"stemmer",
],
},
"arabic": {
"tokenizer": "standard",
"rebuilt_arabic": {
"tokenizer": "standard",
"char_filter": ["html_strip"],
"filter": [
"lowercase",
"stop",
"stemmer",
],
},
"decimal_digit",
"arabic_stop",
"arabic_normalization",
"arabic_stemmer"
]
}
},
"filter": {
# 2-3 word shingles for better suggestions
Expand All @@ -67,6 +69,14 @@ def create_and_update_index(index_name, documents, fields_to_not_index):
"lenient": True,
"synonyms_path": "synonyms.txt",
},
"arabic_stemmer": {
"type": "stemmer",
"language": "arabic"
},
"arabic_stop": {
"type": "stop",
"stopwords": "_arabic_"
},
},
},
}
Expand Down

0 comments on commit 378a833

Please sign in to comment.