Skip to content

Commit

Permalink
Add solr.ASCIIFoldingFilterFactory
Browse files Browse the repository at this point in the history
Add <filter class="solr.ASCIIFoldingFilterFactory"/> in conjunction with <filter class="solr.LowerCaseFilterFactory"/>
Now all words with accents (i.e. é,ë,û are replaced by normal characters).
We have found cases where people were searching for words containing these accents, without providing them in the query.
  • Loading branch information
RVXD authored May 6, 2020
1 parent 21b77d9 commit 77d2721
Showing 1 changed file with 12 additions and 0 deletions.
12 changes: 12 additions & 0 deletions Solr/7/templates/types.ss
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
<filter class="solr.KeywordRepeatFilterFactory"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
Expand All @@ -168,13 +169,15 @@
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.FlattenGraphFilterFactory"/> <!-- required on index analyzers after graph filters -->
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
</analyzer>
Expand All @@ -185,6 +188,7 @@
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1"
catenateWords="1"
Expand All @@ -197,6 +201,7 @@
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
<filter class="solr.KeywordRepeatFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1"
Expand All @@ -215,6 +220,7 @@
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0"
catenateWords="1"
Expand All @@ -235,6 +241,7 @@
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.LengthFilterFactory" min="4" max="20"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
Expand All @@ -248,6 +255,7 @@
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.LengthFilterFactory" min="4" max="20"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
Expand All @@ -260,6 +268,7 @@
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1"
catenateWords="1"
Expand All @@ -271,6 +280,7 @@
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="stopwords.txt"
Expand All @@ -292,6 +302,7 @@
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1"
catenateWords="1"
Expand Down Expand Up @@ -392,6 +403,7 @@
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory"/>
</analyzer>
</fieldType>
Expand Down

0 comments on commit 77d2721

Please sign in to comment.