Stemmer override token filter
editStemmer override token filter
editOverrides stemming algorithms, by applying a custom mapping, then protecting these terms from being modified by stemmers. Must be placed before any stemming filters.
Rules are mappings in the form of token1[, ..., tokenN] => override
.
Setting | Description |
---|---|
|
A list of mapping rules to use. |
|
A path (either relative to |
Here is an example:
resp = client.indices.create( index="my-index-000001", settings={ "analysis": { "analyzer": { "my_analyzer": { "tokenizer": "standard", "filter": [ "lowercase", "custom_stems", "porter_stem" ] } }, "filter": { "custom_stems": { "type": "stemmer_override", "rules_path": "analysis/stemmer_override.txt" } } } }, ) print(resp)
response = client.indices.create( index: 'my-index-000001', body: { settings: { analysis: { analyzer: { my_analyzer: { tokenizer: 'standard', filter: [ 'lowercase', 'custom_stems', 'porter_stem' ] } }, filter: { custom_stems: { type: 'stemmer_override', rules_path: 'analysis/stemmer_override.txt' } } } } } ) puts response
const response = await client.indices.create({ index: "my-index-000001", settings: { analysis: { analyzer: { my_analyzer: { tokenizer: "standard", filter: ["lowercase", "custom_stems", "porter_stem"], }, }, filter: { custom_stems: { type: "stemmer_override", rules_path: "analysis/stemmer_override.txt", }, }, }, }, }); console.log(response);
PUT /my-index-000001 { "settings": { "analysis": { "analyzer": { "my_analyzer": { "tokenizer": "standard", "filter": [ "lowercase", "custom_stems", "porter_stem" ] } }, "filter": { "custom_stems": { "type": "stemmer_override", "rules_path": "analysis/stemmer_override.txt" } } } } }
Where the file looks like:
running, runs => run stemmer => stemmer
You can also define the overrides rules inline:
resp = client.indices.create( index="my-index-000001", settings={ "analysis": { "analyzer": { "my_analyzer": { "tokenizer": "standard", "filter": [ "lowercase", "custom_stems", "porter_stem" ] } }, "filter": { "custom_stems": { "type": "stemmer_override", "rules": [ "running, runs => run", "stemmer => stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'my-index-000001', body: { settings: { analysis: { analyzer: { my_analyzer: { tokenizer: 'standard', filter: [ 'lowercase', 'custom_stems', 'porter_stem' ] } }, filter: { custom_stems: { type: 'stemmer_override', rules: [ 'running, runs => run', 'stemmer => stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "my-index-000001", settings: { analysis: { analyzer: { my_analyzer: { tokenizer: "standard", filter: ["lowercase", "custom_stems", "porter_stem"], }, }, filter: { custom_stems: { type: "stemmer_override", rules: ["running, runs => run", "stemmer => stemmer"], }, }, }, }, }); console.log(response);
PUT /my-index-000001 { "settings": { "analysis": { "analyzer": { "my_analyzer": { "tokenizer": "standard", "filter": [ "lowercase", "custom_stems", "porter_stem" ] } }, "filter": { "custom_stems": { "type": "stemmer_override", "rules": [ "running, runs => run", "stemmer => stemmer" ] } } } } }