Mapping character filter
editMapping character filter
editThe mapping
character filter accepts a map of keys and values. Whenever it
encounters a string of characters that is the same as a key, it replaces them
with the value associated with that key.
Matching is greedy; the longest pattern matching at a given point wins. Replacements are allowed to be the empty string.
The mapping
filter uses Lucene’s
MappingCharFilter.
Example
editThe following analyze API request uses the mapping
filter
to convert Hindu-Arabic numerals (٠١٢٣٤٥٦٧٨٩) into their Arabic-Latin
equivalents (0123456789), changing the text My license plate is ٢٥٠١٥
to
My license plate is 25015
.
resp = client.indices.analyze( tokenizer="keyword", char_filter=[ { "type": "mapping", "mappings": [ "٠ => 0", "١ => 1", "٢ => 2", "٣ => 3", "٤ => 4", "٥ => 5", "٦ => 6", "٧ => 7", "٨ => 8", "٩ => 9" ] } ], text="My license plate is ٢٥٠١٥", ) print(resp)
response = client.indices.analyze( body: { tokenizer: 'keyword', char_filter: [ { type: 'mapping', mappings: [ '٠ => 0', '١ => 1', '٢ => 2', '٣ => 3', '٤ => 4', '٥ => 5', '٦ => 6', '٧ => 7', '٨ => 8', '٩ => 9' ] } ], text: 'My license plate is ٢٥٠١٥' } ) puts response
const response = await client.indices.analyze({ tokenizer: "keyword", char_filter: [ { type: "mapping", mappings: [ "٠ => 0", "١ => 1", "٢ => 2", "٣ => 3", "٤ => 4", "٥ => 5", "٦ => 6", "٧ => 7", "٨ => 8", "٩ => 9", ], }, ], text: "My license plate is ٢٥٠١٥", }); console.log(response);
GET /_analyze { "tokenizer": "keyword", "char_filter": [ { "type": "mapping", "mappings": [ "٠ => 0", "١ => 1", "٢ => 2", "٣ => 3", "٤ => 4", "٥ => 5", "٦ => 6", "٧ => 7", "٨ => 8", "٩ => 9" ] } ], "text": "My license plate is ٢٥٠١٥" }
The filter produces the following text:
[ My license plate is 25015 ]
Configurable parameters
edit-
mappings
-
(Required*, array of strings) Array of mappings, with each element having the form
key => value
.Either this or the
mappings_path
parameter must be specified. -
mappings_path
-
(Required*, string) Path to a file containing
key => value
mappings.This path must be absolute or relative to the
config
location, and the file must be UTF-8 encoded. Each mapping in the file must be separated by a line break.Either this or the
mappings
parameter must be specified.
Customize and add to an analyzer
editTo customize the mappings
filter, duplicate it to create the basis for a new
custom character filter. You can modify the filter using its configurable
parameters.
The following create index API request
configures a new custom analyzer using a custom
mappings
filter, my_mappings_char_filter
.
The my_mappings_char_filter
filter replaces the :)
and :(
emoticons
with a text equivalent.
resp = client.indices.create( index="my-index-000001", settings={ "analysis": { "analyzer": { "my_analyzer": { "tokenizer": "standard", "char_filter": [ "my_mappings_char_filter" ] } }, "char_filter": { "my_mappings_char_filter": { "type": "mapping", "mappings": [ ":) => _happy_", ":( => _sad_" ] } } } }, ) print(resp)
response = client.indices.create( index: 'my-index-000001', body: { settings: { analysis: { analyzer: { my_analyzer: { tokenizer: 'standard', char_filter: [ 'my_mappings_char_filter' ] } }, char_filter: { my_mappings_char_filter: { type: 'mapping', mappings: [ ':) => _happy_', ':( => _sad_' ] } } } } } ) puts response
const response = await client.indices.create({ index: "my-index-000001", settings: { analysis: { analyzer: { my_analyzer: { tokenizer: "standard", char_filter: ["my_mappings_char_filter"], }, }, char_filter: { my_mappings_char_filter: { type: "mapping", mappings: [":) => _happy_", ":( => _sad_"], }, }, }, }, }); console.log(response);
PUT /my-index-000001 { "settings": { "analysis": { "analyzer": { "my_analyzer": { "tokenizer": "standard", "char_filter": [ "my_mappings_char_filter" ] } }, "char_filter": { "my_mappings_char_filter": { "type": "mapping", "mappings": [ ":) => _happy_", ":( => _sad_" ] } } } } }
The following analyze API request uses the custom
my_mappings_char_filter
to replace :(
with _sad_
in
the text I'm delighted about it :(
.
resp = client.indices.analyze( index="my-index-000001", tokenizer="keyword", char_filter=[ "my_mappings_char_filter" ], text="I'm delighted about it :(", ) print(resp)
const response = await client.indices.analyze({ index: "my-index-000001", tokenizer: "keyword", char_filter: ["my_mappings_char_filter"], text: "I'm delighted about it :(", }); console.log(response);
GET /my-index-000001/_analyze { "tokenizer": "keyword", "char_filter": [ "my_mappings_char_filter" ], "text": "I'm delighted about it :(" }
The filter produces the following text:
[ I'm delighted about it _sad_ ]