Decimal digit token filter
editDecimal digit token filter
editConverts all digits in the Unicode Decimal_Number
General Category to 0-9
.
For example, the filter changes the Bengali numeral ৩
to 3
.
This filter uses Lucene’s DecimalDigitFilter.
Example
editThe following analyze API request uses the decimal_digit
filter to convert Devanagari numerals to 0-9
:
resp = client.indices.analyze( tokenizer="whitespace", filter=[ "decimal_digit" ], text="१-one two-२ ३", ) print(resp)
response = client.indices.analyze( body: { tokenizer: 'whitespace', filter: [ 'decimal_digit' ], text: '१-one two-२ ३' } ) puts response
const response = await client.indices.analyze({ tokenizer: "whitespace", filter: ["decimal_digit"], text: "१-one two-२ ३", }); console.log(response);
GET /_analyze { "tokenizer" : "whitespace", "filter" : ["decimal_digit"], "text" : "१-one two-२ ३" }
The filter produces the following tokens:
[ 1-one, two-2, 3]
Add to an analyzer
editThe following create index API request uses the
decimal_digit
filter to configure a new
custom analyzer.
resp = client.indices.create( index="decimal_digit_example", settings={ "analysis": { "analyzer": { "whitespace_decimal_digit": { "tokenizer": "whitespace", "filter": [ "decimal_digit" ] } } } }, ) print(resp)
response = client.indices.create( index: 'decimal_digit_example', body: { settings: { analysis: { analyzer: { whitespace_decimal_digit: { tokenizer: 'whitespace', filter: [ 'decimal_digit' ] } } } } } ) puts response
const response = await client.indices.create({ index: "decimal_digit_example", settings: { analysis: { analyzer: { whitespace_decimal_digit: { tokenizer: "whitespace", filter: ["decimal_digit"], }, }, }, }, }); console.log(response);
PUT /decimal_digit_example { "settings": { "analysis": { "analyzer": { "whitespace_decimal_digit": { "tokenizer": "whitespace", "filter": [ "decimal_digit" ] } } } } }