ignore_above
editignore_above
editStrings longer than the ignore_above
setting will not be indexed or stored.
For arrays of strings, ignore_above
will be applied for each array element separately and string elements longer than ignore_above
will not be indexed or stored.
All strings/array elements will still be present in the _source
field, if the latter is enabled which is the default in Elasticsearch.
resp = client.indices.create( index="my-index-000001", mappings={ "properties": { "message": { "type": "keyword", "ignore_above": 20 } } }, ) print(resp) resp1 = client.index( index="my-index-000001", id="1", document={ "message": "Syntax error" }, ) print(resp1) resp2 = client.index( index="my-index-000001", id="2", document={ "message": "Syntax error with some long stacktrace" }, ) print(resp2) resp3 = client.search( index="my-index-000001", aggs={ "messages": { "terms": { "field": "message" } } }, ) print(resp3)
response = client.indices.create( index: 'my-index-000001', body: { mappings: { properties: { message: { type: 'keyword', ignore_above: 20 } } } } ) puts response response = client.index( index: 'my-index-000001', id: 1, body: { message: 'Syntax error' } ) puts response response = client.index( index: 'my-index-000001', id: 2, body: { message: 'Syntax error with some long stacktrace' } ) puts response response = client.search( index: 'my-index-000001', body: { aggregations: { messages: { terms: { field: 'message' } } } } ) puts response
const response = await client.indices.create({ index: "my-index-000001", mappings: { properties: { message: { type: "keyword", ignore_above: 20, }, }, }, }); console.log(response); const response1 = await client.index({ index: "my-index-000001", id: 1, document: { message: "Syntax error", }, }); console.log(response1); const response2 = await client.index({ index: "my-index-000001", id: 2, document: { message: "Syntax error with some long stacktrace", }, }); console.log(response2); const response3 = await client.search({ index: "my-index-000001", aggs: { messages: { terms: { field: "message", }, }, }, }); console.log(response3);
PUT my-index-000001 { "mappings": { "properties": { "message": { "type": "keyword", "ignore_above": 20 } } } } PUT my-index-000001/_doc/1 { "message": "Syntax error" } PUT my-index-000001/_doc/2 { "message": "Syntax error with some long stacktrace" } GET my-index-000001/_search { "aggs": { "messages": { "terms": { "field": "message" } } } }
This field will ignore any string longer than 20 characters. |
|
This document is indexed successfully. |
|
This document will be indexed, but without indexing the |
|
Search returns both documents, but only the first is present in the terms aggregation. |
The ignore_above
setting can be updated on
existing fields using the update mapping API.
This option is also useful for protecting against Lucene’s term byte-length
limit of 32766
.
The value for ignore_above
is the character count, but Lucene counts
bytes. If you use UTF-8 text with many non-ASCII characters, you may want to
set the limit to 32766 / 4 = 8191
since UTF-8 characters may occupy at most
4 bytes.