This documentation contains work-in-progress information for future Elastic Stack and Cloud releases. Use the version selector to view supported release docs. It also contains some Elastic Cloud serverless information. Check out our serverless docs for more details.
Test an analyzer
editTest an analyzer
editThe analyze
API is an invaluable tool for viewing the
terms produced by an analyzer. A built-in analyzer can be specified inline in
the request:
resp = client.indices.analyze( analyzer="whitespace", text="The quick brown fox.", ) print(resp)
response = client.indices.analyze( body: { analyzer: 'whitespace', text: 'The quick brown fox.' } ) puts response
const response = await client.indices.analyze({ analyzer: "whitespace", text: "The quick brown fox.", }); console.log(response);
POST _analyze { "analyzer": "whitespace", "text": "The quick brown fox." }
The API returns the following response:
{ "tokens": [ { "token": "The", "start_offset": 0, "end_offset": 3, "type": "word", "position": 0 }, { "token": "quick", "start_offset": 4, "end_offset": 9, "type": "word", "position": 1 }, { "token": "brown", "start_offset": 10, "end_offset": 15, "type": "word", "position": 2 }, { "token": "fox.", "start_offset": 16, "end_offset": 20, "type": "word", "position": 3 } ] }
You can also test combinations of:
- A tokenizer
- Zero or more token filters
- Zero or more character filters
resp = client.indices.analyze( tokenizer="standard", filter=[ "lowercase", "asciifolding" ], text="Is this déja vu?", ) print(resp)
response = client.indices.analyze( body: { tokenizer: 'standard', filter: [ 'lowercase', 'asciifolding' ], text: 'Is this déja vu?' } ) puts response
const response = await client.indices.analyze({ tokenizer: "standard", filter: ["lowercase", "asciifolding"], text: "Is this déja vu?", }); console.log(response);
POST _analyze { "tokenizer": "standard", "filter": [ "lowercase", "asciifolding" ], "text": "Is this déja vu?" }
The API returns the following response:
{ "tokens": [ { "token": "is", "start_offset": 0, "end_offset": 2, "type": "<ALPHANUM>", "position": 0 }, { "token": "this", "start_offset": 3, "end_offset": 7, "type": "<ALPHANUM>", "position": 1 }, { "token": "deja", "start_offset": 8, "end_offset": 12, "type": "<ALPHANUM>", "position": 2 }, { "token": "vu", "start_offset": 13, "end_offset": 15, "type": "<ALPHANUM>", "position": 3 } ] }
Alternatively, a custom
analyzer can be
referred to when running the analyze
API on a specific index:
resp = client.indices.create( index="my-index-000001", settings={ "analysis": { "analyzer": { "std_folded": { "type": "custom", "tokenizer": "standard", "filter": [ "lowercase", "asciifolding" ] } } } }, mappings={ "properties": { "my_text": { "type": "text", "analyzer": "std_folded" } } }, ) print(resp) resp1 = client.indices.analyze( index="my-index-000001", analyzer="std_folded", text="Is this déjà vu?", ) print(resp1) resp2 = client.indices.analyze( index="my-index-000001", field="my_text", text="Is this déjà vu?", ) print(resp2)
response = client.indices.create( index: 'my-index-000001', body: { settings: { analysis: { analyzer: { std_folded: { type: 'custom', tokenizer: 'standard', filter: [ 'lowercase', 'asciifolding' ] } } } }, mappings: { properties: { my_text: { type: 'text', analyzer: 'std_folded' } } } } ) puts response response = client.indices.analyze( index: 'my-index-000001', body: { analyzer: 'std_folded', text: 'Is this déjà vu?' } ) puts response response = client.indices.analyze( index: 'my-index-000001', body: { field: 'my_text', text: 'Is this déjà vu?' } ) puts response
const response = await client.indices.create({ index: "my-index-000001", settings: { analysis: { analyzer: { std_folded: { type: "custom", tokenizer: "standard", filter: ["lowercase", "asciifolding"], }, }, }, }, mappings: { properties: { my_text: { type: "text", analyzer: "std_folded", }, }, }, }); console.log(response); const response1 = await client.indices.analyze({ index: "my-index-000001", analyzer: "std_folded", text: "Is this déjà vu?", }); console.log(response1); const response2 = await client.indices.analyze({ index: "my-index-000001", field: "my_text", text: "Is this déjà vu?", }); console.log(response2);
PUT my-index-000001 { "settings": { "analysis": { "analyzer": { "std_folded": { "type": "custom", "tokenizer": "standard", "filter": [ "lowercase", "asciifolding" ] } } } }, "mappings": { "properties": { "my_text": { "type": "text", "analyzer": "std_folded" } } } } GET my-index-000001/_analyze { "analyzer": "std_folded", "text": "Is this déjà vu?" } GET my-index-000001/_analyze { "field": "my_text", "text": "Is this déjà vu?" }
The API returns the following response:
{ "tokens": [ { "token": "is", "start_offset": 0, "end_offset": 2, "type": "<ALPHANUM>", "position": 0 }, { "token": "this", "start_offset": 3, "end_offset": 7, "type": "<ALPHANUM>", "position": 1 }, { "token": "deja", "start_offset": 8, "end_offset": 12, "type": "<ALPHANUM>", "position": 2 }, { "token": "vu", "start_offset": 13, "end_offset": 15, "type": "<ALPHANUM>", "position": 3 } ] }