Test an analyzer
editTest an analyzer
editThe analyze
API is an invaluable tool for viewing the
terms produced by an analyzer. A built-in analyzer can be specified inline in
the request:
resp = client.indices.analyze( analyzer="whitespace", text="The quick brown fox.", ) print(resp)
response = client.indices.analyze( body: { analyzer: 'whitespace', text: 'The quick brown fox.' } ) puts response
const response = await client.indices.analyze({ analyzer: "whitespace", text: "The quick brown fox.", }); console.log(response);
POST _analyze { "analyzer": "whitespace", "text": "The quick brown fox." }
The API returns the following response:
{ "tokens": [ { "token": "The", "start_offset": 0, "end_offset": 3, "type": "word", "position": 0 }, { "token": "quick", "start_offset": 4, "end_offset": 9, "type": "word", "position": 1 }, { "token": "brown", "start_offset": 10, "end_offset": 15, "type": "word", "position": 2 }, { "token": "fox.", "start_offset": 16, "end_offset": 20, "type": "word", "position": 3 } ] }
You can also test combinations of:
- A tokenizer
- Zero or more token filters
- Zero or more character filters
resp = client.indices.analyze( tokenizer="standard", filter=[ "lowercase", "asciifolding" ], text="Is this déja vu?", ) print(resp)
response = client.indices.analyze( body: { tokenizer: 'standard', filter: [ 'lowercase', 'asciifolding' ], text: 'Is this déja vu?' } ) puts response
const response = await client.indices.analyze({ tokenizer: "standard", filter: ["lowercase", "asciifolding"], text: "Is this déja vu?", }); console.log(response);
POST _analyze { "tokenizer": "standard", "filter": [ "lowercase", "asciifolding" ], "text": "Is this déja vu?" }
The API returns the following response:
{ "tokens": [ { "token": "is", "start_offset": 0, "end_offset": 2, "type": "<ALPHANUM>", "position": 0 }, { "token": "this", "start_offset": 3, "end_offset": 7, "type": "<ALPHANUM>", "position": 1 }, { "token": "deja", "start_offset": 8, "end_offset": 12, "type": "<ALPHANUM>", "position": 2 }, { "token": "vu", "start_offset": 13, "end_offset": 15, "type": "<ALPHANUM>", "position": 3 } ] }
Alternatively, a custom
analyzer can be
referred to when running the analyze
API on a specific index:
resp = client.indices.create( index="my-index-000001", settings={ "analysis": { "analyzer": { "std_folded": { "type": "custom", "tokenizer": "standard", "filter": [ "lowercase", "asciifolding" ] } } } }, mappings={ "properties": { "my_text": { "type": "text", "analyzer": "std_folded" } } }, ) print(resp) resp1 = client.indices.analyze( index="my-index-000001", analyzer="std_folded", text="Is this déjà vu?", ) print(resp1) resp2 = client.indices.analyze( index="my-index-000001", field="my_text", text="Is this déjà vu?", ) print(resp2)
response = client.indices.create( index: 'my-index-000001', body: { settings: { analysis: { analyzer: { std_folded: { type: 'custom', tokenizer: 'standard', filter: [ 'lowercase', 'asciifolding' ] } } } }, mappings: { properties: { my_text: { type: 'text', analyzer: 'std_folded' } } } } ) puts response response = client.indices.analyze( index: 'my-index-000001', body: { analyzer: 'std_folded', text: 'Is this déjà vu?' } ) puts response response = client.indices.analyze( index: 'my-index-000001', body: { field: 'my_text', text: 'Is this déjà vu?' } ) puts response
const response = await client.indices.create({ index: "my-index-000001", settings: { analysis: { analyzer: { std_folded: { type: "custom", tokenizer: "standard", filter: ["lowercase", "asciifolding"], }, }, }, }, mappings: { properties: { my_text: { type: "text", analyzer: "std_folded", }, }, }, }); console.log(response); const response1 = await client.indices.analyze({ index: "my-index-000001", analyzer: "std_folded", text: "Is this déjà vu?", }); console.log(response1); const response2 = await client.indices.analyze({ index: "my-index-000001", field: "my_text", text: "Is this déjà vu?", }); console.log(response2);
PUT my-index-000001 { "settings": { "analysis": { "analyzer": { "std_folded": { "type": "custom", "tokenizer": "standard", "filter": [ "lowercase", "asciifolding" ] } } } }, "mappings": { "properties": { "my_text": { "type": "text", "analyzer": "std_folded" } } } } GET my-index-000001/_analyze { "analyzer": "std_folded", "text": "Is this déjà vu?" } GET my-index-000001/_analyze { "field": "my_text", "text": "Is this déjà vu?" }
The API returns the following response:
{ "tokens": [ { "token": "is", "start_offset": 0, "end_offset": 2, "type": "<ALPHANUM>", "position": 0 }, { "token": "this", "start_offset": 3, "end_offset": 7, "type": "<ALPHANUM>", "position": 1 }, { "token": "deja", "start_offset": 8, "end_offset": 12, "type": "<ALPHANUM>", "position": 2 }, { "token": "vu", "start_offset": 13, "end_offset": 15, "type": "<ALPHANUM>", "position": 3 } ] }