New

The executive guide to generative AI

Read more

search_analyzer

edit

Usually, the same analyzer should be applied at index time and at search time, to ensure that the terms in the query are in the same format as the terms in the inverted index.

Sometimes, though, it can make sense to use a different analyzer at search time, such as when using the edge_ngram tokenizer for autocomplete or when using search-time synonyms.

By default, queries will use the analyzer defined in the field mapping, but this can be overridden with the search_analyzer setting:

resp = client.indices.create(
    index="my-index-000001",
    settings={
        "analysis": {
            "filter": {
                "autocomplete_filter": {
                    "type": "edge_ngram",
                    "min_gram": 1,
                    "max_gram": 20
                }
            },
            "analyzer": {
                "autocomplete": {
                    "type": "custom",
                    "tokenizer": "standard",
                    "filter": [
                        "lowercase",
                        "autocomplete_filter"
                    ]
                }
            }
        }
    },
    mappings={
        "properties": {
            "text": {
                "type": "text",
                "analyzer": "autocomplete",
                "search_analyzer": "standard"
            }
        }
    },
)
print(resp)

resp1 = client.index(
    index="my-index-000001",
    id="1",
    document={
        "text": "Quick Brown Fox"
    },
)
print(resp1)

resp2 = client.search(
    index="my-index-000001",
    query={
        "match": {
            "text": {
                "query": "Quick Br",
                "operator": "and"
            }
        }
    },
)
print(resp2)
response = client.indices.create(
  index: 'my-index-000001',
  body: {
    settings: {
      analysis: {
        filter: {
          autocomplete_filter: {
            type: 'edge_ngram',
            min_gram: 1,
            max_gram: 20
          }
        },
        analyzer: {
          autocomplete: {
            type: 'custom',
            tokenizer: 'standard',
            filter: [
              'lowercase',
              'autocomplete_filter'
            ]
          }
        }
      }
    },
    mappings: {
      properties: {
        text: {
          type: 'text',
          analyzer: 'autocomplete',
          search_analyzer: 'standard'
        }
      }
    }
  }
)
puts response

response = client.index(
  index: 'my-index-000001',
  id: 1,
  body: {
    text: 'Quick Brown Fox'
  }
)
puts response

response = client.search(
  index: 'my-index-000001',
  body: {
    query: {
      match: {
        text: {
          query: 'Quick Br',
          operator: 'and'
        }
      }
    }
  }
)
puts response
const response = await client.indices.create({
  index: "my-index-000001",
  settings: {
    analysis: {
      filter: {
        autocomplete_filter: {
          type: "edge_ngram",
          min_gram: 1,
          max_gram: 20,
        },
      },
      analyzer: {
        autocomplete: {
          type: "custom",
          tokenizer: "standard",
          filter: ["lowercase", "autocomplete_filter"],
        },
      },
    },
  },
  mappings: {
    properties: {
      text: {
        type: "text",
        analyzer: "autocomplete",
        search_analyzer: "standard",
      },
    },
  },
});
console.log(response);

const response1 = await client.index({
  index: "my-index-000001",
  id: 1,
  document: {
    text: "Quick Brown Fox",
  },
});
console.log(response1);

const response2 = await client.search({
  index: "my-index-000001",
  query: {
    match: {
      text: {
        query: "Quick Br",
        operator: "and",
      },
    },
  },
});
console.log(response2);
PUT my-index-000001
{
  "settings": {
    "analysis": {
      "filter": {
        "autocomplete_filter": {
          "type": "edge_ngram",
          "min_gram": 1,
          "max_gram": 20
        }
      },
      "analyzer": {
        "autocomplete": { 
          "type": "custom",
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "autocomplete_filter"
          ]
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "text": {
        "type": "text",
        "analyzer": "autocomplete", 
        "search_analyzer": "standard" 
      }
    }
  }
}

PUT my-index-000001/_doc/1
{
  "text": "Quick Brown Fox" 
}

GET my-index-000001/_search
{
  "query": {
    "match": {
      "text": {
        "query": "Quick Br", 
        "operator": "and"
      }
    }
  }
}

Analysis settings to define the custom autocomplete analyzer.

The text field uses the autocomplete analyzer at index time, but the standard analyzer at search time.

This field is indexed as the terms: [ q, qu, qui, quic, quick, b, br, bro, brow, brown, f, fo, fox ]

The query searches for both of these terms: [ quick, br ]

See Index time search-as-you- type for a full explanation of this example.

The search_analyzer setting can be updated on existing fields using the update mapping API. Note, that in order to do so, any existing "analyzer" setting and "type" need to be repeated in the updated field definition.

Was this helpful?
Feedback