This documentation contains work-in-progress information for future Elastic Stack and Cloud releases. Use the version selector to view supported release docs. It also contains some Elastic Cloud serverless information. Check out our serverless docs for more details.

« fields norms »

› › ›

normalizer

edit

`normalizer`

edit

The normalizer property of keyword fields is similar to analyzer except that it guarantees that the analysis chain produces a single token.

The normalizer is applied prior to indexing the keyword, as well as at search-time when the keyword field is searched via a query parser such as the match query or via a term-level query such as the term query.

A simple normalizer called lowercase ships with elasticsearch and can be used. Custom normalizers can be defined as part of analysis settings as follows.

resp = client.indices.create(
    index="index",
    settings={
        "analysis": {
            "normalizer": {
                "my_normalizer": {
                    "type": "custom",
                    "char_filter": [],
                    "filter": [
                        "lowercase",
                        "asciifolding"
                    ]
                }
            }
        }
    },
    mappings={
        "properties": {
            "foo": {
                "type": "keyword",
                "normalizer": "my_normalizer"
            }
        }
    },
)
print(resp)

resp1 = client.index(
    index="index",
    id="1",
    document={
        "foo": "BÀR"
    },
)
print(resp1)

resp2 = client.index(
    index="index",
    id="2",
    document={
        "foo": "bar"
    },
)
print(resp2)

resp3 = client.index(
    index="index",
    id="3",
    document={
        "foo": "baz"
    },
)
print(resp3)

resp4 = client.indices.refresh(
    index="index",
)
print(resp4)

resp5 = client.search(
    index="index",
    query={
        "term": {
            "foo": "BAR"
        }
    },
)
print(resp5)

resp6 = client.search(
    index="index",
    query={
        "match": {
            "foo": "BAR"
        }
    },
)
print(resp6)

response = client.indices.create(
  index: 'index',
  body: {
    settings: {
      analysis: {
        normalizer: {
          my_normalizer: {
            type: 'custom',
            char_filter: [],
            filter: [
              'lowercase',
              'asciifolding'
            ]
          }
        }
      }
    },
    mappings: {
      properties: {
        foo: {
          type: 'keyword',
          normalizer: 'my_normalizer'
        }
      }
    }
  }
)
puts response

response = client.index(
  index: 'index',
  id: 1,
  body: {
    foo: 'BÀR'
  }
)
puts response

response = client.index(
  index: 'index',
  id: 2,
  body: {
    foo: 'bar'
  }
)
puts response

response = client.index(
  index: 'index',
  id: 3,
  body: {
    foo: 'baz'
  }
)
puts response

response = client.indices.refresh(
  index: 'index'
)
puts response

response = client.search(
  index: 'index',
  body: {
    query: {
      term: {
        foo: 'BAR'
      }
    }
  }
)
puts response

response = client.search(
  index: 'index',
  body: {
    query: {
      match: {
        foo: 'BAR'
      }
    }
  }
)
puts response

const response = await client.indices.create({
  index: "index",
  settings: {
    analysis: {
      normalizer: {
        my_normalizer: {
          type: "custom",
          char_filter: [],
          filter: ["lowercase", "asciifolding"],
        },
      },
    },
  },
  mappings: {
    properties: {
      foo: {
        type: "keyword",
        normalizer: "my_normalizer",
      },
    },
  },
});
console.log(response);

const response1 = await client.index({
  index: "index",
  id: 1,
  document: {
    foo: "BÀR",
  },
});
console.log(response1);

const response2 = await client.index({
  index: "index",
  id: 2,
  document: {
    foo: "bar",
  },
});
console.log(response2);

const response3 = await client.index({
  index: "index",
  id: 3,
  document: {
    foo: "baz",
  },
});
console.log(response3);

const response4 = await client.indices.refresh({
  index: "index",
});
console.log(response4);

const response5 = await client.search({
  index: "index",
  query: {
    term: {
      foo: "BAR",
    },
  },
});
console.log(response5);

const response6 = await client.search({
  index: "index",
  query: {
    match: {
      foo: "BAR",
    },
  },
});
console.log(response6);

PUT index
{
  "settings": {
    "analysis": {
      "normalizer": {
        "my_normalizer": {
          "type": "custom",
          "char_filter": [],
          "filter": ["lowercase", "asciifolding"]
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "foo": {
        "type": "keyword",
        "normalizer": "my_normalizer"
      }
    }
  }
}

PUT index/_doc/1
{
  "foo": "BÀR"
}

PUT index/_doc/2
{
  "foo": "bar"
}

PUT index/_doc/3
{
  "foo": "baz"
}

POST index/_refresh

GET index/_search
{
  "query": {
    "term": {
      "foo": "BAR"
    }
  }
}

GET index/_search
{
  "query": {
    "match": {
      "foo": "BAR"
    }
  }
}

The above queries match documents 1 and 2 since BÀR is converted to bar at both index and query time.

{
  "took": $body.took,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped" : 0,
    "failed": 0
  },
  "hits": {
    "total" : {
        "value": 2,
        "relation": "eq"
    },
    "max_score": 0.4700036,
    "hits": [
      {
        "_index": "index",
        "_id": "1",
        "_score": 0.4700036,
        "_source": {
          "foo": "BÀR"
        }
      },
      {
        "_index": "index",
        "_id": "2",
        "_score": 0.4700036,
        "_source": {
          "foo": "bar"
        }
      }
    ]
  }
}

Also, the fact that keywords are converted prior to indexing also means that aggregations return normalized values:

resp = client.search(
    index="index",
    size=0,
    aggs={
        "foo_terms": {
            "terms": {
                "field": "foo"
            }
        }
    },
)
print(resp)

response = client.search(
  index: 'index',
  body: {
    size: 0,
    aggregations: {
      foo_terms: {
        terms: {
          field: 'foo'
        }
      }
    }
  }
)
puts response

const response = await client.search({
  index: "index",
  size: 0,
  aggs: {
    foo_terms: {
      terms: {
        field: "foo",
      },
    },
  },
});
console.log(response);

GET index/_search
{
  "size": 0,
  "aggs": {
    "foo_terms": {
      "terms": {
        "field": "foo"
      }
    }
  }
}

returns

{
  "took": 43,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped" : 0,
    "failed": 0
  },
  "hits": {
    "total" : {
        "value": 3,
        "relation": "eq"
    },
    "max_score": null,
    "hits": []
  },
  "aggregations": {
    "foo_terms": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "bar",
          "doc_count": 2
        },
        {
          "key": "baz",
          "doc_count": 1
        }
      ]
    }
  }
}

« fields norms »