ignore_above

edit

Strings longer than the ignore_above setting will not be indexed or stored. For arrays of strings, ignore_above will be applied for each array element separately and string elements longer than ignore_above will not be indexed or stored.

All strings/array elements will still be present in the _source field, if the latter is enabled which is the default in Elasticsearch.

resp = client.indices.create(
    index="my-index-000001",
    mappings={
        "properties": {
            "message": {
                "type": "keyword",
                "ignore_above": 20
            }
        }
    },
)
print(resp)

resp1 = client.index(
    index="my-index-000001",
    id="1",
    document={
        "message": "Syntax error"
    },
)
print(resp1)

resp2 = client.index(
    index="my-index-000001",
    id="2",
    document={
        "message": "Syntax error with some long stacktrace"
    },
)
print(resp2)

resp3 = client.search(
    index="my-index-000001",
    aggs={
        "messages": {
            "terms": {
                "field": "message"
            }
        }
    },
)
print(resp3)
response = client.indices.create(
  index: 'my-index-000001',
  body: {
    mappings: {
      properties: {
        message: {
          type: 'keyword',
          ignore_above: 20
        }
      }
    }
  }
)
puts response

response = client.index(
  index: 'my-index-000001',
  id: 1,
  body: {
    message: 'Syntax error'
  }
)
puts response

response = client.index(
  index: 'my-index-000001',
  id: 2,
  body: {
    message: 'Syntax error with some long stacktrace'
  }
)
puts response

response = client.search(
  index: 'my-index-000001',
  body: {
    aggregations: {
      messages: {
        terms: {
          field: 'message'
        }
      }
    }
  }
)
puts response
const response = await client.indices.create({
  index: "my-index-000001",
  mappings: {
    properties: {
      message: {
        type: "keyword",
        ignore_above: 20,
      },
    },
  },
});
console.log(response);

const response1 = await client.index({
  index: "my-index-000001",
  id: 1,
  document: {
    message: "Syntax error",
  },
});
console.log(response1);

const response2 = await client.index({
  index: "my-index-000001",
  id: 2,
  document: {
    message: "Syntax error with some long stacktrace",
  },
});
console.log(response2);

const response3 = await client.search({
  index: "my-index-000001",
  aggs: {
    messages: {
      terms: {
        field: "message",
      },
    },
  },
});
console.log(response3);
PUT my-index-000001
{
  "mappings": {
    "properties": {
      "message": {
        "type": "keyword",
        "ignore_above": 20 
      }
    }
  }
}

PUT my-index-000001/_doc/1 
{
  "message": "Syntax error"
}

PUT my-index-000001/_doc/2 
{
  "message": "Syntax error with some long stacktrace"
}

GET my-index-000001/_search 
{
  "aggs": {
    "messages": {
      "terms": {
        "field": "message"
      }
    }
  }
}

This field will ignore any string longer than 20 characters.

This document is indexed successfully.

This document will be indexed, but without indexing the message field.

Search returns both documents, but only the first is present in the terms aggregation.

The ignore_above setting can be updated on existing fields using the update mapping API.

This option is also useful for protecting against Lucene’s term byte-length limit of 32766.

The value for ignore_above is the character count, but Lucene counts bytes. If you use UTF-8 text with many non-ASCII characters, you may want to set the limit to 32766 / 4 = 8191 since UTF-8 characters may occupy at most 4 bytes.