Configure dynamic field mappings and analyzers in an Elasticsearch index engine

edit

Configure dynamic field mappings and analyzers in an Elasticsearch index engine

edit

In order to ensure that your Elasticsearch index engine supports all search types, it is recommended to configure dynamic field mappings and analyzers before any data is indexed. The below snippets will set up the index with the mappings and analyzers that Engines have by default.

First close the index:

POST my-index/_close

Then create the analyzers:

PUT my-index/_settings
{
  "analysis": {
    "filter": {
      "front_ngram": {
        "type": "edge_ngram",
        "min_gram": "1",
        "max_gram": "12"
      },
      "bigram_joiner": {
        "max_shingle_size": "2",
        "token_separator": "",
        "output_unigrams": "false",
        "type": "shingle"
      },
      "bigram_max_size": {
        "type": "length",
        "max": "16",
        "min": "0"
      },
      "en-stem-filter": {
        "name": "light_english",
        "type": "stemmer"
      },
      "bigram_joiner_unigrams": {
        "max_shingle_size": "2",
        "token_separator": "",
        "output_unigrams": "true",
        "type": "shingle"
      },
      "delimiter": {
        "split_on_numerics": "true",
        "generate_word_parts": "true",
        "preserve_original": "false",
        "catenate_words": "true",
        "generate_number_parts": "true",
        "catenate_all": "true",
        "split_on_case_change": "true",
        "type": "word_delimiter_graph",
        "catenate_numbers": "true",
        "stem_english_possessive": "true"
      },
      "en-stop-words-filter": {
        "type": "stop",
        "stopwords": "_english_"
      }
    },
    "analyzer": {
      "i_prefix": {
        "filter": [
          "cjk_width",
          "lowercase",
          "asciifolding",
          "front_ngram"
        ],
        "tokenizer": "standard"
      },
      "iq_text_delimiter": {
        "filter": [
          "delimiter",
          "cjk_width",
          "lowercase",
          "asciifolding",
          "en-stop-words-filter",
          "en-stem-filter"
        ],
        "tokenizer": "whitespace"
      },
      "q_prefix": {
        "filter": [
          "cjk_width",
          "lowercase",
          "asciifolding"
        ],
        "tokenizer": "standard"
      },
      "iq_text_base": {
        "filter": [
          "cjk_width",
          "lowercase",
          "asciifolding",
          "en-stop-words-filter"
        ],
        "tokenizer": "standard"
      },
      "iq_text_stem": {
        "filter": [
          "cjk_width",
          "lowercase",
          "asciifolding",
          "en-stop-words-filter",
          "en-stem-filter"
        ],
        "tokenizer": "standard"
      },
      "i_text_bigram": {
        "filter": [
          "cjk_width",
          "lowercase",
          "asciifolding",
          "en-stem-filter",
          "bigram_joiner",
          "bigram_max_size"
        ],
        "tokenizer": "standard"
      },
      "q_text_bigram": {
        "filter": [
          "cjk_width",
          "lowercase",
          "asciifolding",
          "en-stem-filter",
          "bigram_joiner_unigrams",
          "bigram_max_size"
        ],
        "tokenizer": "standard"
      }
    }
  }
}

This sample configuration uses English stemmer and stop word filters. In order to support text analysis in a different language, replace en-stem-filter and en-stop-words-filter with the appropriate stemmer and token filter.

Create dynamic mappings:

PUT my-index/_mappings
{
  "dynamic_templates": [
    {
      "permissions": {
        "match": "_*_permissions",
        "mapping": {
          "type": "keyword"
        }
      }
    },
    {
      "thumbnails": {
        "match": "_thumbnail_*",
        "mapping": {
          "type": "binary"
        }
      }
    },
    {
      "data": {
        "match_mapping_type": "*",
        "mapping": {
          "analyzer": "iq_text_base",
          "fields": {
            "date": {
              "format": "strict_date_time||strict_date",
              "ignore_malformed": true,
              "type": "date"
            },
            "prefix": {
              "search_analyzer": "q_prefix",
              "analyzer": "i_prefix",
              "type": "text",
              "index_options": "docs"
            },
            "delimiter": {
              "analyzer": "iq_text_delimiter",
              "type": "text",
              "index_options": "freqs"
            },
            "joined": {
              "search_analyzer": "q_text_bigram",
              "analyzer": "i_text_bigram",
              "type": "text",
              "index_options": "freqs"
            },
            "location": {
              "ignore_malformed": true,
              "type": "geo_point",
              "ignore_z_value": false
            },
            "float": {
              "ignore_malformed": true,
              "type": "double"
            },
            "enum": {
              "ignore_above": 2048,
              "type": "keyword"
            },
            "stem": {
              "analyzer": "iq_text_stem",
              "type": "text"
            }
          },
          "index_options": "freqs",
          "type": "text"
        }
      }
    }
  ]
}

Finally reopen the index:

POST my-index/_open

As an alternative to the above steps you can also configure an index template that automatically sets the required analyzers and mappings for searchable fields. This needs to be done before the index is created.

Create index template
PUT _index_template/my-index-template
{
  "index_patterns": ["my-index*"],
  "template": {
    "settings": {
      "analysis": {
        "filter": {
          "front_ngram": {
            "type": "edge_ngram",
            "min_gram": "1",
            "max_gram": "12"
          },
          "bigram_joiner": {
            "max_shingle_size": "2",
            "token_separator": "",
            "output_unigrams": "false",
            "type": "shingle"
          },
          "bigram_max_size": {
            "type": "length",
            "max": "16",
            "min": "0"
          },
          "en-stem-filter": {
            "name": "light_english",
            "type": "stemmer"
          },
          "bigram_joiner_unigrams": {
            "max_shingle_size": "2",
            "token_separator": "",
            "output_unigrams": "true",
            "type": "shingle"
          },
          "delimiter": {
            "split_on_numerics": "true",
            "generate_word_parts": "true",
            "preserve_original": "false",
            "catenate_words": "true",
            "generate_number_parts": "true",
            "catenate_all": "true",
            "split_on_case_change": "true",
            "type": "word_delimiter_graph",
            "catenate_numbers": "true",
            "stem_english_possessive": "true"
          },
          "en-stop-words-filter": {
            "type": "stop",
            "stopwords": "_english_"
          }
        },
        "analyzer": {
          "i_prefix": {
            "filter": [
              "cjk_width",
              "lowercase",
              "asciifolding",
              "front_ngram"
            ],
            "tokenizer": "standard"
          },
          "iq_text_delimiter": {
            "filter": [
              "delimiter",
              "cjk_width",
              "lowercase",
              "asciifolding",
              "en-stop-words-filter",
              "en-stem-filter"
            ],
            "tokenizer": "whitespace"
          },
          "q_prefix": {
            "filter": [
              "cjk_width",
              "lowercase",
              "asciifolding"
            ],
            "tokenizer": "standard"
          },
          "iq_text_base": {
            "filter": [
              "cjk_width",
              "lowercase",
              "asciifolding",
              "en-stop-words-filter"
            ],
            "tokenizer": "standard"
          },
          "iq_text_stem": {
            "filter": [
              "cjk_width",
              "lowercase",
              "asciifolding",
              "en-stop-words-filter",
              "en-stem-filter"
            ],
            "tokenizer": "standard"
          },
          "i_text_bigram": {
            "filter": [
              "cjk_width",
              "lowercase",
              "asciifolding",
              "en-stem-filter",
              "bigram_joiner",
              "bigram_max_size"
            ],
            "tokenizer": "standard"
          },
          "q_text_bigram": {
            "filter": [
              "cjk_width",
              "lowercase",
              "asciifolding",
              "en-stem-filter",
              "bigram_joiner_unigrams",
              "bigram_max_size"
            ],
            "tokenizer": "standard"
          }
        }
      }
    },
    "mappings": {
      "dynamic_templates": [
        {
          "permissions": {
            "match": "_*_permissions",
            "mapping": {
              "type": "keyword"
            }
          }
        },
        {
          "thumbnails": {
            "match": "_thumbnail_*",
            "mapping": {
              "type": "binary"
            }
          }
        },
        {
          "data": {
            "match_mapping_type": "*",
            "mapping": {
              "analyzer": "iq_text_base",
              "fields": {
                "date": {
                  "format": "strict_date_time||strict_date",
                  "ignore_malformed": true,
                  "type": "date"
                },
                "prefix": {
                  "search_analyzer": "q_prefix",
                  "analyzer": "i_prefix",
                  "type": "text",
                  "index_options": "docs"
                },
                "delimiter": {
                  "analyzer": "iq_text_delimiter",
                  "type": "text",
                  "index_options": "freqs"
                },
                "joined": {
                  "search_analyzer": "q_text_bigram",
                  "analyzer": "i_text_bigram",
                  "type": "text",
                  "index_options": "freqs"
                },
                "location": {
                  "ignore_malformed": true,
                  "type": "geo_point",
                  "ignore_z_value": false
                },
                "float": {
                  "ignore_malformed": true,
                  "type": "double"
                },
                "enum": {
                  "ignore_above": 2048,
                  "type": "keyword"
                },
                "stem": {
                  "analyzer": "iq_text_stem",
                  "type": "text"
                }
              },
              "index_options": "freqs",
              "type": "text"
            }
          }
        }
      ]
    }
  },
  "_meta": {
    "description": "Template for Elasticsearch index engine"
  }
}

Then create your index with a name that matches the template’s pattern.

For more information about index templates please refer to the documentation.