Configuring built-in analyzers

edit

The built-in analyzers can be used directly without any configuration. Some of them, however, support configuration options to alter their behaviour. For instance, the standard analyzer can be configured to support a list of stop words:

resp = client.indices.create(
    index="my-index-000001",
    settings={
        "analysis": {
            "analyzer": {
                "std_english": {
                    "type": "standard",
                    "stopwords": "_english_"
                }
            }
        }
    },
    mappings={
        "properties": {
            "my_text": {
                "type": "text",
                "analyzer": "standard",
                "fields": {
                    "english": {
                        "type": "text",
                        "analyzer": "std_english"
                    }
                }
            }
        }
    },
)
print(resp)

resp1 = client.indices.analyze(
    index="my-index-000001",
    field="my_text",
    text="The old brown cow",
)
print(resp1)

resp2 = client.indices.analyze(
    index="my-index-000001",
    field="my_text.english",
    text="The old brown cow",
)
print(resp2)
response = client.indices.create(
  index: 'my-index-000001',
  body: {
    settings: {
      analysis: {
        analyzer: {
          std_english: {
            type: 'standard',
            stopwords: '_english_'
          }
        }
      }
    },
    mappings: {
      properties: {
        my_text: {
          type: 'text',
          analyzer: 'standard',
          fields: {
            english: {
              type: 'text',
              analyzer: 'std_english'
            }
          }
        }
      }
    }
  }
)
puts response

response = client.indices.analyze(
  index: 'my-index-000001',
  body: {
    field: 'my_text',
    text: 'The old brown cow'
  }
)
puts response

response = client.indices.analyze(
  index: 'my-index-000001',
  body: {
    field: 'my_text.english',
    text: 'The old brown cow'
  }
)
puts response
const response = await client.indices.create({
  index: "my-index-000001",
  settings: {
    analysis: {
      analyzer: {
        std_english: {
          type: "standard",
          stopwords: "_english_",
        },
      },
    },
  },
  mappings: {
    properties: {
      my_text: {
        type: "text",
        analyzer: "standard",
        fields: {
          english: {
            type: "text",
            analyzer: "std_english",
          },
        },
      },
    },
  },
});
console.log(response);

const response1 = await client.indices.analyze({
  index: "my-index-000001",
  field: "my_text",
  text: "The old brown cow",
});
console.log(response1);

const response2 = await client.indices.analyze({
  index: "my-index-000001",
  field: "my_text.english",
  text: "The old brown cow",
});
console.log(response2);
PUT my-index-000001
{
  "settings": {
    "analysis": {
      "analyzer": {
        "std_english": { 
          "type":      "standard",
          "stopwords": "_english_"
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "my_text": {
        "type":     "text",
        "analyzer": "standard", 
        "fields": {
          "english": {
            "type":     "text",
            "analyzer": "std_english" 
          }
        }
      }
    }
  }
}

POST my-index-000001/_analyze
{
  "field": "my_text", 
  "text": "The old brown cow"
}

POST my-index-000001/_analyze
{
  "field": "my_text.english", 
  "text": "The old brown cow"
}

We define the std_english analyzer to be based on the standard analyzer, but configured to remove the pre-defined list of English stopwords.

The my_text field uses the standard analyzer directly, without any configuration. No stop words will be removed from this field. The resulting terms are: [ the, old, brown, cow ]

The my_text.english field uses the std_english analyzer, so English stop words will be removed. The resulting terms are: [ old, brown, cow ]