Terms set query

edit

Returns documents that contain a minimum number of exact terms in a provided field.

The terms_set query is the same as the terms query, except you can define the number of matching terms required to return a document. For example:

  • A field, programming_languages, contains a list of known programming languages, such as c++, java, or php for job candidates. You can use the terms_set query to return documents that match at least two of these languages.
  • A field, permissions, contains a list of possible user permissions for an application. You can use the terms_set query to return documents that match a subset of these permissions.

Example request

edit

Index setup

edit

In most cases, you’ll need to include a numeric field mapping in your index to use the terms_set query. This numeric field contains the number of matching terms required to return a document.

To see how you can set up an index for the terms_set query, try the following example.

  1. Create an index, job-candidates, with the following field mappings:

    • name, a keyword field. This field contains the name of the job candidate.
    • programming_languages, a keyword field. This field contains programming languages known by the job candidate.
    • required_matches, a numeric long field. This field contains the number of matching terms required to return a document.
    resp = client.indices.create(
        index="job-candidates",
        mappings={
            "properties": {
                "name": {
                    "type": "keyword"
                },
                "programming_languages": {
                    "type": "keyword"
                },
                "required_matches": {
                    "type": "long"
                }
            }
        },
    )
    print(resp)
    response = client.indices.create(
      index: 'job-candidates',
      body: {
        mappings: {
          properties: {
            name: {
              type: 'keyword'
            },
            programming_languages: {
              type: 'keyword'
            },
            required_matches: {
              type: 'long'
            }
          }
        }
      }
    )
    puts response
    const response = await client.indices.create({
      index: "job-candidates",
      mappings: {
        properties: {
          name: {
            type: "keyword",
          },
          programming_languages: {
            type: "keyword",
          },
          required_matches: {
            type: "long",
          },
        },
      },
    });
    console.log(response);
    PUT /job-candidates
    {
      "mappings": {
        "properties": {
          "name": {
            "type": "keyword"
          },
          "programming_languages": {
            "type": "keyword"
          },
          "required_matches": {
            "type": "long"
          }
        }
      }
    }
  2. Index a document with an ID of 1 and the following values:

    • Jane Smith in the name field.
    • ["c++", "java"] in the programming_languages field.
    • 2 in the required_matches field.

    Include the ?refresh parameter so the document is immediately available for search.

    resp = client.index(
        index="job-candidates",
        id="1",
        refresh=True,
        document={
            "name": "Jane Smith",
            "programming_languages": [
                "c++",
                "java"
            ],
            "required_matches": 2
        },
    )
    print(resp)
    response = client.index(
      index: 'job-candidates',
      id: 1,
      refresh: true,
      body: {
        name: 'Jane Smith',
        programming_languages: [
          'c++',
          'java'
        ],
        required_matches: 2
      }
    )
    puts response
    const response = await client.index({
      index: "job-candidates",
      id: 1,
      refresh: "true",
      document: {
        name: "Jane Smith",
        programming_languages: ["c++", "java"],
        required_matches: 2,
      },
    });
    console.log(response);
    PUT /job-candidates/_doc/1?refresh
    {
      "name": "Jane Smith",
      "programming_languages": [ "c++", "java" ],
      "required_matches": 2
    }
  3. Index another document with an ID of 2 and the following values:

    • Jason Response in the name field.
    • ["java", "php"] in the programming_languages field.
    • 2 in the required_matches field.
    resp = client.index(
        index="job-candidates",
        id="2",
        refresh=True,
        document={
            "name": "Jason Response",
            "programming_languages": [
                "java",
                "php"
            ],
            "required_matches": 2
        },
    )
    print(resp)
    response = client.index(
      index: 'job-candidates',
      id: 2,
      refresh: true,
      body: {
        name: 'Jason Response',
        programming_languages: [
          'java',
          'php'
        ],
        required_matches: 2
      }
    )
    puts response
    const response = await client.index({
      index: "job-candidates",
      id: 2,
      refresh: "true",
      document: {
        name: "Jason Response",
        programming_languages: ["java", "php"],
        required_matches: 2,
      },
    });
    console.log(response);
    PUT /job-candidates/_doc/2?refresh
    {
      "name": "Jason Response",
      "programming_languages": [ "java", "php" ],
      "required_matches": 2
    }

You can now use the required_matches field value as the number of matching terms required to return a document in the terms_set query.

Example query

edit

The following search returns documents where the programming_languages field contains at least two of the following terms:

  • c++
  • java
  • php

The minimum_should_match_field is required_matches. This means the number of matching terms required is 2, the value of the required_matches field.

resp = client.search(
    index="job-candidates",
    query={
        "terms_set": {
            "programming_languages": {
                "terms": [
                    "c++",
                    "java",
                    "php"
                ],
                "minimum_should_match_field": "required_matches"
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'job-candidates',
  body: {
    query: {
      terms_set: {
        programming_languages: {
          terms: [
            'c++',
            'java',
            'php'
          ],
          minimum_should_match_field: 'required_matches'
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "job-candidates",
  query: {
    terms_set: {
      programming_languages: {
        terms: ["c++", "java", "php"],
        minimum_should_match_field: "required_matches",
      },
    },
  },
});
console.log(response);
GET /job-candidates/_search
{
  "query": {
    "terms_set": {
      "programming_languages": {
        "terms": [ "c++", "java", "php" ],
        "minimum_should_match_field": "required_matches"
      }
    }
  }
}

Top-level parameters for terms_set

edit
<field>
(Required, object) Field you wish to search.

Parameters for <field>

edit
terms

(Required, array of strings) Array of terms you wish to find in the provided <field>. To return a document, a required number of terms must exactly match the field values, including whitespace and capitalization.

The required number of matching terms is defined in the minimum_should_match_field or minimum_should_match_script parameter.

minimum_should_match_field
(Optional, string) Numeric field containing the number of matching terms required to return a document.
minimum_should_match_script

(Optional, string) Custom script containing the number of matching terms required to return a document.

For parameters and valid values, see Scripting.

For an example query using the minimum_should_match_script parameter, see How to use the minimum_should_match_script parameter.

Notes

edit

How to use the minimum_should_match_script parameter

edit

You can use minimum_should_match_script to define the required number of matching terms using a script. This is useful if you need to set the number of required terms dynamically.

Example query using minimum_should_match_script
edit

The following search returns documents where the programming_languages field contains at least two of the following terms:

  • c++
  • java
  • php

The source parameter of this query indicates:

  • The required number of terms to match cannot exceed params.num_terms, the number of terms provided in the terms field.
  • The required number of terms to match is 2, the value of the required_matches field.
resp = client.search(
    index="job-candidates",
    query={
        "terms_set": {
            "programming_languages": {
                "terms": [
                    "c++",
                    "java",
                    "php"
                ],
                "minimum_should_match_script": {
                    "source": "Math.min(params.num_terms, doc['required_matches'].value)"
                },
                "boost": 1
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'job-candidates',
  body: {
    query: {
      terms_set: {
        programming_languages: {
          terms: [
            'c++',
            'java',
            'php'
          ],
          minimum_should_match_script: {
            source: "Math.min(params.num_terms, doc['required_matches'].value)"
          },
          boost: 1
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "job-candidates",
  query: {
    terms_set: {
      programming_languages: {
        terms: ["c++", "java", "php"],
        minimum_should_match_script: {
          source: "Math.min(params.num_terms, doc['required_matches'].value)",
        },
        boost: 1,
      },
    },
  },
});
console.log(response);
GET /job-candidates/_search
{
  "query": {
    "terms_set": {
      "programming_languages": {
        "terms": [ "c++", "java", "php" ],
        "minimum_should_match_script": {
          "source": "Math.min(params.num_terms, doc['required_matches'].value)"
        },
        "boost": 1.0
      }
    }
  }
}