Significant Terms Aggregation Usage

edit

An aggregation that returns interesting or unusual occurrences of terms in a set.

The significant_terms aggregation can be very heavy when run on large indices. Work is in progress to provide more lightweight sampling techniques. As a result, the API for this feature may change in non-backwards compatible ways

See the Elasticsearch documentation on significant terms aggregation for more detail.

Fluent DSL example

edit
s => s
.Aggregations(a => a
    .SignificantTerms("significant_names", st => st
        .Field(p => p.Name)
        .MinimumDocumentCountAsLong(10)
        .MutualInformation(mi => mi
            .BackgroundIsSuperSet()
            .IncludeNegatives()
        )
    )
)

Object Initializer syntax example

edit
new SearchRequest<Project>
{
    Aggregations = new SignificantTermsAggregation("significant_names")
    {
        Field = Field<Project>(p => p.Name),
        MinimumDocumentCountAsLong = 10,
        MutualInformation = new MutualInformationHeuristic
        {
            BackgroundIsSuperSet = true,
            IncludeNegatives = true
        }
    }
}

Example json output.

{
  "aggs": {
    "significant_names": {
      "significant_terms": {
        "field": "name",
        "min_doc_count": 10,
        "mutual_information": {
          "background_is_superset": true,
          "include_negatives": true
        }
      }
    }
  }
}

Handling Responses

edit
response.ShouldBeValid();
var sigNames = response.Aggs.SignificantTerms("significant_names");
sigNames.Should().NotBeNull();
sigNames.DocCount.Should().BeGreaterThan(0);

Filtering with a regular expression pattern

edit

Using significant terms aggregation with filtering to include values using a regular expression pattern

Fluent DSL example

edit
s => s
.Aggregations(a => a
    .SignificantTerms("significant_names", st => st
        .Field(p => p.Name)
        .MinimumDocumentCountAsLong(10)
        .MutualInformation(mi => mi
            .BackgroundIsSuperSet()
            .IncludeNegatives()
        )
        .Include("pi*")
    )
)

Object Initializer syntax example

edit
new SearchRequest<Project>
{
    Aggregations = new SignificantTermsAggregation("significant_names")
    {
        Field = Field<Project>(p => p.Name),
        MinimumDocumentCountAsLong = 10,
        MutualInformation = new MutualInformationHeuristic
        {
            BackgroundIsSuperSet = true,
            IncludeNegatives = true
        },
        IncludeTerms = new SignificantTermsIncludeExclude("pi*")
    }
}

Example json output.

{
  "aggs": {
    "significant_names": {
      "significant_terms": {
        "field": "name",
        "min_doc_count": 10,
        "mutual_information": {
          "background_is_superset": true,
          "include_negatives": true
        },
        "include": "pi*"
      }
    }
  }
}

Handling Responses

edit
response.ShouldBeValid();
var sigNames = response.Aggs.SignificantTerms("significant_names");
sigNames.Should().NotBeNull();
sigNames.DocCount.Should().BeGreaterThan(0);

Filtering with exact values

edit

Using significant terms aggregation with filtering to exclude specific values

Fluent DSL example

edit
s => s
.Aggregations(a => a
    .SignificantTerms("significant_names", st => st
        .Field(p => p.Name)
        .MinimumDocumentCountAsLong(10)
        .MutualInformation(mi => mi
            .BackgroundIsSuperSet()
            .IncludeNegatives()
        )
        .Exclude(new [] { "pierce" })
    )
)

Object Initializer syntax example

edit
new SearchRequest<Project>
{
    Aggregations = new SignificantTermsAggregation("significant_names")
    {
        Field = Field<Project>(p => p.Name),
        MinimumDocumentCountAsLong = 10,
        MutualInformation = new MutualInformationHeuristic
        {
            BackgroundIsSuperSet = true,
            IncludeNegatives = true
        },
        ExcludeTerms = new SignificantTermsIncludeExclude(new[] { "pierce" })
    }
}

Example json output.

{
  "aggs": {
    "significant_names": {
      "significant_terms": {
        "field": "name",
        "min_doc_count": 10,
        "mutual_information": {
          "background_is_superset": true,
          "include_negatives": true
        },
        "exclude": [
          "pierce"
        ]
      }
    }
  }
}

Handling Responses

edit
response.ShouldBeValid();
var sigNames = response.Aggs.SignificantTerms("significant_names");
sigNames.Should().NotBeNull();
sigNames.DocCount.Should().BeGreaterThan(0);