Significant Terms Aggregation Usage

edit

An aggregation that returns interesting or unusual occurrences of terms in a set.

The significant_terms aggregation can be very heavy when run on large indices. Work is in progress to provide more lightweight sampling techniques. As a result, the API for this feature may change in non-backwards compatible ways

See the Elasticsearch documentation on significant terms aggregation for more detail.

Fluent DSL example

edit
a => a
.SignificantTerms("significant_names", st => st
    .Field(p => p.Name)
    .MinimumDocumentCount(10)
    .MutualInformation(mi => mi
        .BackgroundIsSuperSet()
        .IncludeNegatives()
    )
)

Object Initializer syntax example

edit
new SignificantTermsAggregation("significant_names")
{
    Field = Field<Project>(p => p.Name),
    MinimumDocumentCount = 10,
    MutualInformation = new MutualInformationHeuristic
    {
        BackgroundIsSuperSet = true,
        IncludeNegatives = true
    }
}

Example json output.

{
  "significant_names": {
    "significant_terms": {
      "field": "name",
      "min_doc_count": 10,
      "mutual_information": {
        "background_is_superset": true,
        "include_negatives": true
      }
    }
  }
}

Handling Responses

edit
response.ShouldBeValid();
var sigNames = response.Aggregations.SignificantTerms("significant_names");
sigNames.Should().NotBeNull();
sigNames.DocCount.Should().BeGreaterThan(0);

Filtering with a regular expression pattern

edit

Using significant terms aggregation with filtering to include values using a regular expression pattern

Fluent DSL example

edit
a => a
.SignificantTerms("significant_names", st => st
    .Field(p => p.Name)
    .MinimumDocumentCount(10)
    .MutualInformation(mi => mi
        .BackgroundIsSuperSet()
        .IncludeNegatives()
    )
    .Include("pi*")
)

Object Initializer syntax example

edit
new SignificantTermsAggregation("significant_names")
{
    Field = Field<Project>(p => p.Name),
    MinimumDocumentCount = 10,
    MutualInformation = new MutualInformationHeuristic
    {
        BackgroundIsSuperSet = true,
        IncludeNegatives = true
    },
    Include = new SignificantTermsIncludeExclude("pi*")
}

Example json output.

{
  "significant_names": {
    "significant_terms": {
      "field": "name",
      "min_doc_count": 10,
      "mutual_information": {
        "background_is_superset": true,
        "include_negatives": true
      },
      "include": "pi*"
    }
  }
}

Handling Responses

edit
response.ShouldBeValid();
var sigNames = response.Aggregations.SignificantTerms("significant_names");
sigNames.Should().NotBeNull();
sigNames.DocCount.Should().BeGreaterThan(0);

Filtering with exact values

edit

Using significant terms aggregation with filtering to exclude specific values

Fluent DSL example

edit
a => a
.SignificantTerms("significant_names", st => st
    .Field(p => p.Name)
    .MinimumDocumentCount(10)
    .MutualInformation(mi => mi
        .BackgroundIsSuperSet()
        .IncludeNegatives()
    )
    .Exclude(new[] { "pierce" })
)

Object Initializer syntax example

edit
new SignificantTermsAggregation("significant_names")
{
    Field = Field<Project>(p => p.Name),
    MinimumDocumentCount = 10,
    MutualInformation = new MutualInformationHeuristic
    {
        BackgroundIsSuperSet = true,
        IncludeNegatives = true
    },
    Exclude = new SignificantTermsIncludeExclude(new[] { "pierce" })
}

Example json output.

{
  "significant_names": {
    "significant_terms": {
      "field": "name",
      "min_doc_count": 10,
      "mutual_information": {
        "background_is_superset": true,
        "include_negatives": true
      },
      "exclude": [
        "pierce"
      ]
    }
  }
}

Handling Responses

edit
response.ShouldBeValid();
var sigNames = response.Aggregations.SignificantTerms("significant_names");
sigNames.Should().NotBeNull();
sigNames.DocCount.Should().BeGreaterThan(0);