Bucket sort aggregation

edit

A parent pipeline aggregation which sorts the buckets of its parent multi-bucket aggregation. Zero or more sort fields may be specified together with the corresponding sort order. Each bucket may be sorted based on its _key, _count or its sub-aggregations. In addition, parameters from and size may be set in order to truncate the result buckets.

The bucket_sort aggregation, like all pipeline aggregations, is executed after all other non-pipeline aggregations. This means the sorting only applies to whatever buckets are already returned from the parent aggregation. For example, if the parent aggregation is terms and its size is set to 10, the bucket_sort will only sort over those 10 returned term buckets.

Syntax

edit

A bucket_sort aggregation looks like this in isolation:

{
  "bucket_sort": {
    "sort": [
      { "sort_field_1": { "order": "asc" } },   
      { "sort_field_2": { "order": "desc" } },
      "sort_field_3"
    ],
    "from": 1,
    "size": 3
  }
}

Here, sort_field_1 is the bucket path to the variable to be used as the primary sort and its order is ascending.

Table 58. bucket_sort Parameters

Parameter Name Description Required Default Value

sort

The list of fields to sort on. See sort for more details.

Optional

from

Buckets in positions prior to the set value will be truncated.

Optional

0

size

The number of buckets to return. Defaults to all buckets of the parent aggregation.

Optional

gap_policy

The policy to apply when gaps are found in the data (see Dealing with gaps in the data for more details)

Optional

skip

The following snippet returns the buckets corresponding to the 3 months with the highest total sales in descending order:

resp = client.search(
    index="sales",
    size=0,
    aggs={
        "sales_per_month": {
            "date_histogram": {
                "field": "date",
                "calendar_interval": "month"
            },
            "aggs": {
                "total_sales": {
                    "sum": {
                        "field": "price"
                    }
                },
                "sales_bucket_sort": {
                    "bucket_sort": {
                        "sort": [
                            {
                                "total_sales": {
                                    "order": "desc"
                                }
                            }
                        ],
                        "size": 3
                    }
                }
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'sales',
  body: {
    size: 0,
    aggregations: {
      sales_per_month: {
        date_histogram: {
          field: 'date',
          calendar_interval: 'month'
        },
        aggregations: {
          total_sales: {
            sum: {
              field: 'price'
            }
          },
          sales_bucket_sort: {
            bucket_sort: {
              sort: [
                {
                  total_sales: {
                    order: 'desc'
                  }
                }
              ],
              size: 3
            }
          }
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "sales",
  size: 0,
  aggs: {
    sales_per_month: {
      date_histogram: {
        field: "date",
        calendar_interval: "month",
      },
      aggs: {
        total_sales: {
          sum: {
            field: "price",
          },
        },
        sales_bucket_sort: {
          bucket_sort: {
            sort: [
              {
                total_sales: {
                  order: "desc",
                },
              },
            ],
            size: 3,
          },
        },
      },
    },
  },
});
console.log(response);
POST /sales/_search
{
  "size": 0,
  "aggs": {
    "sales_per_month": {
      "date_histogram": {
        "field": "date",
        "calendar_interval": "month"
      },
      "aggs": {
        "total_sales": {
          "sum": {
            "field": "price"
          }
        },
        "sales_bucket_sort": {
          "bucket_sort": {
            "sort": [
              { "total_sales": { "order": "desc" } } 
            ],
            "size": 3                                
          }
        }
      }
    }
  }
}

sort is set to use the values of total_sales in descending order

size is set to 3 meaning only the top 3 months in total_sales will be returned

And the following may be the response:

{
   "took": 82,
   "timed_out": false,
   "_shards": ...,
   "hits": ...,
   "aggregations": {
      "sales_per_month": {
         "buckets": [
            {
               "key_as_string": "2015/01/01 00:00:00",
               "key": 1420070400000,
               "doc_count": 3,
               "total_sales": {
                   "value": 550.0
               }
            },
            {
               "key_as_string": "2015/03/01 00:00:00",
               "key": 1425168000000,
               "doc_count": 2,
               "total_sales": {
                   "value": 375.0
               }
            },
            {
               "key_as_string": "2015/02/01 00:00:00",
               "key": 1422748800000,
               "doc_count": 2,
               "total_sales": {
                   "value": 60.0
               }
            }
         ]
      }
   }
}

Truncating without sorting

edit

It is also possible to use this aggregation in order to truncate the result buckets without doing any sorting. To do so, just use the from and/or size parameters without specifying sort.

The following example simply truncates the result so that only the second bucket is returned:

resp = client.search(
    index="sales",
    size=0,
    aggs={
        "sales_per_month": {
            "date_histogram": {
                "field": "date",
                "calendar_interval": "month"
            },
            "aggs": {
                "bucket_truncate": {
                    "bucket_sort": {
                        "from": 1,
                        "size": 1
                    }
                }
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'sales',
  body: {
    size: 0,
    aggregations: {
      sales_per_month: {
        date_histogram: {
          field: 'date',
          calendar_interval: 'month'
        },
        aggregations: {
          bucket_truncate: {
            bucket_sort: {
              from: 1,
              size: 1
            }
          }
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "sales",
  size: 0,
  aggs: {
    sales_per_month: {
      date_histogram: {
        field: "date",
        calendar_interval: "month",
      },
      aggs: {
        bucket_truncate: {
          bucket_sort: {
            from: 1,
            size: 1,
          },
        },
      },
    },
  },
});
console.log(response);
POST /sales/_search
{
  "size": 0,
  "aggs": {
    "sales_per_month": {
      "date_histogram": {
        "field": "date",
        "calendar_interval": "month"
      },
      "aggs": {
        "bucket_truncate": {
          "bucket_sort": {
            "from": 1,
            "size": 1
          }
        }
      }
    }
  }
}

Response:

{
   "took": 11,
   "timed_out": false,
   "_shards": ...,
   "hits": ...,
   "aggregations": {
      "sales_per_month": {
         "buckets": [
            {
               "key_as_string": "2015/02/01 00:00:00",
               "key": 1422748800000,
               "doc_count": 2
            }
         ]
      }
   }
}