Geo-distance aggregation
editGeo-distance aggregation
editA multi-bucket aggregation that works on geo_point
fields and conceptually works very similar to the range aggregation. The user can define a point of origin and a set of distance range buckets. The aggregation evaluates the distance of each document value from the origin point and determines the buckets it belongs to based on the ranges (a document belongs to a bucket if the distance between the document and the origin falls within the distance range of the bucket).
response = client.indices.create( index: 'museums', body: { mappings: { properties: { location: { type: 'geo_point' } } } } ) puts response response = client.bulk( index: 'museums', refresh: true, body: [ { index: { _id: 1 } }, { location: 'POINT (4.912350 52.374081)', name: 'NEMO Science Museum' }, { index: { _id: 2 } }, { location: 'POINT (4.901618 52.369219)', name: 'Museum Het Rembrandthuis' }, { index: { _id: 3 } }, { location: 'POINT (4.914722 52.371667)', name: 'Nederlands Scheepvaartmuseum' }, { index: { _id: 4 } }, { location: 'POINT (4.405200 51.222900)', name: 'Letterenhuis' }, { index: { _id: 5 } }, { location: 'POINT (2.336389 48.861111)', name: 'Musée du Louvre' }, { index: { _id: 6 } }, { location: 'POINT (2.327000 48.860000)', name: "Musée d'Orsay" } ] ) puts response response = client.search( index: 'museums', size: 0, body: { aggregations: { rings_around_amsterdam: { geo_distance: { field: 'location', origin: 'POINT (4.894 52.3760)', ranges: [ { to: 100_000 }, { from: 100_000, to: 300_000 }, { from: 300_000 } ] } } } } ) puts response
PUT /museums { "mappings": { "properties": { "location": { "type": "geo_point" } } } } POST /museums/_bulk?refresh {"index":{"_id":1}} {"location": "POINT (4.912350 52.374081)", "name": "NEMO Science Museum"} {"index":{"_id":2}} {"location": "POINT (4.901618 52.369219)", "name": "Museum Het Rembrandthuis"} {"index":{"_id":3}} {"location": "POINT (4.914722 52.371667)", "name": "Nederlands Scheepvaartmuseum"} {"index":{"_id":4}} {"location": "POINT (4.405200 51.222900)", "name": "Letterenhuis"} {"index":{"_id":5}} {"location": "POINT (2.336389 48.861111)", "name": "Musée du Louvre"} {"index":{"_id":6}} {"location": "POINT (2.327000 48.860000)", "name": "Musée d'Orsay"} POST /museums/_search?size=0 { "aggs": { "rings_around_amsterdam": { "geo_distance": { "field": "location", "origin": "POINT (4.894 52.3760)", "ranges": [ { "to": 100000 }, { "from": 100000, "to": 300000 }, { "from": 300000 } ] } } } }
Response:
{ ... "aggregations": { "rings_around_amsterdam": { "buckets": [ { "key": "*-100000.0", "from": 0.0, "to": 100000.0, "doc_count": 3 }, { "key": "100000.0-300000.0", "from": 100000.0, "to": 300000.0, "doc_count": 1 }, { "key": "300000.0-*", "from": 300000.0, "doc_count": 2 } ] } } }
The specified field must be of type geo_point
(which can only be set explicitly in the mappings). And it can also hold an array of geo_point
fields, in which case all will be taken into account during aggregation. The origin point can accept all formats supported by the geo_point
type:
-
Object format:
{ "lat" : 52.3760, "lon" : 4.894 }
- this is the safest format as it is the most explicit about thelat
&lon
values -
String format:
"52.3760, 4.894"
- where the first number is thelat
and the second is thelon
-
Array format:
[4.894, 52.3760]
- which is based on the GeoJSON standard where the first number is thelon
and the second one is thelat
By default, the distance unit is m
(meters) but it can also accept: mi
(miles), in
(inches), yd
(yards), km
(kilometers), cm
(centimeters), mm
(millimeters).
response = client.search( index: 'museums', size: 0, body: { aggregations: { rings: { geo_distance: { field: 'location', origin: 'POINT (4.894 52.3760)', unit: 'km', ranges: [ { to: 100 }, { from: 100, to: 300 }, { from: 300 } ] } } } } ) puts response
POST /museums/_search?size=0 { "aggs": { "rings": { "geo_distance": { "field": "location", "origin": "POINT (4.894 52.3760)", "unit": "km", "ranges": [ { "to": 100 }, { "from": 100, "to": 300 }, { "from": 300 } ] } } } }
There are two distance calculation modes: arc
(the default), and plane
. The arc
calculation is the most accurate. The plane
is the fastest but least accurate. Consider using plane
when your search context is "narrow", and spans smaller geographical areas (~5km). plane
will return higher error margins for searches across very large areas (e.g. cross continent search). The distance calculation type can be set using the distance_type
parameter:
response = client.search( index: 'museums', size: 0, body: { aggregations: { rings: { geo_distance: { field: 'location', origin: 'POINT (4.894 52.3760)', unit: 'km', distance_type: 'plane', ranges: [ { to: 100 }, { from: 100, to: 300 }, { from: 300 } ] } } } } ) puts response
POST /museums/_search?size=0 { "aggs": { "rings": { "geo_distance": { "field": "location", "origin": "POINT (4.894 52.3760)", "unit": "km", "distance_type": "plane", "ranges": [ { "to": 100 }, { "from": 100, "to": 300 }, { "from": 300 } ] } } } }
Keyed Response
editSetting the keyed
flag to true
will associate a unique string key with each bucket and return the ranges as a hash rather than an array:
response = client.search( index: 'museums', size: 0, body: { aggregations: { rings_around_amsterdam: { geo_distance: { field: 'location', origin: 'POINT (4.894 52.3760)', ranges: [ { to: 100_000 }, { from: 100_000, to: 300_000 }, { from: 300_000 } ], keyed: true } } } } ) puts response
POST /museums/_search?size=0 { "aggs": { "rings_around_amsterdam": { "geo_distance": { "field": "location", "origin": "POINT (4.894 52.3760)", "ranges": [ { "to": 100000 }, { "from": 100000, "to": 300000 }, { "from": 300000 } ], "keyed": true } } } }
Response:
{ ... "aggregations": { "rings_around_amsterdam": { "buckets": { "*-100000.0": { "from": 0.0, "to": 100000.0, "doc_count": 3 }, "100000.0-300000.0": { "from": 100000.0, "to": 300000.0, "doc_count": 1 }, "300000.0-*": { "from": 300000.0, "doc_count": 2 } } } } }
It is also possible to customize the key for each range:
response = client.search( index: 'museums', size: 0, body: { aggregations: { rings_around_amsterdam: { geo_distance: { field: 'location', origin: 'POINT (4.894 52.3760)', ranges: [ { to: 100_000, key: 'first_ring' }, { from: 100_000, to: 300_000, key: 'second_ring' }, { from: 300_000, key: 'third_ring' } ], keyed: true } } } } ) puts response
POST /museums/_search?size=0 { "aggs": { "rings_around_amsterdam": { "geo_distance": { "field": "location", "origin": "POINT (4.894 52.3760)", "ranges": [ { "to": 100000, "key": "first_ring" }, { "from": 100000, "to": 300000, "key": "second_ring" }, { "from": 300000, "key": "third_ring" } ], "keyed": true } } } }
Response:
{ ... "aggregations": { "rings_around_amsterdam": { "buckets": { "first_ring": { "from": 0.0, "to": 100000.0, "doc_count": 3 }, "second_ring": { "from": 100000.0, "to": 300000.0, "doc_count": 1 }, "third_ring": { "from": 300000.0, "doc_count": 2 } } } } }