IMPORTANT: No additional bug fixes or documentation updates
will be released for this version. For the latest information, see the
current release documentation.
ES|QL multivalued fields
editES|QL multivalued fields
editES|QL is fine reading from multivalued fields:
response = client.bulk( index: 'mv', refresh: true, body: [ { index: {} }, { a: 1, b: [ 2, 1 ] }, { index: {} }, { a: 2, b: 3 } ] ) puts response response = client.esql.query( body: { query: 'FROM mv | LIMIT 2' } ) puts response
POST /mv/_bulk?refresh { "index" : {} } { "a": 1, "b": [2, 1] } { "index" : {} } { "a": 2, "b": 3 } POST /_query { "query": "FROM mv | LIMIT 2" }
Multivalued fields come back as a JSON array:
{ "columns": [ { "name": "a", "type": "long"}, { "name": "b", "type": "long"} ], "values": [ [1, [1, 2]], [2, 3] ] }
The relative order of values in a multivalued field is undefined. They’ll frequently be in ascending order but don’t rely on that.
Duplicate values
editSome field types, like keyword
remove duplicate values on write:
response = client.indices.create( index: 'mv', body: { mappings: { properties: { b: { type: 'keyword' } } } } ) puts response response = client.bulk( index: 'mv', refresh: true, body: [ { index: {} }, { a: 1, b: [ 'foo', 'foo', 'bar' ] }, { index: {} }, { a: 2, b: [ 'bar', 'bar' ] } ] ) puts response response = client.esql.query( body: { query: 'FROM mv | LIMIT 2' } ) puts response
PUT /mv { "mappings": { "properties": { "b": {"type": "keyword"} } } } POST /mv/_bulk?refresh { "index" : {} } { "a": 1, "b": ["foo", "foo", "bar"] } { "index" : {} } { "a": 2, "b": ["bar", "bar"] } POST /_query { "query": "FROM mv | LIMIT 2" }
And ES|QL sees that removal:
{ "columns": [ { "name": "a", "type": "long"}, { "name": "b", "type": "keyword"} ], "values": [ [1, ["bar", "foo"]], [2, "bar"] ] }
But other types, like long
don’t remove duplicates.
response = client.indices.create( index: 'mv', body: { mappings: { properties: { b: { type: 'long' } } } } ) puts response response = client.bulk( index: 'mv', refresh: true, body: [ { index: {} }, { a: 1, b: [ 2, 2, 1 ] }, { index: {} }, { a: 2, b: [ 1, 1 ] } ] ) puts response response = client.esql.query( body: { query: 'FROM mv | LIMIT 2' } ) puts response
PUT /mv { "mappings": { "properties": { "b": {"type": "long"} } } } POST /mv/_bulk?refresh { "index" : {} } { "a": 1, "b": [2, 2, 1] } { "index" : {} } { "a": 2, "b": [1, 1] } POST /_query { "query": "FROM mv | LIMIT 2" }
And ES|QL also sees that:
{ "columns": [ { "name": "a", "type": "long"}, { "name": "b", "type": "long"} ], "values": [ [1, [1, 2, 2]], [2, [1, 1]] ] }
This is all at the storage layer. If you store duplicate `long`s and then convert them to strings the duplicates will stay:
response = client.indices.create( index: 'mv', body: { mappings: { properties: { b: { type: 'long' } } } } ) puts response response = client.bulk( index: 'mv', refresh: true, body: [ { index: {} }, { a: 1, b: [ 2, 2, 1 ] }, { index: {} }, { a: 2, b: [ 1, 1 ] } ] ) puts response response = client.esql.query( body: { query: 'FROM mv | EVAL b=TO_STRING(b) | LIMIT 2' } ) puts response
PUT /mv { "mappings": { "properties": { "b": {"type": "long"} } } } POST /mv/_bulk?refresh { "index" : {} } { "a": 1, "b": [2, 2, 1] } { "index" : {} } { "a": 2, "b": [1, 1] } POST /_query { "query": "FROM mv | EVAL b=TO_STRING(b) | LIMIT 2" }
{ "columns": [ { "name": "a", "type": "long"}, { "name": "b", "type": "keyword"} ], "values": [ [1, ["1", "2", "2"]], [2, ["1", "1"]] ] }
Functions
editUnless otherwise documented functions will return null
when applied to a multivalued
field. This behavior may change in a later version.
response = client.bulk( index: 'mv', refresh: true, body: [ { index: {} }, { a: 1, b: [ 2, 1 ] }, { index: {} }, { a: 2, b: 3 } ] ) puts response
POST /mv/_bulk?refresh { "index" : {} } { "a": 1, "b": [2, 1] } { "index" : {} } { "a": 2, "b": 3 }
response = client.esql.query( body: { query: 'FROM mv | EVAL b + 2, a + b | LIMIT 4' } ) puts response
POST /_query { "query": "FROM mv | EVAL b + 2, a + b | LIMIT 4" }
{ "columns": [ { "name": "a", "type": "long"}, { "name": "b", "type": "long"}, { "name": "b + 2", "type": "long"}, { "name": "a + b", "type": "long"} ], "values": [ [1, [1, 2], null, null], [2, 3, 5, 5] ] }
Work around this limitation by converting the field to single value with one of:
response = client.esql.query( body: { query: 'FROM mv | EVAL b=MV_MIN(b) | EVAL b + 2, a + b | LIMIT 4' } ) puts response
POST /_query { "query": "FROM mv | EVAL b=MV_MIN(b) | EVAL b + 2, a + b | LIMIT 4" }
{ "columns": [ { "name": "a", "type": "long"}, { "name": "b", "type": "long"}, { "name": "b + 2", "type": "long"}, { "name": "a + b", "type": "long"} ], "values": [ [1, 1, 3, 2], [2, 3, 5, 5] ] }