Skip to content

Commit 3e88ded

Browse files
not-napoleonelasticsearchmachine
andauthored
Histogram field block loader (#139308)
Adds a block loader for the histogram field type. This is the first step towards getting ESQL support for the type, although this PR doesn't add any user-facing functionality. As is tradition, most of the work here is writing the data generation for the randomized field testing framework. --------- Co-authored-by: elasticsearchmachine <infra-root+elasticsearchmachine@elastic.co>
1 parent ad85787 commit 3e88ded

File tree

6 files changed

+148
-12
lines changed

6 files changed

+148
-12
lines changed

server/src/main/java/org/elasticsearch/index/mapper/blockloader/docvalues/BytesRefsFromBinaryBlockLoader.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@
1717

1818
import java.io.IOException;
1919

20+
/**
21+
* This block loader should be used for fields that are directly encoded as binary values but are always single valued, such as the
22+
* histogram fields. See also {@link BytesRefsFromCustomBinaryBlockLoader} for multivalued binary fields, and
23+
* {@link BytesRefsFromOrdsBlockLoader} for ordinals-based binary values
24+
*/
2025
public class BytesRefsFromBinaryBlockLoader extends BlockDocValuesReader.DocValuesBlockLoader {
2126

2227
private final String fieldName;

server/src/main/java/org/elasticsearch/index/mapper/blockloader/docvalues/BytesRefsFromCustomBinaryBlockLoader.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@
1818

1919
import java.io.IOException;
2020

21+
/**
22+
* This block loader should be used for "wildcard-style" binary values, which is to say fields we have encoded into a binary
23+
* format that supports multivalued via an encoding on our side. See also {@link BytesRefsFromOrdsBlockLoader} for ordinals
24+
* based multivalue aware binary fields, and {@link BytesRefsFromBinaryBlockLoader} for single-valued binary fields.
25+
*/
2126
public class BytesRefsFromCustomBinaryBlockLoader extends BlockDocValuesReader.DocValuesBlockLoader {
2227
private final String fieldName;
2328

server/src/main/java/org/elasticsearch/index/mapper/blockloader/docvalues/BytesRefsFromOrdsBlockLoader.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717
import java.io.IOException;
1818

1919
/**
20-
* Loads {@code keyword} style fields that are stored as a lookup table and ordinals.
20+
* Loads {@code keyword} style fields that are stored as a lookup table and ordinals. See also {@link BytesRefsFromCustomBinaryBlockLoader}
21+
* for {@code wildcard} style (i.e. non-ordinal encoded multivalued) and {@link BytesRefsFromBinaryBlockLoader} for {@code histogram}
22+
* style (i.e. non-ordinal single valued).
2123
*/
2224
public class BytesRefsFromOrdsBlockLoader extends AbstractBytesRefsFromOrdsBlockLoader {
2325
public BytesRefsFromOrdsBlockLoader(String fieldName) {

test/framework/src/main/java/org/elasticsearch/index/mapper/BlockLoaderTestCase.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,15 @@ public DataSourceResponse.ObjectMappingParametersGenerator handle(
229229
.build();
230230
}
231231

232+
/**
233+
* For a given mapping and input value, compute the value that will be in the block. Values are generated from the
234+
* {@link DocumentGenerator}, and the behavior can be controled by writing a custom {@link DataSourceHandler}.
235+
*
236+
* @param fieldMapping Generated parameters for this field mapping
237+
* @param value Generated input value to convert
238+
* @param testContext Context information for the current test run
239+
* @return The value that will be added to the block
240+
*/
232241
protected abstract Object expected(Map<String, Object> fieldMapping, Object value, TestContext testContext);
233242

234243
protected static Object maybeFoldList(List<?> list) {

x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.elasticsearch.index.fielddata.IndexHistogramFieldData;
3030
import org.elasticsearch.index.fielddata.LeafHistogramFieldData;
3131
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
32+
import org.elasticsearch.index.mapper.BlockLoader;
3233
import org.elasticsearch.index.mapper.CompositeSyntheticFieldLoader;
3334
import org.elasticsearch.index.mapper.DocumentParserContext;
3435
import org.elasticsearch.index.mapper.DocumentParsingException;
@@ -42,6 +43,7 @@
4243
import org.elasticsearch.index.mapper.SourceValueFetcher;
4344
import org.elasticsearch.index.mapper.TimeSeriesParams;
4445
import org.elasticsearch.index.mapper.ValueFetcher;
46+
import org.elasticsearch.index.mapper.blockloader.docvalues.BytesRefsFromBinaryBlockLoader;
4547
import org.elasticsearch.index.query.SearchExecutionContext;
4648
import org.elasticsearch.script.field.DocValuesScriptFieldFactory;
4749
import org.elasticsearch.search.DocValueFormat;
@@ -56,6 +58,7 @@
5658

5759
import java.io.IOException;
5860
import java.io.UncheckedIOException;
61+
import java.util.List;
5962
import java.util.Map;
6063

6164
import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken;
@@ -305,6 +308,11 @@ public Query termQuery(Object value, SearchExecutionContext context) {
305308
"[" + CONTENT_TYPE + "] field do not support searching, " + "use dedicated aggregations instead: [" + name() + "]"
306309
);
307310
}
311+
312+
@Override
313+
public BlockLoader blockLoader(BlockLoaderContext blContext) {
314+
return new BytesRefsFromBinaryBlockLoader(name());
315+
}
308316
}
309317

310318
@Override
@@ -351,17 +359,11 @@ public void parse(DocumentParserContext context) throws IOException {
351359
parsedHistogram = HistogramParser.parse(fullPath(), subParser);
352360
}
353361

354-
BytesStreamOutput streamOutput = new BytesStreamOutput();
355-
for (int i = 0; i < parsedHistogram.values().size(); i++) {
356-
long count = parsedHistogram.counts().get(i);
357-
assert count >= 0;
358-
// we do not add elements with count == 0
359-
if (count > 0) {
360-
streamOutput.writeVLong(count);
361-
streamOutput.writeLong(Double.doubleToRawLongBits(parsedHistogram.values().get(i)));
362-
}
363-
}
364-
BytesRef docValue = streamOutput.bytes().toBytesRef();
362+
List<Double> values = parsedHistogram.values();
363+
List<Long> counts = parsedHistogram.counts();
364+
365+
BytesRef docValue = encodeBytesRef(values, counts);
366+
365367
Field field = new BinaryDocValuesField(fullPath(), docValue);
366368
if (context.doc().getByKey(fieldType().name()) != null) {
367369
throw new IllegalArgumentException(
@@ -402,6 +404,22 @@ public void parse(DocumentParserContext context) throws IOException {
402404
context.path().remove();
403405
}
404406

407+
static BytesRef encodeBytesRef(List<Double> values, List<Long> counts) throws IOException {
408+
BytesStreamOutput streamOutput = new BytesStreamOutput();
409+
assert counts.size() == values.size();
410+
for (int i = 0; i < values.size(); i++) {
411+
long count = counts.get(i);
412+
assert count >= 0;
413+
// we do not add elements with count == 0
414+
if (count > 0) {
415+
streamOutput.writeVLong(count);
416+
streamOutput.writeLong(Double.doubleToRawLongBits(values.get(i)));
417+
}
418+
}
419+
BytesRef docValue = streamOutput.bytes().toBytesRef();
420+
return docValue;
421+
}
422+
405423
/** re-usable {@link HistogramValue} implementation */
406424
static class InternalHistogramValue extends HistogramValue {
407425
double value;
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.analytics.mapper;
9+
10+
import org.elasticsearch.core.Types;
11+
import org.elasticsearch.datageneration.datasource.DataSourceHandler;
12+
import org.elasticsearch.datageneration.datasource.DataSourceRequest;
13+
import org.elasticsearch.datageneration.datasource.DataSourceResponse;
14+
import org.elasticsearch.index.mapper.BlockLoaderTestCase;
15+
import org.elasticsearch.plugins.Plugin;
16+
import org.elasticsearch.test.ESTestCase;
17+
import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
18+
19+
import java.io.IOException;
20+
import java.util.Collection;
21+
import java.util.HashMap;
22+
import java.util.List;
23+
import java.util.Map;
24+
import java.util.Optional;
25+
26+
public class HistogramFieldBlockLoaderTests extends BlockLoaderTestCase {
27+
28+
public HistogramFieldBlockLoaderTests(Params params) {
29+
super(HistogramFieldMapper.CONTENT_TYPE, List.of(DATA_SOURCE_HANDLER), params);
30+
}
31+
32+
@Override
33+
protected Collection<? extends Plugin> getPlugins() {
34+
return List.of(new AnalyticsPlugin());
35+
}
36+
37+
private static DataSourceHandler DATA_SOURCE_HANDLER = new DataSourceHandler() {
38+
39+
@Override
40+
public DataSourceResponse.ObjectArrayGenerator handle(DataSourceRequest.ObjectArrayGenerator request) {
41+
// histogram does not support multiple values in a document so we can't have object arrays
42+
return new DataSourceResponse.ObjectArrayGenerator(Optional::empty);
43+
}
44+
45+
@Override
46+
public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceRequest.LeafMappingParametersGenerator request) {
47+
if (request.fieldType().equals(HistogramFieldMapper.CONTENT_TYPE) == false) {
48+
return null;
49+
}
50+
51+
return new DataSourceResponse.LeafMappingParametersGenerator(() -> {
52+
var map = new HashMap<String, Object>();
53+
if (ESTestCase.randomBoolean()) {
54+
map.put("ignore_malformed", ESTestCase.randomBoolean());
55+
}
56+
return map;
57+
});
58+
}
59+
60+
@Override
61+
public DataSourceResponse.FieldDataGenerator handle(DataSourceRequest.FieldDataGenerator request) {
62+
if (request.fieldType().equals(HistogramFieldMapper.CONTENT_TYPE) == false) {
63+
return null;
64+
}
65+
return new DataSourceResponse.FieldDataGenerator(mapping -> {
66+
List<Double> values = randomList(randomIntBetween(1, 1000), ESTestCase::randomDouble);
67+
values.sort(Double::compareTo);
68+
return Map.of(
69+
"values",
70+
values,
71+
"counts",
72+
// Note - we need the three parameter version of random list here to ensure it's always the same length as values
73+
randomList(values.size(), values.size(), ESTestCase::randomNonNegativeLong)
74+
);
75+
});
76+
}
77+
};
78+
79+
@Override
80+
public void testBlockLoaderOfMultiField() throws IOException {
81+
// Multi fields are not supported
82+
}
83+
84+
@Override
85+
protected Object expected(Map<String, Object> fieldMapping, Object value, TestContext testContext) {
86+
Map<String, Object> valueAsMap = Types.forciblyCast(value);
87+
List<Double> bucketValues = Types.forciblyCast(valueAsMap.get("values"));
88+
List<Long> counts = Types.forciblyCast(valueAsMap.get("counts"));
89+
90+
try {
91+
return HistogramFieldMapper.encodeBytesRef(bucketValues, counts);
92+
} catch (IOException e) {
93+
fail("failed to encode histogram field values");
94+
}
95+
throw new IllegalStateException("Unreachable");
96+
}
97+
}

0 commit comments

Comments
 (0)