Skip to content

Commit

Permalink
Fix PR 1896 with the fuzzy filter list so that it will honor the rege…
Browse files Browse the repository at this point in the history
…x filter

and properly ignore rows that don't match the explicit filter. Also sort the
fuzzy filter list in ascending order and implement a static comparator instead
of instantiating one on each call.
  • Loading branch information
manolama committed May 12, 2020
1 parent 81d2112 commit e8f4900
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 91 deletions.
204 changes: 117 additions & 87 deletions src/query/QueryUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,14 @@
import org.hbase.async.FuzzyRowFilter.FuzzyFilterPair;
import org.hbase.async.KeyRegexpFilter;
import org.hbase.async.Bytes.ByteMap;
import org.hbase.async.FilterList.Operator;
import org.hbase.async.FilterList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Strings;
import com.google.common.collect.Lists;

import org.hbase.async.Scanner;

/**
Expand Down Expand Up @@ -268,10 +274,12 @@ private static String getRowKeyUIDRegex(
* Note: The caller has to restrict the scan to proper start and stop
* for the filter to work correctly.
* @param row_key_literals A list of key value pairs to filter on.
* @param fuzzy_key The starting row key we'll adjust for proper filtering.
* @return A sorted, non-empty list of FuzzyFilterPair
*/
private static List<FuzzyFilterPair> buildFuzzyFilters(
final ByteMap<byte[][]> row_key_literals) {
final ByteMap<byte[][]> row_key_literals,
final byte[] fuzzy_key) {
final int prefix_width = Const.SALT_WIDTH() + TSDB.metrics_width() +
Const.TIMESTAMP_BYTES;
final short name_width = TSDB.tagk_width();
Expand All @@ -287,102 +295,105 @@ private static List<FuzzyFilterPair> buildFuzzyFilters(
}
}
final List<FuzzyFilterPair> fuzzy_filter_pairs =
new ArrayList<FuzzyFilterPair>();
new ArrayList<FuzzyFilterPair>(row_key_literals.size());

// Initialize first_fuzzy_key and first_fuzzy_mask
// these will serve as model for the fuzzy filter list
// generated for tags with multiple values (|)
byte[] first_fuzzy_key = new byte[row_key_size];
byte[] first_fuzzy_mask = new byte[row_key_size];
byte[] first_fuzzy_key = Arrays.copyOf(fuzzy_key, fuzzy_key.length);
byte[] first_fuzzy_mask = new byte[fuzzy_key.length];
int fuzzy_offset = 0;

// TODO - see if it's less expensive to skip the salt, timestamp and metric.
// skip salt & timestamp (filtering should be done by start/stop
// of the scanner)
while(fuzzy_offset < prefix_width) {
first_fuzzy_key[fuzzy_offset] = 0;
first_fuzzy_mask[fuzzy_offset++] =
(row_key_literals != null) ? (byte)1 : (byte)0;
}
if (row_key_literals != null) {
final Iterator<Entry<byte[], byte[][]>> it = row_key_literals.iterator();
while(it.hasNext()) {
Entry<byte[], byte[][]> entry = it.next();
final boolean not_key =
entry.getValue() != null && entry.getValue().length == 0;

// first pass to build the key and mask
Iterator<Entry<byte[], byte[][]>> it = row_key_literals.iterator();
while(it.hasNext()) {
Entry<byte[], byte[][]> entry = it.next();
final boolean not_key =
entry.getValue() != null && entry.getValue().length == 0;

if (!not_key) {
final byte[] tag_key = entry.getKey();
System.arraycopy(tag_key, 0,
first_fuzzy_key, fuzzy_offset, name_width);
for (int i=0; i<name_width; i++) {
first_fuzzy_mask[fuzzy_offset++] = 0;
}
if (!not_key) {
final byte[] tag_key = entry.getKey();
System.arraycopy(tag_key, 0,
first_fuzzy_key, fuzzy_offset, name_width);
for (int i=0; i<name_width; i++) {
first_fuzzy_mask[fuzzy_offset++] = 0;
}

final byte[] tag_value;
if (entry.getValue()!=null && entry.getValue().length > 0) {
tag_value = entry.getValue()[0];
} else {
tag_value = null;
final byte[] tag_value;
if (entry.getValue()!=null && entry.getValue().length > 0) {
tag_value = entry.getValue()[0];
} else {
tag_value = null;
}

if (tag_value!=null) {
System.arraycopy(tag_value, 0,
first_fuzzy_key, fuzzy_offset, value_width);
for (int i=0; i<value_width; i++) {
first_fuzzy_mask[fuzzy_offset++] = 0;
}
if (tag_value!=null) {
System.arraycopy(tag_value, 0,
first_fuzzy_key, fuzzy_offset, value_width);
for (int i=0; i<value_width; i++) {
first_fuzzy_mask[fuzzy_offset++] = 0;
}
} else {
// not filtered with fuzzy filter -> skip
for (int i=0; i<value_width; i++) {
first_fuzzy_key[fuzzy_offset] = 0;
first_fuzzy_mask[fuzzy_offset++] = 1;
}
} else {
// not filtered with fuzzy filter -> skip
for (int i=0; i<value_width; i++) {
first_fuzzy_key[fuzzy_offset] = 0;
first_fuzzy_mask[fuzzy_offset++] = 1;
}
}
}
}
fuzzy_filter_pairs.add(new FuzzyFilterPair(first_fuzzy_key, first_fuzzy_mask));

if (row_key_literals != null) {
// generate filters for all combinations of tag values
fuzzy_offset = prefix_width;
final Iterator<Entry<byte[], byte[][]>> it = row_key_literals.iterator();
while(it.hasNext()) {
final Entry<byte[], byte[][]> entry = it.next();
fuzzy_offset += name_width;
// generate filters for all combinations of tag values using the first key
// as the template.
fuzzy_offset = prefix_width;
it = row_key_literals.iterator();
while (it.hasNext()) {
final Entry<byte[], byte[][]> entry = it.next();
fuzzy_offset += name_width;

// if multiple values value, generate a new combination of filters
// for each value
if (entry.getValue()!=null && entry.getValue().length > 1) {
final List<FuzzyFilterPair> duplicate_fuzzy_filters =
new ArrayList<FuzzyFilterPair>(fuzzy_filter_pairs);
for (int i=1; i<entry.getValue().length; i++) {
final byte[] tag_value = entry.getValue()[i];
// if multiple values value, generate a new combination of filters
// for each value
if (entry.getValue()!=null && entry.getValue().length > 1) {
for (int i=1; i<entry.getValue().length; i++) {
final byte[] tag_value = entry.getValue()[i];
byte[] local_fuzzy_key =
Arrays.copyOf(first_fuzzy_key, row_key_size);
System.arraycopy(tag_value, 0,
local_fuzzy_key, fuzzy_offset, value_width);

for (FuzzyFilterPair pair: duplicate_fuzzy_filters) {
byte[] fuzzy_key =
Arrays.copyOf(pair.getRowKey(), row_key_size);
System.arraycopy(tag_value, 0,
fuzzy_key, fuzzy_offset, value_width);

fuzzy_filter_pairs.add(
new FuzzyFilterPair(fuzzy_key, first_fuzzy_mask));
}
}
fuzzy_filter_pairs.add(
new FuzzyFilterPair(local_fuzzy_key, first_fuzzy_mask));
}
fuzzy_offset += value_width;
}
fuzzy_offset += value_width;
}

// Sort filters list over rowkey
Collections.sort(fuzzy_filter_pairs, new Comparator<FuzzyFilterPair>() {
@Override
public int compare(FuzzyFilterPair pair1, FuzzyFilterPair pair2) {
return Bytes.memcmp(pair1.getRowKey(), pair2.getRowKey());
}
});

Collections.sort(fuzzy_filter_pairs, FUZZY_FILTER_CMP);
return fuzzy_filter_pairs;
}

/**
* Comparator that sorts the fuzzy filter list ascending based on the row
* key.
*/
private static class FuzzyFilterComparator implements Comparator<FuzzyFilterPair> {
@Override
public int compare(FuzzyFilterPair pair1, FuzzyFilterPair pair2) {
return Bytes.memcmp(pair2.getRowKey(), pair1.getRowKey());
}
}
private static FuzzyFilterComparator FUZZY_FILTER_CMP = new FuzzyFilterComparator();

/**
* Sets a filter or filter list on the scanner based on whether or not the
* query had tags it needed to match.
Expand Down Expand Up @@ -421,22 +432,26 @@ public static void setDataTableScanFilter(
final int prefix_width = Const.SALT_WIDTH() + TSDB.metrics_width() +
Const.TIMESTAMP_BYTES;

if (explicit_tags && enable_fuzzy_filter) {
final FuzzyRowFilter fuzzy_filter;
if (explicit_tags &&
enable_fuzzy_filter &&
row_key_literals != null &&
!row_key_literals.isEmpty()) {

final byte[] fuzzy_key = new byte[prefix_width + (row_key_literals.size() *
(TSDB.tagk_width() + TSDB.tagv_width()))];
System.arraycopy(scanner.getCurrentKey(), 0, fuzzy_key, 0,
scanner.getCurrentKey().length);

final List<FuzzyFilterPair> fuzzy_filter_pairs =
buildFuzzyFilters(row_key_literals);

buildFuzzyFilters(row_key_literals, fuzzy_key);
// The Fuzzy Filter list is sorted: the first and last filters row key
// can be used to build a start and stop keys for the scanner
final byte[] start_key = Arrays.copyOf(
fuzzy_filter_pairs.get(0).getRowKey(),
fuzzy_filter_pairs.get(0).getRowKey().length);
System.arraycopy(scanner.getCurrentKey(), 0, start_key, 0, prefix_width);

// can be used to build the stop key for the scanner
final byte[] stop_key = Arrays.copyOf(
fuzzy_filter_pairs.get(fuzzy_filter_pairs.size()-1).getRowKey(),
start_key.length);
System.arraycopy(scanner.getCurrentKey(), 0,
stop_key, 0, prefix_width);
fuzzy_filter_pairs.get(fuzzy_filter_pairs.size() - 1).getRowKey(),
fuzzy_key.length);
System.arraycopy(scanner.getCurrentKey(), 0, stop_key, 0, prefix_width);
Internal.setBaseTime(stop_key, end_time);
int idx = prefix_width + TSDB.tagk_width();
// max out the tag values
Expand All @@ -447,18 +462,33 @@ public static void setDataTableScanFilter(
idx += TSDB.tagk_width();
}

scanner.setStartKey(start_key);
scanner.setStartKey(fuzzy_key);
scanner.setStopKey(stop_key);
scanner.setFilter(new FuzzyRowFilter(fuzzy_filter_pairs));
} else {
final String regex = getRowKeyUIDRegex(row_key_literals, explicit_tags);
final KeyRegexpFilter regex_filter = new KeyRegexpFilter(
regex.toString(), Const.ASCII_CHARSET);
fuzzy_filter = new FuzzyRowFilter(fuzzy_filter_pairs);
} else {
fuzzy_filter = null;
}

final String regex = getRowKeyUIDRegex(row_key_literals, explicit_tags);
final KeyRegexpFilter regex_filter;
if (!Strings.isNullOrEmpty(regex)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Regex for scanner: " + scanner + ": " +
byteRegexToString(regex));
}

regex_filter = new KeyRegexpFilter(regex.toString(),
Const.ASCII_CHARSET);
} else {
regex_filter = null;
}

if (fuzzy_filter != null && !Strings.isNullOrEmpty(regex)) {
final FilterList filter = new FilterList(Lists.newArrayList(fuzzy_filter,
regex_filter),Operator.MUST_PASS_ALL);
scanner.setFilter(filter);
} else if (fuzzy_filter != null) {
scanner.setFilter(fuzzy_filter);
} else if (!Strings.isNullOrEmpty(regex)) {
scanner.setFilter(regex_filter);
}
}
Expand Down
19 changes: 16 additions & 3 deletions test/core/TestTsdbQueryQueries.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.hbase.async.Bytes;
import org.hbase.async.FilterList;
import org.hbase.async.FuzzyRowFilter;
import org.hbase.async.KeyRegexpFilter;
import org.hbase.async.Scanner;
import org.junit.Before;
import org.junit.Test;
Expand Down Expand Up @@ -1633,7 +1634,11 @@ public void filterExplicitTagsOK() throws Exception {
assertEquals(300, dps[0].aggregatedSize());
// assert fuzzy
for (final MockScanner scanner : storage.getScanners()) {
assertTrue(scanner.getFilter() instanceof FuzzyRowFilter);
assertTrue(scanner.getFilter() instanceof FilterList);
FilterList filter_list = (FilterList) scanner.getFilter();
assertEquals(2, filter_list.size());
assertTrue(filter_list.filters().get(0) instanceof FuzzyRowFilter);
assertTrue(filter_list.filters().get(1) instanceof KeyRegexpFilter);
}
}

Expand Down Expand Up @@ -1664,7 +1669,11 @@ public void filterExplicitTagsGroupByOK() throws Exception {
assertEquals(300, dps[0].aggregatedSize());
// assert fuzzy
for (final MockScanner scanner : storage.getScanners()) {
assertTrue(scanner.getFilter() instanceof FuzzyRowFilter);
assertTrue(scanner.getFilter() instanceof FilterList);
FilterList filter_list = (FilterList) scanner.getFilter();
assertEquals(2, filter_list.size());
assertTrue(filter_list.filters().get(0) instanceof FuzzyRowFilter);
assertTrue(filter_list.filters().get(1) instanceof KeyRegexpFilter);
}
}

Expand All @@ -1690,7 +1699,11 @@ public void filterExplicitTagsMissing() throws Exception {
assertEquals(0, dps.length);
// assert fuzzy
for (final MockScanner scanner : storage.getScanners()) {
assertTrue(scanner.getFilter() instanceof FuzzyRowFilter);
assertTrue(scanner.getFilter() instanceof FilterList);
FilterList filter_list = (FilterList) scanner.getFilter();
assertEquals(2, filter_list.size());
assertTrue(filter_list.filters().get(0) instanceof FuzzyRowFilter);
assertTrue(filter_list.filters().get(1) instanceof KeyRegexpFilter);
}
}

Expand Down
2 changes: 1 addition & 1 deletion test/query/TestQueryUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ public void setDataTableScanFilterEnableBoth() throws Exception {
true,
true,
0);
verify(scanner, times(2)).getCurrentKey();
verify(scanner, times(3)).getCurrentKey();
// TODO - validate the regex and fuzzy filter
verify(scanner, times(1)).setFilter(any(FilterList.class));
verify(scanner, times(1)).setStartKey(any(byte[].class));
Expand Down

0 comments on commit e8f4900

Please sign in to comment.