diff --git a/src/query/QueryUtil.java b/src/query/QueryUtil.java index 4361b3a4d7..5324e0e822 100644 --- a/src/query/QueryUtil.java +++ b/src/query/QueryUtil.java @@ -31,8 +31,14 @@ import org.hbase.async.FuzzyRowFilter.FuzzyFilterPair; import org.hbase.async.KeyRegexpFilter; import org.hbase.async.Bytes.ByteMap; +import org.hbase.async.FilterList.Operator; +import org.hbase.async.FilterList; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import com.google.common.base.Strings; +import com.google.common.collect.Lists; + import org.hbase.async.Scanner; /** @@ -268,10 +274,12 @@ private static String getRowKeyUIDRegex( * Note: The caller has to restrict the scan to proper start and stop * for the filter to work correctly. * @param row_key_literals A list of key value pairs to filter on. + * @param fuzzy_key The starting row key we'll adjust for proper filtering. * @return A sorted, non-empty list of FuzzyFilterPair */ private static List buildFuzzyFilters( - final ByteMap row_key_literals) { + final ByteMap row_key_literals, + final byte[] fuzzy_key) { final int prefix_width = Const.SALT_WIDTH() + TSDB.metrics_width() + Const.TIMESTAMP_BYTES; final short name_width = TSDB.tagk_width(); @@ -287,14 +295,16 @@ private static List buildFuzzyFilters( } } final List fuzzy_filter_pairs = - new ArrayList(); + new ArrayList(row_key_literals.size()); // Initialize first_fuzzy_key and first_fuzzy_mask // these will serve as model for the fuzzy filter list // generated for tags with multiple values (|) - byte[] first_fuzzy_key = new byte[row_key_size]; - byte[] first_fuzzy_mask = new byte[row_key_size]; + byte[] first_fuzzy_key = Arrays.copyOf(fuzzy_key, fuzzy_key.length); + byte[] first_fuzzy_mask = new byte[fuzzy_key.length]; int fuzzy_offset = 0; + + // TODO - see if it's less expensive to skip the salt, timestamp and metric. // skip salt & timestamp (filtering should be done by start/stop // of the scanner) while(fuzzy_offset < prefix_width) { @@ -302,87 +312,88 @@ private static List buildFuzzyFilters( first_fuzzy_mask[fuzzy_offset++] = (row_key_literals != null) ? (byte)1 : (byte)0; } - if (row_key_literals != null) { - final Iterator> it = row_key_literals.iterator(); - while(it.hasNext()) { - Entry entry = it.next(); - final boolean not_key = - entry.getValue() != null && entry.getValue().length == 0; + + // first pass to build the key and mask + Iterator> it = row_key_literals.iterator(); + while(it.hasNext()) { + Entry entry = it.next(); + final boolean not_key = + entry.getValue() != null && entry.getValue().length == 0; - if (!not_key) { - final byte[] tag_key = entry.getKey(); - System.arraycopy(tag_key, 0, - first_fuzzy_key, fuzzy_offset, name_width); - for (int i=0; i 0) { - tag_value = entry.getValue()[0]; - } else { - tag_value = null; + final byte[] tag_value; + if (entry.getValue()!=null && entry.getValue().length > 0) { + tag_value = entry.getValue()[0]; + } else { + tag_value = null; + } + + if (tag_value!=null) { + System.arraycopy(tag_value, 0, + first_fuzzy_key, fuzzy_offset, value_width); + for (int i=0; i skip - for (int i=0; i skip + for (int i=0; i> it = row_key_literals.iterator(); - while(it.hasNext()) { - final Entry entry = it.next(); - fuzzy_offset += name_width; + // generate filters for all combinations of tag values using the first key + // as the template. + fuzzy_offset = prefix_width; + it = row_key_literals.iterator(); + while (it.hasNext()) { + final Entry entry = it.next(); + fuzzy_offset += name_width; - // if multiple values value, generate a new combination of filters - // for each value - if (entry.getValue()!=null && entry.getValue().length > 1) { - final List duplicate_fuzzy_filters = - new ArrayList(fuzzy_filter_pairs); - for (int i=1; i 1) { + for (int i=1; i() { - @Override - public int compare(FuzzyFilterPair pair1, FuzzyFilterPair pair2) { - return Bytes.memcmp(pair1.getRowKey(), pair2.getRowKey()); - } - }); - + Collections.sort(fuzzy_filter_pairs, FUZZY_FILTER_CMP); return fuzzy_filter_pairs; } + /** + * Comparator that sorts the fuzzy filter list ascending based on the row + * key. + */ + private static class FuzzyFilterComparator implements Comparator { + @Override + public int compare(FuzzyFilterPair pair1, FuzzyFilterPair pair2) { + return Bytes.memcmp(pair2.getRowKey(), pair1.getRowKey()); + } + } + private static FuzzyFilterComparator FUZZY_FILTER_CMP = new FuzzyFilterComparator(); + /** * Sets a filter or filter list on the scanner based on whether or not the * query had tags it needed to match. @@ -421,22 +432,26 @@ public static void setDataTableScanFilter( final int prefix_width = Const.SALT_WIDTH() + TSDB.metrics_width() + Const.TIMESTAMP_BYTES; - if (explicit_tags && enable_fuzzy_filter) { + final FuzzyRowFilter fuzzy_filter; + if (explicit_tags && + enable_fuzzy_filter && + row_key_literals != null && + !row_key_literals.isEmpty()) { + + final byte[] fuzzy_key = new byte[prefix_width + (row_key_literals.size() * + (TSDB.tagk_width() + TSDB.tagv_width()))]; + System.arraycopy(scanner.getCurrentKey(), 0, fuzzy_key, 0, + scanner.getCurrentKey().length); + final List fuzzy_filter_pairs = - buildFuzzyFilters(row_key_literals); - + buildFuzzyFilters(row_key_literals, fuzzy_key); + // The Fuzzy Filter list is sorted: the first and last filters row key - // can be used to build a start and stop keys for the scanner - final byte[] start_key = Arrays.copyOf( - fuzzy_filter_pairs.get(0).getRowKey(), - fuzzy_filter_pairs.get(0).getRowKey().length); - System.arraycopy(scanner.getCurrentKey(), 0, start_key, 0, prefix_width); - + // can be used to build the stop key for the scanner final byte[] stop_key = Arrays.copyOf( - fuzzy_filter_pairs.get(fuzzy_filter_pairs.size()-1).getRowKey(), - start_key.length); - System.arraycopy(scanner.getCurrentKey(), 0, - stop_key, 0, prefix_width); + fuzzy_filter_pairs.get(fuzzy_filter_pairs.size() - 1).getRowKey(), + fuzzy_key.length); + System.arraycopy(scanner.getCurrentKey(), 0, stop_key, 0, prefix_width); Internal.setBaseTime(stop_key, end_time); int idx = prefix_width + TSDB.tagk_width(); // max out the tag values @@ -447,18 +462,33 @@ public static void setDataTableScanFilter( idx += TSDB.tagk_width(); } - scanner.setStartKey(start_key); + scanner.setStartKey(fuzzy_key); scanner.setStopKey(stop_key); - scanner.setFilter(new FuzzyRowFilter(fuzzy_filter_pairs)); - } else { - final String regex = getRowKeyUIDRegex(row_key_literals, explicit_tags); - final KeyRegexpFilter regex_filter = new KeyRegexpFilter( - regex.toString(), Const.ASCII_CHARSET); + fuzzy_filter = new FuzzyRowFilter(fuzzy_filter_pairs); + } else { + fuzzy_filter = null; + } + + final String regex = getRowKeyUIDRegex(row_key_literals, explicit_tags); + final KeyRegexpFilter regex_filter; + if (!Strings.isNullOrEmpty(regex)) { if (LOG.isDebugEnabled()) { LOG.debug("Regex for scanner: " + scanner + ": " + byteRegexToString(regex)); } - + regex_filter = new KeyRegexpFilter(regex.toString(), + Const.ASCII_CHARSET); + } else { + regex_filter = null; + } + + if (fuzzy_filter != null && !Strings.isNullOrEmpty(regex)) { + final FilterList filter = new FilterList(Lists.newArrayList(fuzzy_filter, + regex_filter),Operator.MUST_PASS_ALL); + scanner.setFilter(filter); + } else if (fuzzy_filter != null) { + scanner.setFilter(fuzzy_filter); + } else if (!Strings.isNullOrEmpty(regex)) { scanner.setFilter(regex_filter); } } diff --git a/test/core/TestTsdbQueryQueries.java b/test/core/TestTsdbQueryQueries.java index 06e503a821..e07be2e852 100644 --- a/test/core/TestTsdbQueryQueries.java +++ b/test/core/TestTsdbQueryQueries.java @@ -34,6 +34,7 @@ import org.hbase.async.Bytes; import org.hbase.async.FilterList; import org.hbase.async.FuzzyRowFilter; +import org.hbase.async.KeyRegexpFilter; import org.hbase.async.Scanner; import org.junit.Before; import org.junit.Test; @@ -1633,7 +1634,11 @@ public void filterExplicitTagsOK() throws Exception { assertEquals(300, dps[0].aggregatedSize()); // assert fuzzy for (final MockScanner scanner : storage.getScanners()) { - assertTrue(scanner.getFilter() instanceof FuzzyRowFilter); + assertTrue(scanner.getFilter() instanceof FilterList); + FilterList filter_list = (FilterList) scanner.getFilter(); + assertEquals(2, filter_list.size()); + assertTrue(filter_list.filters().get(0) instanceof FuzzyRowFilter); + assertTrue(filter_list.filters().get(1) instanceof KeyRegexpFilter); } } @@ -1664,7 +1669,11 @@ public void filterExplicitTagsGroupByOK() throws Exception { assertEquals(300, dps[0].aggregatedSize()); // assert fuzzy for (final MockScanner scanner : storage.getScanners()) { - assertTrue(scanner.getFilter() instanceof FuzzyRowFilter); + assertTrue(scanner.getFilter() instanceof FilterList); + FilterList filter_list = (FilterList) scanner.getFilter(); + assertEquals(2, filter_list.size()); + assertTrue(filter_list.filters().get(0) instanceof FuzzyRowFilter); + assertTrue(filter_list.filters().get(1) instanceof KeyRegexpFilter); } } @@ -1690,7 +1699,11 @@ public void filterExplicitTagsMissing() throws Exception { assertEquals(0, dps.length); // assert fuzzy for (final MockScanner scanner : storage.getScanners()) { - assertTrue(scanner.getFilter() instanceof FuzzyRowFilter); + assertTrue(scanner.getFilter() instanceof FilterList); + FilterList filter_list = (FilterList) scanner.getFilter(); + assertEquals(2, filter_list.size()); + assertTrue(filter_list.filters().get(0) instanceof FuzzyRowFilter); + assertTrue(filter_list.filters().get(1) instanceof KeyRegexpFilter); } } diff --git a/test/query/TestQueryUtil.java b/test/query/TestQueryUtil.java index 31e938b55b..ab04c38568 100644 --- a/test/query/TestQueryUtil.java +++ b/test/query/TestQueryUtil.java @@ -139,7 +139,7 @@ public void setDataTableScanFilterEnableBoth() throws Exception { true, true, 0); - verify(scanner, times(2)).getCurrentKey(); + verify(scanner, times(3)).getCurrentKey(); // TODO - validate the regex and fuzzy filter verify(scanner, times(1)).setFilter(any(FilterList.class)); verify(scanner, times(1)).setStartKey(any(byte[].class));