feature.h

// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// This library defines the concepts "fuzzing feature" and "feature domain".
// It is used by Centipede, and it can be used by fuzz runners to
// define their features in a way most friendly to Centipede.
// Fuzz runners do not have to use this file nor to obey the rules defined here.
// But using this file and following its rules is the simplest way if you want
// Centipede to understand the details about the features generated by the
// runner.
//
// This library must not depend on anything other than libc so that fuzz targets
// using it doesn't gain redundant coverage. For the same reason this library
// uses raw __builtin_trap instead of CHECKs.
// We make an exception for <algorithm> for std::sort/std::unique,
// since <algorithm> is very lightweight.
// This library is also header-only, with all functions defined as inline.

#ifndef THIRD_PARTY_CENTIPEDE_FEATURE_H_
#define THIRD_PARTY_CENTIPEDE_FEATURE_H_

#include <stddef.h>
#include <string.h>

// WARNING!!!: Be very careful with what STL headers or other dependencies you
// add here. This header needs to remain mostly bare-bones so that we can
// include it into runner.
// <vector> is an exception, because it's too clumsy w/o it, and it introduces
// minimal code footprint.
#include <cstdint>
#include <limits>
#include <memory>
#include <vector>

#include "./concurrent_bitset.h"
#include "./foreach_nonzero.h"

namespace centipede {

// Feature is an integer that identifies some unique behaviour
// of the fuzz target exercised by a given input.
// We say, this input has this feature with regard to this fuzz target.
// One example of a feature: a certain control flow edge being executed.
using feature_t = uint64_t;

// A vector of features. It is not expected to be ordered.
// It typically does not contain repetitions, but it's ok to have them.
using FeatureVec = std::vector<feature_t>;

// Computes a hash of `bits`. The purpose is to use the result for XOR-ing with
// some other values, such that all resulting bits look random.
inline uint64_t Hash64Bits(uint64_t bits) {
  // This particular prime number seems to mix bits well.
  // TODO(kcc): find a more scientific way to mix bits, e.g. switch to Murmur.
  constexpr uint64_t kPrime = 13441014529ULL;
  return bits * kPrime;
}

// Returns `bits` rotated left by `n`.
inline uint64_t RotateLeft(uint64_t bits, uint64_t n) {
  return (bits << n) | (bits >> (64 - n));
}

namespace feature_domains {

// Feature domain is a subset of 64-bit integers dedicated to a certain
// kind of fuzzing features.
// All domains are of the same size (kDomainSize), This way, we can compute
// a domain for a given feature by dividing by kDomainSize.
class Domain {
 public:
  // kDomainSize is the largest power of two such that all domains fit.
  static constexpr size_t kMaxNumDomainsLog = 6;
  static constexpr size_t kMaxNumDomains = 1 << kMaxNumDomainsLog;
  static constexpr size_t kDomainSize = 1ULL << (64 - kMaxNumDomainsLog);
  static constexpr size_t kLastDomainId = kMaxNumDomains - 1;

  constexpr Domain(size_t domain_id) : domain_id_(domain_id) {}

  constexpr feature_t begin() const { return kDomainSize * domain_id_; }
  constexpr feature_t end() const { return begin() + kDomainSize; }
  bool Contains(feature_t feature) const {
    return feature >= begin() && feature < end();
  }
  constexpr size_t domain_id() const { return domain_id_; }

  // Converts any `number` into a feature in this domain.
  feature_t ConvertToMe(size_t number) const {
    return begin() + number % kDomainSize;
  }

  // Returns the DomainId of the domain that the feature belongs to.
  static size_t FeatureToDomainId(feature_t feature) {
    return feature / kDomainSize;
  }

 private:
  const size_t domain_id_;
};

// NEXT_DOMAIN_ID() returns a constexpr value for the unique domain ID.
// TODO(kcc): Ideally, it would return consecutive values, but it's unclear how
// to do this with the constexpr magic. Suggestions are welcome, but not
// critical. For now we just use __LINE__ - kFirstDomainLine as the unique ID.
constexpr size_t kFirstDomainLine = __LINE__;
#define NEXT_DOMAIN_ID() (__LINE__ - kFirstDomainLine)
// Catch-all domain for unknown features.
constexpr Domain kUnknown = {NEXT_DOMAIN_ID()};
// Represents PCs, i.e. control flow edges.
// Use ConvertPCFeatureToPcIndex() to convert back to a PC index.
constexpr Domain kPCs = {NEXT_DOMAIN_ID()};
// Features derived from edge counters. See Convert8bitCounterToNumber().
constexpr Domain k8bitCounters = {NEXT_DOMAIN_ID()};
// Features derived from data flow edges.
// A typical data flow edge is a pair of PCs: {store-PC, load-PC}.
// Another variant of a data flow edge is a pair of {global-address, load-PC}.
constexpr Domain kDataFlow = {NEXT_DOMAIN_ID()};
// Features derived from instrumenting CMP instructions.
constexpr Domain kCMP = {NEXT_DOMAIN_ID()};
// Features derived from computing (bounded) control flow paths.
constexpr Domain kBoundedPath = {NEXT_DOMAIN_ID()};
// Features derived from (unordered) pairs of PCs.
constexpr Domain kPCPair = {NEXT_DOMAIN_ID()};
static_assert(NEXT_DOMAIN_ID() < Domain::kLastDomainId);
constexpr Domain kLastDomainId = {Domain::kLastDomainId};  // must be last.

// Special feature used to indicate an absence of features. Typically used where
// a feature array must not be empty, but doesn't have any other features.
constexpr feature_t kNoFeature = kUnknown.begin();

}  // namespace feature_domains

// Converts an 8-bit coverage counter, i.e. a pair of {`pc_index`,
// `counter_value` must not be zero.
//
// We convert the 8-bit counter value to a number from 0 to 7
// by computing its binary log, i.e. 1=>0, 2=>1, 4=>2, 8=>3, ..., 128=>7.
// This is a heuristic, similar to that of AFL or libFuzzer
// that tries to encourage inputs with different number of repetitions
// of the same PC.
inline size_t Convert8bitCounterToNumber(size_t pc_index,
                                         uint8_t counter_value) {
  if (counter_value == 0) __builtin_trap();  // Wrong input.
  // Compute a log2 of counter_value, i.e. a value between 0 and 7.
  // __builtin_clz consumes a 32-bit integer.
  uint32_t counter_log2 =
      sizeof(uint32_t) * 8 - 1 - __builtin_clz(counter_value);
  return pc_index * 8 + counter_log2;
}

// Given the `feature` from the PC domain, returns the feature's
// pc_index. I.e. reverse of kPC.ConvertToMe(), assuming all PCs originally
// converted to features were less than Domain::kDomainSize.
inline size_t ConvertPCFeatureToPcIndex(feature_t feature) {
  auto domain = feature_domains::kPCs;
  if (!domain.Contains(feature)) __builtin_trap();
  return feature - domain.begin();
}

// Encodes {`pc1`, `pc2`} into a number.
// `pc1` and `pc2` are in range [0, `max_pc`)
inline size_t ConvertPcPairToNumber(uintptr_t pc1, uintptr_t pc2,
                                    uintptr_t max_pc) {
  return pc1 * max_pc + pc2;
}

// Encodes {context, a, b} into a number.
// `a` and `b` are arguments of an instruction "a CMP b".
// `context` identifies the CMP call site.
// Usually, `context` is a random-looking number derived from a hash function.
//
// This function has several mutually conflicting requirements:
//  * it must be very fast, as it is executed on every CMP instruction.
//  * it must allow to distinguish {a,b} pairs in some non-trivial way.
//  * it must not produce too many different values
//    (where "too many" is hard to define)
inline size_t ConvertContextAndArgPairToNumber(uintptr_t a, uintptr_t b,
                                               uintptr_t context) {
  // Below is a giant unscientific heuristic.
  // Expect quite a bit of tuning effort here.
  //
  // The idea is to treat different {context,a,b} tuples as different features,
  // so that a sufficiently new argument pair for a given context is recognized
  // as interesting.
  // Obviously, we can't generate 2^128 different features per context
  // and so we need to bucketize them.
  //
  // The following relationships between a and b seem worthy of
  // differentiation via different feature values:
  //  * a==b
  //  * [diff]      a==b+K or a==b-K (for some small K, e.g. 1 to 31).
  //  * [hamming]   Different values of hamming distance.
  //  * [msb_eq]    The number of most significant bits being equal.
  //  * [diff_log2] Different values of Log2(a-b)
  // We compute a superposition of these properties.
  //
  // Similar ideas:
  // * https://lafintel.wordpress.com/
  // * https://llvm.org/docs/LibFuzzer.html#value-profile

  // ab is the component of the feature based on {a,b}.
  uintptr_t ab = 0;  // Value for the case of a == b;
  if (a != b) {
    uintptr_t diff = a - b;
    // diff_component is in [0,64)
    uintptr_t diff_component = diff < 32 ? diff : -diff < 32 ? 32 + -diff : 0;
    // hamming_component is in [0, 64)
    uintptr_t hamming_component = __builtin_popcountll(a ^ b) - 1;
    // diff_log2_component is in [0, 64)
    uintptr_t diff_log2_component = __builtin_clzll(diff);
    // msb_eq_component is in [0, 64)
    uintptr_t msb_eq_component = __builtin_clzll(a ^ b);
    ab = (diff_component << 0) | (hamming_component << 6) |
         (diff_log2_component << 12) | (msb_eq_component << 18);
    // This gives us whooping 2^24 different features for just one context.
    // In theory this is pretty bad: it will bloat the corpus beyond reasonable.
    // Whether it's bad in practice remains to be seen.
    // We may want to reduce the number of different features with e.g. this:
    // ab %= 7919;  // mod large prime
  }
  // Combine {a,b} and context.
  return ab ^ context;
}

// Fixed-size ring buffer that maintains a hash of its elements.
// Create objects of this type as zero-initialized globals or thread-locals.
// In a zero-initialized object all values and the hash are zero.
// `kSize` indicates the maximum possible size for the ring-buffer.
// The actual size is controlled by the `ring_buffer_size` argument of push().
template <size_t kSize>
class HashedRingBuffer {
 public:
  // Adds `new_item` and returns the new hash of the entire collection.
  // Evicts an old item.
  // `ring_buffer_size` must be <= kSize and must be the same for all push()
  // calls for a given object.
  // We don't enforce these constraints here to avoid overhead.
  // The hash function used:
  // https://en.wikipedia.org/wiki/Rolling_hash#Cyclic_polynomial
  size_t push(size_t new_item, size_t ring_buffer_size) {
    size_t new_pos = last_added_pos_ + 1;
    if (new_pos >= ring_buffer_size) new_pos = 0;
    size_t evicted_item = buffer_[new_pos];
    new_item = Hash64Bits(new_item);
    buffer_[new_pos] = new_item;
    hash_ = RotateLeft(hash_, 1) ^ RotateLeft(evicted_item, ring_buffer_size) ^
            new_item;
    last_added_pos_ = new_pos;
    return hash_;
  }

  // returns the current hash.
  size_t hash() const { return hash_; }

  // Zero-initialize the object.
  void clear() { memset(this, 0, sizeof(*this)); }

 private:
  size_t buffer_[kSize];   // All elements.
  size_t last_added_pos_;  // Position of the last added element.
  size_t hash_;            // XOR of all elements in buffer_.
};

// A simple fixed-size byte array.
// Each element is an 8-bit counter that can be incremented concurrently.
// The counters are allowed to overflow (i.e. are not saturating).
// Thread-compatible.
// The main use case is to increment an execution counter of a given PC.
template <size_t kSize>
class CounterArray {
 public:
  // Constructs an empty counter array.
  CounterArray() = default;

  // Clears all counters.
  void Clear() { memset(data_, 0, sizeof(data_)); }

  // Increments the counter that corresponds to idx.
  // Idx is taken modulo kSize.
  void Increment(size_t idx) {
    // An atomic increment is quite expensive, even if relaxed.
    // We do a non-atomic increment instead, with the full understanding
    // that if the increment of the same `idx` happens concurrently the result
    // will be unreliable. This is a trade-off between correctness and speed.
    // This behavior is similar to that of Clang Coverage and inline counters:
    // https://clang.llvm.org/docs/SourceBasedCodeCoverage.html
    // https://clang.llvm.org/docs/SanitizerCoverage.html#inline-8bit-counters
    // There is also a risk of contention if the same hot code is executed
    // concurrently. Both atomic and non-atomic increment will be bad.
    uint8_t *ptr = &data_[idx % kSize];
    uint8_t value{};
    __atomic_load(ptr, &value, __ATOMIC_RELAXED);
    ++value;
    __atomic_store(ptr, &value, __ATOMIC_RELAXED);
  }

  // Accessors.
  const uint8_t *data() const { return &data_[0]; }
  size_t size() const { return kSize; }

 private:
  uint8_t data_[kSize] = {};
};

// A simple fixed-capacity array with push_back.
// Thread-compatible.
template <size_t kSize>
class FeatureArray {
 public:
  // Constructs an empty feature array.
  FeatureArray() = default;

  // pushes `feature` back if there is enough space.
  void push_back(feature_t feature) {
    if (num_features_ < kSize) {
      features_[num_features_++] = feature;
    }
  }

  // Makes the array empty.
  void clear() { num_features_ = 0; }

  // Returns the array's raw data.
  feature_t *data() { return &features_[0]; }

  // Returns the number of elements in the array.
  size_t size() const { return num_features_; }

 private:
  // NOTE: No initializer needed: object state is captured by `num_features_`.
  feature_t features_[kSize];
  size_t num_features_ = 0;
};

}  // namespace centipede

#endif  // THIRD_PARTY_CENTIPEDE_FEATURE_H_