Skip to content

Commit

Permalink
Faster dedup, not using zoneRepresentation but wire format, which all…
Browse files Browse the repository at this point in the history
…ows for

an unordered_set as well.
  • Loading branch information
omoerbeek committed Oct 25, 2024
1 parent 99e30dd commit 8089aeb
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 4 deletions.
20 changes: 20 additions & 0 deletions pdns/dnsparser.hh
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,26 @@ public:
return record;
}

[[nodiscard]] string wireFormatContent(const DNSName& qname, bool canonic = false, bool lowerCase = false) const
{
vector<uint8_t> packet;
DNSPacketWriter packetWriter(packet, g_rootdnsname, QType::A);

if (canonic) {
packetWriter.setCanonic(true);
}
if (lowerCase) {
packetWriter.setLowercase(true);
}

packetWriter.startRecord(qname, getType());
toPacket(packetWriter);

string record;
packetWriter.getContentWireFormat(record); // needs to be called before commit()
return record;
}

virtual bool operator==(const DNSRecordContent& rhs) const
{
return typeid(*this)==typeid(rhs) && this->getZoneRepresentation() == rhs.getZoneRepresentation();
Expand Down
6 changes: 6 additions & 0 deletions pdns/dnswriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,12 @@ template <typename Container> void GenericDNSPacketWriter<Container>::getRecordP
records.assign(d_content.begin() + d_sor, d_content.end());
}

// call __before commit__
template <typename Container> void GenericDNSPacketWriter<Container>::getContentWireFormat(string& records)
{
records.assign(d_content.begin() + d_rollbackmarker, d_content.end());
}

template <typename Container> uint32_t GenericDNSPacketWriter<Container>::size() const
{
return d_content.size();
Expand Down
1 change: 1 addition & 0 deletions pdns/dnswriter.hh
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ public:

dnsheader* getHeader();
void getRecordPayload(string& records); // call __before commit__
void getContentWireFormat(string& records); // call __before commit__

void setCanonic(bool val)
{
Expand Down
3 changes: 2 additions & 1 deletion pdns/recursordist/test-shuffle_cc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,9 @@ BOOST_AUTO_TEST_CASE(test_simple)
BOOST_CHECK_EQUAL(dups, 1U);
BOOST_CHECK_EQUAL(list.size(), 2U);
addRecordToList(list, DNSName("Foo"), QType::A, "1.2.3.4");
addRecordToList(list, DNSName("FoO"), QType::A, "1.2.3.4", DNSResourceRecord::ADDITIONAL, 999);
dups = pdns::dedup(list);
BOOST_CHECK_EQUAL(dups, 1U);
BOOST_CHECK_EQUAL(dups, 2U);
BOOST_CHECK_EQUAL(list.size(), 2U);
BOOST_CHECK_EQUAL(address, &list);
}
Expand Down
7 changes: 4 additions & 3 deletions pdns/shuffle.cc
Original file line number Diff line number Diff line change
Expand Up @@ -143,23 +143,24 @@ void pdns::orderAndShuffle(vector<DNSRecord>& rrs, bool includingAdditionals)

unsigned int pdns::dedup(vector<DNSRecord>& rrs)
{
// This functino tries to avoid unneccesary work
// This function tries to avoid unneccesary work
// First a vector with zero or one element does not need dedupping
if (rrs.size() <= 1) {
return 0;
}

// If we have a larger vector, first check if we actually have duplicates.
// We assume the most common case is: no
std::set<std::tuple<DNSName, QType, std::string>> seen;
std::unordered_set<std::string> seen;
std::vector<bool> dups(rrs.size(), false);

unsigned int counter = 0;
unsigned int numDups = 0;

for (const auto& rec : rrs) {
const auto key = rec.getContent()->wireFormatContent(rec.d_name, true, true);
// This ignores class, ttl and place by using constants for those
if (!seen.emplace(rec.d_name.makeLowerCase(), rec.d_type, rec.getContent()->serialize(rec.d_name, true, false)).second) {
if (!seen.emplace(key).second) {
dups[counter] = true;
numDups++;
}
Expand Down

0 comments on commit 8089aeb

Please sign in to comment.