-
Notifications
You must be signed in to change notification settings - Fork 812
Description
Description
As defined per the C++11 standard, as referenced here for std::multimap and std::multiset,
the order of the elements/keys-value pairs that compare equivalent is the order of insertion and does not change. There is a bug in how the deserialization of these standard containers is implemented, which results in order of equivalent keys being flipped after each subsequent deserialization.
How to reproduce
Here is a minimal code to reproduce the bug:
#include <iostream>
#include <string>
#include <iterator>
#include <sstream>
#include "cereal/types/map.hpp"
#include "cereal/types/set.hpp"
#include "cereal/archives/binary.hpp"
struct MockValue
{
char key, value;
MockValue() = default;
explicit MockValue(char key, char value)
: key(key), value(value) {}
bool operator<(const MockValue &other) const
{
return this->key < other.key;
}
bool operator==(const MockValue &other) const
{
return this->key == other.key && this->value == other.value;
}
friend std::ostream &operator<<(std::ostream &os, const MockValue &val)
{
return os << "[Key=" << val.key << ", Value=" << val.value << "]";
}
template <class Archive>
void serialize(Archive &archive)
{
archive(key, value);
}
};
// Print generic std::pair
template <template <typename...> class Pair, typename... Args,
typename = std::pair<typename Pair<Args...>::first_type,
typename Pair<Args...>::second_type>>
std::ostream &operator<<(std::ostream &os, const Pair<Args...> &pair)
{
return os << "Pair{" << pair.first << ", " << pair.second << "}";
}
// Print standard containers
template <
template <typename...> class Container,
typename... Args>
void printContainer(Container<Args...> &container)
{
using VType = typename Container<Args...>::value_type;
auto begin = container.begin();
auto end = container.end();
std::copy(begin, end, std::ostream_iterator<VType>(std::cout, "\n"));
}
void testMultimap() {
std::cout << std::string(40, '=') << "std::multimap" << std::string(40, '=') << '\n';
std::stringstream buffer;
{
std::multimap<char, MockValue> mmap;
mmap.emplace('a', MockValue('a', 'a'));
mmap.emplace('a', MockValue('a', 'b'));
mmap.emplace('a', MockValue('a', 'c'));
mmap.emplace('a', MockValue('a', 'd'));
mmap.emplace('b', MockValue('b', 'a'));
mmap.emplace('b', MockValue('b', 'b'));
mmap.emplace('b', MockValue('b', 'c'));
mmap.emplace('b', MockValue('b', 'd'));
std::cout << "Before:\n";
printContainer(mmap);
std::cout << std::string(50,'-') << '\n';
cereal::BinaryOutputArchive ar(buffer);
ar(mmap);
}
{
std::multimap<char, MockValue> mmap;
cereal::BinaryInputArchive ar(buffer);
ar(mmap);
std::cout << "After: \n";
printContainer(mmap);
}
}
void testMultiset() {
std::cout << std::string(40, '=') << "std::multiset" << std::string(40, '=') << '\n';
std::stringstream buffer;
{
std::multiset<MockValue> mset;
mset.emplace(MockValue('a', 'a'));
mset.emplace(MockValue('a', 'b'));
mset.emplace(MockValue('a', 'c'));
mset.emplace(MockValue('a', 'd'));
mset.emplace(MockValue('b', 'a'));
mset.emplace(MockValue('b', 'b'));
mset.emplace(MockValue('b', 'c'));
mset.emplace(MockValue('b', 'd'));
std::cout << "Before:\n";
printContainer(mset);
std::cout << std::string(50,'-') << '\n';
cereal::BinaryOutputArchive ar(buffer);
ar(mset);
}
{
std::multiset<MockValue> mset;
cereal::BinaryInputArchive ar(buffer);
ar(mset);
std::cout << "After: \n";
printContainer(mset);
}
}
int main()
{
testMultimap();
testMultiset();
}
And this is the output with the current release:
Pair{a, [Key=a, Value=b]}
Pair{a, [Key=a, Value=c]}
Pair{a, [Key=a, Value=d]}
Pair{b, [Key=b, Value=a]}
Pair{b, [Key=b, Value=b]}
Pair{b, [Key=b, Value=c]}
Pair{b, [Key=b, Value=d]}
--------------------------------------------------
After:
Pair{a, [Key=a, Value=d]}
Pair{a, [Key=a, Value=c]}
Pair{a, [Key=a, Value=b]}
Pair{a, [Key=a, Value=a]}
Pair{b, [Key=b, Value=d]}
Pair{b, [Key=b, Value=c]}
Pair{b, [Key=b, Value=b]}
Pair{b, [Key=b, Value=a]}
========================================std::multiset========================================
Before:
[Key=a, Value=a]
[Key=a, Value=b]
[Key=a, Value=c]
[Key=a, Value=d]
[Key=b, Value=a]
[Key=b, Value=b]
[Key=b, Value=c]
[Key=b, Value=d]
--------------------------------------------------
After:
[Key=a, Value=d]
[Key=a, Value=c]
[Key=a, Value=b]
[Key=a, Value=a]
[Key=b, Value=d]
[Key=b, Value=c]
[Key=b, Value=b]
[Key=b, Value=a]
This is the expected output ( note identical before and after outputs):
========================================std::multimap========================================
Before:
Pair{a, [Key=a, Value=a]}
Pair{a, [Key=a, Value=b]}
Pair{a, [Key=a, Value=c]}
Pair{a, [Key=a, Value=d]}
Pair{b, [Key=b, Value=a]}
Pair{b, [Key=b, Value=b]}
Pair{b, [Key=b, Value=c]}
Pair{b, [Key=b, Value=d]}
--------------------------------------------------
After:
Pair{a, [Key=a, Value=a]}
Pair{a, [Key=a, Value=b]}
Pair{a, [Key=a, Value=c]}
Pair{a, [Key=a, Value=d]}
Pair{b, [Key=b, Value=a]}
Pair{b, [Key=b, Value=b]}
Pair{b, [Key=b, Value=c]}
Pair{b, [Key=b, Value=d]}
========================================std::multiset========================================
Before:
[Key=a, Value=a]
[Key=a, Value=b]
[Key=a, Value=c]
[Key=a, Value=d]
[Key=b, Value=a]
[Key=b, Value=b]
[Key=b, Value=c]
[Key=b, Value=d]
--------------------------------------------------
After:
[Key=a, Value=a]
[Key=a, Value=b]
[Key=a, Value=c]
[Key=a, Value=d]
[Key=b, Value=a]
[Key=b, Value=b]
[Key=b, Value=c]
[Key=b, Value=d]
Diagnosis
I've been able to diagnose the issue, namely here for std::multimap, and here for std::multiset. In both cases, emplace_hint()
is used. This function inserts the new value before the value pointed to by the hint. This means that the order of inserted keys is reversed after the container is deserialized. I presume this function was used because of performance considerations, but it does not result in the expected behavior.
Note that this must be considered a bug because it violates the expected type invariants of std::multimap and std::multiset.