Skip to content

Original order of keys lost after deserialization for std::multimap and std::multiset #865

@lordofdestiny

Description

@lordofdestiny

Description

As defined per the C++11 standard, as referenced here for std::multimap and std::multiset,
the order of the elements/keys-value pairs that compare equivalent is the order of insertion and does not change. There is a bug in how the deserialization of these standard containers is implemented, which results in order of equivalent keys being flipped after each subsequent deserialization.

How to reproduce

Here is a minimal code to reproduce the bug:

#include <iostream>
#include <string>
#include <iterator>
#include <sstream>

#include "cereal/types/map.hpp"
#include "cereal/types/set.hpp"
#include "cereal/archives/binary.hpp"

struct MockValue
{
    char key, value;

    MockValue() = default;

    explicit MockValue(char key, char value)
        : key(key), value(value) {}

    bool operator<(const MockValue &other) const
    {
        return this->key < other.key;
    }
    bool operator==(const MockValue &other) const
    {
        return this->key == other.key && this->value == other.value;
    }

    friend std::ostream &operator<<(std::ostream &os, const MockValue &val)
    {
        return os << "[Key=" << val.key << ", Value=" << val.value << "]";
    }

    template <class Archive>
    void serialize(Archive &archive)
    {
        archive(key, value);
    }
};

// Print generic std::pair
template <template <typename...> class Pair, typename... Args,
          typename = std::pair<typename Pair<Args...>::first_type,
                               typename Pair<Args...>::second_type>>
std::ostream &operator<<(std::ostream &os, const Pair<Args...> &pair)
{
    return os << "Pair{" << pair.first << ", " << pair.second << "}";
}

// Print standard containers
template <
    template <typename...> class Container,
    typename... Args>
void printContainer(Container<Args...> &container)
{
    using VType = typename Container<Args...>::value_type;
    auto begin = container.begin();
    auto end = container.end();
    std::copy(begin, end, std::ostream_iterator<VType>(std::cout, "\n"));
}

void testMultimap() {
    std::cout << std::string(40, '=') << "std::multimap" << std::string(40, '=') << '\n';
    std::stringstream buffer;

    {
        std::multimap<char, MockValue> mmap;
        mmap.emplace('a', MockValue('a', 'a'));
        mmap.emplace('a', MockValue('a', 'b'));
        mmap.emplace('a', MockValue('a', 'c'));
        mmap.emplace('a', MockValue('a', 'd'));
        mmap.emplace('b', MockValue('b', 'a'));
        mmap.emplace('b', MockValue('b', 'b'));
        mmap.emplace('b', MockValue('b', 'c'));
        mmap.emplace('b', MockValue('b', 'd'));
    
        std::cout << "Before:\n";
        printContainer(mmap);
        std::cout << std::string(50,'-') << '\n';
        
        cereal::BinaryOutputArchive ar(buffer);
        ar(mmap);
    }
    
    {
        std::multimap<char, MockValue> mmap;
        cereal::BinaryInputArchive ar(buffer);
        ar(mmap);
        
        std::cout << "After: \n";
        printContainer(mmap);
    }

}

void testMultiset() {
    std::cout << std::string(40, '=') << "std::multiset" << std::string(40, '=') << '\n';
    std::stringstream buffer;

    {
        std::multiset<MockValue> mset;
        mset.emplace(MockValue('a', 'a'));
        mset.emplace(MockValue('a', 'b'));
        mset.emplace(MockValue('a', 'c'));
        mset.emplace(MockValue('a', 'd'));
        mset.emplace(MockValue('b', 'a'));
        mset.emplace(MockValue('b', 'b'));
        mset.emplace(MockValue('b', 'c'));
        mset.emplace(MockValue('b', 'd'));
    
        std::cout << "Before:\n";
        printContainer(mset);
        std::cout << std::string(50,'-') << '\n';
        
        cereal::BinaryOutputArchive ar(buffer);
        ar(mset);
    }
    
    {
        std::multiset<MockValue> mset;
        cereal::BinaryInputArchive ar(buffer);
        ar(mset);
        
        std::cout << "After: \n";
        printContainer(mset);
    }

}

int main()
{
    testMultimap();
    testMultiset();
}

And this is the output with the current release:

Pair{a, [Key=a, Value=b]}
Pair{a, [Key=a, Value=c]}
Pair{a, [Key=a, Value=d]}
Pair{b, [Key=b, Value=a]}
Pair{b, [Key=b, Value=b]}
Pair{b, [Key=b, Value=c]}
Pair{b, [Key=b, Value=d]}
--------------------------------------------------
After: 
Pair{a, [Key=a, Value=d]}
Pair{a, [Key=a, Value=c]}
Pair{a, [Key=a, Value=b]}
Pair{a, [Key=a, Value=a]}
Pair{b, [Key=b, Value=d]}
Pair{b, [Key=b, Value=c]}
Pair{b, [Key=b, Value=b]}
Pair{b, [Key=b, Value=a]}
========================================std::multiset========================================
Before:
[Key=a, Value=a]
[Key=a, Value=b]
[Key=a, Value=c]
[Key=a, Value=d]
[Key=b, Value=a]
[Key=b, Value=b]
[Key=b, Value=c]
[Key=b, Value=d]
--------------------------------------------------
After: 
[Key=a, Value=d]
[Key=a, Value=c]
[Key=a, Value=b]
[Key=a, Value=a]
[Key=b, Value=d]
[Key=b, Value=c]
[Key=b, Value=b]
[Key=b, Value=a]

This is the expected output ( note identical before and after outputs):

========================================std::multimap========================================
Before:
Pair{a, [Key=a, Value=a]}
Pair{a, [Key=a, Value=b]}
Pair{a, [Key=a, Value=c]}
Pair{a, [Key=a, Value=d]}
Pair{b, [Key=b, Value=a]}
Pair{b, [Key=b, Value=b]}
Pair{b, [Key=b, Value=c]}
Pair{b, [Key=b, Value=d]}
--------------------------------------------------
After: 
Pair{a, [Key=a, Value=a]}
Pair{a, [Key=a, Value=b]}
Pair{a, [Key=a, Value=c]}
Pair{a, [Key=a, Value=d]}
Pair{b, [Key=b, Value=a]}
Pair{b, [Key=b, Value=b]}
Pair{b, [Key=b, Value=c]}
Pair{b, [Key=b, Value=d]}
========================================std::multiset========================================
Before:
[Key=a, Value=a]
[Key=a, Value=b]
[Key=a, Value=c]
[Key=a, Value=d]
[Key=b, Value=a]
[Key=b, Value=b]
[Key=b, Value=c]
[Key=b, Value=d]
--------------------------------------------------
After: 
[Key=a, Value=a]
[Key=a, Value=b]
[Key=a, Value=c]
[Key=a, Value=d]
[Key=b, Value=a]
[Key=b, Value=b]
[Key=b, Value=c]
[Key=b, Value=d]

Diagnosis

I've been able to diagnose the issue, namely here for std::multimap, and here for std::multiset. In both cases, emplace_hint() is used. This function inserts the new value before the value pointed to by the hint. This means that the order of inserted keys is reversed after the container is deserialized. I presume this function was used because of performance considerations, but it does not result in the expected behavior.

Note that this must be considered a bug because it violates the expected type invariants of std::multimap and std::multiset.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions