-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathword_count.cpp
59 lines (52 loc) · 1.88 KB
/
word_count.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
/*******************************************************************************
* word_count.cpp
*
* Example for Project Thrill - http://project-thrill.org
*
* Copyright (C) 2016 Timo Bingmann <[email protected]>
*
* All rights reserved. Published under the BSD-2 license in the LICENSE file.
******************************************************************************/
#include <thrill/api/read_lines.hpp>
#include <thrill/api/reduce_by_key.hpp>
#include <thrill/api/write_lines.hpp>
#include <thrill/common/string_view.hpp>
#include <iostream>
#include <string>
#include <utility>
using thrill::common::StringView;
using thrill::common::SplitView;
void WordCount(thrill::Context& ctx,
std::string input, std::string output) {
using Pair = std::pair<std::string, size_t>;
auto word_pairs =
ReadLines(ctx, input)
.template FlatMap<Pair>(
// flatmap lambda: split and emit each word
[](const std::string& line, auto emit) {
SplitView(line, ' ', [&](StringView sv) {
emit(Pair(sv.ToString(), 1));
});
});
word_pairs.ReduceByKey(
// key extractor: the word string
[](const Pair& p) { return p.first; },
// commutative reduction: add counters
[](const Pair& a, const Pair& b) {
return Pair(a.first, a.second + b.second);
})
.Map([](const Pair& p) {
return p.first + ": "
+ std::to_string(p.second);
})
.WriteLines(output);
}
int main(int argc, char* argv[]) {
if (argc != 3) {
std::cout << "Usage: " << argv[0] << " <input> <output>" << std::endl;
return -1;
}
return thrill::Run(
[&](thrill::Context& ctx) { WordCount(ctx, argv[1], argv[2]); });
}
/******************************************************************************/