Skip to content

Commit ef6b5b2

Browse files
committed
more work on variadic 'simdFor'
1 parent 1970aad commit ef6b5b2

File tree

9 files changed

+176
-49
lines changed

9 files changed

+176
-49
lines changed

gen/nt2-functors.R

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
path <- "inst/include/RcppNT2/functor.h"
1+
path <- "inst/include/RcppNT2/functor/functor.h"
22
if (!dir.exists(dirname(path)))
33
dir.create(dirname(path), recursive = TRUE, showWarnings = FALSE)
44

@@ -15,8 +15,8 @@ indent <- function(code, indent = " ") {
1515
}
1616

1717
template <- paste(c(
18-
"#ifndef RCPP_NT2_FUNCTORS_H",
19-
"#define RCPP_NT2_FUNCTORS_H",
18+
"#ifndef RCPP_NT2_FUNCTOR_FUNCTOR_H",
19+
"#define RCPP_NT2_FUNCTOR_FUNCTOR_H",
2020
"",
2121
"// Auto-generated functors for functions provided by NT2.",
2222
"// See 'gen/nt2-functors.R' for more details.",
@@ -29,7 +29,7 @@ template <- paste(c(
2929
"} // namespace functor",
3030
"} // namespace RcppNT2",
3131
"",
32-
"#endif /* RCPP_NT2_FUNCTORS_H */"
32+
"#endif /* RCPP_NT2_FUNCTOR_FUNCTOR_H */"
3333
), collapse = "\n")
3434

3535
code <- NULL
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// Shows how 'simdFor()' can be used with an arbitrary
2+
// number of input, output iterators.
3+
4+
// [[Rcpp::depends(RcppNT2)]]
5+
#include <RcppNT2.h>
6+
using namespace RcppNT2;
7+
8+
#include <Rcpp.h>
9+
using namespace Rcpp;
10+
11+
struct F
12+
{
13+
template <typename T>
14+
void operator()(const T& a, const T& b, const T& c)
15+
{
16+
result_ += nt2::sum(a * b * c);
17+
}
18+
19+
operator double() const {
20+
return result_;
21+
}
22+
23+
double result_ = 0.0;
24+
};
25+
26+
// [[Rcpp::export]]
27+
double simdForTest(NumericVector a, NumericVector b, NumericVector c)
28+
{
29+
return variadic::simdFor(F(), a, b, c);
30+
}
31+
32+
/*** R
33+
n <- 1024 * 1000
34+
x <- rnorm(n)
35+
y <- rnorm(n)
36+
z <- rnorm(n)
37+
38+
stopifnot(all.equal(
39+
sum(x * y * z),
40+
simdForTest(x, y, z)
41+
))
42+
43+
library(microbenchmark)
44+
microbenchmark(
45+
R = sum(x * y * z),
46+
simd = simdForTest(x, y, z)
47+
)
48+
*/

inst/include/RcppNT2.h

+4-3
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,9 @@
3939
#include <nt2/swar/swar.hpp>
4040
#include <nt2/trigonometric/trigonometric.hpp>
4141

42-
#include <RcppNT2/algorithm.h>
43-
#include <RcppNT2/functor.h>
44-
#include <RcppNT2/variadic.h>
42+
#include <RcppNT2/core/core.h>
43+
#include <RcppNT2/algorithm/algorithm.h>
44+
#include <RcppNT2/functor/functor.h>
45+
#include <RcppNT2/variadic/variadic.h>
4546

4647
#endif /* RCPP_NT2_H */

inst/include/RcppNT2/algorithm.h renamed to inst/include/RcppNT2/algorithm/algorithm.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
#ifndef RCPP_NT2_ALGORITHM_H
2-
#define RCPP_NT2_ALGORITHM_H
1+
#ifndef RCPP_NT2_ALGORITHM_ALGORITHM_H
2+
#define RCPP_NT2_ALGORITHM_ALGORITHM_H
33

44
#include <boost/simd/sdk/simd/algorithm.hpp>
55

@@ -125,4 +125,4 @@ U simdMapReduce(const T* it, const T* end, U init, MapReducer&& mapper)
125125

126126
} // namespace RcppParallel
127127

128-
#endif /* RCPP_NT2_ALGORITHM_H */
128+
#endif /* RCPP_NT2_ALGORITHM_ALGORITHM_H */

inst/include/RcppNT2/core/core.h

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#ifndef RCPP_NT2_CORE_CORE_H
2+
#define RCPP_NT2_CORE_CORE_H
3+
4+
namespace RcppNT2 {
5+
6+
template <typename T>
7+
auto begin(T&& t) -> decltype(&std::forward<T>(t)[0])
8+
{
9+
return &std::forward<T>(t)[0];
10+
}
11+
12+
template <typename T>
13+
auto end(T&& t) -> decltype(&std::forward<T>(t)[0])
14+
{
15+
return &std::forward<T>(t)[0] + std::forward<T>(t).size();
16+
}
17+
18+
} // namespace RcppNT2
19+
20+
#endif /* RCPP_NT2_CORE_CORE_H */

inst/include/RcppNT2/functor.h renamed to inst/include/RcppNT2/functor/functor.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
#ifndef RCPP_NT2_FUNCTORS_H
2-
#define RCPP_NT2_FUNCTORS_H
1+
#ifndef RCPP_NT2_FUNCTOR_FUNCTOR_H
2+
#define RCPP_NT2_FUNCTOR_FUNCTOR_H
33

44
// Auto-generated functors for functions provided by NT2.
55
// See 'gen/nt2-functors.R' for more details.
@@ -59,4 +59,4 @@ inline auto operator()(T&& t, U&& u) -> decltype(std::forward<T>(t) / std::forwa
5959
} // namespace functor
6060
} // namespace RcppNT2
6161

62-
#endif /* RCPP_NT2_FUNCTORS_H */
62+
#endif /* RCPP_NT2_FUNCTOR_FUNCTOR_H */

inst/include/RcppNT2/variadic.h

-36
This file was deleted.

inst/include/RcppNT2/variadic/tools.h

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#ifndef RCPP_NT2_VARIADIC_TOOLS_H
2+
#define RCPP_NT2_VARIADIC_TOOLS_H
3+
4+
namespace RcppNT2 {
5+
namespace variadic {
6+
7+
template <typename F, typename... Args>
8+
inline auto apply(F&& f, Args&&... args) -> decltype(std::forward<F>(f)(std::forward<Args>(args)...))
9+
{
10+
return std::forward<F>(f)(std::forward<Args>(args)...);
11+
}
12+
13+
template <typename F, typename T>
14+
inline auto min(F&& f, T&& t) -> decltype(std::forward<T>(t))
15+
{
16+
return std::forward<T>(t);
17+
}
18+
19+
template <typename F, typename T, typename... Ts>
20+
inline auto min(F&& f, T&& t, Ts&&... ts) -> decltype(std::forward<T>(t))
21+
{
22+
auto&& lhs = min(std::forward<F>(f), std::forward<T>(t));
23+
auto&& rhs = min(std::forward<F>(f), std::forward<Ts>(ts)...);
24+
return lhs < rhs ? lhs : rhs;
25+
}
26+
27+
template <typename F, typename T>
28+
inline auto max(F&& f, T&& t) -> decltype(std::forward<T>(t))
29+
{
30+
return std::forward<T>(t);
31+
}
32+
33+
template <typename F, typename T, typename... Ts>
34+
inline auto max(F&& f, T&& t, Ts&&... ts) -> decltype(std::forward<T>(t))
35+
{
36+
auto&& lhs = max(std::forward<F>(f), std::forward<T>(t));
37+
auto&& rhs = max(std::forward<F>(f), std::forward<Ts>(ts)...);
38+
return lhs < rhs ? rhs : lhs;
39+
}
40+
41+
template <int Size, typename... Ts>
42+
void increment(Ts&&... ts) {}
43+
44+
template <int Size, typename T, typename... Ts>
45+
void increment(T&& t, Ts&&... ts)
46+
{
47+
std::forward<T>(t) += Size;
48+
increment<Size>(std::forward<Ts>(ts)...);
49+
}
50+
51+
} // namespace variadic
52+
} // namespace RcppNT2
53+
54+
#endif /* RCPP_NT2_VARIADIC_TOOLS_H */
+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#ifndef RCPP_NT2_VARIADIC_VARIADIC_H
2+
#define RCPP_NT2_VARIADIC_VARIADIC_H
3+
4+
#include <RcppNT2/variadic/tools.h>
5+
6+
namespace RcppNT2 {
7+
namespace variadic {
8+
9+
template <typename F, typename T, typename... Ts>
10+
F simdFor(F&& f, const T* it, const T* end, const Ts*... ts)
11+
{
12+
typedef boost::simd::pack<T> vT;
13+
static const std::size_t N = vT::static_size;
14+
15+
const T* aligned_begin = std::min(boost::simd::align_on(it, N * sizeof(T)), end);
16+
const T* aligned_end = aligned_begin + (end - aligned_begin) / N * N;
17+
18+
for (; it != aligned_begin; increment<1>(it, ts...))
19+
std::forward<F>(f)(*it, *ts...);
20+
21+
for (; it != aligned_end; increment<N>(it, ts...))
22+
std::forward<F>(f)(boost::simd::load<vT>(it), boost::simd::load<vT>(ts)...);
23+
24+
for (; it != end; increment<1>(it, ts...))
25+
std::forward<F>(f)(*it, *ts...);
26+
27+
return f;
28+
29+
}
30+
31+
template <typename F, typename T, typename... Ts>
32+
F simdFor(F&& f, const T& t, const Ts&... ts)
33+
{
34+
return simdFor(std::forward<F>(f), t.begin(), t.end(), begin(ts)...);
35+
}
36+
37+
} // namespace variadic
38+
} // namespace RcppNT2
39+
40+
#endif /* RCPP_NT2_VARIADIC_VARIADIC_H */

0 commit comments

Comments
 (0)