Skip to content

Commit 6bd6b96

Browse files
committed
add manual broadcast loops, remove tabs, fix benchmarks
1 parent 5cc89c1 commit 6bd6b96

File tree

3 files changed

+225
-155
lines changed

3 files changed

+225
-155
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ string(TOUPPER "${CMAKE_BUILD_TYPE}" U_CMAKE_BUILD_TYPE)
2222
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Intel")
2323
CHECK_CXX_COMPILER_FLAG("-std=c++14" HAS_CPP14_FLAG)
2424
if (HAS_CPP14_FLAG)
25-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Ofast -ffast-math -march=native -std=c++14 -pthread")
25+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Ofast -ffast-math -march=native -std=c++14 -pthread -Wno-narrowing")
2626
else()
2727
message(FATAL_ERROR "Unsupported compiler -- xtensor requires C++14 support!")
2828
endif()

src/benchmark_broadcasting.hpp

Lines changed: 94 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "xtensor/xrandom.hpp"
1414
#include "xtensor/xtensor.hpp"
1515
#include "xtensor/xarray.hpp"
16+
#include "xtensor/xfixed.hpp"
1617

1718
#ifdef HAS_PYTHONIC
1819
#include <pythonic/core.hpp>
@@ -27,37 +28,106 @@
2728

2829
namespace xt
2930
{
30-
void xtensor_broadcasting(benchmark::State& state)
31-
{
32-
using namespace xt;
33-
using allocator = xsimd::aligned_allocator<double, 32>;
34-
using tensor3 = xtensor_container<xt::uvector<double, allocator>, 3, layout_type::row_major>;
35-
using tensor2 = xtensor_container<xt::uvector<double, allocator>, 2, layout_type::row_major>;
31+
void xtensor_broadcasting(benchmark::State& state)
32+
{
33+
using namespace xt;
34+
using allocator = xsimd::aligned_allocator<double, 32>;
35+
using tensor3 = xtensor_container<xt::uvector<double, allocator>, 3, layout_type::row_major>;
36+
using tensor2 = xtensor_container<xt::uvector<double, allocator>, 2, layout_type::row_major>;
3637

37-
tensor3 a = random::rand<double>({state.range(0), state.range(0), state.range(0)});
38-
tensor2 b = random::rand<double>({state.range(0), state.range(0)});
38+
tensor3 a = random::rand<double>({state.range(0), state.range(0), state.range(0)});
39+
tensor2 b = random::rand<double>({state.range(0), state.range(0)});
3940

4041
for (auto _ : state)
41-
{
42-
tensor3 res(a + b);
43-
benchmark::DoNotOptimize(res.raw_data());
44-
}
45-
}
46-
BENCHMARK(xtensor_broadcasting)->RangeMultiplier(MULTIPLIER)->Range(RANGE);
42+
{
43+
tensor3 res(a + b);
44+
benchmark::DoNotOptimize(res.raw_data());
45+
}
46+
}
47+
BENCHMARK(xtensor_broadcasting)->RangeMultiplier(MULTIPLIER)->Range(RANGE);
48+
49+
void xarray_broadcasting(benchmark::State& state)
50+
{
51+
using namespace xt;
52+
using allocator = xsimd::aligned_allocator<double, 32>;
53+
using tensor3 = xarray_container<xt::uvector<double, allocator>, layout_type::row_major>;
54+
using tensor2 = xarray_container<xt::uvector<double, allocator>, layout_type::row_major>;
55+
56+
tensor3 a = random::rand<double>({state.range(0), state.range(0), state.range(0)});
57+
tensor2 b = random::rand<double>({state.range(0), state.range(0)});
58+
59+
for (auto _ : state)
60+
{
61+
tensor3 res(a + b);
62+
benchmark::DoNotOptimize(res.raw_data());
63+
}
64+
}
65+
BENCHMARK(xarray_broadcasting)->RangeMultiplier(MULTIPLIER)->Range(RANGE);
66+
67+
template <std::size_t N>
68+
void manual_broadcast_xtensorf(benchmark::State& state)
69+
{
70+
auto a = xt::xtensorf<double, xt::xshape<N, N, N>>();
71+
auto b = xt::xtensorf<double, xt::xshape<N, N>>();
72+
for (auto _ : state)
73+
{
74+
auto c = xt::xtensorf<double, xt::xshape<N, N, N>>();
75+
for (std::size_t i = 0; i < a.shape()[0]; ++i)
76+
for (std::size_t j = 0; j < a.shape()[1]; ++j)
77+
for (std::size_t k = 0; k < a.shape()[2]; ++k)
78+
c(i, j, k) = a(i, j, k) + b(i, j, k);
79+
benchmark::DoNotOptimize(c.raw_data());
80+
}
81+
}
82+
BENCHMARK_TEMPLATE(manual_broadcast_xtensorf, 3);
83+
BENCHMARK_TEMPLATE(manual_broadcast_xtensorf, 8);
84+
BENCHMARK_TEMPLATE(manual_broadcast_xtensorf, 64);
85+
86+
void manual_broadcast_xtensor(benchmark::State& state)
87+
{
88+
auto a = xt::xtensor<double, 3>::from_shape({state.range(0), state.range(0), state.range(0)});
89+
auto b = xt::xtensor<double, 2>::from_shape({state.range(0), state.range(0)});
90+
for (auto _ : state)
91+
{
92+
xt::xtensor<double, 3> c = xt::xtensor<double, 3>::from_shape({state.range(0), state.range(0), state.range(0)});
93+
for (std::size_t i = 0; i < a.shape()[0]; ++i)
94+
for (std::size_t j = 0; j < a.shape()[1]; ++j)
95+
for (std::size_t k = 0; k < a.shape()[2]; ++k)
96+
c(i, j, k) = a(i, j, k) + b(i, j, k);
97+
benchmark::DoNotOptimize(c.raw_data());
98+
}
99+
}
100+
BENCHMARK(manual_broadcast_xtensor)->RangeMultiplier(MULTIPLIER)->Range(RANGE);
101+
102+
void manual_broadcast_xarray(benchmark::State& state)
103+
{
104+
auto a = xt::xarray<double>::from_shape({state.range(0), state.range(0), state.range(0)});
105+
auto b = xt::xarray<double>::from_shape({state.range(0), state.range(0)});
106+
for (auto _ : state)
107+
{
108+
xt::xarray<double> c = xt::xarray<double>::from_shape({state.range(0), state.range(0), state.range(0)});
109+
for (std::size_t i = 0; i < a.shape()[0]; ++i)
110+
for (std::size_t j = 0; j < a.shape()[1]; ++j)
111+
for (std::size_t k = 0; k < a.shape()[2]; ++k)
112+
c(i, j, k) = a(i, j, k) + b(i, j, k);
113+
benchmark::DoNotOptimize(c.raw_data());
114+
}
115+
}
116+
BENCHMARK(manual_broadcast_xarray)->RangeMultiplier(MULTIPLIER)->Range(RANGE);
47117

48118
#ifdef HAS_PYTHONIC
49-
void pythonic_broadcasting(benchmark::State& state)
50-
{
51-
auto x = pythonic::numpy::random::rand(state.range(0), state.range(0), state.range(0));
52-
auto y = pythonic::numpy::random::rand(state.range(0), state.range(0));
119+
void pythonic_broadcasting(benchmark::State& state)
120+
{
121+
auto x = pythonic::numpy::random::rand(state.range(0), state.range(0), state.range(0));
122+
auto y = pythonic::numpy::random::rand(state.range(0), state.range(0));
53123

54124
for (auto _ : state)
55-
{
56-
pythonic::types::ndarray<double, 3> z = x + y;
57-
benchmark::DoNotOptimize(z.fbegin());
58-
}
59-
}
60-
BENCHMARK(pythonic_broadcasting)->RangeMultiplier(MULTIPLIER)->Range(RANGE);
125+
{
126+
pythonic::types::ndarray<double, 3> z = x + y;
127+
benchmark::DoNotOptimize(z.fbegin());
128+
}
129+
}
130+
BENCHMARK(pythonic_broadcasting)->RangeMultiplier(MULTIPLIER)->Range(RANGE);
61131
#endif
62132

63133
}

0 commit comments

Comments
 (0)