Skip to content

Commit 9057609

Browse files
committed
Added variant of combine_factors that reports unused combinations.
1 parent 59d4aa9 commit 9057609

File tree

2 files changed

+177
-0
lines changed

2 files changed

+177
-0
lines changed

include/scran_aggregate/combine_factors.hpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,86 @@ std::vector<std::vector<Factor_> > combine_factors(size_t n, const std::vector<c
9393
return output;
9494
}
9595

96+
/**
97+
* This function is a variation of `combine_factors()` that considers unobserved combinations of factor levels.
98+
*
99+
* @tparam Factor_ Factor type.
100+
* Any type may be used here as long as it is comparable.
101+
* @tparam Number_ Integer type for the number of levels in each factor.
102+
* @tparam Combined_ Integer type for the combined factor.
103+
* This should be large enough to hold the number of unique (possibly unused) combinations.
104+
*
105+
* @param n Number of observations (i.e., cells).
106+
* @param[in] factors Vector of pairs, each of which corresponds to a factor.
107+
* The first element of the pair is a pointer to an array of length `n`, containing the factor level for each observation.
108+
* The second element is the total number of levels for this factor, which may be greater than the largeset observed level.
109+
* @param[out] combined Pointer to an array of length `n` in which the combined factor is to be stored.
110+
* On output, each entry determines the corresponding observation's combination of levels by indexing into the inner vectors of the returned object;
111+
* see the argument of the same name in `combine_factors()` for more details.
112+
*
113+
* @return
114+
* Vector of vectors containing each unique combinations of factor levels.
115+
* This has the same structure as the output of `combine_factors()`,
116+
* with the only difference being that unobserved combinations are also reported.
117+
*/
118+
template<typename Factor_, typename Number_, typename Combined_>
119+
std::vector<std::vector<Factor_> > combine_factors_unused(size_t n, const std::vector<std::pair<const Factor_*, Number_> >& factors, Combined_* combined) {
120+
size_t nfac = factors.size();
121+
std::vector<std::vector<Factor_> > output(nfac);
122+
123+
if (nfac > 1) {
124+
// We iterate from back to front, where the first factor is the slowest changing.
125+
std::copy_n(factors[nfac - 1].first, n, combined);
126+
Combined_ mult = factors[nfac - 1].second;
127+
for (size_t f = nfac - 1; f > 0; --f) {
128+
const auto& finfo = factors[f - 1];
129+
auto ff = finfo.first;
130+
for (size_t i = 0; i < n; ++i) {
131+
combined[i] += mult * ff[i];
132+
}
133+
mult *= finfo.second;
134+
}
135+
136+
auto ncombos = mult;
137+
Combined_ outer_repeats = mult;
138+
Combined_ inner_repeats = 1;
139+
for (size_t f = nfac; f > 0; --f) {
140+
auto& out = output[f - 1];
141+
out.reserve(ncombos);
142+
143+
const auto& finfo = factors[f - 1];
144+
size_t initial_size = inner_repeats * finfo.second;
145+
out.resize(initial_size);
146+
147+
if (inner_repeats == 1) {
148+
std::iota(out.begin(), out.end(), static_cast<Combined_>(0));
149+
} else {
150+
auto oIt = out.begin();
151+
for (Number_ l = 0; l < finfo.second; ++l) {
152+
std::fill_n(oIt, inner_repeats, l);
153+
oIt += inner_repeats;
154+
}
155+
}
156+
inner_repeats = initial_size;
157+
158+
outer_repeats /= finfo.second;
159+
for (Combined_ r = 1; r < outer_repeats; ++r) {
160+
out.insert(out.end(), out.begin(), out.begin() + initial_size);
161+
}
162+
}
163+
164+
} else if (nfac == 1) {
165+
output[0].resize(factors[0].second);
166+
std::iota(output[0].begin(), output[0].end(), static_cast<Combined_>(0));
167+
std::copy_n(factors[0].first, n, combined);
168+
169+
} else {
170+
std::fill_n(combined, n, 0);
171+
}
172+
173+
return output;
174+
}
175+
96176
}
97177

98178
#endif

tests/src/combine_factors.cpp

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,13 @@ TEST(CombineFactors, Simple) {
4545
std::vector<int> levels { 1, 3, 5, 7, 9 };
4646
EXPECT_EQ(combined.first[0], levels);
4747
}
48+
49+
// Nothing at all.
50+
{
51+
auto combined = test_combine_factors(10, std::vector<const int*>{});
52+
EXPECT_EQ(combined.second, std::vector<int>(10));
53+
EXPECT_TRUE(combined.first.empty());
54+
}
4855
}
4956

5057
TEST(CombineFactors, Multiple) {
@@ -101,3 +108,93 @@ TEST(CombineFactors, Multiple) {
101108
EXPECT_EQ(factor2, combined.first[1]);
102109
}
103110
}
111+
112+
template<typename Factor_, typename Number_>
113+
std::pair<std::vector<std::vector<Factor_> >, std::vector<int> > test_combine_factors_unused(size_t n, const std::vector<std::pair<const Factor_*, Number_> >& factors) {
114+
std::vector<int> combined(n);
115+
auto levels = scran_aggregate::combine_factors_unused(n, factors, combined.data());
116+
return std::make_pair(std::move(levels), std::move(combined));
117+
}
118+
119+
TEST(CombineFactorsUnused, Basic) {
120+
std::vector<int> stuff{ 1, 3, 5, 3, 1 };
121+
auto combined = test_combine_factors_unused(stuff.size(), std::vector<std::pair<const int*, int> >{ { stuff.data(), 7 } });
122+
EXPECT_EQ(combined.second, stuff);
123+
std::vector<int> levels{ 0, 1, 2, 3, 4, 5, 6 };
124+
EXPECT_EQ(combined.first[0], levels);
125+
126+
auto combined2 = test_combine_factors_unused(10, std::vector<std::pair<const int*, int> >{});
127+
EXPECT_EQ(combined2.second, std::vector<int>(10));
128+
EXPECT_TRUE(combined2.first.empty());
129+
}
130+
131+
TEST(CombineFactorsUnused, Multiple) {
132+
std::vector<int> stuff1{ 0, 0, 1, 1, 1, 2, 2, 2, 2 };
133+
std::vector<int> stuff2{ 0, 1, 2, 0, 1, 2, 0, 1, 2 };
134+
135+
{
136+
auto combined = test_combine_factors_unused(stuff1.size(),
137+
std::vector<std::pair<const int*, int> >{
138+
{ stuff1.data(), 3 },
139+
{ stuff2.data(), 3 }
140+
}
141+
);
142+
143+
std::vector<int> expected { 0, 1, 5, 3, 4, 8, 6, 7, 8 };
144+
EXPECT_EQ(combined.second, expected);
145+
146+
EXPECT_EQ(combined.first.size(), 2);
147+
std::vector<int> levels1 { 0, 0, 0, 1, 1, 1, 2, 2, 2 };
148+
std::vector<int> levels2 { 0, 1, 2, 0, 1, 2, 0, 1, 2 }; // (0, 2) is included even though it is not observed.
149+
EXPECT_EQ(combined.first[0], levels1);
150+
EXPECT_EQ(combined.first[1], levels2);
151+
}
152+
153+
{
154+
auto combined = test_combine_factors_unused(stuff1.size(),
155+
std::vector<std::pair<const int*, int> >{
156+
{ stuff1.data(), 4 },
157+
{ stuff2.data(), 3 }
158+
}
159+
);
160+
161+
std::vector<int> expected { 0, 1, 5, 3, 4, 8, 6, 7, 8 };
162+
EXPECT_EQ(combined.second, expected);
163+
164+
EXPECT_EQ(combined.first.size(), 2);
165+
std::vector<int> levels1 { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
166+
std::vector<int> levels2 { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2 }; // (0, 2) and (3, *) are included even though they are not observed.
167+
EXPECT_EQ(combined.first[0], levels1);
168+
EXPECT_EQ(combined.first[1], levels2);
169+
}
170+
171+
// Multiple things at play here.
172+
{
173+
std::vector<int> mock{ 1 };
174+
auto combined = test_combine_factors_unused(1,
175+
std::vector<std::pair<const int*, int> >{
176+
{ mock.data(), 2 },
177+
{ mock.data(), 3 },
178+
{ mock.data(), 4 }
179+
}
180+
);
181+
182+
EXPECT_EQ(combined.second[0], 17); // i.e., 1*3*4 + 1*4 + 1.
183+
184+
auto create_mock_sequence = [](size_t nlevels, size_t inner, size_t outer) -> auto {
185+
std::vector<int> output(nlevels * inner * outer);
186+
auto oIt = output.begin();
187+
for (size_t o = 0; o < outer; ++o) {
188+
for (size_t l = 0; l < nlevels; ++l) {
189+
std::fill_n(oIt, inner, l);
190+
oIt += inner;
191+
}
192+
}
193+
return output;
194+
};
195+
196+
EXPECT_EQ(combined.first[0], create_mock_sequence(2, 12, 1));
197+
EXPECT_EQ(combined.first[1], create_mock_sequence(3, 4, 2));
198+
EXPECT_EQ(combined.first[2], create_mock_sequence(4, 1, 6));
199+
}
200+
}

0 commit comments

Comments
 (0)