forked from EPFL-LAP/dynamatic
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHandshakeMaterialize.cpp
370 lines (327 loc) · 15.1 KB
/
HandshakeMaterialize.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
//===- HandshakeMaterialize.h - Materialize Handshake IR --------*- C++ -*-===//
//
// Dynamatic is under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Implements the Handshake materialization pass using a mix of simple rewrite
// steps and a couple rewrite patterns applied greedily on the IR.
//
//===----------------------------------------------------------------------===//
#include "dynamatic/Transforms/HandshakeMaterialize.h"
#include "dynamatic/Dialect/Handshake/HandshakeCanonicalize.h"
#include "dynamatic/Dialect/Handshake/HandshakeOps.h"
#include "dynamatic/Dialect/Handshake/MemoryInterfaces.h"
#include "dynamatic/Support/CFG.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Support/ErrorHandling.h"
#include <iterator>
using namespace mlir;
using namespace dynamatic;
/// Determines whether the value should be concerned by materialization rules;
/// only SSA values with dataflow semantics must have a single use.
static inline bool eligibleForMaterialization(Value val) {
return isa<handshake::ControlType, handshake::ChannelType>(val.getType());
}
LogicalResult dynamatic::verifyIRMaterialized(handshake::FuncOp funcOp) {
auto checkUses = [&](Operation *op, Value val, StringRef desc,
unsigned idx) -> LogicalResult {
if (!eligibleForMaterialization(val))
return success();
auto numUses = std::distance(val.getUses().begin(), val.getUses().end());
if (numUses == 0)
return op->emitError() << desc << " " << idx << " has no uses.";
if (numUses > 1)
return op->emitError() << desc << " " << idx << " has multiple uses.";
return success();
};
// Check function arguments
for (BlockArgument funcArg : funcOp.getBody().getArguments()) {
if (failed(checkUses(funcOp, funcArg, "function argument",
funcArg.getArgNumber())))
return failure();
}
// Check results of operations
for (Operation &op : llvm::make_early_inc_range(funcOp.getOps())) {
for (OpResult res : op.getResults()) {
if (failed(checkUses(&op, res, "result", res.getResultNumber())))
return failure();
}
}
return success();
}
LogicalResult dynamatic::verifyIRMaterialized(mlir::ModuleOp modOp) {
for (handshake::FuncOp funcOp : modOp.getOps<handshake::FuncOp>()) {
if (failed(verifyIRMaterialized(funcOp)))
return failure();
}
return success();
}
/// Replaces the first use of `oldVal` by `newVal` in the operation's operands.
/// Asserts if the operation's operands do not contain the old value.
static void replaceFirstUse(Operation *op, Value oldVal, Value newVal) {
for (unsigned i = 0, e = op->getNumOperands(); i < e; ++i) {
if (op->getOperand(i) == oldVal) {
op->setOperand(i, newVal);
return;
}
}
llvm_unreachable("failed to find operation operand");
}
/// Materializes a value, potentially placing an eager fork (if it has more than
/// one uses) or a sink (if it has no uses) to ensure that it is used exactly
/// once.
static void materializeValue(Value val, OpBuilder &builder) {
if (!eligibleForMaterialization(val))
return;
if (val.use_empty()) {
builder.setInsertionPointAfterValue(val);
builder.create<handshake::SinkOp>(val.getLoc(), val);
return;
}
if (val.hasOneUse())
return;
// The value has multiple uses, collect its owners
unsigned numUses = std::distance(val.getUses().begin(), val.getUses().end());
SmallVector<Operation *> valUsers;
for (OpOperand &oprd : val.getUses())
valUsers.push_back(oprd.getOwner());
// Insert a fork with as many results as the value has uses
builder.setInsertionPointAfterValue(val);
auto forkOp = builder.create<handshake::ForkOp>(val.getLoc(), val, numUses);
if (Operation *defOp = val.getDefiningOp())
inheritBB(defOp, forkOp);
// Replace original uses of the value with the fork's results
for (auto [user, forkRes] : llvm::zip_equal(valUsers, forkOp->getResults()))
replaceFirstUse(user, val, forkRes);
}
/// Promotes some eager forks to lazy forks to ensure that different group
/// allocations to the same LSQ do not arrive to the LSQ on the same cycle. This
/// assumes that the IR is materialized.
static void promoteEagerToLazyForks(handshake::FuncOp funcOp) {
// Associate all eager forks feeding group allocation signals to any LSQ to
// the set of their results that must become lazy
DenseMap<handshake::ForkOp, SetVector<Value>> lazyChannels;
for (auto lsqOp : funcOp.getOps<handshake::LSQOp>()) {
LSQPorts lsqPorts = lsqOp.getPorts();
ValueRange lsqInputs = lsqOp.getOperands();
for (LSQGroup &group : lsqPorts.getGroups()) {
Value groupCtrl = lsqInputs[group->ctrlPort->getCtrlInputIndex()];
Operation *ctrlDefOp = groupCtrl.getDefiningOp();
// There can only be other control paths to the same LSQ if the defining
// operation is a fork (eager or lazy). The defining operation is allowed
// to be `nullptr` if the control value is a function argument
if (isa_and_present<handshake::LazyForkOp>(ctrlDefOp)) {
// If there is already a lazy fork in the IR for some reason, assume
// someone knows what they are doing and do not demote any result
continue;
}
auto forkOp = dyn_cast_if_present<handshake::ForkOp>(ctrlDefOp);
if (!forkOp)
continue;
// Find outputs of the control fork which are part of the memory network
// and add them to the set of fork results that must be lazy. A single
// control path means that no other group allocation is reachable from the
// fork, so the result can be eager
SmallVector<Value> ctrlResults = lsqOp.getControlPaths(forkOp);
assert(!ctrlResults.empty() && "at least one control path must exist");
if (ctrlResults.size() > 1)
lazyChannels[forkOp].insert(ctrlResults.begin(), ctrlResults.end());
}
}
// Promote eager fork results to lazy where necessary
OpBuilder builder(funcOp->getContext());
for (auto &[forkOp, lazyResults] : lazyChannels) {
unsigned numLazyResults = lazyResults.size();
bool createEagerFork = numLazyResults < forkOp->getNumResults();
if (createEagerFork) {
// To minimize damage to performance, as many outputs of the control fork
// as possible should remain "eager". We achieve this by creating an eager
// fork after the lazy fork that handles token duplication outside the
// memory control network. The lazy fork needs an extra output to feed the
// eager fork
++numLazyResults;
}
builder.setInsertionPoint(forkOp);
handshake::LazyForkOp lazyForkOp = builder.create<handshake::LazyForkOp>(
forkOp->getLoc(), forkOp.getOperand(), numLazyResults);
inheritBB(forkOp, lazyForkOp);
// Replace the original fork's outputs that are part of the memory control
// network with the first lazy fork's outputs
for (auto [from, to] : llvm::zip(lazyResults, lazyForkOp->getResults()))
from.replaceAllUsesWith(to);
if (createEagerFork) {
// If some of the control fork's result go outside the memory control
// network, create an eager fork fed by the lazy fork's last result
unsigned numEagerResults = forkOp->getNumResults() - lazyResults.size();
handshake::ForkOp eagerForkOp = builder.create<handshake::ForkOp>(
forkOp->getLoc(), lazyForkOp->getResults().back(), numEagerResults);
inheritBB(forkOp, eagerForkOp);
// Replace the control fork's outputs that do not belong to the memory
// control network with the eager fork's results
ValueRange eagerResults = eagerForkOp.getResult();
auto eagerForkResIt = eagerResults.begin();
for (OpResult res : forkOp.getResults()) {
if (!lazyResults.contains(res))
res.replaceAllUsesWith(*(eagerForkResIt++));
}
assert(eagerForkResIt == eagerResults.end() &&
"did not exhaust iterator");
}
// Erase the original fork whose results are now unused
forkOp->erase();
}
}
namespace {
/// Removes outputs of forks that do not have real uses. This can result in the
/// size reduction or deletion of fork operations (the latter if none of the
/// fork results have real users) as well as sink users of fork results.
struct MinimizeForkSizes : OpRewritePattern<handshake::ForkOp> {
using OpRewritePattern<handshake::ForkOp>::OpRewritePattern;
LogicalResult matchAndRewrite(handshake::ForkOp forkOp,
PatternRewriter &rewriter) const override {
// Compute the list of fork results that are actually used (erase any sink
// user along the way)
SmallVector<Value> usedForkResults;
for (OpResult res : forkOp.getResults()) {
if (hasRealUses(res)) {
usedForkResults.push_back(res);
} else if (!res.use_empty()) {
// The value has sink users, delete them as the fork producing their
// operand will be removed
for (Operation *sinkUser : llvm::make_early_inc_range(res.getUsers()))
rewriter.eraseOp(sinkUser);
}
}
// Fail if all fork results are used, since it means that no transformation
// is requires
if (usedForkResults.size() == forkOp->getNumResults())
return failure();
if (!usedForkResults.empty()) {
// Create a new fork operation
rewriter.setInsertionPoint(forkOp);
handshake::ForkOp newForkOp = rewriter.create<handshake::ForkOp>(
forkOp.getLoc(), forkOp.getOperand(), usedForkResults.size());
inheritBB(forkOp, newForkOp);
// Replace results with actual uses of the original fork with results from
// the new fork
ValueRange newResults = newForkOp.getResult();
for (auto [oldRes, newRes] : llvm::zip(usedForkResults, newResults))
rewriter.replaceAllUsesWith(oldRes, newRes);
}
rewriter.eraseOp(forkOp);
return success();
}
};
/// Eliminates forks feeding into other forks by replacing both with a single
/// fork operation.
struct EliminateForksToForks : OpRewritePattern<handshake::ForkOp> {
using mlir::OpRewritePattern<handshake::ForkOp>::OpRewritePattern;
LogicalResult matchAndRewrite(handshake::ForkOp forkOp,
PatternRewriter &rewriter) const override {
// The defining operation must be also be a fork for the pattern to apply
Value forkOprd = forkOp.getOperand();
auto defForkOp = forkOprd.getDefiningOp<handshake::ForkOp>();
if (!defForkOp)
return failure();
// It is important to take into account whether the matched fork's operand
// is the single use of the defining fork's corresponding result. If it is
// not, the new combined fork needs an extra result to replace the defining
// fork's result with in the other uses
bool isForkOprdSingleUse = forkOprd.hasOneUse();
// Create a new combined fork to replace the two others
unsigned totalNumResults =
forkOp->getNumResults() + defForkOp.getNumResults();
if (isForkOprdSingleUse)
--totalNumResults;
rewriter.setInsertionPoint(defForkOp);
handshake::ForkOp newForkOp = rewriter.create<handshake::ForkOp>(
defForkOp.getLoc(), defForkOp.getOperand(), totalNumResults);
inheritBB(defForkOp, newForkOp);
// Replace the defining fork's results with the first results of the new
// fork (skipping the result feeding the matched fork if it has a single
// use)
ValueRange newResults = newForkOp->getResults();
auto newResIt = newResults.begin();
for (OpResult defForkRes : defForkOp->getResults()) {
if (!isForkOprdSingleUse || defForkRes != forkOprd)
rewriter.replaceAllUsesWith(defForkRes, *(newResIt++));
}
// Replace the results of the matched fork with the corresponding results of
// the new defining fork
rewriter.replaceOp(forkOp, newResults.take_back(forkOp.getNumResults()));
return success();
}
};
/// Erases forks with a single result unless their operand originates from a
/// lazy fork, in which case they may exist to prevent a combinational cycle.
struct EraseSingleOutputForks : OpRewritePattern<handshake::ForkOp> {
using mlir::OpRewritePattern<handshake::ForkOp>::OpRewritePattern;
LogicalResult matchAndRewrite(handshake::ForkOp forkOp,
PatternRewriter &rewriter) const override {
// The fork must have a single result
if (forkOp.getNumResults() != 1)
return failure();
// The defining operation must not be a lazy fork, otherwise the fork may be
// here to avoid a combination cycle between the valid and ready wires
if (forkOp.getOperand().getDefiningOp<handshake::LazyForkOp>())
return failure();
// Bypass the fork and succeed
rewriter.replaceOp(forkOp, forkOp.getOperand());
return success();
}
};
/// Driver for the materialization pass, materializing the IR in three
/// sequential steps.
/// 1. First, forks and sinks are inserted within Handshake functions to ensure
/// that every value is used exactly once.
/// 2. Then, unnecessary forks and sinks are erased from the IR in a greedy
/// fashion. If the input IR contained no forks or sinks, this step should do
/// nothing. If the input IR was partially materialized, this may optimize away
/// some of the forks and sinks.
/// 3. Finally, eager forks feeding group allocation signals to LSQs are turned
/// into lazy forks to ensure (together with a correct buffer placement) that
/// multiple group allocations never happen during the same cycle.
struct HandshakeMaterializePass
: public dynamatic::impl::HandshakeMaterializeBase<
HandshakeMaterializePass> {
void runDynamaticPass() override {
mlir::ModuleOp modOp = getOperation();
MLIRContext *ctx = &getContext();
// First make sure that every value within Handshake functions is used
// exactly once
OpBuilder builder(ctx);
for (handshake::FuncOp funcOp : modOp.getOps<handshake::FuncOp>()) {
for (BlockArgument funcArg : funcOp.getBody().getArguments())
materializeValue(funcArg, builder);
for (Operation &op : llvm::make_early_inc_range(funcOp.getOps())) {
for (OpResult res : op.getResults())
materializeValue(res, builder);
}
}
// Then, greedily optimize forks
mlir::GreedyRewriteConfig config;
config.useTopDownTraversal = true;
config.enableRegionSimplification = false;
RewritePatternSet patterns{ctx};
patterns
.add<MinimizeForkSizes, EliminateForksToForks, EraseSingleOutputForks>(
ctx);
if (failed(
applyPatternsAndFoldGreedily(modOp, std::move(patterns), config)))
return signalPassFailure();
// Finally, promote forks to lazy wherever necessary
for (handshake::FuncOp funcOp : modOp.getOps<handshake::FuncOp>())
promoteEagerToLazyForks(funcOp);
assert(succeeded(verifyIRMaterialized(modOp)) && "IR is not materialized");
}
};
} // namespace
std::unique_ptr<dynamatic::DynamaticPass>
dynamatic::createHandshakeMaterialize() {
return std::make_unique<HandshakeMaterializePass>();
}