Skip to content
This repository was archived by the owner on Mar 30, 2022. It is now read-only.

Commit 225ed73

Browse files
author
Eugene Burmako
authored
Data model and parser for SIL (#227)
* Data model and parser for SIL This is something that @apaszke-g and I have hacked up last week. This is an alternative and complementary approach to metaprogramming Swift with quasiquotes (#225). It will be interesting to explore both. * Update .swift-format to match tensorflow/swift-apis * Reformat code * Rename ALL_CAPS constants to camelCase
1 parent 42591cd commit 225ed73

23 files changed

+2915
-1
lines changed

.gitignore

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
### SwiftPM ###
2+
Packages
3+
.build/
4+
xcuserdata
5+
DerivedData/
6+
*.xcodeproj
7+
*~
8+
*.vscode
9+
*.idea
10+
11+
### MacOS ###
12+
.DS_Store

.swift-format

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"version": 1,
3+
"lineLength": 100,
4+
"indentation": {
5+
"spaces": 4
6+
},
7+
"maximumBlankLines": 1,
8+
"respectsExistingLineBreaks": true,
9+
"blankLineBetweenMembers": {
10+
"ignoreSingleLineProperties": true
11+
},
12+
"lineBreakBeforeControlFlowKeywords": false,
13+
"lineBreakBeforeEachArgument": false
14+
}

Package.swift

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// swift-tools-version:4.2
2+
// The swift-tools-version declares the minimum version of Swift required to build this package.
3+
import PackageDescription
4+
5+
let package = Package(
6+
name: "SIL",
7+
products: [
8+
.library(
9+
name: "SIL",
10+
type: .dynamic,
11+
targets: ["SIL"]),
12+
],
13+
dependencies: [],
14+
targets: [
15+
.target(
16+
name: "SIL",
17+
dependencies: []),
18+
.testTarget(
19+
name: "SILTests",
20+
dependencies: ["SIL"],
21+
path: "Tests/SILTests")
22+
]
23+
)

Sources/SIL/Bitcode.swift

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
struct BitcodeRecord {
2+
var code: Bits
3+
var ops: [BitcodeOperand]
4+
}
5+
6+
indirect enum OperandKind {
7+
case literal(_ value: Bits)
8+
case fixed(_ width: Int)
9+
case vbr(_ width: Int)
10+
case array(_ element: OperandKind)
11+
case char6
12+
case blob
13+
}
14+
15+
indirect enum BitcodeOperand {
16+
case bits(_ value: Bits)
17+
case blob(_ value: Bits)
18+
case array(_ values: [BitcodeOperand])
19+
20+
var bits: Bits? {
21+
guard case let .bits(value) = self else { return nil }
22+
return value
23+
}
24+
}
25+
26+
typealias Structure = [OperandKind]
27+
28+
class BitcodeBlockInfo {
29+
var id: Bits
30+
var name: String?
31+
var recordNames: [Bits: String] = [:]
32+
var abbreviations: [Bits: Structure] = [:]
33+
34+
init(id: Bits) {
35+
self.id = id
36+
}
37+
// NB: This copies the structure, because all members are value types
38+
init(from other: BitcodeBlockInfo) {
39+
id = other.id
40+
name = other.name
41+
recordNames = other.recordNames
42+
abbreviations = other.abbreviations
43+
}
44+
}
45+
46+
class BitcodeBlock {
47+
var info: BitcodeBlockInfo
48+
var records: [BitcodeRecord] = []
49+
var subblocks: [BitcodeBlock] = []
50+
let abbrLen: Int
51+
let blockLen32: Int
52+
53+
convenience init(id: Bits, abbrLen: Int, blockLen32: Int) {
54+
self.init(info: BitcodeBlockInfo(id: id), abbrLen: abbrLen, blockLen32: blockLen32)
55+
}
56+
57+
init(info: BitcodeBlockInfo, abbrLen: Int, blockLen32: Int) {
58+
self.info = info
59+
self.abbrLen = abbrLen
60+
self.blockLen32 = blockLen32
61+
}
62+
}

Sources/SIL/BitcodeParser.swift

Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
import Foundation
2+
3+
class BitcodeParser {
4+
// Block ID: Default block info
5+
var blockInfoTemplates: [Bits: BitcodeBlockInfo] = [:]
6+
// NB: The choice of id is a bit questionable in here (0 is reserved for
7+
// the info block), but I didn't feel like making the field optional.
8+
var blockStack: [BitcodeBlock] = [BitcodeBlock(id: 0, abbrLen: 2, blockLen32: 0)]
9+
var currentBlock: BitcodeBlock { blockStack.last! }
10+
var stream: Bitstream
11+
12+
// Builtin abbreviation IDs
13+
let endBlock: Bits = 0
14+
let enterSubBlock: Bits = 1
15+
let defineAbbrev: Bits = 2
16+
let unabbrevRecord: Bits = 3
17+
18+
// Builtin abbreviations within the info block
19+
let setbid: Bits = 1
20+
let blockName: Bits = 2
21+
let setRecordName: Bits = 3
22+
23+
// Block ID of the info block
24+
let blockInfoId: Bits = 0
25+
26+
enum Error: Swift.Error {
27+
case unsupportedBlockInfoAbbrev(_ code: Bits)
28+
case unsupportedBlockInfoRecord(_ code: Bits)
29+
case unsupportedRecordId(_ operand: BitcodeOperand)
30+
case parseError(_ reason: String?)
31+
}
32+
33+
init(_ stream: Bitstream) {
34+
self.stream = stream
35+
}
36+
37+
func read(fixed width: Int) throws -> Bits {
38+
return try stream.next(bits: width)
39+
}
40+
41+
func read(vbr width: Int) throws -> Bits {
42+
assert(width >= 1, "VBR fields cannot have a width smaller than 1")
43+
var chunks: [Bits] = []
44+
repeat {
45+
chunks.append(try stream.next(bits: width - 1))
46+
} while (try stream.next(bits: 1)) == 1
47+
return Bits.join(chunks)
48+
}
49+
50+
func read(desc: OperandKind) throws -> BitcodeOperand {
51+
switch (desc) {
52+
case let .literal(v):
53+
return .bits(v)
54+
case let .fixed(w):
55+
return try .bits(read(fixed: w))
56+
case let .vbr(w):
57+
return try .bits(read(vbr: w))
58+
case let .array(elDesc):
59+
let length = try read(vbr: 6).asInt()
60+
return try .array((0..<length).map { _ in try read(desc: elDesc) })
61+
case .char6:
62+
fatalError("Char6 not supported")
63+
case .blob:
64+
let length = try read(vbr: 6).asInt()
65+
stream.align(toMultipleOf: 32)
66+
let result = try stream.next(bits: length * 8)
67+
stream.align(toMultipleOf: 32)
68+
return .blob(result)
69+
}
70+
}
71+
72+
func parseUnabbrevRecord() throws -> BitcodeRecord {
73+
// [unabbrevRecord, code(vbr6), numops(vbr6), op0(vbr6), op1(vbr6), ...]
74+
let code = try read(vbr: 6)
75+
let numOps = try read(vbr: 6)
76+
var ops: [BitcodeOperand] = []
77+
for _ in 1...numOps.asUInt32() {
78+
ops.append(try .bits(read(vbr: 6)))
79+
}
80+
return BitcodeRecord(code: code, ops: ops)
81+
}
82+
83+
func parseFieldType() throws -> (result: OperandKind, complexity: Int) {
84+
let isLiteral = try read(fixed: 1)
85+
if isLiteral == 1 {
86+
return (result: .literal(try read(vbr: 8)), complexity: 1)
87+
}
88+
let encoding = try read(fixed: 3)
89+
let result: OperandKind
90+
switch (encoding.asUInt8()) {
91+
case 1:
92+
result = .fixed(try read(vbr: 5).asInt())
93+
case 2:
94+
result = .vbr(try read(vbr: 5).asInt())
95+
case 3:
96+
let (result:elementType, complexity:c) = try parseFieldType()
97+
return (result: .array(elementType), complexity: c + 1)
98+
case 4:
99+
result = .char6
100+
case 5:
101+
result = .blob
102+
default:
103+
throw Error.parseError("Unknown record field encoding: " + String(encoding.asUInt8()))
104+
}
105+
return (result: result, complexity: 1)
106+
}
107+
108+
func parseAbbrevStructure() throws -> Structure {
109+
let numOps = try read(vbr: 5).asUInt32()
110+
var result: Structure = []
111+
var i = 0
112+
while i < numOps {
113+
let (result:r, complexity:c) = try parseFieldType()
114+
result.append(r)
115+
i += c
116+
}
117+
return result
118+
}
119+
120+
func parseInfoBlock(abbrLen: Int) throws {
121+
var currentInfo: BitcodeBlockInfo?
122+
while true {
123+
let abbrev = try stream.next(bits: abbrLen)
124+
switch (abbrev) {
125+
case endBlock:
126+
return
127+
case unabbrevRecord:
128+
let record = try parseUnabbrevRecord()
129+
// Unabbreviated records have no structure, so the cast to bits is safe
130+
let ops = record.ops.map { $0.bits! }
131+
switch (record.code) {
132+
case setbid:
133+
assert(ops.count == 1)
134+
let blockId: Bits = ops[0]
135+
if blockInfoTemplates[blockId] == nil {
136+
blockInfoTemplates[blockId] = BitcodeBlockInfo(id: blockId)
137+
}
138+
currentInfo = blockInfoTemplates[blockId]
139+
case blockName:
140+
let nameBytes = ops.map { $0.asUInt8() }
141+
guard let name = String(bytes: nameBytes, encoding: .utf8) else {
142+
// The name was incorrect, so we skip it.
143+
continue
144+
}
145+
currentInfo?.name = name
146+
break
147+
case setRecordName:
148+
let recordId = ops[0]
149+
let nameBytes = ops.suffix(from: 1).map { $0.asUInt8() }
150+
guard let name = String(bytes: nameBytes, encoding: .utf8) else {
151+
// The name was incorrect, so we skip it.
152+
continue
153+
}
154+
currentInfo?.recordNames[recordId] = name
155+
break
156+
default:
157+
throw Error.unsupportedBlockInfoRecord(record.code)
158+
}
159+
default:
160+
throw Error.unsupportedBlockInfoAbbrev(abbrev)
161+
}
162+
}
163+
}
164+
165+
func parseAbbrevRecord(_ structure: Structure) throws -> BitcodeRecord {
166+
assert(!structure.isEmpty)
167+
let codeOperand = try read(desc: structure[0])
168+
let ops = try structure.suffix(from: 1).map { try read(desc: $0) }
169+
// XXX: Ok, so here we make an assumption that the record code is not encoded
170+
// using a blob or an array which I guess should be reasonable?
171+
guard let code = codeOperand.bits else {
172+
throw Error.unsupportedRecordId(codeOperand)
173+
}
174+
return BitcodeRecord(code: code, ops: ops)
175+
}
176+
177+
func parse() throws -> BitcodeBlock {
178+
if stream.isEmpty {
179+
guard blockStack.count == 1 else {
180+
throw Error.parseError(
181+
"End of stream encountered with some blocks still open")
182+
}
183+
return blockStack[0]
184+
}
185+
let abbrev = try stream.next(bits: currentBlock.abbrLen)
186+
switch (abbrev) {
187+
case endBlock:
188+
// [endBlock, <align32bits>]
189+
stream.align(toMultipleOf: 32)
190+
191+
let _ = blockStack.popLast()
192+
guard !blockStack.isEmpty else {
193+
throw Error.parseError("Unexpected endBlock")
194+
}
195+
196+
return try parse()
197+
case enterSubBlock:
198+
// [enterSubBlock, blockid(vbr8), newabbrevlen(vbr4), <align32bits>, blocklen_32]
199+
let blockId = try read(vbr: 8)
200+
let newAbbrevLenBits = try read(vbr: 4)
201+
stream.align(toMultipleOf: 32)
202+
let blockLen32 = try Int(stream.next(bits: 32).asUInt32())
203+
204+
let newAbbrevLen = newAbbrevLenBits.asInt()
205+
// BLOCKINFO block is a bit special and we'll reparse it
206+
// into blockInfoTemplates instead of having it as a subblock
207+
if (blockId == blockInfoId) {
208+
try parseInfoBlock(abbrLen: newAbbrevLen)
209+
} else {
210+
var subblockInfo: BitcodeBlockInfo
211+
if let info = blockInfoTemplates[blockId] {
212+
subblockInfo = BitcodeBlockInfo(from: info)
213+
} else {
214+
subblockInfo = BitcodeBlockInfo(id: blockId)
215+
}
216+
let subblock = BitcodeBlock(
217+
info: subblockInfo, abbrLen: newAbbrevLen, blockLen32: blockLen32)
218+
currentBlock.subblocks.append(subblock)
219+
blockStack.append(subblock)
220+
// XXX: At this point subblock is the currentBlock
221+
}
222+
223+
return try parse()
224+
case defineAbbrev:
225+
// NB: Abbreviation IDs are assign in order of their declaration,
226+
// but starting from 4 (because there are 4 builtin abbrevs).
227+
let abbrevId = Bits(currentBlock.info.abbreviations.count + 4)
228+
currentBlock.info.abbreviations[abbrevId] = try parseAbbrevStructure()
229+
230+
return try parse()
231+
case unabbrevRecord:
232+
currentBlock.records.append(try parseUnabbrevRecord())
233+
return try parse()
234+
default: // Abbreviated record
235+
guard let structure = currentBlock.info.abbreviations[abbrev] else {
236+
throw Error.parseError("Undeclared abbreviation: " + abbrev.description)
237+
}
238+
let record = try parseAbbrevRecord(structure)
239+
currentBlock.records.append(record)
240+
return try parse()
241+
}
242+
}
243+
}
244+
245+
let SIB_MAGIC: [UInt8] = [0xE2, 0x9C, 0xA8, 0x0E]
246+
247+
enum SIBFileError: Error {
248+
case cannotOpenFile
249+
case incorrectMagic
250+
}
251+
252+
func loadSIBBitcode(fromPath path: String) throws -> BitcodeBlock {
253+
guard let handle = FileHandle(forReadingAtPath: path) else {
254+
throw SIBFileError.cannotOpenFile
255+
}
256+
257+
var stream = Bitstream(handle.readDataToEndOfFile())
258+
if (try stream.next(bytes: 4) != SIB_MAGIC) {
259+
throw SIBFileError.incorrectMagic
260+
}
261+
262+
let parser = BitcodeParser(stream)
263+
return try parser.parse()
264+
}

0 commit comments

Comments
 (0)