Skip to content

Commit a28aec8

Browse files
authored
On-the-fly local file headers parsing (#12)
1 parent ac14d33 commit a28aec8

15 files changed

+196
-162
lines changed

Sources/ZIPFoundation/Archive+Progress.swift

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ extension Archive {
1616
///
1717
/// - Parameter entry: The entry that will be removed.
1818
/// - Returns: The number of the work units.
19-
public func totalUnitCountForRemoving(_ entry: Entry) -> Int64 {
20-
return Int64(self.offsetToStartOfCentralDirectory - entry.localSize)
19+
func totalUnitCountForRemoving(_ entry: Entry, localFileHeader: LocalFileHeader) throws -> Int64 {
20+
return try Int64(self.offsetToStartOfCentralDirectory - entry.localSize(with: localFileHeader))
2121
}
2222

23-
func makeProgressForRemoving(_ entry: Entry) -> Progress {
24-
return Progress(totalUnitCount: self.totalUnitCountForRemoving(entry))
23+
func makeProgressForRemoving(_ entry: Entry, localFileHeader: LocalFileHeader) throws -> Progress {
24+
return Progress(totalUnitCount: try totalUnitCountForRemoving(entry, localFileHeader: localFileHeader))
2525
}
2626

2727
/// The number of the work units that have to be performed when

Sources/ZIPFoundation/Archive+Reading.swift

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,12 @@ extension Archive {
9191
guard bufferSize > 0 else {
9292
throw ArchiveError.invalidBufferSize
9393
}
94-
let localFileHeader = entry.localFileHeader
95-
guard entry.dataOffset <= .max else { throw ArchiveError.invalidLocalHeaderDataOffset }
94+
let localFileHeader = try await localFileHeader(for: entry)
95+
let dataOffset = try entry.dataOffset(with: localFileHeader)
9696

9797
let transaction = try await dataSource.openRead()
9898
var checksum = CRC32(0)
99-
try await transaction.seek(to: entry.dataOffset)
99+
try await transaction.seek(to: dataOffset)
100100
progress?.totalUnitCount = self.totalUnitCountForReading(entry)
101101
switch entry.type {
102102
case .file:
@@ -128,7 +128,6 @@ extension Archive {
128128
try await consumer(Data())
129129
progress?.completedUnitCount = self.totalUnitCountForReading(entry)
130130
case .symlink:
131-
let localFileHeader = entry.localFileHeader
132131
let size = Int(localFileHeader.compressedSize)
133132
let data = try await transaction.read(length: size)
134133
checksum = data.crc32(checksum: 0)
@@ -162,21 +161,18 @@ extension Archive {
162161
guard range.lowerBound >= 0, range.upperBound <= entry.uncompressedSize else {
163162
throw ArchiveError.rangeOutOfBounds
164163
}
165-
let localFileHeader = entry.localFileHeader
166-
guard entry.dataOffset <= .max else {
167-
throw ArchiveError.invalidLocalHeaderDataOffset
168-
}
164+
let localFileHeader = try await localFileHeader(for: entry)
169165

170166
guard let compressionMethod = CompressionMethod(rawValue: localFileHeader.compressionMethod) else {
171167
throw ArchiveError.invalidCompressionMethod
172168
}
173169

174170
switch compressionMethod {
175171
case .none:
176-
try await extractStoredRange(range, of: entry, bufferSize: bufferSize, consumer: consumer)
172+
try await extractStoredRange(range, of: entry, localFileHeader: localFileHeader, bufferSize: bufferSize, consumer: consumer)
177173

178174
case .deflate:
179-
try await extractCompressedRange(range, of: entry, bufferSize: bufferSize, consumer: consumer)
175+
try await extractCompressedRange(range, of: entry, localFileHeader: localFileHeader, bufferSize: bufferSize, consumer: consumer)
180176
}
181177
}
182178

@@ -185,11 +181,13 @@ extension Archive {
185181
private func extractStoredRange(
186182
_ range: Range<UInt64>,
187183
of entry: Entry,
184+
localFileHeader: LocalFileHeader,
188185
bufferSize: Int,
189186
consumer: Consumer
190187
) async throws {
188+
let offset = try entry.dataOffset(with: localFileHeader)
191189
let transaction = try await dataSource.openRead()
192-
try await transaction.seek(to: entry.dataOffset + range.lowerBound)
190+
try await transaction.seek(to: offset + range.lowerBound)
193191

194192
_ = try await Data.consumePart(
195193
of: Int64(range.count),
@@ -207,14 +205,16 @@ extension Archive {
207205
private func extractCompressedRange(
208206
_ range: Range<UInt64>,
209207
of entry: Entry,
208+
localFileHeader: LocalFileHeader,
210209
bufferSize: Int,
211210
consumer: Consumer
212211
) async throws {
212+
let offset = try entry.dataOffset(with: localFileHeader)
213213
let transaction = try await dataSource.openRead()
214214
let bytesReadCounter = SharedMutableValue<UInt64>()
215215

216216
do {
217-
try await transaction.seek(to: entry.dataOffset)
217+
try await transaction.seek(to: offset)
218218

219219
_ = try await readCompressed(
220220
transaction: transaction,

Sources/ZIPFoundation/Archive+Writing.swift

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ extension Archive {
9797
compressionMethod: compressionMethod, bufferSize: bufferSize,
9898
progress: progress, provider: provider)
9999
}
100+
101+
didWrite()
100102
}
101103

102104
/// Write files, directories or symlinks to the receiver.
@@ -191,6 +193,8 @@ extension Archive {
191193
try await rollback(transaction, UInt64(fileHeaderStart), (existingData, existingSize), bufferSize, eocdRecord, zip64EOCD)
192194
throw ArchiveError.cancelledOperation
193195
}
196+
197+
didWrite()
194198
}
195199

196200
/// Remove a ZIP `Entry` from the receiver.
@@ -208,14 +212,16 @@ extension Archive {
208212
throw ArchiveError.unwritableArchive
209213
}
210214

215+
let lfh = try await localFileHeader(for: entry)
211216
let transaction = try await dataSource.openWrite()
212217
let (tempArchive, tempDir) = try await self.makeTempArchive()
213218
let tempTransaction = try await tempArchive.dataSource.openWrite()
214219
defer { tempDir.map { try? FileManager().removeItem(at: $0) } }
215-
progress?.totalUnitCount = self.totalUnitCountForRemoving(entry)
220+
progress?.totalUnitCount = try self.totalUnitCountForRemoving(entry, localFileHeader: lfh)
216221
var centralDirectoryData = Data()
217222
var offset: UInt64 = 0
218-
for try await currentEntry in self {
223+
for currentEntry in try await entries() {
224+
let currentEntryLFH = try await localFileHeader(for: currentEntry)
219225
let cds = currentEntry.centralDirectoryStructure
220226
if currentEntry != entry {
221227
let entryStart = cds.effectiveRelativeOffsetOfLocalHeader
@@ -228,13 +234,15 @@ extension Archive {
228234
try await tempTransaction.write(data)
229235
progress?.completedUnitCount += Int64(data.count)
230236
}
231-
guard currentEntry.localSize <= .max else { throw ArchiveError.invalidLocalHeaderSize }
232-
_ = try await Data.consumePart(of: Int64(currentEntry.localSize), chunkSize: bufferSize,
237+
let localSize = try currentEntry.localSize(with: currentEntryLFH)
238+
_ = try await Data.consumePart(of: Int64(localSize), chunkSize: bufferSize,
233239
provider: provider, consumer: consumer)
234240
let updatedCentralDirectory = updateOffsetInCentralDirectory(centralDirectoryStructure: cds,
235241
updatedOffset: entryStart - offset)
236242
centralDirectoryData.append(updatedCentralDirectory.data)
237-
} else { offset = currentEntry.localSize }
243+
} else {
244+
offset = try currentEntry.localSize(with: currentEntryLFH)
245+
}
238246
}
239247

240248
let startOfCentralDirectory = try await tempTransaction.position()
@@ -252,6 +260,8 @@ extension Archive {
252260
self.setEndOfCentralDirectory(ecodStructure)
253261
try await tempTransaction.flush()
254262
try await self.replaceCurrentArchive(with: tempArchive)
263+
264+
didWrite()
255265
}
256266

257267
func replaceCurrentArchive(with archive: Archive) async throws {
@@ -270,6 +280,8 @@ extension Archive {
270280
_ = try fileManager.moveItem(at: archiveURL, to: url)
271281
#endif
272282
self.dataSource = try await FileDataSource(url: url, isWritable: true)
283+
284+
didWrite()
273285
}
274286
}
275287

Sources/ZIPFoundation/Archive.swift

Lines changed: 86 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,7 @@ let centralDirectoryStructSignature = 0x02014b50
5252
/// var archiveURL = URL(fileURLWithPath: "/path/file.zip")
5353
/// var archive = Archive(url: archiveURL, accessMode: .update)
5454
/// try archive?.addEntry("test.txt", relativeTo: baseURL, compressionMethod: .deflate)
55-
public actor Archive: AsyncSequence {
56-
public typealias Element = Entry
57-
55+
public actor Archive {
5856

5957
typealias LocalFileHeader = Entry.LocalFileHeader
6058
typealias DataDescriptor = Entry.DefaultDataDescriptor
@@ -79,6 +77,8 @@ public actor Archive: AsyncSequence {
7977
case invalidBufferSize
8078
/// Thrown when uncompressedSize/compressedSize exceeds `Int64.max` (Imposed by file API).
8179
case invalidEntrySize
80+
/// Thrown when the local header cannot be found.
81+
case localHeaderNotFound
8282
/// Thrown when the offset of local header data exceeds `Int64.max` (Imposed by file API).
8383
case invalidLocalHeaderDataOffset
8484
/// Thrown when the size of local header exceeds `Int64.max` (Imposed by file API).
@@ -206,80 +206,42 @@ public actor Archive: AsyncSequence {
206206
self.zip64EndOfCentralDirectory = config.zip64EndOfCentralDirectory
207207
}
208208

209-
public nonisolated func makeAsyncIterator() -> Iterator {
210-
Iterator(archive: self)
211-
}
209+
private var entriesTask: Task<[Entry], Error>?
212210

213-
public actor Iterator: AsyncIteratorProtocol {
214-
215-
private struct Input {
216-
let transaction: DataSourceTransaction
217-
let totalNumberOfEntriesInCD: UInt64
211+
/// Returns the list of entries in the archive.
212+
public func entries() async throws -> [Entry] {
213+
if entriesTask == nil {
214+
entriesTask = Task { try await readEntries() }
218215
}
219216

220-
private let archive: Archive
221-
private var directoryIndex: UInt64 = 0
222-
private var index = 0
223-
224-
fileprivate init(archive: Archive) {
225-
self.archive = archive
217+
return try await entriesTask!.value
218+
}
219+
220+
private func readEntries() async throws -> [Entry] {
221+
guard totalNumberOfEntriesInCentralDirectory > 0 else {
222+
return []
226223
}
227224

228-
private var _initializeTask: Task<DataSourceTransaction, Error>?
225+
var entries: [Entry] = []
226+
var directoryIndex = offsetToStartOfCentralDirectory
227+
let transaction = try await dataSource.openRead()
229228

230-
private func initialize() async throws -> DataSourceTransaction {
231-
if _initializeTask == nil {
232-
_initializeTask = Task {
233-
directoryIndex = await archive.offsetToStartOfCentralDirectory
234-
return try await archive.dataSource.openRead()
235-
}
229+
for _ in 0..<totalNumberOfEntriesInCentralDirectory {
230+
guard let centralDirStruct: CentralDirectoryStructure = try await transaction.readStruct(at: directoryIndex) else {
231+
continue
236232
}
237233

238-
return try await _initializeTask!.value
239-
}
240-
241-
public func next() async throws -> Entry? {
242-
let totalNumberOfEntries = await archive.totalNumberOfEntriesInCentralDirectory
243-
guard index < totalNumberOfEntries else {
244-
return nil
245-
}
246-
247-
let dataSource = try await initialize()
248-
249-
do {
250-
guard let centralDirStruct: CentralDirectoryStructure = try await dataSource.readStruct(at: directoryIndex) else {
251-
return nil
252-
}
253-
let offset = UInt64(centralDirStruct.effectiveRelativeOffsetOfLocalHeader)
254-
guard let localFileHeader: LocalFileHeader = try await dataSource.readStruct(at: offset) else { return nil }
255-
var dataDescriptor: DataDescriptor?
256-
var zip64DataDescriptor: ZIP64DataDescriptor?
257-
if centralDirStruct.usesDataDescriptor {
258-
let additionalSize = UInt64(localFileHeader.fileNameLength) + UInt64(localFileHeader.extraFieldLength)
259-
let isCompressed = centralDirStruct.compressionMethod != CompressionMethod.none.rawValue
260-
let dataSize = isCompressed
261-
? centralDirStruct.effectiveCompressedSize
262-
: centralDirStruct.effectiveUncompressedSize
263-
let descriptorPosition = offset + UInt64(LocalFileHeader.size) + additionalSize + dataSize
264-
if centralDirStruct.isZIP64 {
265-
zip64DataDescriptor = try await dataSource.readStruct(at: descriptorPosition)
266-
} else {
267-
dataDescriptor = try await dataSource.readStruct(at: descriptorPosition)
268-
}
269-
}
270-
defer {
271-
directoryIndex += UInt64(CentralDirectoryStructure.size)
272-
directoryIndex += UInt64(centralDirStruct.fileNameLength)
273-
directoryIndex += UInt64(centralDirStruct.extraFieldLength)
274-
directoryIndex += UInt64(centralDirStruct.fileCommentLength)
275-
index += 1
276-
}
277-
return Entry(centralDirectoryStructure: centralDirStruct, localFileHeader: localFileHeader,
278-
dataDescriptor: dataDescriptor, zip64DataDescriptor: zip64DataDescriptor)
279-
} catch {
280-
return nil
234+
if let entry = Entry(centralDirectoryStructure: centralDirStruct) {
235+
entries.append(entry)
281236
}
237+
238+
directoryIndex += UInt64(CentralDirectoryStructure.size)
239+
directoryIndex += UInt64(centralDirStruct.fileNameLength)
240+
directoryIndex += UInt64(centralDirStruct.extraFieldLength)
241+
directoryIndex += UInt64(centralDirStruct.fileCommentLength)
282242
}
243+
244+
return entries
283245
}
284246

285247
/// Retrieve the ZIP `Entry` with the given `path` from the receiver.
@@ -291,10 +253,64 @@ public actor Archive: AsyncSequence {
291253
/// - Parameter path: A relative file path identifying the corresponding `Entry`.
292254
/// - Returns: An `Entry` with the given `path`. Otherwise, `nil`.
293255
public func get(_ path: String) async throws -> Entry? {
294-
if let encoding = self.pathEncoding {
295-
return try await self.first { $0.path(using: encoding) == path }
256+
let result = try await entries().first {
257+
if let encoding = self.pathEncoding {
258+
return $0.path(using: encoding) == path
259+
} else {
260+
return $0.path == path
261+
}
296262
}
297-
return try await self.first { $0.path == path }
263+
return result
264+
}
265+
266+
/// Cache for local file headers.
267+
private var localFileHeaders: [String: Task<LocalFileHeader, Error>] = [:]
268+
269+
/// Retrieves the local file header for the given `entry`.
270+
func localFileHeader(for entry: Entry) async throws -> LocalFileHeader {
271+
if let task = localFileHeaders[entry.path] {
272+
return try await task.value
273+
}
274+
275+
let task = Task {
276+
let transaction = try await dataSource.openRead()
277+
let centralDirStruct = entry.centralDirectoryStructure
278+
let offset = UInt64(centralDirStruct.effectiveRelativeOffsetOfLocalHeader)
279+
guard
280+
var localFileHeader: LocalFileHeader = try await transaction.readStruct(at: offset)
281+
else {
282+
throw Archive.ArchiveError.localHeaderNotFound
283+
}
284+
285+
/// We only load the data descriptors if we are in writing mode, because
286+
/// they might need to be written over. In read mode it is is
287+
/// superfluous as the same infos are in the central directory structure.
288+
if centralDirStruct.usesDataDescriptor && accessMode != .read {
289+
let additionalSize = UInt64(localFileHeader.fileNameLength) + UInt64(localFileHeader.extraFieldLength)
290+
let isCompressed = centralDirStruct.compressionMethod != CompressionMethod.none.rawValue
291+
let dataSize = isCompressed
292+
? centralDirStruct.effectiveCompressedSize
293+
: centralDirStruct.effectiveUncompressedSize
294+
let descriptorPosition = offset + UInt64(LocalFileHeader.size) + additionalSize + dataSize
295+
if centralDirStruct.isZIP64 {
296+
localFileHeader.zip64DataDescriptor = try await transaction.readStruct(at: descriptorPosition)
297+
} else {
298+
localFileHeader.dataDescriptor = try await transaction.readStruct(at: descriptorPosition)
299+
}
300+
}
301+
302+
return localFileHeader
303+
}
304+
localFileHeaders[entry.path] = task
305+
306+
return try await task.value
307+
}
308+
309+
/// Called when the archive was modified.
310+
func didWrite() {
311+
// Clears the caches.
312+
entriesTask = nil
313+
localFileHeaders = [:]
298314
}
299315

300316
// MARK: - Helpers

0 commit comments

Comments
 (0)