Skip to content

Commit cd708ac

Browse files
committed
fuse: partial file reads
1 parent 48b6cb6 commit cd708ac

File tree

3 files changed

+128
-75
lines changed

3 files changed

+128
-75
lines changed

recuperabit/fs/core_types.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def __init__(self, index, name, size, is_directory=False,
5050
self.children = set()
5151
self.children_names = set() # Avoid name clashes breaking restore
5252
self.offset = None # Offset from beginning of disk
53+
self.isopen = False # if the file is currently open
5354

5455
def set_parent(self, parent):
5556
"""Set a pointer to the parent directory."""
@@ -107,6 +108,13 @@ def get_content(self, partition):
107108
if self.is_directory or self.is_ghost:
108109
return None
109110
raise NotImplementedError
111+
112+
def open(self, partition):
113+
raise NotImplementedError
114+
def close(self, partition):
115+
pass
116+
def read(self, partition, offset, size):
117+
raise NotImplementedError
110118

111119
# pylint: disable=R0201
112120
def ignore(self):

recuperabit/fs/ntfs.py

Lines changed: 105 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -320,72 +320,63 @@ def _padded_bytes(image, offset, size):
320320
'{}'.format(offset, size))
321321
dump += bytearray('\x00' * (size - len(dump)))
322322
return dump
323-
324-
def content_iterator(self, partition, image, datas):
325-
"""Return an iterator for the contents of this file."""
323+
324+
def parse_data(self, partition, datas):
326325
vcn = 0
327326
spc = partition.sec_per_clus
327+
328+
output = [] # (vcn start, vcn end, sector offset (-1 if zeros))
328329
for attr in datas:
329330
diff = attr['start_VCN'] - vcn
330331
if diff > 0:
331332
logging.warning(
332333
u'Missing part for {}, filling {} clusters '
333334
'with zeros'.format(self, diff)
334335
)
335-
while diff > 0:
336-
amount = min(max_sectors//spc, diff)
337-
vcn += amount
338-
diff -= amount
339-
yield '\x00' * sector_size * spc * amount
340-
336+
output.append((vcn, attr['start_VCN'], -1))
337+
338+
vcn = attr['start_VCN']
341339
clusters_pos = 0
342340
size = attr['real_size']
343341

344342
if 'runlist' not in attr:
345-
logging.error(
346-
u'Cannot restore {}, missing runlist'.format(self)
347-
)
343+
raise ValueError(u'Cannot restore {}, missing runlist'.format(self))
348344
break
349345

350346
for entry in attr['runlist']:
351347
length = min(entry['length'] * spc * sector_size, size)
352348
size -= length
353349
# Sparse runlist
354350
if entry['offset'] is None:
355-
while length > 0:
356-
amount = min(max_sectors*sector_size, length)
357-
length -= amount
358-
yield '\x00' * amount
359-
continue
360-
# Normal runlists
361-
clusters_pos += entry['offset']
362-
real_pos = clusters_pos * spc + partition.offset
363-
# Avoid to fill memory with huge blocks
364-
offset = 0
365-
while length > 0:
366-
amount = min(max_sectors*sector_size, length)
367-
position = real_pos*sector_size + offset
368-
partial = self._padded_bytes(image, position, amount)
369-
length -= amount
370-
offset += amount
371-
yield str(partial)
351+
output.append((vcn, vcn+entry['length'], -1))
352+
else:
353+
# Normal runlists
354+
clusters_pos += entry['offset']
355+
real_pos = clusters_pos * spc + partition.offset
356+
output.append((vcn, vcn+entry['length'], real_pos))
357+
vcn += entry['length']
358+
if vcn != attr['end_VCN'] + 1:
359+
logging.error("VCN miscalcuation! {} {}".format(vcn, attr['end_VCN'] + 1))
372360
vcn = attr['end_VCN'] + 1
373-
374-
def get_content(self, partition):
375-
"""Extract the content of the file.
361+
return output
362+
363+
def open(self, partition):
364+
"""Opens the file and get the sector locations of the file.
376365
377366
This method works by extracting the $DATA attribute."""
367+
if self.isopen:
368+
logging.warning(u'Tried to open already open file {}!'.format(self))
369+
return # already open!
370+
378371
if self.is_ghost:
379-
logging.error(u'Cannot restore ghost file {}'.format(self))
380-
return None
372+
raise ValueError(u'Cannot open ghost file {}'.format(self))
381373

382374
image = DiskScanner.get_image(partition.scanner)
383375
dump = sectors(image, File.get_offset(self), FILE_size)
384376
parsed = parse_file_record(dump)
385377

386378
if not parsed['valid'] or 'attributes' not in parsed:
387-
logging.error(u'Invalid MFT entry for {}'.format(self))
388-
return None
379+
raise ValueError(u'Invalid MFT entry for {}'.format(self))
389380
attrs = parsed['attributes']
390381
if ('$ATTRIBUTE_LIST' in attrs and
391382
partition.sec_per_clus is not None):
@@ -395,32 +386,30 @@ def get_content(self, partition):
395386
datas = [d for d in attrs['$DATA'] if d['name'] == self.ads]
396387
if not len(datas):
397388
if not self.is_directory:
398-
logging.error(u'Cannot restore $DATA attribute(s) '
389+
raise ValueError(u'Cannot restore $DATA attribute(s) '
399390
'for {}'.format(self))
400-
return None
401391

402392
# TODO implemented compressed attributes
403393
for d in datas:
404394
if d['flags'] & 0x01:
405-
logging.error(u'Cannot restore compressed $DATA attribute(s) '
395+
raise ValueError(u'Cannot restore compressed $DATA attribute(s) '
406396
'for {}'.format(self))
407-
return None
408397
elif d['flags'] & 0x4000:
409398
logging.warning(u'Found encrypted $DATA attribute(s) '
410399
'for {}'.format(self))
411-
400+
self.isopen = True
412401
# Handle resident file content
413402
if len(datas) == 1 and not datas[0]['non_resident']:
414403
single = datas[0]
415404
start = single['dump_offset'] + single['content_off']
416405
end = start + single['content_size']
417-
content = dump[start:end]
418-
return str(content)
406+
self.resident = True
407+
self.content = dump[start:end]
408+
return
419409
else:
420410
if partition.sec_per_clus is None:
421-
logging.error(u'Cannot restore non-resident $DATA '
411+
raise ValueError(u'Cannot restore non-resident $DATA '
422412
'attribute(s) for {}'.format(self))
423-
return None
424413
non_resident = sorted(
425414
(d for d in attrs['$DATA'] if d['non_resident']),
426415
key=lambda x: x['start_VCN']
@@ -430,7 +419,77 @@ def get_content(self, partition):
430419
u'Found leftover resident $DATA attributes for '
431420
'{}'.format(self)
432421
)
433-
return self.content_iterator(partition, image, non_resident)
422+
self.resident = False
423+
self.content = self.parse_data(partition, non_resident)
424+
return
425+
426+
def content_iterator(self, partition, image, datas):
427+
"""Return an iterator for the contents of this file."""
428+
429+
spc = partition.sec_per_clus
430+
bpc = sector_size*spc # bytes per cluster
431+
432+
curlen = 0
433+
for attr in self.content:
434+
(attr_start, attr_end, sectoroff) = attr
435+
curoff = (attr_end - attr_start) * bpc
436+
length = min(self.size - curlen, curoff)
437+
438+
if length <= 0:
439+
break
440+
if sectoroff == -1:
441+
yield '\x00' * length;
442+
else:
443+
yield self._padded_bytes(image, sectoroff*sector_size, length)
444+
445+
def get_content(self, partition):
446+
"""Extract the entire content of the file."""
447+
self.open(partition)
448+
assert self.isopen
449+
if self.resident:
450+
return bytes(self.content) # typecast from bytearray -> bytes
451+
else:
452+
image = DiskScanner.get_image(partition.scanner)
453+
return self.content_iterator(partition, image, self.content)
454+
455+
# TODO it can technically read off the end of the file a bit....
456+
def read(self, partition, roffset, rsize):
457+
if not self.isopen:
458+
raise RuntimeError("tried to read file that wasn't open!")
459+
if self.resident:
460+
trim = self.content[roffset:roffset+rsize]
461+
return bytes(trim) # typecast from bytearray -> bytes
462+
463+
image = DiskScanner.get_image(partition.scanner)
464+
spc = partition.sec_per_clus
465+
bpc = sector_size*spc # bytes per cluster
466+
467+
start_vcn = roffset // bpc
468+
offset_startvcn = roffset % bpc
469+
end_vcn = (roffset+rsize) // bpc
470+
471+
value = bytearray()
472+
for attr in self.content:
473+
(attr_start, attr_end, sectoroff) = attr
474+
vcn_off = 0
475+
if start_vcn > attr_end:
476+
continue
477+
elif start_vcn >= attr_start:
478+
vcn_off = start_vcn - attr_start
479+
480+
481+
length = attr_end - (attr_start + vcn_off)
482+
offset = sectoroff + (spc*vcn_off)
483+
if sectoroff == -1:
484+
value.extend('\x00' * bpc * length)
485+
else:
486+
value.extend(self._padded_bytes(image, offset*sector_size, length*bpc))
487+
488+
if end_vcn < attr_end:
489+
break
490+
491+
trim = value[offset_startvcn:offset_startvcn+rsize]
492+
return bytes(trim) # typecast from bytearray -> bytes
434493

435494
def ignore(self):
436495
"""Determine which files should be ignored."""

recuperabit/ifuse.py

Lines changed: 15 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import time
99
from datetime import datetime
1010
from fs.core_types import File
11+
import traceback
1112

1213
# was originally named fuse.py until i realized it conflicted with fusepy
1314

@@ -137,46 +138,31 @@ def open(self, path, flags):
137138
part = self.get_part_from_path(path)
138139

139140
try:
140-
content = file.get_content(part)
141-
except NotImplementedError:
142-
logging.error(u'Restore of #%s is not supported', file.index)
141+
file.open(part)
142+
except Exception, e:
143+
track = traceback.format_exc()
144+
logging.error(e)
145+
logging.error(track)
143146
raise FuseOSError(EIO)
144147

145-
146-
if file.is_directory and content is not None:
147-
logging.warning(u'Directory %s has data content!', file.file_path)
148-
149-
binarray = bytearray()
150-
if content is not None:
151-
logging.info(u'Restoring #%s %s', file.index, path)
152-
if hasattr(content, '__iter__'):
153-
for piece in content:
154-
binarray.extend(piece)
155-
else:
156-
binarray.extend(content)
157-
"""else:
158-
if not is_directory:
159-
# Empty file
160-
pass
161-
else:
162-
raise FuseOSError(EIO)"""
163-
164-
binout = bytes(binarray)
165-
#print(type(binout))
166-
167148
self.fd += 1
168-
self.files[self.fd] = (file, binout)
149+
self.files[self.fd] = file
169150
return self.fd
170151

171152
def release(self, path, fh):
172153
self.files[fh] = None
173154
return 0
174155

175156
def read(self, path, size, offset, fh):
176-
content = self.files[fh][1]
177-
if content is None:
157+
file = self.get_file_from_path(path)
158+
part = self.get_part_from_path(path)
159+
try:
160+
return file.read(part, offset, size)
161+
except Exception, e:
162+
track = traceback.format_exc()
163+
logging.error(e)
164+
logging.error(track)
178165
raise FuseOSError(EIO)
179-
return content[offset:offset+size]
180166

181167
class PartView(AbstractView):
182168
def __init__(self, part, root):

0 commit comments

Comments
 (0)