Skip to content

Commit

Permalink
chore: Merge pull request #41 from abelcheung:feat/json
Browse files Browse the repository at this point in the history
JSON output format, closes #39
  • Loading branch information
abelcheung authored Dec 16, 2023
2 parents d7d68c6 + 8059c70 commit 12a8106
Show file tree
Hide file tree
Showing 22 changed files with 1,156 additions and 621 deletions.
8 changes: 1 addition & 7 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,5 @@ indent_style = space
[*.{c,h,txt,cmake}]
indent_size = 4

[configure.ac]
indent_size = 4

[*.{md,yml}]
[*.{md,yml,json}]
indent_size = 2

[{Makefile.am,*.mk}]
indent_style = tab
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ foreach(bin rifiuti rifiuti-vista)
PRIVATE
src/utils.c
src/utils.h
src/utils-conv.c
src/utils-conv.h
)
if(WIN32)
target_sources(${bin}
Expand Down
99 changes: 52 additions & 47 deletions src/rifiuti-vista.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
* Please see LICENSE file for more info.
*/

#include <stdbool.h>
#include <glib/gi18n.h>
#include <glib/gstdio.h>

#include "rifiuti-vista.h"
#include "utils-conv.h"
#include "utils.h"
#ifdef G_OS_WIN32
# include "utils-win.h"
Expand Down Expand Up @@ -92,7 +94,7 @@ _validate_index_file (const char *filename,
pathlen = GUINT32_FROM_LE (pathlen);

/* Header length + strlen in UTF-16 encoding */
expect_sz = VERSION2_FILENAME_OFFSET + pathlen * 2;
expect_sz = VERSION2_FILENAME_OFFSET + pathlen * sizeof(gunichar2);
if (*bufsize != expect_sz)
{
g_debug ("File size = %" G_GSIZE_FORMAT
Expand Down Expand Up @@ -124,22 +126,43 @@ _validate_index_file (const char *filename,

static rbin_struct *
_populate_record_data (void *buf,
uint64_t version,
gboolean erraneous)
gsize bufsize,
uint64_t version)
{
rbin_struct *record;
size_t read;
size_t pathbuf_sz = 0;
void *pathbuf_start = NULL;
bool erraneous = false;

switch (version)
{
case VERSION_VISTA:
// In rare cases, the size of index file is one byte short of
// (fixed) 544 bytes in Vista. Under such occasion, file size
// only occupies 56 bit, not 64 bit as it ought to be.
// Actually this 56-bit file size is very likely wrong after all.
// This is observed during deletion of dd.exe from Forensic
// Acquisition Utilities (by George M. Garner Jr)
// in certain localized Vista.
if (bufsize == VERSION1_FILE_SIZE - 1)
erraneous = true;

pathbuf_sz = WIN_PATH_MAX * sizeof(gunichar2);
pathbuf_start = buf - (int)erraneous + VERSION1_FILENAME_OFFSET;
break;

case VERSION_WIN10:
pathbuf_sz = bufsize - VERSION2_FILENAME_OFFSET;
pathbuf_start = buf + VERSION2_FILENAME_OFFSET;
break;

default:
g_assert_not_reached ();
}

record = g_malloc0 (sizeof (rbin_struct));
record->version = version;

/*
* In rare cases, the size of index file is 543 bytes versus (normal) 544 bytes.
* In such occasion file size only occupies 56 bit, not 64 bit as it ought to be.
* Actually this 56-bit file size is very likely wrong after all. Probably some
* bug inside Windows. This is observed during deletion of dd.exe from Forensic
* Acquisition Utilities (by George M. Garner Jr) in certain localized Vista.
*/
memcpy (&record->filesize, buf + FILESIZE_OFFSET,
FILETIME_OFFSET - FILESIZE_OFFSET - (int) erraneous);
if (erraneous)
Expand All @@ -156,34 +179,30 @@ _populate_record_data (void *buf,
}

/* File deletion time */
memcpy (&record->winfiletime, buf + FILETIME_OFFSET - (int) erraneous,
memcpy (&record->winfiletime, buf - (int) erraneous + FILETIME_OFFSET,
VERSION1_FILENAME_OFFSET - FILETIME_OFFSET);
record->winfiletime = GINT64_FROM_LE (record->winfiletime);
record->deltime = win_filetime_to_gdatetime (record->winfiletime);

switch (version)
{
case VERSION_VISTA:
record->uni_path = conv_path_to_utf8_with_tmpl (
(const char *) (buf - erraneous + VERSION1_FILENAME_OFFSET),
NULL, "<\\u%04X>", &read, &record->error);
break;

case VERSION_WIN10:
record->uni_path = conv_path_to_utf8_with_tmpl (
(const char *) (buf + VERSION2_FILENAME_OFFSET),
NULL, "<\\u%04X>", &read, &record->error);
break;
record->raw_uni_path = g_malloc0 (pathbuf_sz + sizeof(gunichar2));
memcpy (record->raw_uni_path, pathbuf_start, pathbuf_sz);

default:
g_assert_not_reached ();
{
// Never set len = -1 for UCS2 source string
char *s = g_convert (record->raw_uni_path,
ucs2_strnlen (record->raw_uni_path, pathbuf_sz) * sizeof (gunichar2),
"UTF-8", "UTF-16LE", NULL, NULL, NULL);
if (s)
{
g_free (s);
}
else
{
g_set_error_literal (&record->error, R2_REC_ERROR, R2_REC_ERROR_CONV_PATH,
_("Path contains broken unicode character(s)"));
}
}

if (! record->uni_path)
g_set_error_literal (&record->error, R2_REC_ERROR,
R2_REC_ERROR_CONV_PATH,
_("Trash file path conversion failed completely"));

return record;
}

Expand Down Expand Up @@ -212,21 +231,7 @@ _parse_record_cb (char *index_file,

g_debug ("Start populating record for '%s'...", basename);

switch (version)
{
case VERSION_VISTA:
record = _populate_record_data (buf, version,
(bufsize == VERSION1_FILE_SIZE - 1));
break;

case VERSION_WIN10:
record = _populate_record_data (buf, version, FALSE);
break;

default:
g_assert_not_reached();
}

record = _populate_record_data (buf, bufsize, version);
g_free (buf);

/* Check corresponding $R.... file existance and set record->gone */
Expand Down
80 changes: 53 additions & 27 deletions src/rifiuti.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <glib/gstdio.h>

#include "rifiuti.h"
#include "utils-conv.h"
#include "utils.h"


Expand Down Expand Up @@ -125,7 +126,7 @@ _validate_index_file (const char *filename,

rewind (fp);
*infile = fp;
meta->version = (uint64_t) ver;
meta->version = ver;
return TRUE;

validation_broken:
Expand All @@ -142,13 +143,13 @@ _populate_record_data (void *buf,
{
rbin_struct *record;
uint32_t drivenum;
size_t read;
char *legacy_fname;
size_t uni_buf_sz, null_terminator_offset;

record = g_malloc0 (sizeof (rbin_struct));

legacy_fname = g_malloc0 (RECORD_INDEX_OFFSET - LEGACY_FILENAME_OFFSET);
copy_field (legacy_fname, LEGACY_FILENAME_OFFSET, RECORD_INDEX_OFFSET);
// Verbatim path in ANSI code page
record->raw_legacy_path = g_malloc0 (RECORD_INDEX_OFFSET - LEGACY_FILENAME_OFFSET);
copy_field (record->raw_legacy_path, LEGACY_FILENAME_OFFSET, RECORD_INDEX_OFFSET);

/* Index number associated with the record */
copy_field (&record->index_n, RECORD_INDEX_OFFSET, DRIVE_LETTER_OFFSET);
Expand All @@ -170,10 +171,10 @@ _populate_record_data (void *buf,
record->gone = FILESTATUS_EXISTS;
// If file is not in recycle bin (restored or permanently deleted),
// first byte will be removed from filename
if (!*legacy_fname)
if (! *record->raw_legacy_path)
{
record->gone = FILESTATUS_GONE;
*legacy_fname = record->drive;
*record->raw_legacy_path = record->drive;
}

/* File deletion time */
Expand All @@ -187,30 +188,45 @@ _populate_record_data (void *buf,
record->filesize = GUINT64_FROM_LE (record->filesize);
g_debug ("filesize=%" PRIu64, record->filesize);

/*
* 1. Only bother populating legacy path if users need it,
* because otherwise we don't know which encoding to use
* 2. Enclose with angle brackets because they are not allowed
* in Windows file name, therefore stands out better that
* the escaped hex sequences are not part of real file name
*/
// Only bother checking legacy path when requested,
// because otherwise we don't know which encoding to use
if (legacy_encoding)
{
record->legacy_path = conv_path_to_utf8_with_tmpl (
legacy_fname, legacy_encoding,
"<\\%02X>", &read, &record->error);
char *s = g_convert (record->raw_legacy_path, -1,
"UTF-8", legacy_encoding, NULL, NULL, NULL);
if (s)
g_free (s);
else
g_set_error (&record->error, R2_REC_ERROR, R2_REC_ERROR_CONV_PATH,
_("Path contains character(s) that could not be "
"interpreted in %s encoding"), legacy_encoding);
}

g_free (legacy_fname);

if (bufsize == LEGACY_RECORD_SIZE)
return record;

/* Part below deals with unicode path only */

record->uni_path = conv_path_to_utf8_with_tmpl (
(char *) (buf + UNICODE_FILENAME_OFFSET), NULL,
"<\\u%04X>", &read, &record->error);
uni_buf_sz = UNICODE_RECORD_SIZE - UNICODE_FILENAME_OFFSET;
record->raw_uni_path = g_malloc (uni_buf_sz);
copy_field (record->raw_uni_path, UNICODE_FILENAME_OFFSET, UNICODE_RECORD_SIZE);
null_terminator_offset = ucs2_strnlen (
record->raw_uni_path, WIN_PATH_MAX) * sizeof (gunichar2);

{
// Never set len = -1 for wchar source string
char *s = g_convert (record->raw_uni_path, null_terminator_offset,
"UTF-8", "UTF-16LE", NULL, NULL, NULL);
if (s)
{
g_free (s);
}
else
{
g_set_error_literal (&record->error, R2_REC_ERROR, R2_REC_ERROR_CONV_PATH,
_("Path contains broken unicode character(s)"));
}
}

/*
* We check for junk memory filling the padding area after
Expand All @@ -226,22 +242,32 @@ _populate_record_data (void *buf,
* Looks like an ANSI codepage full path is filled in
* legacy path field, then overwritten in place by a 8.3
* version of path whenever applicable (which was always shorter).
*
* The 8.3 path generated from non-ascii seems to follow certain
* ruleset, but the exact detail is unknown:
* - accented latin chars transliterated to pure ASCII
* - first DBCS char converted to UCS2 codepoint
*/
if (junk_detected && ! *junk_detected)
{
void *ptr;
// Beware: start pos shouldn't be previously read bytes,
// as it may contain invalid seq and quit prematurely.
char *p = record->raw_uni_path + null_terminator_offset;

for (ptr = buf + UNICODE_FILENAME_OFFSET + read;
ptr < buf + UNICODE_RECORD_SIZE; ptr++)
while (p < record->raw_uni_path + uni_buf_sz)
{
if ( *(char *) ptr != '\0' )
if (*p != '\0')
{
g_debug ("Junk detected at offset 0x%tx of unicode path",
ptr - buf - UNICODE_FILENAME_OFFSET);
p - record->raw_uni_path);
*junk_detected = TRUE;
break;
}
p++;
}

if (*junk_detected)
hexdump (record->raw_uni_path, uni_buf_sz);
}

return record;
Expand Down
Loading

0 comments on commit 12a8106

Please sign in to comment.