diff --git a/.editorconfig b/.editorconfig index 7cf2d65..74b98f9 100644 --- a/.editorconfig +++ b/.editorconfig @@ -9,11 +9,5 @@ indent_style = space [*.{c,h,txt,cmake}] indent_size = 4 -[configure.ac] -indent_size = 4 - -[*.{md,yml}] +[*.{md,yml,json}] indent_size = 2 - -[{Makefile.am,*.mk}] -indent_style = tab diff --git a/CMakeLists.txt b/CMakeLists.txt index 260132a..1798e51 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -62,6 +62,8 @@ foreach(bin rifiuti rifiuti-vista) PRIVATE src/utils.c src/utils.h + src/utils-conv.c + src/utils-conv.h ) if(WIN32) target_sources(${bin} diff --git a/src/rifiuti-vista.c b/src/rifiuti-vista.c index c1eaaa0..7d5cd19 100644 --- a/src/rifiuti-vista.c +++ b/src/rifiuti-vista.c @@ -4,10 +4,12 @@ * Please see LICENSE file for more info. */ +#include #include #include #include "rifiuti-vista.h" +#include "utils-conv.h" #include "utils.h" #ifdef G_OS_WIN32 # include "utils-win.h" @@ -92,7 +94,7 @@ _validate_index_file (const char *filename, pathlen = GUINT32_FROM_LE (pathlen); /* Header length + strlen in UTF-16 encoding */ - expect_sz = VERSION2_FILENAME_OFFSET + pathlen * 2; + expect_sz = VERSION2_FILENAME_OFFSET + pathlen * sizeof(gunichar2); if (*bufsize != expect_sz) { g_debug ("File size = %" G_GSIZE_FORMAT @@ -124,22 +126,43 @@ _validate_index_file (const char *filename, static rbin_struct * _populate_record_data (void *buf, - uint64_t version, - gboolean erraneous) + gsize bufsize, + uint64_t version) { rbin_struct *record; - size_t read; + size_t pathbuf_sz = 0; + void *pathbuf_start = NULL; + bool erraneous = false; + + switch (version) + { + case VERSION_VISTA: + // In rare cases, the size of index file is one byte short of + // (fixed) 544 bytes in Vista. Under such occasion, file size + // only occupies 56 bit, not 64 bit as it ought to be. + // Actually this 56-bit file size is very likely wrong after all. + // This is observed during deletion of dd.exe from Forensic + // Acquisition Utilities (by George M. Garner Jr) + // in certain localized Vista. + if (bufsize == VERSION1_FILE_SIZE - 1) + erraneous = true; + + pathbuf_sz = WIN_PATH_MAX * sizeof(gunichar2); + pathbuf_start = buf - (int)erraneous + VERSION1_FILENAME_OFFSET; + break; + + case VERSION_WIN10: + pathbuf_sz = bufsize - VERSION2_FILENAME_OFFSET; + pathbuf_start = buf + VERSION2_FILENAME_OFFSET; + break; + + default: + g_assert_not_reached (); + } record = g_malloc0 (sizeof (rbin_struct)); record->version = version; - /* - * In rare cases, the size of index file is 543 bytes versus (normal) 544 bytes. - * In such occasion file size only occupies 56 bit, not 64 bit as it ought to be. - * Actually this 56-bit file size is very likely wrong after all. Probably some - * bug inside Windows. This is observed during deletion of dd.exe from Forensic - * Acquisition Utilities (by George M. Garner Jr) in certain localized Vista. - */ memcpy (&record->filesize, buf + FILESIZE_OFFSET, FILETIME_OFFSET - FILESIZE_OFFSET - (int) erraneous); if (erraneous) @@ -156,34 +179,30 @@ _populate_record_data (void *buf, } /* File deletion time */ - memcpy (&record->winfiletime, buf + FILETIME_OFFSET - (int) erraneous, + memcpy (&record->winfiletime, buf - (int) erraneous + FILETIME_OFFSET, VERSION1_FILENAME_OFFSET - FILETIME_OFFSET); record->winfiletime = GINT64_FROM_LE (record->winfiletime); record->deltime = win_filetime_to_gdatetime (record->winfiletime); - switch (version) - { - case VERSION_VISTA: - record->uni_path = conv_path_to_utf8_with_tmpl ( - (const char *) (buf - erraneous + VERSION1_FILENAME_OFFSET), - NULL, "<\\u%04X>", &read, &record->error); - break; - - case VERSION_WIN10: - record->uni_path = conv_path_to_utf8_with_tmpl ( - (const char *) (buf + VERSION2_FILENAME_OFFSET), - NULL, "<\\u%04X>", &read, &record->error); - break; + record->raw_uni_path = g_malloc0 (pathbuf_sz + sizeof(gunichar2)); + memcpy (record->raw_uni_path, pathbuf_start, pathbuf_sz); - default: - g_assert_not_reached (); + { + // Never set len = -1 for UCS2 source string + char *s = g_convert (record->raw_uni_path, + ucs2_strnlen (record->raw_uni_path, pathbuf_sz) * sizeof (gunichar2), + "UTF-8", "UTF-16LE", NULL, NULL, NULL); + if (s) + { + g_free (s); + } + else + { + g_set_error_literal (&record->error, R2_REC_ERROR, R2_REC_ERROR_CONV_PATH, + _("Path contains broken unicode character(s)")); + } } - if (! record->uni_path) - g_set_error_literal (&record->error, R2_REC_ERROR, - R2_REC_ERROR_CONV_PATH, - _("Trash file path conversion failed completely")); - return record; } @@ -212,21 +231,7 @@ _parse_record_cb (char *index_file, g_debug ("Start populating record for '%s'...", basename); - switch (version) - { - case VERSION_VISTA: - record = _populate_record_data (buf, version, - (bufsize == VERSION1_FILE_SIZE - 1)); - break; - - case VERSION_WIN10: - record = _populate_record_data (buf, version, FALSE); - break; - - default: - g_assert_not_reached(); - } - + record = _populate_record_data (buf, bufsize, version); g_free (buf); /* Check corresponding $R.... file existance and set record->gone */ diff --git a/src/rifiuti.c b/src/rifiuti.c index 4244e1e..530c091 100644 --- a/src/rifiuti.c +++ b/src/rifiuti.c @@ -9,6 +9,7 @@ #include #include "rifiuti.h" +#include "utils-conv.h" #include "utils.h" @@ -125,7 +126,7 @@ _validate_index_file (const char *filename, rewind (fp); *infile = fp; - meta->version = (uint64_t) ver; + meta->version = ver; return TRUE; validation_broken: @@ -142,13 +143,13 @@ _populate_record_data (void *buf, { rbin_struct *record; uint32_t drivenum; - size_t read; - char *legacy_fname; + size_t uni_buf_sz, null_terminator_offset; record = g_malloc0 (sizeof (rbin_struct)); - legacy_fname = g_malloc0 (RECORD_INDEX_OFFSET - LEGACY_FILENAME_OFFSET); - copy_field (legacy_fname, LEGACY_FILENAME_OFFSET, RECORD_INDEX_OFFSET); + // Verbatim path in ANSI code page + record->raw_legacy_path = g_malloc0 (RECORD_INDEX_OFFSET - LEGACY_FILENAME_OFFSET); + copy_field (record->raw_legacy_path, LEGACY_FILENAME_OFFSET, RECORD_INDEX_OFFSET); /* Index number associated with the record */ copy_field (&record->index_n, RECORD_INDEX_OFFSET, DRIVE_LETTER_OFFSET); @@ -170,10 +171,10 @@ _populate_record_data (void *buf, record->gone = FILESTATUS_EXISTS; // If file is not in recycle bin (restored or permanently deleted), // first byte will be removed from filename - if (!*legacy_fname) + if (! *record->raw_legacy_path) { record->gone = FILESTATUS_GONE; - *legacy_fname = record->drive; + *record->raw_legacy_path = record->drive; } /* File deletion time */ @@ -187,30 +188,45 @@ _populate_record_data (void *buf, record->filesize = GUINT64_FROM_LE (record->filesize); g_debug ("filesize=%" PRIu64, record->filesize); - /* - * 1. Only bother populating legacy path if users need it, - * because otherwise we don't know which encoding to use - * 2. Enclose with angle brackets because they are not allowed - * in Windows file name, therefore stands out better that - * the escaped hex sequences are not part of real file name - */ + // Only bother checking legacy path when requested, + // because otherwise we don't know which encoding to use if (legacy_encoding) { - record->legacy_path = conv_path_to_utf8_with_tmpl ( - legacy_fname, legacy_encoding, - "<\\%02X>", &read, &record->error); + char *s = g_convert (record->raw_legacy_path, -1, + "UTF-8", legacy_encoding, NULL, NULL, NULL); + if (s) + g_free (s); + else + g_set_error (&record->error, R2_REC_ERROR, R2_REC_ERROR_CONV_PATH, + _("Path contains character(s) that could not be " + "interpreted in %s encoding"), legacy_encoding); } - g_free (legacy_fname); - if (bufsize == LEGACY_RECORD_SIZE) return record; /* Part below deals with unicode path only */ - record->uni_path = conv_path_to_utf8_with_tmpl ( - (char *) (buf + UNICODE_FILENAME_OFFSET), NULL, - "<\\u%04X>", &read, &record->error); + uni_buf_sz = UNICODE_RECORD_SIZE - UNICODE_FILENAME_OFFSET; + record->raw_uni_path = g_malloc (uni_buf_sz); + copy_field (record->raw_uni_path, UNICODE_FILENAME_OFFSET, UNICODE_RECORD_SIZE); + null_terminator_offset = ucs2_strnlen ( + record->raw_uni_path, WIN_PATH_MAX) * sizeof (gunichar2); + + { + // Never set len = -1 for wchar source string + char *s = g_convert (record->raw_uni_path, null_terminator_offset, + "UTF-8", "UTF-16LE", NULL, NULL, NULL); + if (s) + { + g_free (s); + } + else + { + g_set_error_literal (&record->error, R2_REC_ERROR, R2_REC_ERROR_CONV_PATH, + _("Path contains broken unicode character(s)")); + } + } /* * We check for junk memory filling the padding area after @@ -226,22 +242,32 @@ _populate_record_data (void *buf, * Looks like an ANSI codepage full path is filled in * legacy path field, then overwritten in place by a 8.3 * version of path whenever applicable (which was always shorter). + * + * The 8.3 path generated from non-ascii seems to follow certain + * ruleset, but the exact detail is unknown: + * - accented latin chars transliterated to pure ASCII + * - first DBCS char converted to UCS2 codepoint */ if (junk_detected && ! *junk_detected) { - void *ptr; + // Beware: start pos shouldn't be previously read bytes, + // as it may contain invalid seq and quit prematurely. + char *p = record->raw_uni_path + null_terminator_offset; - for (ptr = buf + UNICODE_FILENAME_OFFSET + read; - ptr < buf + UNICODE_RECORD_SIZE; ptr++) + while (p < record->raw_uni_path + uni_buf_sz) { - if ( *(char *) ptr != '\0' ) + if (*p != '\0') { g_debug ("Junk detected at offset 0x%tx of unicode path", - ptr - buf - UNICODE_FILENAME_OFFSET); + p - record->raw_uni_path); *junk_detected = TRUE; break; } + p++; } + + if (*junk_detected) + hexdump (record->raw_uni_path, uni_buf_sz); } return record; diff --git a/src/utils-conv.c b/src/utils-conv.c new file mode 100644 index 0000000..7c9159f --- /dev/null +++ b/src/utils-conv.c @@ -0,0 +1,376 @@ +/* + * Copyright (C) 2023, Abel Cheung. + * rifiuti2 is released under Revised BSD License. + * Please see LICENSE file for more info. + */ + +#include +#include +#include +#include +#include + +#include "utils-conv.h" + + +/** + * @brief Try out if encoding is compatible to ASCII + * @param enc The encoding to test + * @param error Location to store error during trial + * @return `true` if compatible, `false` otherwise + * (including the case where encoding doesn't exist) + */ +bool +enc_is_ascii_compatible (const char *enc, + GError **error) +{ + bool equal; + char *s; + + g_return_val_if_fail (enc && *enc, false); + + s = g_convert ("C:\\", -1, "UTF-8", enc, NULL, NULL, error); + equal = (0 == g_strcmp0 ("C:\\", (const char *)s)); + g_free (s); + + if (equal) + return true; + + if (*error == NULL) + // Encoding is ASCII incompatible (e.g. EBCDIC). Even if trial + // convert doesn't fail, it would cause application error + // later on. Treat that as conversion error for convenience. + g_set_error_literal (error, G_CONVERT_ERROR, + G_CONVERT_ERROR_ILLEGAL_SEQUENCE, ""); + return false; +} + + +/** + * @brief Compute UCS2 string length like `wcslen()` + * @param str The string to check (in `char*` !) + * @param max_sz Maximum length to check, or use -1 to + * denote the string is nul-terminated + * @return Either number of UCS2 char for whole string, + * or return `max_sz` when `max_sz` param is exceeded + */ +size_t +ucs2_strnlen (const char *str, + ssize_t max_sz) +{ + // wcsnlen_s should be equivalent except for boundary + // cases we don't care about + + size_t i = 0; + char *p = (char *) str; + + if (str == NULL) + return 0; + + while (*p || *(p+1)) + { + if (max_sz >= 0 && i >= (size_t) max_sz) + break; + i++; + p += 2; + } + return i; +} + + +/** + * @brief Move character pointer for specified bytes + * @param sz Must be either 1 or 2, denoting broken byte or broken UCS2 character + * @param in_str Reference to input string to be converted + * @param read_bytes Reference to already read bytes count to keep track of + * @param out_str Reference to output string to be appended + * @param write_bytes Reference to writable bytes count to decrement + * @param tmpl `printf` template to represent the broken character + * @note This is the core of `conv_path_to_utf8_with_tmpl()` doing + * error fallback, converting a single broken char to `printf` output. + */ +static void +_advance_octet (size_t sz, + char **in_str, + gsize *read_bytes, + char **out_str, + gsize *write_bytes, + const char *tmpl) +{ + char *repl; + + switch (sz) { + case 1: + { + unsigned char c = *(unsigned char *) (*in_str); + repl = g_strdup_printf (tmpl, c); + } + break; + + case 2: + { + uint16_t c = GUINT16_FROM_LE (*(uint16_t *) (*in_str)); + repl = g_strdup_printf (tmpl, c); + } + break; + + default: + g_assert_not_reached(); + } + + (*in_str) += sz; + if (read_bytes != NULL) + (*read_bytes) -= sz; + + *out_str = g_stpcpy (*out_str, (const char *) repl); + if (write_bytes != NULL) + *write_bytes -= strlen (repl); + + g_free (repl); + return; +} + + +/** + * @brief Convert non-printable characters to escape sequences + * @param str The original string to be converted + * @param tmpl `printf` template to represent non-printable chars + * @return Converted string, maybe containing escape sequences + * @attention Caller is responsible for using correct template, no + * error checking is performed. This template should handle a single + * Windows unicode path character, which is in UTF-16LE encoding. + */ +static char * +_filter_printable_char (const char *str, + const char *tmpl) +{ + char *p, *np; + gunichar c; + GString *s; + + s = g_string_sized_new (strlen (str) * 2); + p = (char *) str; + while (*p) + { + c = g_utf8_get_char (p); + np = g_utf8_next_char (p); + + /* + * ASCII space is the norm (e.g. Program Files), but + * all other kinds of spaces are rare, so escape them too + */ + if (g_unichar_isgraph (c) || (c == 0x20)) + s = g_string_append_len (s, p, (gssize) (np - p)); + else + g_string_append_printf (s, tmpl, c); + + p = np; + } + + return g_string_free (s, FALSE); +} + + +/** + * @brief Convert path to UTF-8 encoding with customizable fallback + * @param path The path string to be converted + * @param from_enc Either a legacy Windows ANSI encoding, or use + * `NULL` to represent Windows wide char encoding (UTF-16LE) + * @param tmpl `printf`-style string template to represent broken + * character. This template should handle either single- or + * double-octet, namely `%u`, `%o`, `%d`, `%i`, `%x` and `%X`. + * @param read Reference to number of successfully read bytes + * @param error Location to store error upon problem + * @return UTF-8 encoded path, or `NULL` if conversion error happens + * @note This is very similar to `g_convert_with_fallback()`, but the + * fallback is a `printf`-style string instead of a fixed string, + * so that different fallback sequence can be used with various output + * format. + * @attention 1. This routine is not for generic charset conversion. + * Extra transformation is intended for path display only. + * @attention 1. Caller is responsible for using correct template, + * almost no error checking is performed. + */ +char * +conv_path_to_utf8_with_tmpl (const char *path, + ssize_t pathlen, + const char *from_enc, + const char *tmpl, + size_t *read, + GError **error) +{ + char *u8_path, *i_ptr, *o_ptr, *result = NULL; + gsize len, r_total, rbyte, wbyte, status, in_ch_width, out_ch_width; + GIConv conv; + + g_return_val_if_fail (path && *path, NULL); + g_return_val_if_fail (tmpl && *tmpl, NULL); + g_return_val_if_fail (! from_enc || *from_enc, NULL); + g_return_val_if_fail (! error || ! *error , NULL); + + /* try the template */ + { + char *s = g_strdup_printf (tmpl, from_enc ? 0xFF : 0xFFFF); + /* UTF-8 character occupies at most 6 bytes */ + out_ch_width = MAX (strlen(s), 6); + g_free (s); + } + + if (from_enc != NULL) { + in_ch_width = sizeof (char); + len = strnlen (path, (size_t) pathlen); + } else { + in_ch_width = sizeof (gunichar2); + len = ucs2_strnlen (path, (size_t) pathlen); + } + + rbyte = len * in_ch_width; + wbyte = len * out_ch_width; + u8_path = g_malloc0 (wbyte); + + r_total = rbyte; + i_ptr = (char *) path; + o_ptr = u8_path; + + /* Shouldn't fail, from_enc already tested upon start of prog */ + conv = g_iconv_open ("UTF-8", from_enc ? from_enc : "UTF-16LE"); + + g_debug ("Initial: read=%" G_GSIZE_FORMAT ", write=%" G_GSIZE_FORMAT, + rbyte, wbyte); + + /* Pass 1: Convert to UTF-8, all illegal seq become escaped hex */ + while (TRUE) + { + int e; + + if (*i_ptr == '\0') { + if (from_enc != NULL) break; + if (*(i_ptr+1) == '\0') break; /* utf-16: check "\0\0" */ + } + + // GNU iconv may return number of nonreversible conversions + // upon success, but we don't need to worry about it, as + // conversion from code page to UTF-8 would not be nonreversible + if ((gsize) -1 != (status = g_iconv ( + conv, &i_ptr, &rbyte, &o_ptr, &wbyte))) + break; + + e = errno; + + g_debug ("r=%02" G_GSIZE_FORMAT ", w=%02" G_GSIZE_FORMAT + ", stt=%" G_GSIZE_FORMAT " (%s) str=%s", + rbyte, wbyte, status, g_strerror(e), u8_path); + + switch (e) { + case EILSEQ: + case EINVAL: // TODO Handle partial input for EINVAL + if (error && ! *error) { + g_set_error (error, G_CONVERT_ERROR, + G_CONVERT_ERROR_ILLEGAL_SEQUENCE, + _("Illegal sequence or partial input at offset %" G_GSIZE_FORMAT), rbyte); + } + _advance_octet (in_ch_width, &i_ptr, &rbyte, &o_ptr, &wbyte, tmpl); + g_iconv (conv, NULL, NULL, &o_ptr, &wbyte); // reset state + break; + case E2BIG: // TODO realloc instead of Kaboom! + g_assert_not_reached(); + } + } + + g_debug ("r=%02" G_GSIZE_FORMAT ", w=%02" G_GSIZE_FORMAT + ", stt=%" G_GSIZE_FORMAT ", str=%s", rbyte, wbyte, status, u8_path); + + g_iconv_close (conv); + + if (read != NULL) + *read = r_total - rbyte; + + /* Pass 2: Convert all non-printable chars to hex */ + g_return_val_if_fail (g_utf8_validate (u8_path, -1, NULL), NULL); + + result = _filter_printable_char (u8_path, tmpl); + g_free (u8_path); + + return result; +} + + +/** + * @brief Convert escape sequences in delimiters + * @param str The original delimiter string + * @return Escaped delimiter string + * @note Similar to `g_strcompress()`, but only process a few + * characters, unlike glib routine which converts all 8bit chars. + * Currently handles `\\r`, `\\n`, `\\t` and `\\e`. + */ +char * +filter_escapes (const char *str) +{ + GString *result, *debug_str; + char *i = (char *) str; + + g_return_val_if_fail ( (str != NULL) && (*str != '\0'), NULL); + + result = g_string_new (NULL); + do + { + if ( *i != '\\' ) + { + result = g_string_append_c (result, *i); + continue; + } + + switch ( *(++i) ) + { + case 'r': + result = g_string_append_c (result, '\r'); break; + case 'n': + result = g_string_append_c (result, '\n'); break; + case 't': + result = g_string_append_c (result, '\t'); break; + case 'e': + result = g_string_append_c (result, '\x1B'); break; + default: + result = g_string_append_c (result, '\\'); i--; + } + } + while ( *(++i) ); + + debug_str = g_string_new ("filtered delimiter = "); + i = result->str; + do + { + if ( *i >= 0x20 && *i <= 0x7E ) /* problem during linking with g_ascii_isprint */ + debug_str = g_string_append_c (debug_str, *i); + else + g_string_append_printf (debug_str, "\\x%02X", *(unsigned char *) i); + } + while ( *(++i) ); + g_debug ("%s", debug_str->str); + g_string_free (debug_str, TRUE); + return g_string_free (result, FALSE); +} + + +char * +json_escape_path (const char *path) +{ + // TODO g_string_replace from glib 2.68 does it all + + char *p = (char *) path; + gunichar c = 0; + GString *s = g_string_new (""); + + while (*p) { + c = g_utf8_get_char (p); + if (c == '\\') + s = g_string_append (s, "\\\\"); + else if (c == '*') + s = g_string_append_c (s, '\\'); + else + s = g_string_append_unichar (s, c); + p = g_utf8_next_char (p); + } + return g_string_free (s, FALSE); +} + diff --git a/src/utils-conv.h b/src/utils-conv.h new file mode 100644 index 0000000..1bb1be8 --- /dev/null +++ b/src/utils-conv.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2023, Abel Cheung. + * rifiuti2 is released under Revised BSD License. + * Please see LICENSE file for more info. + */ + +#ifndef _RIFIUTI_UTILS_CONV_H +#define _RIFIUTI_UTILS_CONV_H + +#include +#include + +bool enc_is_ascii_compatible (const char *enc, + GError **error); + +size_t ucs2_strnlen (const char *str, + ssize_t max_sz); + +char * conv_path_to_utf8_with_tmpl (const char *path, + ssize_t pathlen, + const char *from_enc, + const char *tmpl, + size_t *read, + GError **error); + +char * filter_escapes (const char *str); + +char * json_escape_path (const char *path); + +#endif diff --git a/src/utils.c b/src/utils.c index 6f45d58..d7d4bc3 100644 --- a/src/utils.c +++ b/src/utils.c @@ -10,6 +10,7 @@ #include #include +#include "utils-conv.h" #include "utils.h" #ifdef G_OS_WIN32 # include "utils-win.h" @@ -20,7 +21,7 @@ /* Our own error domain */ -G_DEFINE_QUARK (rifiuti-misc-error-quark, rifiuti_fatal_error) +G_DEFINE_QUARK (rifiuti-fatal-error-quark, rifiuti_fatal_error) G_DEFINE_QUARK (rifiuti-record-error-quark, rifiuti_record_error) /* Common function signature for option callbacks */ @@ -36,7 +37,7 @@ DECL_OPT_CALLBACK(_set_output_path); DECL_OPT_CALLBACK(_option_deprecated); DECL_OPT_CALLBACK(_set_opt_delim); DECL_OPT_CALLBACK(_set_opt_noheading); -DECL_OPT_CALLBACK(_set_output_xml); +DECL_OPT_CALLBACK(_set_opt_format); DECL_OPT_CALLBACK(_show_ver_and_exit); /* pre-declared out of laziness */ @@ -87,7 +88,14 @@ static char *os_strings[] = { N_("Windows 10 or above") }; -static int output_mode = OUTPUT_NONE; +static char *out_format_name[] = { + "unknown format", + "TSV format", + "XML format", + "JSON format", +}; + +static out_fmt output_format = FORMAT_UNKNOWN; static gboolean no_heading = FALSE; static gboolean use_localtime = FALSE; static gboolean live_mode = FALSE; @@ -103,22 +111,27 @@ static FILE *err_fh = NULL; /*!< unused for Windows console * metarecord *meta = NULL; -/* Options intended for tab delimited mode output only */ -static const GOptionEntry text_options[] = { +/* Options controlling output format */ +static const GOptionEntry out_options[] = { { "delimiter", 't', 0, G_OPTION_ARG_CALLBACK, _set_opt_delim, - N_("String to use as delimiter (TAB by default)"), N_("STRING") + N_("Field delimiter for TSV ['\\t' (TAB) if not given]"), N_("STRING") }, { "no-heading", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, _set_opt_noheading, - N_("Don't show column header and metadata"), NULL + N_("Don't show TSV column header and metadata"), NULL }, { - "always-utf8", '8', G_OPTION_FLAG_HIDDEN | G_OPTION_FLAG_NO_ARG, + "xml", 'x', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, _option_deprecated, - N_("(This option is deprecated)"), NULL + N_("Deprecated, use '-f xml' in future"), NULL + }, + { + "format", 'f', 0, + G_OPTION_ARG_CALLBACK, _set_opt_format, + N_("'text' (default), 'xml' or 'json'"), N_("FORMAT") }, { 0 } }; @@ -130,11 +143,6 @@ static const GOptionEntry main_options[] = { G_OPTION_ARG_CALLBACK, _set_output_path, N_("Write output to FILE"), N_("FILE") }, - { - "xml", 'x', G_OPTION_FLAG_NO_ARG, - G_OPTION_ARG_CALLBACK, _set_output_xml, - N_("Output in XML format instead of tab-delimited values"), NULL - }, { "localtime", 'z', 0, G_OPTION_ARG_NONE, &use_localtime, @@ -178,38 +186,50 @@ static const GOptionEntry live_options[] = { /* Following routines are command argument handling related */ static gboolean -_set_output_mode (int mode, - GError **error) +_set_out_format (out_fmt desired_format, + GError **error) { - if (output_mode == mode) + if (output_format == desired_format) return TRUE; - if (output_mode == OUTPUT_NONE) { - output_mode = mode; + if (output_format == FORMAT_UNKNOWN) { + output_format = desired_format; return TRUE; } - g_set_error_literal (error, G_OPTION_ERROR, G_OPTION_ERROR_FAILED, - _("Plain text format options can not be used in XML mode.")); + g_set_error (error, G_OPTION_ERROR, G_OPTION_ERROR_FAILED, + "Output was already set in %s, but later argument " + "attempts to change to %s", + out_format_name[output_format], + out_format_name[desired_format]); return FALSE; } -/** - * @brief Option callback for setting output mode to XML - * @return `FALSE` if option conflict exists, `TRUE` otherwise - */ static gboolean -_set_output_xml (const gchar *opt_name, - const gchar *value, - gpointer data, - GError **error) +_set_opt_format (const gchar *opt_name, + const gchar *format, + gpointer data, + GError **error) { UNUSED(opt_name); - UNUSED(value); UNUSED(data); - return _set_output_mode (OUTPUT_XML, error); + if (g_strcmp0 (format, "text") == 0) + return _set_out_format (FORMAT_TEXT, error); + else if (g_strcmp0 (format, "tsv") == 0) // aliases + return _set_out_format (FORMAT_TEXT, error); + else if (g_strcmp0 (format, "csv") == 0) + return _set_out_format (FORMAT_TEXT, error); + else if (g_strcmp0 (format, "xml") == 0) + return _set_out_format (FORMAT_XML, error); + else if (g_strcmp0 (format, "json") == 0) + return _set_out_format (FORMAT_JSON, error); + else { + g_set_error (error, G_OPTION_ERROR, G_OPTION_ERROR_BAD_VALUE, + "Illegal output format '%s'", format); + return FALSE; + } } @@ -229,63 +249,7 @@ _set_opt_noheading (const gchar *opt_name, no_heading = TRUE; - return _set_output_mode (OUTPUT_CSV, error); -} - - -/** - * @brief Extra level of escape for escape sequences in delimiters - * @param str The original delimiter string - * @return Escaped delimiter string - * @note Delimiter needs another escape because it is later used - * in `printf` routines. It handles `\\r`, `\\n`, `\\t` and `\\e`. - */ -static char * -_filter_escapes (const char *str) -{ - GString *result, *debug_str; - char *i = (char *) str; - - g_return_val_if_fail ( (str != NULL) && (*str != '\0'), NULL); - - result = g_string_new (NULL); - do - { - if ( *i != '\\' ) - { - result = g_string_append_c (result, *i); - continue; - } - - switch ( *(++i) ) - { - case 'r': - result = g_string_append_c (result, '\r'); break; - case 'n': - result = g_string_append_c (result, '\n'); break; - case 't': - result = g_string_append_c (result, '\t'); break; - case 'e': - result = g_string_append_c (result, '\x1B'); break; - default: - result = g_string_append_c (result, '\\'); i--; - } - } - while ( *(++i) ); - - debug_str = g_string_new ("filtered delimiter = "); - i = result->str; - do - { - if ( *i >= 0x20 && *i <= 0x7E ) /* problem during linking with g_ascii_isprint */ - debug_str = g_string_append_c (debug_str, *i); - else - g_string_append_printf (debug_str, "\\x%02X", *(unsigned char *) i); - } - while ( *(++i) ); - g_debug ("%s", debug_str->str); - g_string_free (debug_str, TRUE); - return g_string_free (result, FALSE); + return _set_out_format (FORMAT_TEXT, error); } @@ -312,9 +276,9 @@ _set_opt_delim (const gchar *opt_name, } seen = TRUE; - delim = (*value) ? _filter_escapes (value) : g_strdup (""); + delim = (*value) ? filter_escapes (value) : g_strdup (""); - return _set_output_mode (OUTPUT_CSV, error); + return _set_out_format (FORMAT_TEXT, error); } @@ -366,14 +330,17 @@ _set_output_path (const gchar *opt_name, */ static gboolean _option_deprecated (const gchar *opt_name, - const gchar *unused, + const gchar *value, gpointer data, GError **error) { - UNUSED(unused); + UNUSED(value); UNUSED(data); - UNUSED(error); - g_warning(_("Option '%s' is deprecated and ignored."), opt_name); + if (strcmp (opt_name, "-x") == 0 || strcmp (opt_name, "--xml") == 0) + { + g_warning(_("Option '%s' is deprecated. Use '-f xml' in future."), opt_name); + return _set_out_format (FORMAT_XML, error); + } return TRUE; } @@ -410,27 +377,14 @@ _check_legacy_encoding (const gchar *opt_name, return FALSE; } + if (enc_is_ascii_compatible (enc, &conv_err)) { - char *s = g_convert ("C:\\", -1, "UTF-8", enc, NULL, NULL, &conv_err); - gboolean equal = ! g_strcmp0 ("C:\\", s); - g_free (s); - - if (equal) { - legacy_encoding = g_strdup (enc); - return TRUE; - } + legacy_encoding = g_strdup (enc); + return TRUE; } /* everything below is error handling */ - if (conv_err == NULL) { - // Encoding is ASCII incompatible (e.g. EBCDIC). Even if trial - // convert doesn't fail, it would cause application error - // later on. Treat that as conversion error for convenience. - g_set_error_literal (&conv_err, G_CONVERT_ERROR, - G_CONVERT_ERROR_ILLEGAL_SEQUENCE, ""); - } - if (g_error_matches (conv_err, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_CONVERSION)) { g_set_error (error, G_OPTION_ERROR, G_OPTION_ERROR_BAD_VALUE, _("'%s' encoding is not supported by glib library " @@ -552,14 +506,14 @@ _fileargs_handler (GOptionContext *context, /** - * @brief post-callback after handling all text related arguments + * @brief post-callback after handling all output related args * @return Always `TRUE`, denoting success. It never fails. */ static gboolean -_text_default_options (GOptionContext *context, - GOptionGroup *group, - gpointer data, - GError **error) +_set_def_output_opts (GOptionContext *context, + GOptionGroup *group, + gpointer data, + GError **error) { UNUSED (context); UNUSED (group); @@ -570,262 +524,12 @@ _text_default_options (GOptionContext *context, if (delim == NULL) delim = g_strdup ("\t"); - if (output_mode == OUTPUT_NONE) - output_mode = OUTPUT_CSV; + if (output_format == FORMAT_UNKNOWN) + output_format = FORMAT_TEXT; return TRUE; } -/* - * Charset conversion routines - */ - -size_t -ucs2_strnlen (const char *str, size_t max_sz) -{ -#ifdef G_OS_WIN32 - - return wcsnlen_s ((const wchar_t *) str, max_sz); - -#else - - if (str == NULL) - return 0; - - for (size_t i = 0; i < max_sz; i++) { - if (*(str + i*2) == '\0' && *(str + i*2 + 1) == '\0') - return i; - } - return max_sz; - -#endif -} - - -/** - * @brief Move character pointer for specified bytes - * @param sz Must be either 1 or 2, denoting broken byte or broken UCS2 character - * @param in_str Reference to input string to be converted - * @param read_bytes Reference to already read bytes count to keep track of - * @param out_str Reference to output string to be appended - * @param write_bytes Reference to writable bytes count to decrement - * @param tmpl `printf` template to represent the broken character - * @note This is the core of `conv_path_to_utf8_with_tmpl()` doing - * error fallback, converting a single broken char to `printf` output. - */ -static void -_advance_octet (size_t sz, - gchar **in_str, - gsize *read_bytes, - gchar **out_str, - gsize *write_bytes, - const char *tmpl) -{ - gchar *repl; - - switch (sz) { - case 1: - { - unsigned char c = *(unsigned char *) (*in_str); - repl = g_strdup_printf (tmpl, c); - } - break; - - case 2: - { - uint16_t c = GUINT16_FROM_LE (*(uint16_t *) (*in_str)); - repl = g_strdup_printf (tmpl, c); - } - break; - - default: - g_assert_not_reached(); - } - - (*in_str) += sz; - if (read_bytes != NULL) - (*read_bytes) -= sz; - - *out_str = g_stpcpy (*out_str, (const char *) repl); - if (write_bytes != NULL) - *write_bytes -= strlen (repl); - - g_free (repl); - return; -} - - -/** - * @brief Convert non-printable characters to escape sequences - * @param str The original string to be converted - * @param tmpl `printf` template to represent non-printable chars - * @return Converted string, maybe containing escape sequences - * @attention Caller is responsible for using correct template, no - * error checking is performed. This template should handle a single - * Windows unicode path character, which is in UTF-16LE encoding. - */ -static char * -_filter_printable_char (const char *str, - const char *tmpl) -{ - char *p, *np; - gunichar c; - GString *s; - - s = g_string_sized_new (strlen (str) * 2); - p = (char *) str; - while (*p) - { - c = g_utf8_get_char (p); - np = g_utf8_next_char (p); - - /* - * ASCII space is the norm (e.g. Program Files), but - * all other kinds of spaces are rare, so escape them too - */ - if (g_unichar_isgraph (c) || (c == 0x20)) - s = g_string_append_len (s, p, (gssize) (np - p)); - else - g_string_append_printf (s, tmpl, c); - - p = np; - } - - return g_string_free (s, FALSE); -} - - -/** - * @brief Convert path to UTF-8 encoding with customizable fallback - * @param path The path string to be converted - * @param from_enc Either a legacy Windows ANSI encoding, or use - * `NULL` to represent Windows wide char encoding (UTF-16LE) - * @param tmpl `printf`-style string template to represent broken character - * @param read Reference to number of successfully read bytes - * @param error Location to store error upon problem - * @return UTF-8 encoded path, or `NULL` if conversion error happens - * @note This is very similar to `g_convert_with_fallback()`, but the - * fallback is a `printf`-style string instead of a fixed string. - * @attention 1. This routine is not for generic charset conversion. - * Only supply encoding used in Windows ANSI code page, or use `NULL` - * for unicode path. - * @attention 1. Caller is responsible for using correct template, no - * error checking is performed. - * This template should handle either single- or double-octet, namely - * `%u`, `%o`, `%d`, `%i`, `%x` and `%X`. `%c` is no good since byte - * sequence concerned can't be converted to proper UTF-8 character. - */ -char * -conv_path_to_utf8_with_tmpl (const char *path, - const char *from_enc, - const char *tmpl, - size_t *read, - GError **error) -{ - char *u8_path, *i_ptr, *o_ptr, *result = NULL; - gsize len, r_total, rbyte, wbyte, status, in_ch_width, out_ch_width; - GIConv conv; - gboolean will_set_error = FALSE; // avoid overwriting error - - g_return_val_if_fail (! from_enc || *from_enc, NULL); - g_return_val_if_fail (tmpl && *tmpl, NULL); - g_return_val_if_fail (! error || ! *error, NULL); - - /* try the template */ - { - char *s = g_strdup_printf (tmpl, from_enc ? 0xFF : 0xFFFF); - /* UTF-8 character occupies at most 6 bytes */ - out_ch_width = MAX (strlen(s), 6); - g_free (s); - } - - if (from_enc != NULL) { - in_ch_width = sizeof (char); - len = strnlen (path, WIN_PATH_MAX); - } else { - in_ch_width = sizeof (gunichar2); - len = ucs2_strnlen (path, WIN_PATH_MAX); - } - - if (! len) - return NULL; - - rbyte = len * in_ch_width; - wbyte = len * out_ch_width; - u8_path = g_malloc0 (wbyte); - - r_total = rbyte; - i_ptr = (char *) path; - o_ptr = u8_path; - - /* Shouldn't fail, from_enc already tested upon start of prog */ - conv = g_iconv_open ("UTF-8", from_enc ? from_enc : "UTF-16LE"); - - g_debug ("Initial: read=%" G_GSIZE_FORMAT ", write=%" G_GSIZE_FORMAT, - rbyte, wbyte); - - /* Pass 1: Convert to UTF-8, all illegal seq become escaped hex */ - while (TRUE) - { - int e; - - if (*i_ptr == '\0') { - if (from_enc != NULL) break; - if (*(i_ptr+1) == '\0') break; /* utf-16: check "\0\0" */ - } - - status = g_iconv (conv, &i_ptr, &rbyte, &o_ptr, &wbyte); - e = errno; - - if ( status != (gsize) -1 ) break; - - g_debug ("r=%02" G_GSIZE_FORMAT ", w=%02" G_GSIZE_FORMAT - ", stt=%" G_GSIZE_FORMAT " (%s) str=%s", - rbyte, wbyte, status, g_strerror(e), u8_path); - - /* XXX Should I consider the possibility of odd bytes for EINVAL? */ - switch (e) { - case EILSEQ: - case EINVAL: - _advance_octet (in_ch_width, &i_ptr, &rbyte, &o_ptr, &wbyte, tmpl); - /* reset state, hopefully Windows don't use stateful encoding at all */ - g_iconv (conv, NULL, NULL, &o_ptr, &wbyte); - will_set_error = TRUE; - break; - case E2BIG: - /* Should have already allocated enough buffer. Let it KABOOM! otherwise. */ - g_assert_not_reached(); - } - } - - if (will_set_error) - { - if (from_enc) - g_set_error (error, R2_REC_ERROR, R2_REC_ERROR_CONV_PATH, - _("Path contains character(s) that could not be " - "interpreted in %s encoding"), from_enc); - else - g_set_error_literal (error, R2_REC_ERROR, R2_REC_ERROR_CONV_PATH, - _("Path contains broken unicode character(s)")); - } - - g_debug ("r=%02" G_GSIZE_FORMAT ", w=%02" G_GSIZE_FORMAT - ", stt=%" G_GSIZE_FORMAT ", str=%s", rbyte, wbyte, status, u8_path); - - g_iconv_close (conv); - - if (read != NULL) - *read = r_total - rbyte; - - /* Pass 2: Convert all non-printable chars to hex */ - g_return_val_if_fail (g_utf8_validate (u8_path, -1, NULL), NULL); - - result = _filter_printable_char (u8_path, tmpl); - g_free (u8_path); - - return result; -} - /** * @brief Converts Windows FILETIME number to glib counterpart @@ -898,7 +602,7 @@ _opt_ctxt_setup (GOptionContext **context, rbin_type type) { char *desc_str; - GOptionGroup *group, *txt_group; + GOptionGroup *main_group, *output_group; /* FIXME Sneaky metadata modification! Think about cleaner way */ meta->type = type; @@ -912,17 +616,17 @@ _opt_ctxt_setup (GOptionContext **context, g_free (desc_str); /* main group */ - group = g_option_group_new (NULL, NULL, NULL, meta, NULL); + main_group = g_option_group_new (NULL, NULL, NULL, meta, NULL); - g_option_group_add_entries (group, main_options); + g_option_group_add_entries (main_group, main_options); switch (type) { case RECYCLE_BIN_TYPE_FILE: - g_option_group_add_entries (group, rbinfile_options); + g_option_group_add_entries (main_group, rbinfile_options); break; case RECYCLE_BIN_TYPE_DIR: #if (defined G_OS_WIN32 || defined __GLIBC__) - g_option_group_add_entries (group, live_options); + g_option_group_add_entries (main_group, live_options); #else UNUSED (live_options); #endif @@ -930,19 +634,19 @@ _opt_ctxt_setup (GOptionContext **context, default: break; } - g_option_group_set_parse_hooks (group, NULL, + g_option_group_set_parse_hooks (main_group, NULL, (GOptionParseFunc) _fileargs_handler); - g_option_context_set_main_group (*context, group); + g_option_context_set_main_group (*context, main_group); - /* text group */ - txt_group = g_option_group_new ("text", - _("Plain text output options:"), - N_("Show plain text output options"), NULL, NULL); + /* output format arg group */ + output_group = g_option_group_new ("format", + _("Output format options:"), + N_("Show output formatting options"), NULL, NULL); - g_option_group_add_entries (txt_group, text_options); + g_option_group_add_entries (output_group, out_options); g_option_group_set_parse_hooks ( - txt_group, NULL, _text_default_options); - g_option_context_add_group (*context, txt_group); + output_group, NULL, _set_def_output_opts); + g_option_context_add_group (*context, output_group); g_option_context_set_help_enabled (*context, TRUE); } @@ -1003,8 +707,8 @@ _free_record_cb (rbin_struct *record) { g_free (record->index_s); g_date_time_unref (record->deltime); - g_free (record->uni_path); - g_free (record->legacy_path); + g_free (record->raw_uni_path); + g_free (record->raw_legacy_path); g_clear_error (&record->error); g_free (record); } @@ -1056,33 +760,6 @@ rifiuti_init (rbin_type type, } -/*! - * Wrapper of g_utf16_to_utf8 for big endian system. - * Always assume string is nul-terminated. (Unused now?) - */ -char * -utf16le_to_utf8 (const gunichar2 *str, - glong *items_read, - glong *items_written, - GError **error) -{ -#if ((G_BYTE_ORDER) == (G_LITTLE_ENDIAN)) - return g_utf16_to_utf8 (str, -1, items_read, items_written, error); -#else - - gunichar2 *buf; - char *ret; - - /* should be guaranteed to succeed */ - buf = (gunichar2 *) g_convert ((const char *) str, -1, "UTF-16BE", - "UTF-16LE", NULL, NULL, NULL); - ret = g_utf16_to_utf8 (buf, -1, items_read, items_written, error); - g_free (buf); - return ret; -#endif -} - - /** * @brief Wrapper of `g_mkstemp()` that returns file handle * @param fh Reference to `FILE` pointer to store file handle @@ -1346,7 +1023,7 @@ _close_handles (void) * @param meta Pointer to metadata structure */ static void -_print_csv_header (metarecord *meta) +_print_text_header (const metarecord *meta) { { char *rbin_path = g_filename_display_name (meta->filename); @@ -1436,7 +1113,7 @@ _print_csv_header (metarecord *meta) * @param meta Pointer to metadata structure */ static void -_print_xml_header (metarecord *meta) +_print_xml_header (const metarecord *meta) { GString *result; @@ -1472,138 +1149,229 @@ _print_xml_header (metarecord *meta) /** - * @brief Stub routine for printing header - * @note Calls other printing routine depending on output mode + * @brief Print preamble for JSON output + * @param meta Pointer to metadata structure */ static void -_print_header (void) +_print_json_header (const metarecord *meta) { - if (no_heading) return; + g_print ("{\n \"format\": \"%s\",\n", + (meta->type == RECYCLE_BIN_TYPE_FILE) ? "file" : "dir"); - switch (output_mode) - { - case OUTPUT_CSV: - _print_csv_header (meta); - break; + if (meta->version >= 0) /* can be found and not error */ + g_print (" \"version\": %" PRId64 ",\n", meta->version); + else + g_print (" \"version\": null,\n"); - case OUTPUT_XML: - _print_xml_header (meta); - break; + if (meta->type == RECYCLE_BIN_TYPE_FILE && meta->total_entry > 0) + g_print (" \"ever_existed\": %" PRIu32 ",\n", meta->total_entry); - default: - g_assert_not_reached(); + { + char *s = g_filename_display_name (meta->filename); + char *rbin_path = json_escape_path (s); + g_print (" \"path\": \"%s\",\n", rbin_path); + g_free (s); + g_free (rbin_path); } + + g_print (" \"records\": [\n"); } -/** - * @brief Print content of each recycle bin record - * @param record Pointer to each recycle bin record - * @param meta Pointer to metadata structure - */ static void -_print_record_cb (rbin_struct *record, - const metarecord *meta) +_print_text_record (rbin_struct *record, + const metarecord *meta) { - char *out_fname, *index, *size = NULL; - char *outstr = NULL, *deltime = NULL; - GDateTime *dt; + char *outstr; + char **header; + GDateTime *dt; g_return_if_fail (record != NULL); - index = (meta->type == RECYCLE_BIN_TYPE_FILE) ? - g_strdup_printf ("%u", record->index_n) : + header = (char **) g_malloc0_n (6, sizeof(gpointer)); + + header[0] = (meta->type == RECYCLE_BIN_TYPE_FILE) ? + g_strdup_printf ("%" PRIu32, record->index_n) : g_strdup (record->index_s); dt = use_localtime ? g_date_time_to_local (record->deltime): g_date_time_ref (record->deltime); + header[1] = g_date_time_format (dt, "%F %T"); - out_fname = legacy_encoding ? - record->legacy_path : record->uni_path; - out_fname = out_fname ? - g_strdup (out_fname) : g_strdup ("???"); + header[2] = + (record->gone == FILESTATUS_EXISTS) ? g_strdup("FALSE") : + (record->gone == FILESTATUS_GONE ) ? g_strdup("TRUE") : + g_strdup("???") ; - switch (output_mode) - { - case OUTPUT_CSV: + header[3] = (record->filesize == G_MAXUINT64) ? // faulty + g_strdup ("???") : + g_strdup_printf ("%" PRIu64, record->filesize); - deltime = g_date_time_format (dt, "%F %T"); + if (legacy_encoding) + header[4] = conv_path_to_utf8_with_tmpl (record->raw_legacy_path, + -1, legacy_encoding, "<\\%02X>", NULL, NULL); + else + header[4] = conv_path_to_utf8_with_tmpl (record->raw_uni_path, + -1, NULL, "<\\u%04X>", NULL, NULL); + if (! header[4]) + header[4] = g_strdup ("???"); - if ( record->filesize == G_MAXUINT64 ) /* faulty */ - size = g_strdup ("???"); - else - size = g_strdup_printf ("%" PRIu64, record->filesize); + outstr = g_strjoinv (delim, header); + g_print ("%s\n", outstr); - const char *gone = - record->gone == FILESTATUS_EXISTS ? "FALSE" : - record->gone == FILESTATUS_GONE ? "TRUE" : - "???" ; - outstr = g_strjoin (delim, index, deltime, gone, size, out_fname, NULL); + g_free (outstr); + g_date_time_unref (dt); + g_strfreev (header); +} - g_print ("%s\n", outstr); - break; +static void +_print_xml_record (rbin_struct *record, + const metarecord *meta) +{ + char *path, *dt_str; + GDateTime *dt; + GString *s; - case OUTPUT_XML: - { - GString *s = g_string_new (NULL); - - deltime = use_localtime ? g_date_time_format (dt, "%FT%T%z" ): - g_date_time_format (dt, "%FT%TZ"); - - g_string_printf (s, - " gone == FILESTATUS_GONE ) ? "true" : - (record->gone == FILESTATUS_EXISTS) ? "false": - "unknown"); - - if ( record->filesize == G_MAXUINT64 ) /* faulty */ - g_string_append_printf (s, " size=\"-1\""); - else - g_string_append_printf (s, - " size=\"%" PRIu64 "\"", record->filesize); - - g_string_append_printf (s, ">\n" - " \n" - " \n", out_fname); - - outstr = g_string_free (s, FALSE); - g_print ("%s", outstr); - } - break; + g_return_if_fail (record != NULL); - default: - g_assert_not_reached(); + s = g_string_new (" type == RECYCLE_BIN_TYPE_FILE) + g_string_append_printf (s, " index=\"%" PRIu32 "\"", record->index_n); + else + g_string_append_printf (s, " index=\"%s\"", record->index_s); + + if (use_localtime) + { + dt = g_date_time_to_local (record->deltime); + dt_str = g_date_time_format (dt, "%FT%T%z"); } + else + { + dt = g_date_time_ref (record->deltime); + dt_str = g_date_time_format (dt, "%FT%TZ"); + } + g_string_append_printf (s, " time=\"%s\"", dt_str); + + g_string_append_printf (s, " gone=\"%s\"", + (record->gone == FILESTATUS_GONE ) ? "true" : + (record->gone == FILESTATUS_EXISTS) ? "false" : + "unknown"); + + if (record->filesize == G_MAXUINT64) // faulty + g_string_append_printf (s, " size=\"-1\""); + else + g_string_append_printf (s, + " size=\"%" PRIu64 "\"", record->filesize); + + // Still need to be converted despite using CDATA, otherwise + // could be writing garbage on screen or into file + if (legacy_encoding) + path = conv_path_to_utf8_with_tmpl (record->raw_legacy_path, + -1, legacy_encoding, "&#x%02X;", NULL, NULL); + else + path = conv_path_to_utf8_with_tmpl (record->raw_uni_path, + -1, NULL, "&#x%04X;", NULL, NULL); + + if (path) + g_string_append_printf (s, ">\n" + " \n" + " \n", path); + else + s = g_string_append (s, ">\n \n \n"); + + g_print ("%s", s->str); + g_string_free (s, TRUE); + g_date_time_unref (dt); - g_free (outstr); - g_free (out_fname); - g_free (deltime); - g_free (size); - g_free (index); + g_free (path); + g_free (dt_str); } -/** - * @brief Print footer of recycle bin data - */ static void -_print_footer (void) +_print_json_record (rbin_struct *record, + const metarecord *meta) { - switch (output_mode) + char *tmp, *path, *dt_str; + GDateTime *dt; + GString *s; + + g_return_if_fail (record != NULL); + + s = g_string_new (" {"); + + if (meta->type == RECYCLE_BIN_TYPE_FILE) + g_string_append_printf (s, "\"index\": %" PRIu32, record->index_n); + else + g_string_append_printf (s, "\"index\": \"%s\"", record->index_s); + + if (use_localtime) { - case OUTPUT_CSV: - /* do nothing */ - break; + dt = g_date_time_to_local (record->deltime); + dt_str = g_date_time_format (dt, "%FT%T%z"); + } + else + { + dt = g_date_time_ref (record->deltime); + dt_str = g_date_time_format (dt, "%FT%TZ"); + } + g_string_append_printf (s, ", \"time\": \"%s\"", dt_str); - case OUTPUT_XML: - g_print ("%s", "\n"); - break; + g_string_append_printf (s, ", \"gone\": %s", + (record->gone == FILESTATUS_GONE ) ? "true" : + (record->gone == FILESTATUS_EXISTS) ? "false": + "null"); + + if (record->filesize == G_MAXUINT64) // faulty + g_string_append_printf (s, ", \"size\": null"); + else + g_string_append_printf (s, + ", \"size\": %" PRIu64, record->filesize); - default: - g_assert_not_reached(); + if (legacy_encoding) + { + // JSON spec doesn't even allow encoding raw byte data, + // so transform it like text output format + tmp = conv_path_to_utf8_with_tmpl (record->raw_legacy_path, + -1, legacy_encoding, "<\\%02X>", NULL, NULL); + } + else + { + // HACK \u sequence collides with path separator, which + // will be processed in json escaping routine. Use a temp + // char to avoid collision and convert it back later + tmp = conv_path_to_utf8_with_tmpl (record->raw_uni_path, + -1, NULL, "*u%04X", NULL, NULL); } + path = json_escape_path (tmp); + + if (path) + g_string_append_printf (s, ", \"path\": \"%s\"},\n", path); + else + s = g_string_append (s, ", \"path\": null},\n"); + + g_print ("%s", s->str); + + g_date_time_unref (dt); + g_free (tmp); + g_free (path); + g_free (dt_str); +} + + +static void +_print_xml_footer (void) +{ + g_print ("%s", "\n"); +} + + +static void +_print_json_footer (void) +{ + g_print (" ]\n}\n"); } @@ -1617,6 +1385,9 @@ dump_content (GError **error) { FILE *tmp_fh = NULL, *prev_fh = NULL; char *tmp_path = NULL; + void (*print_header_func)(const metarecord *); + void (*print_record_func)(rbin_struct *, const metarecord *); + void (*print_footer_func)(); if (output_loc) { @@ -1630,9 +1401,33 @@ dump_content (GError **error) return FALSE; } - _print_header (); - g_ptr_array_foreach (meta->records, (GFunc) _print_record_cb, meta); - _print_footer (); + switch (output_format) + { + case FORMAT_TEXT: + print_header_func = no_heading ? + NULL : &_print_text_header; + print_record_func = &_print_text_record; + print_footer_func = NULL; + break; + case FORMAT_XML: + print_header_func = &_print_xml_header; + print_record_func = &_print_xml_record; + print_footer_func = &_print_xml_footer; + break; + case FORMAT_JSON: + print_header_func = &_print_json_header; + print_record_func = &_print_json_record; + print_footer_func = &_print_json_footer; + break; + + default: g_assert_not_reached(); + } + + if (print_header_func != NULL) + (*print_header_func) (meta); + g_ptr_array_foreach (meta->records, (GFunc) print_record_func, meta); + if (print_footer_func != NULL) + (*print_footer_func) (); if (!tmp_path) return TRUE; @@ -1780,3 +1575,26 @@ rifiuti_cleanup (void) cleanup_windows_res (); #endif } + + +void +hexdump (void *start, + size_t size) +{ + GString *s = g_string_new (""); + size_t i = 0; + do + { + if (i % 16 == 0) + { + if (s->len > 0) + { + g_debug ("%s", s->str); + s = g_string_assign (s, ""); + } + g_string_append_printf (s, "%04zX ", i); + } + g_string_append_printf (s, "%02" PRIX8 " ", *(uint8_t *) (start+i)); + } + while (i++ < size); +} diff --git a/src/utils.h b/src/utils.h index e34cf96..4d9d8dd 100644 --- a/src/utils.h +++ b/src/utils.h @@ -63,12 +63,13 @@ typedef enum VERSION_ME_03, } detected_os_ver; -enum +typedef enum _out_fmt { - OUTPUT_NONE = 0, - OUTPUT_CSV, - OUTPUT_XML -}; + FORMAT_UNKNOWN, + FORMAT_TEXT, + FORMAT_XML, + FORMAT_JSON, +} out_fmt; /** * @brief Whether original trashed file still exists @@ -165,7 +166,6 @@ typedef struct _rbin_meta */ typedef struct _rbin_struct { - /** * @brief version of each index file * @note `meta.version` keeps the global status of whole dir, @@ -173,7 +173,6 @@ typedef struct _rbin_struct * @attention For `$Recycle.bin` only */ uint64_t version; - /** * @brief Chronological index number for INFO2 * @attention For `INFO2` only @@ -209,7 +208,8 @@ typedef struct _rbin_struct * @note Original path was stored in index file in UTF-16 encoding * since Windows 2000. The path is converted to UTF-8 encoding and stored here . */ - char *uni_path; + char *raw_uni_path; + /** * @brief ANSI encoded trash file original path * @note Until Windows 2003, index file preserves trashed file path in @@ -217,7 +217,8 @@ typedef struct _rbin_struct * @attention For `INFO2` only. Can be either full path or using 8.3 format, * depending on Windows version and code page used. */ - char *legacy_path; + char *raw_legacy_path; + /** * @brief Whether original trashed file is gone * @note Trash file can be detected if it still exists, but via very @@ -265,23 +266,14 @@ gboolean rifiuti_init (rbin_type type, GDateTime * win_filetime_to_gdatetime (int64_t win_filetime); -char * utf16le_to_utf8 (const gunichar2 *str, - glong *items_read, - glong *items_written, - GError **error) - G_GNUC_UNUSED; - gboolean dump_content (GError **error); -char * conv_path_to_utf8_with_tmpl (const char *str, - const char *from_enc, - const char *tmpl, - size_t *read, - GError **error); - exitcode rifiuti_handle_global_error (GError *error); gboolean rifiuti_handle_record_error (void); void rifiuti_cleanup (void); +void hexdump (void *start, + size_t size); + #endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index bb718a1..989df1e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -221,6 +221,7 @@ target_link_directories (test_glib_iconv PRIVATE ${GLIB_LIBRARY_DIRS}) include(cli-option) include(crafted) include(encoding) +include(json) include(parse-info2) include(parse-rdir) include(read-write) diff --git a/test/cmake/_try_encoding.cmake b/test/cmake/_try_encoding.cmake index f4604d9..9c569d8 100644 --- a/test/cmake/_try_encoding.cmake +++ b/test/cmake/_try_encoding.cmake @@ -34,6 +34,10 @@ set(args -l ${encoding} ${INFO2}) if(DEFINED OUTFILE) list(APPEND args -o ${OUTFILE}) endif() +if(DEFINED EXTRA_ARGS) + string(REPLACE "|" ";" EXTRA_ARGS "${EXTRA_ARGS}") + list(APPEND args ${EXTRA_ARGS}) +endif() execute_process( COMMAND ${RIFIUTI} ${args} # COMMAND_ECHO STDOUT diff --git a/test/cmake/cli-option.cmake b/test/cmake/cli-option.cmake index 1428706..f317acf 100644 --- a/test/cmake/cli-option.cmake +++ b/test/cmake/cli-option.cmake @@ -35,11 +35,11 @@ endfunction() addWithFileOptTest(LongHead --no-heading ) addWithFileOptTest(LongSep --delimiter=: ) addWithFileOptTest(LongTime --localtime ) -addWithFileOptTest(LongXml --xml ) +addWithFileOptTest(LongXml --format xml ) addWithFileOptTest(ShortHead -n ) addWithFileOptTest(ShortSep -t : ) addWithFileOptTest(ShortTime -z ) -addWithFileOptTest(ShortXml -x ) +addWithFileOptTest(ShortXml -f xml ) function(addBadBareOptTest name) @@ -104,20 +104,24 @@ set_tests_properties(d_NullArgOptTestOut f_NullArgOptTestOut f_NullArgOptTestEnc add_bintype_label(d_NullArgOptTestOut f_NullArgOptTestOut f_NullArgOptTestEnc) -function(addBadComboOptTest name) - add_test(NAME d_BadComboOptTest${name} COMMAND +function(addBadComboOptTest id) + add_test(NAME d_BadComboOptTest${id} COMMAND rifiuti-vista ${ARGN} ${sample_dir}/dir-sample1) - add_test(NAME f_BadComboOptTest${name} COMMAND + add_test(NAME f_BadComboOptTest${id} COMMAND rifiuti ${ARGN} ${sample_dir}/INFO2-sample1) - set_tests_properties(d_BadComboOptTest${name} f_BadComboOptTest${name} + set_tests_properties(d_BadComboOptTest${id} f_BadComboOptTest${id} PROPERTIES LABELS "arg;xfail" - PASS_REGULAR_EXPRESSION "can not be used in XML mode") - add_bintype_label(d_BadComboOptTest${name} f_BadComboOptTest${name}) + PASS_REGULAR_EXPRESSION "Output was already set in .+ format, but later argument attempts to change to .+ format") + add_bintype_label(d_BadComboOptTest${id} f_BadComboOptTest${id}) endfunction() -addBadComboOptTest(1 -x -t:) -addBadComboOptTest(2 -n -x) +# implicit text options +addBadComboOptTest(1 -f xml -t:) +addBadComboOptTest(2 -n -f xml) +# explicit option conflict +addBadComboOptTest(3 -f tsv -f json) +addBadComboOptTest(4 -f xml -f text) function(addMultiInputTest name) @@ -144,7 +148,7 @@ function(addMissingInputTest name) add_bintype_label(d_MissingInputTest${name} f_MissingInputTest${name}) endfunction() -addMissingInputTest(1 -x) +addMissingInputTest(1 -f xml) addMissingInputTest(2 -t :) addMissingInputTest(3 -z -o file1 -n) diff --git a/test/cmake/encoding.cmake b/test/cmake/encoding.cmake index 2f2a2fb..317816b 100644 --- a/test/cmake/encoding.cmake +++ b/test/cmake/encoding.cmake @@ -132,3 +132,34 @@ add_encoding_test_with_cwd(f_LegacyUNC_Prep generate_simple_comparison_test("LegacyUNC" 1 "" "INFO2-2k-tw-uncpath.txt" "encoding") + +# +# JSON output +# + +add_encoding_test_with_cwd(f_JsonInfo2Win95_Prep + ${sample_dir} + -DINFO2=INFO-95-ja-1 + -DCHOICES=CP932|Windows-932|IBM-943|SJIS|JIS_X0208|SHIFT_JIS|SHIFT-JIS + -DOUTFILE=${bindir}/f_JsonInfo2Win95.output + -DEXTRA_ARGS=-f|json +) + +generate_simple_comparison_test("JsonInfo2Win95" 1 + "" "INFO-95-ja-1.json" "encoding|json") + + +add_encoding_test_with_cwd(f_JsonWin95WrongEnc_Prep + ${sample_dir} + -DINFO2=INFO-95-ja-1 + -DCHOICES=CP1255|MS-HEBR|WINDOWS-1255|HEBREW|ISO-8859-8|ISO-IR-138|ISO8859-8|ISO_8859-8|ISO_8859-8:1988|CSISOLATINHEBREW + -DOUTFILE=${bindir}/f_JsonWin95WrongEnc.output + -DEXTRA_ARGS=-f|json +) + +set_tests_properties(f_JsonWin95WrongEnc_Prep + PROPERTIES + PASS_REGULAR_EXPRESSION "could not be interpreted in .+ encoding") + +generate_simple_comparison_test("JsonWin95WrongEnc" 1 + "" "INFO-95-ja-1-in-cp1255.json" "encoding|xfail|json") diff --git a/test/cmake/json.cmake b/test/cmake/json.cmake new file mode 100644 index 0000000..1183a3f --- /dev/null +++ b/test/cmake/json.cmake @@ -0,0 +1,41 @@ +# Copyright (C) 2023, Abel Cheung +# rifiuti2 is released under Revised BSD License. +# Please see LICENSE file for more info. + +# +# Verify JSON output works as intended +# + +function(createJsonOutputTests) + +set(ids + "JsonInfo2Empty" "JsonInfo2WinXP" "JsonInfo2Win98" + "JsonRdirVista" "JsonRdirWin10" "JsonRdirUNC19" +) + +set(files + "INFO2-empty" "INFO2-sample1" "INFO2-sample2" + "dir-sample1" "dir-win10-01" "dir-2019-uncpath" +) + +set(encs + "" "" "CP1252" "" "" +) + +foreach(id file enc IN ZIP_LISTS ids files encs) + if (IS_DIRECTORY ${sample_dir}/${file}) + set(is_info2 0) + else() + set(is_info2 1) + endif() + set(args -f json) + if(enc) + list(APPEND args -l ${enc}) + endif() + generate_simple_comparison_test(${id} ${is_info2} + ${file} ${file}.json "parse|json" ${args}) +endforeach() + +endfunction() + +createJsonOutputTests() diff --git a/test/rifiuti-schema.json b/test/rifiuti-schema.json new file mode 100644 index 0000000..b309517 --- /dev/null +++ b/test/rifiuti-schema.json @@ -0,0 +1,83 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://raw.githubusercontent.com/abelcheung/rifiuti2/0.8.1/test/rifiuti-schema.json", + "title": "rifiuti", + "description": "JSON schema for rifiuti json formatted output", + "type": "object", + "definitions": { + "nonNegativeInteger": { + "type": "integer", + "minimum": 0 + } + }, + "properties": { + "format": { + "description": "Recycle bin format", + "type": "string" + }, + "version": { + "allOf": [ + { "$ref": "#/definitions/nonNegativeInteger" }, + { "description": "Version embedded in index file header" } + ] + }, + "ever_existed": { + "allOf": [ + { "$ref": "#/definitions/nonNegativeInteger" }, + { "description": "Total items ever existed in recycle bin" } + ] + }, + "path": { + "description": "Location of recycle bin", + "type": "string" + }, + "records": { + "description": "All recycle bin records", + "type": "array", + "uniqueItems": true, + "minItems": 0, + "items": { + "type": "object", + "properties": { + "index": { + "anyOf": [ + { "$ref": "#/definitions/nonNegativeInteger" }, + { "type": "string" } + ] + }, + "time": { + "type": "string" + }, + "gone": { + "anyOf": [ + { "type": "boolean" }, + { "type": "null" } + ] + }, + "size": { + "anyOf": [ + { "$ref": "#/definitions/nonNegativeInteger" }, + { "type": "null" } + ] + }, + "path": { + "type": "string" + } + }, + "required": [ + "index", + "time", + "gone", + "size", + "path" + ] + } + } + }, + "required": [ + "format", + "version", + "path", + "records" + ] +} diff --git a/test/samples/INFO-95-ja-1-in-cp1255.json b/test/samples/INFO-95-ja-1-in-cp1255.json new file mode 100644 index 0000000..ea842d3 --- /dev/null +++ b/test/samples/INFO-95-ja-1-in-cp1255.json @@ -0,0 +1,19 @@ +{ + "format": "file", + "version": 0, + "ever_existed": 16, + "path": "INFO-95-ja-1", + "records": [ + {"index": 1, "time": "2015-05-11T05:59:49Z", "gone": false, "size": 32768, "path": "D:\\WINDOWS\\ֳ<\\DE>½¸ִ¯ּ<\\DF>\\The Microsoft Network ‚ּ¾¯ִ±¯ּ<\\DF>.lnk"}, + {"index": 2, "time": "2015-05-11T06:00:25Z", "gone": false, "size": 950272, "path": "D:\\WINDOWS\\ֳ<\\DE>½¸ִ¯ּ<\\DF>\\<\\90>V‹Kֻ<\\DE>¯ִֿ¯ּ<\\DF> ²ׂ°¼<\\DE>.bmp"}, + {"index": 3, "time": "2015-05-11T07:19:25Z", "gone": false, "size": 32768, "path": "D:\\WINDOWS\\ֳ<\\DE>½¸ִ¯ּ<\\DF>\\<\\90>V‹Kֳ·½ִ•¶<\\8F>‘.txt"}, + {"index": 4, "time": "2015-05-11T09:48:21Z", "gone": false, "size": 589824, "path": "D:\\My Documents\\DirectX-V8.0a\\bda.cab"}, + {"index": 5, "time": "2015-05-11T09:48:21Z", "gone": false, "size": 589824, "path": "D:\\My Documents\\DirectX-V8.0a\\bdant.cab"}, + {"index": 6, "time": "2015-05-11T09:48:21Z", "gone": false, "size": 65536, "path": "D:\\My Documents\\DirectX-V8.0a\\cfgmgr32.dll"}, + {"index": 11, "time": "2015-05-11T09:48:23Z", "gone": false, "size": 163840, "path": "D:\\My Documents\\DirectX-V8.0a\\dxsetup.exe"}, + {"index": 12, "time": "2015-05-11T09:48:23Z", "gone": false, "size": 360448, "path": "D:\\My Documents\\DirectX-V8.0a\\setupapi.dll"}, + {"index": 13, "time": "2015-05-11T09:59:19Z", "gone": false, "size": 32768, "path": "D:\\WINDOWS\\ֳ<\\DE>½¸ִ¯ּ<\\DF>\\Connect to the Internet.LNK"}, + {"index": 14, "time": "2015-05-11T09:59:22Z", "gone": false, "size": 32768, "path": "D:\\WINDOWS\\ֳ<\\DE>½¸ִ¯ּ<\\DF>\\Outlook Express.lnk"}, + {"index": 15, "time": "2015-05-18T00:45:09Z", "gone": false, "size": 32768, "path": "D:\\WINDOWS\\ֳ<\\DE>½¸ִ¯ּ<\\DF>\\<\\90>V‹Kֳ·½ִ•¶<\\8F>‘.txt"}, + ] +} diff --git a/test/samples/INFO-95-ja-1.json b/test/samples/INFO-95-ja-1.json new file mode 100644 index 0000000..95066d4 --- /dev/null +++ b/test/samples/INFO-95-ja-1.json @@ -0,0 +1,19 @@ +{ + "format": "file", + "version": 0, + "ever_existed": 16, + "path": "INFO-95-ja-1", + "records": [ + {"index": 1, "time": "2015-05-11T05:59:49Z", "gone": false, "size": 32768, "path": "D:\\WINDOWS\\デスクトップ\\The Microsoft Network のセットアップ.lnk"}, + {"index": 2, "time": "2015-05-11T06:00:25Z", "gone": false, "size": 950272, "path": "D:\\WINDOWS\\デスクトップ\\新規ビットマップ イメージ.bmp"}, + {"index": 3, "time": "2015-05-11T07:19:25Z", "gone": false, "size": 32768, "path": "D:\\WINDOWS\\デスクトップ\\新規テキスト文書.txt"}, + {"index": 4, "time": "2015-05-11T09:48:21Z", "gone": false, "size": 589824, "path": "D:\\My Documents\\DirectX-V8.0a\\bda.cab"}, + {"index": 5, "time": "2015-05-11T09:48:21Z", "gone": false, "size": 589824, "path": "D:\\My Documents\\DirectX-V8.0a\\bdant.cab"}, + {"index": 6, "time": "2015-05-11T09:48:21Z", "gone": false, "size": 65536, "path": "D:\\My Documents\\DirectX-V8.0a\\cfgmgr32.dll"}, + {"index": 11, "time": "2015-05-11T09:48:23Z", "gone": false, "size": 163840, "path": "D:\\My Documents\\DirectX-V8.0a\\dxsetup.exe"}, + {"index": 12, "time": "2015-05-11T09:48:23Z", "gone": false, "size": 360448, "path": "D:\\My Documents\\DirectX-V8.0a\\setupapi.dll"}, + {"index": 13, "time": "2015-05-11T09:59:19Z", "gone": false, "size": 32768, "path": "D:\\WINDOWS\\デスクトップ\\Connect to the Internet.LNK"}, + {"index": 14, "time": "2015-05-11T09:59:22Z", "gone": false, "size": 32768, "path": "D:\\WINDOWS\\デスクトップ\\Outlook Express.lnk"}, + {"index": 15, "time": "2015-05-18T00:45:09Z", "gone": false, "size": 32768, "path": "D:\\WINDOWS\\デスクトップ\\新規テキスト文書.txt"}, + ] +} diff --git a/test/samples/INFO2-empty.json b/test/samples/INFO2-empty.json new file mode 100644 index 0000000..a3b201a --- /dev/null +++ b/test/samples/INFO2-empty.json @@ -0,0 +1,7 @@ +{ + "format": "file", + "version": 5, + "path": "INFO2-empty", + "records": [ + ] +} diff --git a/test/samples/INFO2-sample1.json b/test/samples/INFO2-sample1.json new file mode 100644 index 0000000..0e13815 --- /dev/null +++ b/test/samples/INFO2-sample1.json @@ -0,0 +1,23 @@ +{ + "format": "file", + "version": 5, + "path": "INFO2-sample1", + "records": [ + {"index": 44, "time": "2008-10-28T15:53:42Z", "gone": false, "size": 4096, "path": "C:\\Documents and Settings\\All Users\\Desktop\\有道桌面词典.lnk"}, + {"index": 45, "time": "2008-11-03T15:01:59Z", "gone": false, "size": 4096, "path": "C:\\Documents and Settings\\Administrator\\Desktop\\wongsir_url.txt"}, + {"index": 46, "time": "2008-11-06T09:20:58Z", "gone": false, "size": 2912256, "path": "C:\\Documents and Settings\\Administrator\\Desktop\\dd-wrt.v24_mini_wrt54g.bin"}, + {"index": 47, "time": "2008-11-13T12:08:39Z", "gone": false, "size": 765952, "path": "C:\\Documents and Settings\\Administrator\\Desktop\\theme\\.svn"}, + {"index": 48, "time": "2008-11-13T12:11:33Z", "gone": false, "size": 5812224, "path": "C:\\Documents and Settings\\Administrator\\Desktop\\Config Client"}, + {"index": 49, "time": "2008-11-13T12:11:36Z", "gone": false, "size": 1847296, "path": "C:\\Documents and Settings\\Administrator\\Desktop\\Config Client.7z"}, + {"index": 50, "time": "2008-11-19T04:42:04Z", "gone": false, "size": 4096, "path": "C:\\Documents and Settings\\All Users\\Desktop\\Wireshark.lnk"}, + {"index": 57, "time": "2008-11-19T05:07:15Z", "gone": false, "size": 2727936, "path": "C:\\Documents and Settings\\Administrator\\Desktop\\GetDataBackforFAT-v3.63_PConline.rar"}, + {"index": 64, "time": "2008-11-19T05:07:35Z", "gone": true, "size": 2727936, "path": "C:\\Documents and Settings\\Administrator\\Desktop\\GetDataBackforFAT-v3.63_PConline"}, + {"index": 65, "time": "2008-11-19T05:17:12Z", "gone": false, "size": 4096, "path": "C:\\Documents and Settings\\Administrator\\Desktop\\360保险箱.lnk"}, + {"index": 66, "time": "2008-11-19T05:21:37Z", "gone": false, "size": 2732032, "path": "C:\\Documents and Settings\\Administrator\\Desktop\\gdb"}, + {"index": 67, "time": "2008-11-19T05:21:37Z", "gone": false, "size": 2723840, "path": "C:\\Documents and Settings\\Administrator\\Desktop\\gdb.zip"}, + {"index": 68, "time": "2008-11-19T11:34:23Z", "gone": false, "size": 0, "path": "C:\\Documents and Settings\\Administrator\\Desktop\\recovered files"}, + {"index": 69, "time": "2008-11-19T18:51:45Z", "gone": false, "size": 2727936, "path": "C:\\Documents and Settings\\Administrator\\Desktop\\GetDataBackforFAT-v3.63_PConline"}, + {"index": 70, "time": "2008-11-19T18:51:45Z", "gone": false, "size": 5169152, "path": "C:\\Documents and Settings\\Administrator\\Desktop\\Uneraser_Setup(2).exe"}, + {"index": 71, "time": "2008-11-19T18:51:45Z", "gone": false, "size": 5169152, "path": "C:\\Documents and Settings\\Administrator\\Desktop\\Uneraser_Setup.exe"}, + ] +} diff --git a/test/samples/INFO2-sample2.json b/test/samples/INFO2-sample2.json new file mode 100644 index 0000000..44619e7 --- /dev/null +++ b/test/samples/INFO2-sample2.json @@ -0,0 +1,14 @@ +{ + "format": "file", + "version": 4, + "path": "INFO2-sample2", + "records": [ + {"index": 0, "time": "2015-04-20T00:07:36Z", "gone": false, "size": 32768, "path": "C:\\WINDOWS\\All Users\\Desktop\\Connect to the Internet.LNK"}, + {"index": 1, "time": "2015-04-20T00:07:42Z", "gone": false, "size": 32768, "path": "C:\\WINDOWS\\Desktop\\Online Services"}, + {"index": 2, "time": "2015-04-20T00:09:43Z", "gone": true, "size": 524288, "path": "C:\\WINDOWS\\Desktop\\IE9-WindowsVista-x64-enu.exe"}, + {"index": 3, "time": "2015-04-20T01:04:33Z", "gone": false, "size": 32768, "path": "C:\\My Documents\\Résumé.txt.txt"}, + {"index": 4, "time": "2015-04-20T01:05:01Z", "gone": false, "size": 6258688, "path": "C:\\WINDOWS\\Desktop\\winzip100.exe"}, + {"index": 5, "time": "2015-04-20T01:05:41Z", "gone": true, "size": 32768, "path": "C:\\WINDOWS\\Desktop\\111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111"}, + {"index": 6, "time": "2015-04-20T01:06:12Z", "gone": false, "size": 32768, "path": "C:\\WINDOWS\\Desktop\\1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345"}, + ] +} diff --git a/test/samples/dir-2019-uncpath.json b/test/samples/dir-2019-uncpath.json new file mode 100644 index 0000000..a4268b0 --- /dev/null +++ b/test/samples/dir-2019-uncpath.json @@ -0,0 +1,10 @@ +{ + "format": "dir", + "version": 2, + "path": "dir-2019-uncpath", + "records": [ + {"index": "$IW0RYW0.rtf", "time": "2019-05-07T20:56:04Z", "gone": true, "size": 7, "path": "\\\\WIN-163RLA0PH3N\\somewhere\\hahaha.rtf"}, + {"index": "$I4OZLXW.bmp", "time": "2019-05-07T21:01:01Z", "gone": true, "size": 1714662, "path": "\\\\WIN-163RLA0PH3N\\somewhere\\পরীক্ষা.bmp"}, + {"index": "$IYDW1CC.rtf", "time": "2019-05-07T21:08:55Z", "gone": true, "size": 7, "path": "\\\\WIN-163RLA0PH3N\\somewhere\\hahaha.rtf"}, + ] +} diff --git a/test/samples/dir-sample1.json b/test/samples/dir-sample1.json new file mode 100644 index 0000000..61c7e6a --- /dev/null +++ b/test/samples/dir-sample1.json @@ -0,0 +1,22 @@ +{ + "format": "dir", + "version": 1, + "path": "dir-sample1", + "records": [ + {"index": "$IUVFB0M.rtf", "time": "2007-09-21T06:32:46Z", "gone": false, "size": 155, "path": "C:\\Users\\student\\Desktop\\New Rich Text Document.rtf"}, + {"index": "$I0JGHX7", "time": "2007-09-21T06:47:49Z", "gone": true, "size": 0, "path": "C:\\Users\\student\\Desktop\\New Folder 1"}, + {"index": "$I1IS2OK.txt", "time": "2007-09-21T06:48:13Z", "gone": false, "size": 0, "path": "C:\\Users\\student\\Desktop\\New Text Document blah.txt"}, + {"index": "$IYAR1YY.exe", "time": "2007-09-21T07:54:23Z", "gone": true, "size": null, "path": "C:\\dd.exe"}, + {"index": "$I95CUKU", "time": "2007-09-21T08:02:59Z", "gone": true, "size": 4096, "path": "C:\\Users\\student\\Downloads\\fau-1.3.0.2355(rc3)\\fau\\FAU.x86\\sparsefile"}, + {"index": "$IHMU3NR.zip", "time": "2007-09-21T08:17:19Z", "gone": true, "size": 5025829, "path": "C:\\Users\\student\\Downloads\\fau-1.3.0.2355(rc3).zip"}, + {"index": "$I7FV8IY.exe", "time": "2007-09-21T08:23:18Z", "gone": true, "size": 153478296, "path": "C:\\Users\\student\\Downloads\\VMware-server-installer-1.0.4-56528.exe"}, + {"index": "$IMG2SSB", "time": "2007-09-21T08:28:57Z", "gone": true, "size": 0, "path": "C:\\Users\\student\\Desktop\\123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012"}, + {"index": "$IZK01YL.txt", "time": "2007-09-21T08:31:35Z", "gone": true, "size": 11, "path": "C:\\Users\\student\\Desktop\\123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012\\1234567.txt"}, + {"index": "$I1TDH1G.exe", "time": "2007-09-21T08:38:30Z", "gone": true, "size": 704512, "path": "C:\\Users\\student\\Downloads\\fau-1.3.0.2355(rc3)\\fau\\FAU.x86\\nc.exe"}, + {"index": "$IEQWWMF.exe", "time": "2007-09-21T08:38:30Z", "gone": true, "size": 679936, "path": "C:\\Users\\student\\Downloads\\fau-1.3.0.2355(rc3)\\fau\\FAU.x86\\fmdata.exe"}, + {"index": "$IFRN1CZ.exe", "time": "2007-09-21T08:38:30Z", "gone": true, "size": 110592, "path": "C:\\Users\\student\\Downloads\\fau-1.3.0.2355(rc3)\\fau\\FAU.x86\\wipe.exe"}, + {"index": "$IW527XU.exe", "time": "2007-09-21T08:38:30Z", "gone": true, "size": 331776, "path": "C:\\Users\\student\\Downloads\\fau-1.3.0.2355(rc3)\\fau\\FAU.x86\\volume_dump.exe"}, + {"index": "$IC6GEAW.exe", "time": "2007-09-21T08:50:16Z", "gone": true, "size": null, "path": "C:\\Users\\student\\Downloads\\fau-1.3.0.2355(rc3)\\fau\\FAU.x86\\dd.exe"}, + {"index": "$IZUFRX4.vmdk", "time": "2007-09-21T09:22:25Z", "gone": true, "size": 10737418240, "path": "C:\\Virtual Machines\\Windows XP Professional\\Windows XP Professional-flat.vmdk"}, + ] +} diff --git a/test/samples/dir-win10-01.json b/test/samples/dir-win10-01.json new file mode 100644 index 0000000..6549def --- /dev/null +++ b/test/samples/dir-win10-01.json @@ -0,0 +1,14 @@ +{ + "format": "dir", + "version": 2, + "path": "dir-win10-01", + "records": [ + {"index": "$IKEGS1G", "time": "2015-04-04T17:19:52Z", "gone": false, "size": 0, "path": "C:\\Users\\tester\\12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"}, + {"index": "$IQ7LAXT.png", "time": "2015-04-04T17:20:01Z", "gone": false, "size": 6455, "path": "C:\\Users\\tester\\Pictures\\web-canvas.png"}, + {"index": "$I7R52EG.txt", "time": "2015-04-04T17:24:09Z", "gone": false, "size": 14, "path": "C:\\Temp\\foobat.txt.txt"}, + {"index": "$IBBFODN", "time": "2015-04-07T23:19:35Z", "gone": true, "size": 7, "path": "C:\\Temp\\𨳊𨶙閪邨鰂"}, + {"index": "$IHO61YT", "time": "2015-04-07T23:32:07Z", "gone": true, "size": 12884901888, "path": "C:\\Temp\\largesparsefile"}, + {"index": "$IROMPZ0.exe", "time": "2015-04-19T10:49:59Z", "gone": true, "size": 1761792, "path": "C:\\Temp\\FAU\\FAU.x64\\dd.exe"}, + {"index": "$IDNLPD4.exe", "time": "2015-04-19T10:50:51Z", "gone": true, "size": 872448, "path": "C:\\Temp\\FAU\\FAU.x86\\dd.exe"}, + ] +}