diff --git a/src/rifiuti-vista.c b/src/rifiuti-vista.c index 03fe858..1e6086c 100644 --- a/src/rifiuti-vista.c +++ b/src/rifiuti-vista.c @@ -27,38 +27,31 @@ extern metarecord *meta; * @note This only checks if index file has sufficient amount * of data for sensible reading */ -static gboolean +static bool _validate_index_file (const char *filename, - void **filebuf, + void **outbuf, gsize *bufsize, uint64_t *ver, GError **error) { - gsize expect_sz; char *buf = NULL; - uint32_t pathlen; - g_return_val_if_fail (filename && *filename, FALSE); - g_return_val_if_fail (filebuf && ! *filebuf , FALSE); - g_return_val_if_fail (! error || ! *error , FALSE); - g_return_val_if_fail (bufsize , FALSE); - g_return_val_if_fail (ver , FALSE); + g_return_val_if_fail (filename && *filename, false); + g_return_val_if_fail (outbuf && ! *outbuf, false); + g_return_val_if_fail (! error || ! *error , false); + g_return_val_if_fail (bufsize , false); + g_return_val_if_fail (ver , false); g_debug ("Start file validation for '%s'...", filename); if (! g_file_get_contents (filename, &buf, bufsize, error)) goto validate_fail; - g_debug ("Read '%s' successfully, size = %" G_GSIZE_FORMAT, - filename, *bufsize); - if (*bufsize <= VERSION1_FILENAME_OFFSET) { - g_debug ("File size = %" G_GSIZE_FORMAT - ", expected > %" G_GSIZE_FORMAT, - *bufsize, (gsize) VERSION1_FILENAME_OFFSET); - g_set_error_literal (error, R2_REC_ERROR, R2_REC_ERROR_IDX_SIZE_INVALID, - _("File is prematurely truncated, or not a $Recycle.bin index.")); + g_set_error_literal (error, R2_REC_ERROR, + R2_REC_ERROR_IDX_SIZE_INVALID, + _("File is not a $Recycle.bin index")); goto validate_fail; } @@ -68,59 +61,39 @@ _validate_index_file (const char *filename, switch (*ver) { - case VERSION_VISTA: - - expect_sz = VERSION1_FILE_SIZE; - /* see _populate_record_data() for reason */ - if ((*bufsize != expect_sz) && (*bufsize != expect_sz - 1)) - { - g_debug ("File size = %" G_GSIZE_FORMAT - ", expected = %" G_GSIZE_FORMAT " or %" G_GSIZE_FORMAT, *bufsize, expect_sz, expect_sz - 1); - g_set_error (error, R2_REC_ERROR, R2_REC_ERROR_IDX_SIZE_INVALID, - "%s", _("Might be an index file, but file size is unexpected.")); - goto validate_fail; - } - break; - - case VERSION_WIN10: - - // Version 2 adds a uint32 file name strlen before file name. - // This presumably breaks the 260 char barrier in version 1. - copy_field (pathlen, buf, VERSION1_FILENAME_OFFSET, VERSION2_FILENAME_OFFSET); - pathlen = GUINT32_FROM_LE (pathlen); - - /* Header length + strlen in UTF-16 encoding */ - expect_sz = VERSION2_FILENAME_OFFSET + pathlen * sizeof(gunichar2); - if (*bufsize != expect_sz) - { - g_debug ("File size = %" G_GSIZE_FORMAT - ", expected = %" G_GSIZE_FORMAT, - *bufsize, expect_sz); - g_set_error (error, R2_REC_ERROR, R2_REC_ERROR_IDX_SIZE_INVALID, - "%s", _("Might be an index file, but file size is unexpected.")); - goto validate_fail; - } - break; - - default: - if (*ver < 10) - g_set_error (error, R2_REC_ERROR, - R2_REC_ERROR_VER_UNSUPPORTED, - _("Index file version %" PRIu64 " is unsupported"), *ver); - else - g_set_error (error, R2_REC_ERROR, - R2_REC_ERROR_VER_UNSUPPORTED, - "%s", _("File is not a $Recycle.bin index")); + case VERSION_VISTA: break; // already handled above + + case VERSION_WIN10: + // Version 2 adds a uint32 file name strlen before file name. + // This presumably breaks the 260 char barrier in version 1. + if (*bufsize <= VERSION2_FILENAME_OFFSET) + { + g_set_error_literal (error, R2_REC_ERROR, + R2_REC_ERROR_IDX_SIZE_INVALID, + _("File is not a $Recycle.bin index")); goto validate_fail; + } + break; + + default: + if (*ver < 10) + g_set_error (error, R2_REC_ERROR, + R2_REC_ERROR_VER_UNSUPPORTED, + _("Index file version %" PRIu64 " is unsupported"), *ver); + else + g_set_error (error, R2_REC_ERROR, + R2_REC_ERROR_VER_UNSUPPORTED, + "%s", _("File is not a $Recycle.bin index")); + goto validate_fail; } - *filebuf = buf; + *outbuf = buf; g_debug ("Finished file validation for '%s'", filename); - return TRUE; + return true; validate_fail: g_free (buf); - return FALSE; + return false; } @@ -130,34 +103,41 @@ _populate_record_data (void *buf, uint64_t version) { rbin_struct *record; - size_t pathbuf_sz = 0; + uint32_t path_sz_expected, path_sz_actual; + size_t null_terminator_offset; void *pathbuf_start = NULL; bool erraneous = false; + GString *u; // shorthand switch (version) { - case VERSION_VISTA: - // In rare cases, the size of index file is one byte short of - // (fixed) 544 bytes in Vista. Under such occasion, file size - // only occupies 56 bit, not 64 bit as it ought to be. - // Actually this 56-bit file size is very likely wrong after all. - // This is observed during deletion of dd.exe from Forensic - // Acquisition Utilities (by George M. Garner Jr) - // in certain localized Vista. - if (bufsize == VERSION1_FILE_SIZE - 1) - erraneous = true; - - pathbuf_sz = WIN_PATH_MAX * sizeof(gunichar2); - pathbuf_start = buf - (int)erraneous + VERSION1_FILENAME_OFFSET; - break; - - case VERSION_WIN10: - pathbuf_sz = bufsize - VERSION2_FILENAME_OFFSET; - pathbuf_start = buf + VERSION2_FILENAME_OFFSET; - break; - - default: - g_assert_not_reached (); + case VERSION_VISTA: + // In rare cases, the size of index file is one byte short of + // (fixed) 544 bytes in Vista. Under such occasion, file size + // only occupies 56 bit, not 64 bit as it ought to be. + // Actually this 56-bit file size is very likely wrong after all. + // This is observed during deletion of dd.exe from Forensic + // Acquisition Utilities (by George M. Garner Jr) + // in certain localized Vista. + if (bufsize == VERSION1_FILE_SIZE - 1) + erraneous = true; + + path_sz_expected = WIN_PATH_MAX * sizeof(gunichar2); + path_sz_actual = bufsize + (int)erraneous - VERSION1_FILENAME_OFFSET; + pathbuf_start = buf - (int)erraneous + VERSION1_FILENAME_OFFSET; + break; + + case VERSION_WIN10: + copy_field (path_sz_expected, buf, VERSION1_FILENAME_OFFSET, + VERSION2_FILENAME_OFFSET); + path_sz_expected = GUINT32_FROM_LE (path_sz_expected) * + sizeof(gunichar2); + path_sz_actual = bufsize - VERSION2_FILENAME_OFFSET; + pathbuf_start = buf + VERSION2_FILENAME_OFFSET; + break; + + default: + g_assert_not_reached (); } record = g_malloc0 (sizeof (rbin_struct)); @@ -184,23 +164,36 @@ _populate_record_data (void *buf, record->winfiletime = GINT64_FROM_LE (record->winfiletime); record->deltime = win_filetime_to_gdatetime (record->winfiletime); - record->raw_uni_path = g_malloc0 (pathbuf_sz + sizeof(gunichar2)); - memcpy (record->raw_uni_path, pathbuf_start, pathbuf_sz); + // Unicode path + if (path_sz_actual > path_sz_expected) + { + g_set_error_literal (&record->error, R2_REC_ERROR, + R2_REC_ERROR_DUBIOUS_PATH, + _("Ignored dangling extraneous data after record")); + } + else if (path_sz_actual < path_sz_expected && ! erraneous) { - // Never set len = -1 for UCS2 source string - char *s = g_convert (record->raw_uni_path, - ucs2_strnlen (record->raw_uni_path, pathbuf_sz) * sizeof (gunichar2), + g_set_error_literal (&record->error, R2_REC_ERROR, + R2_REC_ERROR_DUBIOUS_PATH, + _("Record is truncated, thus unicode path might be incomplete")); + } + + u = g_string_new_len ((const char *) pathbuf_start, + MIN(path_sz_actual, path_sz_expected)); + record->raw_uni_path = u; + + null_terminator_offset = ucs2_bytelen (u->str, u->len); + + if (record->error == NULL) + { + char *s = g_convert (u->str, null_terminator_offset, "UTF-8", "UTF-16LE", NULL, NULL, NULL); if (s) - { g_free (s); - } else - { g_set_error_literal (&record->error, R2_REC_ERROR, R2_REC_ERROR_CONV_PATH, _("Path contains broken unicode character(s)")); - } } return record; diff --git a/src/rifiuti-vista.h b/src/rifiuti-vista.h index c5665f8..2bf7274 100644 --- a/src/rifiuti-vista.h +++ b/src/rifiuti-vista.h @@ -7,7 +7,7 @@ #ifndef _RIFIUTI_VISTA_H #define _RIFIUTI_VISTA_H -#include "utils.h" +#include "utils-conv.h" #define VERSION_OFFSET 0x0 #define FILESIZE_OFFSET 0x8 diff --git a/src/rifiuti.c b/src/rifiuti.c index 68627a0..4379c4d 100644 --- a/src/rifiuti.c +++ b/src/rifiuti.c @@ -62,7 +62,7 @@ _validate_index_file (const char *filename, { g_set_error_literal (error, R2_FATAL_ERROR, R2_FATAL_ERROR_ILLEGAL_DATA, - _("File is prematurely truncated, or not an INFO2 index.")); + _("File is not an INFO2 index.")); goto validation_fail; } @@ -139,19 +139,29 @@ _validate_index_file (const char *filename, static rbin_struct * _populate_record_data (void *buf, - gsize bufsize, - gboolean *junk_detected) + size_t bufsize) { rbin_struct *record; uint32_t drivenum; - size_t uni_buf_sz, null_terminator_offset; + size_t null_terminator_offset; + GString *l, *u; // shorthand for paths + + // Unicode records accept partial path truncation, + // but no fault tolerance for Legacy records + + if (meta->recordsize == LEGACY_RECORD_SIZE && + bufsize < LEGACY_RECORD_SIZE) + return NULL; + + if (meta->recordsize == UNICODE_RECORD_SIZE && + bufsize <= LEGACY_RECORD_SIZE) + return NULL; record = g_malloc0 (sizeof (rbin_struct)); // Verbatim path in ANSI code page - record->raw_legacy_path = g_malloc0 (RECORD_INDEX_OFFSET - LEGACY_FILENAME_OFFSET); - copy_field (*(record->raw_legacy_path), buf, - LEGACY_FILENAME_OFFSET, RECORD_INDEX_OFFSET); + l = g_string_new_len (buf, WIN_PATH_MAX); + record->raw_legacy_path = l; /* Index number associated with the record */ copy_field (record->index_n, buf, RECORD_INDEX_OFFSET, DRIVE_LETTER_OFFSET); @@ -173,10 +183,10 @@ _populate_record_data (void *buf, record->gone = FILESTATUS_EXISTS; // If file is not in recycle bin (restored or permanently deleted), // first byte will be removed from filename - if (! *record->raw_legacy_path) + if (l->str[0] == '\0') { record->gone = FILESTATUS_GONE; - *record->raw_legacy_path = record->drive; + l->str[0] = record->drive; } /* File deletion time */ @@ -195,7 +205,7 @@ _populate_record_data (void *buf, // because otherwise we don't know which encoding to use if (legacy_encoding) { - char *s = g_convert (record->raw_legacy_path, -1, + char *s = g_convert (l->str, -1, "UTF-8", legacy_encoding, NULL, NULL, NULL); if (s) g_free (s); @@ -208,28 +218,30 @@ _populate_record_data (void *buf, if (bufsize == LEGACY_RECORD_SIZE) return record; - /* Part below deals with unicode path only */ + // Part below deals with unicode path only + + if (bufsize < UNICODE_RECORD_SIZE && record->error == NULL) + { + g_set_error_literal (&record->error, R2_REC_ERROR, + R2_REC_ERROR_DUBIOUS_PATH, + _("Record is truncated, thus unicode path might be incomplete")); + } - uni_buf_sz = UNICODE_RECORD_SIZE - UNICODE_FILENAME_OFFSET; - record->raw_uni_path = g_malloc (uni_buf_sz); - copy_field (*(record->raw_uni_path), buf, - UNICODE_FILENAME_OFFSET, UNICODE_RECORD_SIZE); - null_terminator_offset = ucs2_strnlen ( - record->raw_uni_path, WIN_PATH_MAX) * sizeof (gunichar2); + u = g_string_new_len ((const char *) (buf + UNICODE_FILENAME_OFFSET), + bufsize - UNICODE_FILENAME_OFFSET); + record->raw_uni_path = u; + null_terminator_offset = ucs2_bytelen (u->str, u->len); + + if (record->error == NULL) { - // Never set len = -1 for wchar source string - char *s = g_convert (record->raw_uni_path, null_terminator_offset, + char *s = g_convert (u->str, null_terminator_offset, "UTF-8", "UTF-16LE", NULL, NULL, NULL); if (s) - { g_free (s); - } else - { g_set_error_literal (&record->error, R2_REC_ERROR, R2_REC_ERROR_CONV_PATH, _("Path contains broken unicode character(s)")); - } } /* @@ -252,26 +264,23 @@ _populate_record_data (void *buf, * - accented latin chars transliterated to pure ASCII * - first DBCS char converted to UCS2 codepoint */ - if (junk_detected && ! *junk_detected) + if (! meta->fill_junk && u->len > null_terminator_offset) { - // Beware: start pos shouldn't be previously read bytes, - // as it may contain invalid seq and quit prematurely. - char *p = record->raw_uni_path + null_terminator_offset; - - while (p < record->raw_uni_path + uni_buf_sz) + char *p = u->str + null_terminator_offset; + while (p < u->str + u->len) { if (*p != '\0') { g_debug ("Junk detected at offset 0x%tx of unicode path", - p - record->raw_uni_path); - *junk_detected = TRUE; + p - u->str); + meta->fill_junk = true; break; } p++; } - if (*junk_detected) - hexdump (record->raw_uni_path, uni_buf_sz); + if (meta->fill_junk) + hexdump (u->str, u->len); } return record; @@ -282,12 +291,14 @@ static void _parse_record_cb (const char *index_file, metarecord *meta) { - rbin_struct *record; + rbin_struct *record = NULL; FILE *infile = NULL; - gsize read_sz, record_sz; + size_t read_sz, + prev_pos, + curr_pos; void *buf = NULL; GError *error = NULL; - int64_t prev_pos, curr_pos; + char *segment_id; if (! _validate_index_file (index_file, &infile, &error)) { @@ -295,44 +306,37 @@ _parse_record_cb (const char *index_file, g_strdup (index_file), error); return; } - g_debug ("Start populating record for '%s'...", index_file); - record_sz = meta->recordsize; - buf = g_malloc0 (record_sz); - fseek (infile, RECORD_START_OFFSET, SEEK_SET); - curr_pos = (int64_t) ftell (infile); - prev_pos = curr_pos; + prev_pos = curr_pos = ftell (infile); - while ((read_sz = fread (buf, 1, record_sz, infile)) > 0) + buf = g_malloc0 (meta->recordsize); + while ((read_sz = fread (buf, 1, meta->recordsize, infile)) > 0) { prev_pos = curr_pos; - curr_pos = (int64_t) ftell (infile); - g_debug ("Read %s, byte range %" PRId64 " - %" PRId64, - index_file, prev_pos, curr_pos); - if (read_sz < record_sz) { - g_debug ("read size = %zu, less than needed %zu", read_sz, record_sz); - break; - } - record = _populate_record_data (buf, record_sz, &meta->fill_junk); - g_ptr_array_add (meta->records, record); + curr_pos = ftell (infile); + g_debug ("Read byte range %zu-%zu %s", prev_pos, curr_pos, + (read_sz < meta->recordsize ? "" : " (!!!)")); + if (NULL != (record = _populate_record_data (buf, read_sz))) + g_ptr_array_add (meta->records, record); } g_free (buf); - char *segment_id = g_strdup_printf ("|%" PRId64 "|%" PRId64, prev_pos, curr_pos); + segment_id = g_strdup_printf ("|%zu|%zu", prev_pos, curr_pos); - if (feof (infile) && read_sz && (read_sz < record_sz)) + if (feof (infile)) { - g_set_error_literal (&error, R2_REC_ERROR, - R2_REC_ERROR_IDX_SIZE_INVALID, - _("Last segment does not constitute a valid " - "record. Likely a premature end of file.")); + if (read_sz > 0 && record == NULL) + g_set_error_literal (&error, R2_REC_ERROR, + R2_REC_ERROR_IDX_SIZE_INVALID, + _("Premature end of file encountered, and " + "the last segment is not recoverable.")); } else if (ferror (infile)) // other generic error { - g_set_error (&error, G_FILE_ERROR, G_FILE_ERROR_FAILED, - _("Failed to read record at %s"), segment_id); + g_set_error_literal (&error, G_FILE_ERROR, G_FILE_ERROR_FAILED, + _("Failed to read record for unknown reason")); } if (error) { diff --git a/src/rifiuti.h b/src/rifiuti.h index fc8c7f0..ccd2374 100644 --- a/src/rifiuti.h +++ b/src/rifiuti.h @@ -8,7 +8,7 @@ #ifndef _RIFIUTI_H #define _RIFIUTI_H -#include "utils.h" +#include "utils-conv.h" /* These offsets are relative to file start */ #define VERSION_OFFSET 0 diff --git a/src/utils-conv.c b/src/utils-conv.c index 7c9159f..6e9ab5e 100644 --- a/src/utils-conv.c +++ b/src/utils-conv.c @@ -10,9 +10,42 @@ #include #include +#include "utils-error.h" #include "utils-conv.h" +struct _fmt_data fmt[] = { + // must match out_fmt enum order + { + .friendly_name = "unknown format", + .fallback_tmpl = {"", "", ""}, + }, + { + .friendly_name = "TSV format", + .fallback_tmpl = {"<\\u%04X>", "<\\%02X>", "<\\u%04X>"}, + }, + { + .friendly_name = "XML format", + // All paths are placed inside CDATA, using entities + // can be confusing + .fallback_tmpl = {"<\\u%04X>", "<\\%02X>", "<\\u%04X>"}, + }, + { + .friendly_name = "JSON format", + .fallback_tmpl = { + "", // Unused, see json_escape() + // JSON doesn't allow encoding raw byte data in strings + // (must be proper characters) + "<\\%02X>", + // HACK \u sequence collides with path separator, which + // will be processed in json escaping routine. Use a temp + // char to avoid collision and convert it back later + "*u%04X" + }, + }, +}; + + /** * @brief Try out if encoding is compatible to ASCII * @param enc The encoding to test @@ -47,86 +80,75 @@ enc_is_ascii_compatible (const char *enc, /** - * @brief Compute UCS2 string length like `wcslen()` - * @param str The string to check (in `char*` !) - * @param max_sz Maximum length to check, or use -1 to - * denote the string is nul-terminated - * @return Either number of UCS2 char for whole string, - * or return `max_sz` when `max_sz` param is exceeded + * @brief Find null terminator position in UCS2 string + * @param str The string to check (in `char *` !) + * @param max_sz Maximum byte length to check, or use -1 to + * denote the string should be nul-terminated + * @return Byte position where null terminator (double \\0) + * is found, or `max_sz` otherwise + * @note Being different from standard C funcs like `wcsnlen()` + * or `strnlen()`, it returns bytes, not chars. And it would + * take care of odd bytes when UCS2 strings are expecting + * even number of bytes. */ size_t -ucs2_strnlen (const char *str, +ucs2_bytelen (const char *str, ssize_t max_sz) { - // wcsnlen_s should be equivalent except for boundary - // cases we don't care about - - size_t i = 0; char *p = (char *) str; - if (str == NULL) + if (str == NULL || max_sz == 0) return 0; + if (max_sz == 1) + return 1; + while (*p || *(p+1)) { - if (max_sz >= 0 && i >= (size_t) max_sz) - break; - i++; p += 2; + if (max_sz >= 0 && p - str + 1 >= max_sz) + return max_sz; } - return i; + return p - str; } /** * @brief Move character pointer for specified bytes * @param sz Must be either 1 or 2, denoting broken byte or broken UCS2 character - * @param in_str Reference to input string to be converted - * @param read_bytes Reference to already read bytes count to keep track of - * @param out_str Reference to output string to be appended - * @param write_bytes Reference to writable bytes count to decrement - * @param tmpl `printf` template to represent the broken character + * @param ptr Location of char pointer to string to be converted + * @param bytes_left Location to number of remaining bytes to read + * @param s Broken byte(s) will be formatted and appended to this `GString` + * @param fmt_type Type of output format; see `fmt[]` for detail * @note This is the core of `conv_path_to_utf8_with_tmpl()` doing * error fallback, converting a single broken char to `printf` output. */ static void _advance_octet (size_t sz, - char **in_str, - gsize *read_bytes, - char **out_str, - gsize *write_bytes, - const char *tmpl) + char **ptr, + gsize *bytes_left, + GString *s, + out_fmt fmt_type) { - char *repl; - - switch (sz) { - case 1: - { - unsigned char c = *(unsigned char *) (*in_str); - repl = g_strdup_printf (tmpl, c); - } - break; + int c = 0; - case 2: - { - uint16_t c = GUINT16_FROM_LE (*(uint16_t *) (*in_str)); - repl = g_strdup_printf (tmpl, c); - } - break; + g_return_if_fail (*bytes_left > 0); + g_return_if_fail (sz == 1 || sz == 2); + g_return_if_fail (*ptr != NULL); - default: - g_assert_not_reached(); - } + if (*bytes_left == 1) + sz = 1; - (*in_str) += sz; - if (read_bytes != NULL) - (*read_bytes) -= sz; + if (sz == 1) + c = *(uint8_t *) (*ptr); + else + c = GUINT16_FROM_LE (*(uint16_t *) (*ptr)); - *out_str = g_stpcpy (*out_str, (const char *) repl); - if (write_bytes != NULL) - *write_bytes -= strlen (repl); + g_string_append_printf (s, + fmt[fmt_type].fallback_tmpl[sz], c); - g_free (repl); + *ptr += sz; + *bytes_left -= sz; return; } @@ -134,15 +156,15 @@ _advance_octet (size_t sz, /** * @brief Convert non-printable characters to escape sequences * @param str The original string to be converted - * @param tmpl `printf` template to represent non-printable chars + * @param fmt_type Type of output format; see `fmt[]` for detail * @return Converted string, maybe containing escape sequences * @attention Caller is responsible for using correct template, no * error checking is performed. This template should handle a single * Windows unicode path character, which is in UTF-16LE encoding. */ static char * -_filter_printable_char (const char *str, - const char *tmpl) +_filter_printable_char (const char *str, + out_fmt fmt_type) { char *p, *np; gunichar c; @@ -155,14 +177,12 @@ _filter_printable_char (const char *str, c = g_utf8_get_char (p); np = g_utf8_next_char (p); - /* - * ASCII space is the norm (e.g. Program Files), but - * all other kinds of spaces are rare, so escape them too - */ + // ASCII space is common (e.g. "Program Files"), but not + // for any other kinds of space or invisible char if (g_unichar_isgraph (c) || (c == 0x20)) - s = g_string_append_len (s, p, (gssize) (np - p)); + s = g_string_append_len (s, p, (size_t) (np - p)); else - g_string_append_printf (s, tmpl, c); + g_string_append_printf (s, fmt[fmt_type].fallback_tmpl[0], c); p = np; } @@ -171,15 +191,33 @@ _filter_printable_char (const char *str, } +static void +_sync_pos (GString *str, + gsize *bytes_left, + char **chr_ptr, + bool from_gstring) +{ + if (from_gstring) + { + *bytes_left = str->allocated_len - str->len - 1; + *chr_ptr = str->str + str->len; + } + else + { + str->len = str->allocated_len - *bytes_left - 1; + g_assert (*chr_ptr == str->str + str->len); + str->str[str->len] = '\0'; + } +} + /** * @brief Convert path to UTF-8 encoding with customizable fallback * @param path The path string to be converted * @param from_enc Either a legacy Windows ANSI encoding, or use * `NULL` to represent Windows wide char encoding (UTF-16LE) - * @param tmpl `printf`-style string template to represent broken - * character. This template should handle either single- or - * double-octet, namely `%u`, `%o`, `%d`, `%i`, `%x` and `%X`. - * @param read Reference to number of successfully read bytes + * @param fmt_type Type of output format; see `fmt[]` for detail + * @param func String transform func for post processing; can be + * `NULL`, which still does some internal filtering * @param error Location to store error upon problem * @return UTF-8 encoded path, or `NULL` if conversion error happens * @note This is very similar to `g_convert_with_fallback()`, but the @@ -189,107 +227,134 @@ _filter_printable_char (const char *str, * @attention 1. This routine is not for generic charset conversion. * Extra transformation is intended for path display only. * @attention 1. Caller is responsible for using correct template, - * almost no error checking is performed. + * no error checking is performed. */ char * -conv_path_to_utf8_with_tmpl (const char *path, - ssize_t pathlen, - const char *from_enc, - const char *tmpl, - size_t *read, - GError **error) +conv_path_to_utf8_with_tmpl (const GString *path, + const char *from_enc, + out_fmt fmt_type, + StrTransformFunc func, + GError **error) { - char *u8_path, *i_ptr, *o_ptr, *result = NULL; - gsize len, r_total, rbyte, wbyte, status, in_ch_width, out_ch_width; - GIConv conv; - - g_return_val_if_fail (path && *path, NULL); - g_return_val_if_fail (tmpl && *tmpl, NULL); + char *i_ptr, + *o_ptr, + *result; + gsize i_size, + i_left, + o_left, + char_sz, + status; + GIConv conv; + GPtrArray *err_offsets; + GString *s; + + // For unicode path, the first char must be ASCII drive letter + // or slash. And since it is in little endian, first byte is + // always non-null + g_return_val_if_fail (path != NULL, NULL); g_return_val_if_fail (! from_enc || *from_enc, NULL); - g_return_val_if_fail (! error || ! *error , NULL); - /* try the template */ + if (from_enc) { - char *s = g_strdup_printf (tmpl, from_enc ? 0xFF : 0xFFFF); - /* UTF-8 character occupies at most 6 bytes */ - out_ch_width = MAX (strlen(s), 6); - g_free (s); + char_sz = sizeof (char); + i_left = i_size = strnlen (path->str, WIN_PATH_MAX); } - - if (from_enc != NULL) { - in_ch_width = sizeof (char); - len = strnlen (path, (size_t) pathlen); - } else { - in_ch_width = sizeof (gunichar2); - len = ucs2_strnlen (path, (size_t) pathlen); + else + { + char_sz = sizeof (gunichar2); + i_left = i_size = ucs2_bytelen (path->str, path->len); } + i_ptr = path->str; - rbyte = len * in_ch_width; - wbyte = len * out_ch_width; - u8_path = g_malloc0 (wbyte); - - r_total = rbyte; - i_ptr = (char *) path; - o_ptr = u8_path; + // Ballpark figure, GString decides alloc size on its own + s = g_string_sized_new (i_size + 1); + _sync_pos (s, &o_left, &o_ptr, true); - /* Shouldn't fail, from_enc already tested upon start of prog */ + // Shouldn't fail, encoding already tested upon start of prog conv = g_iconv_open ("UTF-8", from_enc ? from_enc : "UTF-16LE"); - g_debug ("Initial: read=%" G_GSIZE_FORMAT ", write=%" G_GSIZE_FORMAT, - rbyte, wbyte); + g_debug ("Initial : r=%02zu, w=%02zu/%02zu", + i_left, o_left, s->allocated_len - 1); + err_offsets = g_ptr_array_new_with_free_func ((GDestroyNotify) g_free); - /* Pass 1: Convert to UTF-8, all illegal seq become escaped hex */ - while (TRUE) - { - int e; + // Pass 1: Convert to UTF-8, all illegal seq become escaped hex + while (i_left > 0) + { if (*i_ptr == '\0') { if (from_enc != NULL) break; if (*(i_ptr+1) == '\0') break; /* utf-16: check "\0\0" */ } - // GNU iconv may return number of nonreversible conversions - // upon success, but we don't need to worry about it, as - // conversion from code page to UTF-8 would not be nonreversible - if ((gsize) -1 != (status = g_iconv ( - conv, &i_ptr, &rbyte, &o_ptr, &wbyte))) + // When non-reversible char are converted to \uFFFD, there + // is nothing we can do. Just accept the status quo. + status = g_iconv (conv, &i_ptr, &i_left, &o_ptr, &o_left); + _sync_pos (s, &o_left, &o_ptr, false); + if (status != (gsize) -1) break; - e = errno; - - g_debug ("r=%02" G_GSIZE_FORMAT ", w=%02" G_GSIZE_FORMAT - ", stt=%" G_GSIZE_FORMAT " (%s) str=%s", - rbyte, wbyte, status, g_strerror(e), u8_path); - - switch (e) { - case EILSEQ: - case EINVAL: // TODO Handle partial input for EINVAL - if (error && ! *error) { - g_set_error (error, G_CONVERT_ERROR, - G_CONVERT_ERROR_ILLEGAL_SEQUENCE, - _("Illegal sequence or partial input at offset %" G_GSIZE_FORMAT), rbyte); - } - _advance_octet (in_ch_width, &i_ptr, &rbyte, &o_ptr, &wbyte, tmpl); - g_iconv (conv, NULL, NULL, &o_ptr, &wbyte); // reset state - break; - case E2BIG: // TODO realloc instead of Kaboom! - g_assert_not_reached(); + int e = errno; + g_debug ("Progress: r=%02zu, w=%02zu/%02zu, status=%zd (%s), str=%s", + i_left, o_left, s->allocated_len - 1, + status, g_strerror(e), s->str); + + switch (e) + { + case EINVAL: + case EILSEQ: + { + size_t *processed = g_malloc (sizeof (size_t)); + *processed = i_size - i_left; + g_ptr_array_add (err_offsets, processed); + } + _advance_octet (char_sz, &i_ptr, &i_left, s, fmt_type); + _sync_pos (s, &o_left, &o_ptr, true); + g_debug ("Progress: r=%02zu, w=%02zu/%02zu, str=%s", + i_left, o_left, s->allocated_len - 1, s->str); + g_iconv (conv, NULL, NULL, &o_ptr, &o_left); // reset state + _sync_pos (s, &o_left, &o_ptr, false); + break; + case E2BIG: + s = g_string_set_size (s, s->allocated_len * 2); + _sync_pos (s, &o_left, &o_ptr, true); + break; } } - g_debug ("r=%02" G_GSIZE_FORMAT ", w=%02" G_GSIZE_FORMAT - ", stt=%" G_GSIZE_FORMAT ", str=%s", rbyte, wbyte, status, u8_path); + g_debug ("Finally : r=%02zu, w=%02zu/%02zu, status=%zd, str=%s", + i_left, o_left, s->allocated_len - 1, status, s->str); g_iconv_close (conv); - if (read != NULL) - *read = r_total - rbyte; + if (error && + g_error_matches ((const GError *) (*error), + R2_REC_ERROR, R2_REC_ERROR_CONV_PATH) && + err_offsets->len > 0) + { + // More detailed error message showing offsets + char *old = (*error)->message; + GString *dbg_str = g_string_new ((const char *) old); + dbg_str = g_string_append (dbg_str, ", at offset:"); + for (size_t i = 0; i < err_offsets->len; i++) + { + g_string_append_printf (dbg_str, " %zu", + *((size_t *) (err_offsets->pdata[i]))); + } + (*error)->message = g_string_free (dbg_str, FALSE); + g_free (old); + } + + g_ptr_array_free (err_offsets, TRUE); - /* Pass 2: Convert all non-printable chars to hex */ - g_return_val_if_fail (g_utf8_validate (u8_path, -1, NULL), NULL); + // Pass 2: Post processing, e.g. convert non-printable chars to hex - result = _filter_printable_char (u8_path, tmpl); - g_free (u8_path); + g_return_val_if_fail (g_utf8_validate (s->str, -1, NULL), NULL); + + if (func == NULL) + result = _filter_printable_char (s->str, fmt_type); + else + result = func (s->str); + g_string_free (s, TRUE); return result; } @@ -353,22 +418,49 @@ filter_escapes (const char *str) char * -json_escape_path (const char *path) +json_escape (const char *src) { // TODO g_string_replace from glib 2.68 does it all - char *p = (char *) path; - gunichar c = 0; - GString *s = g_string_new (""); + char *p = (char *) src; + GString *s = g_string_sized_new (strlen (src)); while (*p) { - c = g_utf8_get_char (p); - if (c == '\\') - s = g_string_append (s, "\\\\"); - else if (c == '*') + gunichar c = g_utf8_get_char (p); + switch (c) + { + // JSON does not need to escape asterisk. This is for + // workaround in format template + case '*' : s = g_string_append_c (s, '\\'); break; + case '\\': + // For all other chars below, they are actually disallowed + // in Windows path. This is for the mischievous who + // move data to other OS and rename + case 0x22: + case 0x27: s = g_string_append_c (s, '\\'); - else - s = g_string_append_unichar (s, c); + s = g_string_append_c (s, c); + break; + case 0x08: s = g_string_append (s, "\\b"); break; + case 0x09: s = g_string_append (s, "\\t"); break; + case 0x0A: s = g_string_append (s, "\\n"); break; + case 0x0B: s = g_string_append (s, "\\v"); break; + case 0x0C: s = g_string_append (s, "\\f"); break; + case 0x0D: s = g_string_append (s, "\\r"); break; + default : + if (g_unichar_isgraph (c) || c == 0x20) + s = g_string_append_unichar (s, c); + else if (c < 0x10000) + g_string_append_printf (s, "\\u%04X", c); + else // calculate surrogate + { + uint16_t high, low; + high = 0xD800 + ((c - 0x10000) >> 10 ); + low = 0xDC00 + ((c - 0x10000) & 0x3FF); + g_string_append_printf (s, "\\u%04X\\u%04X", high, low); + } + break; + } p = g_utf8_next_char (p); } return g_string_free (s, FALSE); diff --git a/src/utils-conv.h b/src/utils-conv.h index 1bb1be8..4a3368e 100644 --- a/src/utils-conv.h +++ b/src/utils-conv.h @@ -10,21 +10,55 @@ #include #include +// All versions of recycle bin prior to Windows 10 use full PATH_MAX +// or FILENAME_MAX (260 char) to store file paths in either ANSI or +// Unicode variations. However it is impossible to reuse any similar +// constant as it is totally platform dependent. +#define WIN_PATH_MAX 260 + + +// Minimum bytes needed to guarantee writing a utf8 character +#define MIN_WRITEBUF_SPACE 4 + + +typedef enum +{ + FORMAT_UNKNOWN, + FORMAT_TEXT, + FORMAT_XML, + FORMAT_JSON, +} out_fmt; + + +typedef struct _fmt_data { + const char *friendly_name; + // tmpl[0]=utf8 (max 32bit), 1=char (8bit), 2=ucs2 (16bit) + // templates should use numeric printf format since + // they are not proper characters, or non-printable + // chars in case of UTF-8 + // namely `%u`, `%o`, `%d`, `%i`, `%x` and `%X` + const char *fallback_tmpl[3]; +} _fmt_data; + + +typedef +char * (*StrTransformFunc) (const char *src); + + bool enc_is_ascii_compatible (const char *enc, GError **error); -size_t ucs2_strnlen (const char *str, +size_t ucs2_bytelen (const char *str, ssize_t max_sz); -char * conv_path_to_utf8_with_tmpl (const char *path, - ssize_t pathlen, +char * conv_path_to_utf8_with_tmpl (const GString *path, const char *from_enc, - const char *tmpl, - size_t *read, + out_fmt fmt_type, + StrTransformFunc func, GError **error); char * filter_escapes (const char *str); -char * json_escape_path (const char *path); +char * json_escape (const char *src); #endif diff --git a/src/utils.c b/src/utils.c index c3ba0ab..6101bee 100644 --- a/src/utils.c +++ b/src/utils.c @@ -85,12 +85,6 @@ static char *os_strings[] = { N_("Windows 10 or above") }; -static char *out_format_name[] = { - "unknown format", - "TSV format", - "XML format", - "JSON format", -}; static out_fmt output_format = FORMAT_UNKNOWN; static gboolean no_heading = FALSE; @@ -186,6 +180,8 @@ static gboolean _set_out_format (out_fmt desired_format, GError **error) { + extern struct _fmt_data fmt[]; + if (output_format == desired_format) return TRUE; @@ -197,8 +193,8 @@ _set_out_format (out_fmt desired_format, g_set_error (error, G_OPTION_ERROR, G_OPTION_ERROR_FAILED, "Output was already set in %s, but later argument " "attempts to change to %s", - out_format_name[output_format], - out_format_name[desired_format]); + fmt[output_format].friendly_name, + fmt[desired_format].friendly_name); return FALSE; } @@ -709,8 +705,10 @@ _free_record_cb (rbin_struct *record) { g_free (record->index_s); g_date_time_unref (record->deltime); - g_free (record->raw_uni_path); - g_free (record->raw_legacy_path); + if (record->raw_uni_path) + g_string_free (record->raw_uni_path, TRUE); + if (record->raw_legacy_path) + g_string_free (record->raw_legacy_path, TRUE); g_clear_error (&record->error); g_free (record); } @@ -1176,7 +1174,7 @@ _print_json_header (const metarecord *meta) { char *s = g_filename_display_name (meta->filename); - char *rbin_path = json_escape_path (s); + char *rbin_path = json_escape (s); g_print (" \"path\": \"%s\",\n", rbin_path); g_free (s); g_free (rbin_path); @@ -1190,8 +1188,8 @@ static void _print_text_record (rbin_struct *record, const metarecord *meta) { - char *outstr; - char **header; + char *output, **header; + GString *src; GDateTime *dt; g_return_if_fail (record != NULL); @@ -1215,19 +1213,17 @@ _print_text_record (rbin_struct *record, g_strdup ("???") : g_strdup_printf ("%" PRIu64, record->filesize); - if (legacy_encoding) - header[4] = conv_path_to_utf8_with_tmpl (record->raw_legacy_path, - -1, legacy_encoding, "<\\%02X>", NULL, NULL); - else - header[4] = conv_path_to_utf8_with_tmpl (record->raw_uni_path, - -1, NULL, "<\\u%04X>", NULL, NULL); + src = legacy_encoding ? record->raw_legacy_path : + record->raw_uni_path ; + header[4] = conv_path_to_utf8_with_tmpl (src, + legacy_encoding, FORMAT_TEXT, NULL, &record->error); if (! header[4]) header[4] = g_strdup ("???"); - outstr = g_strjoinv (delim, header); - g_print ("%s\n", outstr); + output = g_strjoinv (delim, header); + g_print ("%s\n", output); - g_free (outstr); + g_free (output); g_date_time_unref (dt); g_strfreev (header); } @@ -1239,7 +1235,7 @@ _print_xml_record (rbin_struct *record, { char *path, *dt_str; GDateTime *dt; - GString *s; + GString *s, *src; g_return_if_fail (record != NULL); @@ -1273,14 +1269,12 @@ _print_xml_record (rbin_struct *record, g_string_append_printf (s, " size=\"%" PRIu64 "\"", record->filesize); - // Still need to be converted despite using CDATA, otherwise - // could be writing garbage on screen or into file - if (legacy_encoding) - path = conv_path_to_utf8_with_tmpl (record->raw_legacy_path, - -1, legacy_encoding, "&#x%02X;", NULL, NULL); - else - path = conv_path_to_utf8_with_tmpl (record->raw_uni_path, - -1, NULL, "&#x%04X;", NULL, NULL); + // Still need to be converted despite using CDATA, + // otherwise could be writing garbage output + src = legacy_encoding ? record->raw_legacy_path : + record->raw_uni_path ; + path = conv_path_to_utf8_with_tmpl (src, + legacy_encoding, FORMAT_XML, NULL, &record->error); if (path) g_string_append_printf (s, ">\n" @@ -1302,9 +1296,9 @@ static void _print_json_record (rbin_struct *record, const metarecord *meta) { - char *tmp, *path, *dt_str; + char *path, *dt_str; GDateTime *dt; - GString *s; + GString *src, *s; g_return_if_fail (record != NULL); @@ -1338,22 +1332,10 @@ _print_json_record (rbin_struct *record, g_string_append_printf (s, ", \"size\": %" PRIu64, record->filesize); - if (legacy_encoding) - { - // JSON spec doesn't even allow encoding raw byte data, - // so transform it like text output format - tmp = conv_path_to_utf8_with_tmpl (record->raw_legacy_path, - -1, legacy_encoding, "<\\%02X>", NULL, NULL); - } - else - { - // HACK \u sequence collides with path separator, which - // will be processed in json escaping routine. Use a temp - // char to avoid collision and convert it back later - tmp = conv_path_to_utf8_with_tmpl (record->raw_uni_path, - -1, NULL, "*u%04X", NULL, NULL); - } - path = json_escape_path (tmp); + src = legacy_encoding ? record->raw_legacy_path : + record->raw_uni_path ; + path = conv_path_to_utf8_with_tmpl (src, legacy_encoding, + FORMAT_JSON, &json_escape, &record->error); if (path) g_string_append_printf (s, ", \"path\": \"%s\"},\n", path); @@ -1363,7 +1345,6 @@ _print_json_record (rbin_struct *record, g_print ("%s", s->str); g_date_time_unref (dt); - g_free (tmp); g_free (path); g_free (dt_str); g_string_free (s, TRUE); diff --git a/src/utils.h b/src/utils.h index 75be0ba..f147d27 100644 --- a/src/utils.h +++ b/src/utils.h @@ -16,6 +16,7 @@ #define _POSIX_C_SOURCE 199309L #endif +#include #include #include #include @@ -64,13 +65,6 @@ typedef enum VERSION_ME_03, } detected_os_ver; -typedef enum _out_fmt -{ - FORMAT_UNKNOWN, - FORMAT_TEXT, - FORMAT_XML, - FORMAT_JSON, -} out_fmt; /** * @brief Whether original trashed file still exists @@ -117,7 +111,7 @@ typedef struct _rbin_meta * segments due to sloppy programming practice. * @attention For `INFO2` only */ - gboolean fill_junk; + bool fill_junk; /** * @brief List of trash file records pointer */ @@ -170,23 +164,25 @@ typedef struct _rbin_struct */ uint64_t filesize; - /* despite var names, all filenames are converted to UTF-8 upon parsing */ - /** - * @brief Unicode trashed file original path - * @note Original path was stored in index file in UTF-16 encoding - * since Windows 2000. The path is converted to UTF-8 encoding and stored here . + * @brief Original path of trashed file, in unicode + * @note Original path was stored in index file in UTF-16 + * encoding since Windows 2000. The raw UTF-16 data is + * stored here. `GString` structure is chosen for + * convenience in storing buffer length, which can't be + * easily determined from null termination when path data + * is truncated (due to broken file) */ - char *raw_uni_path; + GString *raw_uni_path; /** - * @brief ANSI encoded trash file original path - * @note Until Windows 2003, index file preserves trashed file path in - * ANSI code page. The path is converted to UTF-8 encoding and stored here. - * @attention For `INFO2` only. Can be either full path or using 8.3 format, - * depending on Windows version and code page used. + * @brief Original path of trashed file, in ANSI code page + * @note Until Windows 2003, index file preserves trashed file + * path in ANSI code page. The raw path is stored here. + * @attention For `INFO2` only. Can be either full path or + * 8.3 format, depending on Windows version and code page used. */ - char *raw_legacy_path; + GString *raw_legacy_path; /** * @brief Whether original trashed file is gone @@ -218,15 +214,6 @@ typedef struct _rbin_struct /*! Every Windows use this GUID in recycle bin desktop.ini */ #define RECYCLE_BIN_CLSID "645FF040-5081-101B-9F08-00AA002F954E" -/* - * Most versions of recycle bin use full PATH_MAX (260 char) to store file paths, - * in either ANSI or Unicode variations, except Windows 10 which uses variable size. - * However we don't want to use PATH_MAX directly since on Linux/Unix it's - * another thing. - */ -#define WIN_PATH_MAX 260 - - typedef void (*ParseIdxFunc) (const char *path, metarecord *meta); diff --git a/test/cmake/crafted.cmake b/test/cmake/crafted.cmake index 39e3917..ef3bd36 100644 --- a/test/cmake/crafted.cmake +++ b/test/cmake/crafted.cmake @@ -75,18 +75,20 @@ add_test(NAME f_TruncatedFile set_tests_properties(f_TruncatedFile PROPERTIES LABELS "info2;crafted" - PASS_REGULAR_EXPRESSION "byte range 3220 - 4015: Last segment does not constitute a valid record\.") + PASS_REGULAR_EXPRESSION "5: Record is truncated") # -# Partial bad $Recycle.bin +# Ditto for $Recycle.bin # # TODO different tests for stdout and stderr -add_test(NAME d_SomeBadIdx +add_test(NAME d_TruncatedFile COMMAND rifiuti-vista dir-badfiles WORKING_DIRECTORY ${sample_dir}) -set_tests_properties(d_SomeBadIdx +set_tests_properties(d_TruncatedFile PROPERTIES LABELS "recycledir;crafted" - PASS_REGULAR_EXPRESSION "\\$IX1JBL3\\.djvu: Might be an index file, but file size is unexpected\.") + PASS_REGULAR_EXPRESSION [=[ +\$IF47Q09: File is not a \$Recycle\.bin index +\$IX1JBL3\.djvu: Record is truncated]=]) diff --git a/test/cmake/encoding.cmake b/test/cmake/encoding.cmake index 317816b..0bb9c0d 100644 --- a/test/cmake/encoding.cmake +++ b/test/cmake/encoding.cmake @@ -149,17 +149,42 @@ generate_simple_comparison_test("JsonInfo2Win95" 1 "" "INFO-95-ja-1.json" "encoding|json") -add_encoding_test_with_cwd(f_JsonWin95WrongEnc_Prep +add_encoding_test_with_cwd(f_JsonWrongEnc_Prep ${sample_dir} -DINFO2=INFO-95-ja-1 -DCHOICES=CP1255|MS-HEBR|WINDOWS-1255|HEBREW|ISO-8859-8|ISO-IR-138|ISO8859-8|ISO_8859-8|ISO_8859-8:1988|CSISOLATINHEBREW - -DOUTFILE=${bindir}/f_JsonWin95WrongEnc.output + -DOUTFILE=${bindir}/f_JsonWrongEnc.output -DEXTRA_ARGS=-f|json ) -set_tests_properties(f_JsonWin95WrongEnc_Prep +set_tests_properties(f_JsonWrongEnc_Prep PROPERTIES PASS_REGULAR_EXPRESSION "could not be interpreted in .+ encoding") -generate_simple_comparison_test("JsonWin95WrongEnc" 1 +generate_simple_comparison_test("JsonWrongEnc" 1 "" "INFO-95-ja-1-in-cp1255.json" "encoding|xfail|json") + +# It turns out different iconv implemention may have +# different behavior even for the same code page. Take this +# for example, GNU iconv marks 0x90 illegal for CP1255, but +# winiconv converts that to U+0090. +if(WIN32) + set_tests_properties(f_JsonWrongEnc + PROPERTIES WILL_FAIL true) +endif() + + +add_encoding_test_with_cwd(f_XmlWrongEnc_Prep + ${sample_dir} + -DINFO2=INFO-95-ja-1 + -DCHOICES=CP949|UHC|ISO-IR-149|KOREAN|KSC_5601|KS_C_5601-1987|KS_C_5601-1989|CSKSC56011987 + -DOUTFILE=${bindir}/f_XmlWrongEnc.output + -DEXTRA_ARGS=-f|xml +) + +set_tests_properties(f_XmlWrongEnc_Prep + PROPERTIES + PASS_REGULAR_EXPRESSION "could not be interpreted in .+ encoding") + +generate_simple_comparison_test("XmlWrongEnc" 1 + "" "INFO-95-ja-1-in-cp949.xml" "encoding|xfail|xml") diff --git a/test/cmake/read-write.cmake b/test/cmake/read-write.cmake index 7927b41..e20ded0 100644 --- a/test/cmake/read-write.cmake +++ b/test/cmake/read-write.cmake @@ -29,7 +29,7 @@ add_test(NAME f_InputSpecialFile COMMAND rifiuti nul) set_tests_properties(d_InputSpecialFile f_InputSpecialFile PROPERTIES LABELS "xfail" - PASS_REGULAR_EXPRESSION "File is prematurely truncated, or not .+ index") + PASS_REGULAR_EXPRESSION "File is not .+ index") else() add_test(NAME d_InputSpecialFile COMMAND rifiuti-vista /dev/null) add_test(NAME f_InputSpecialFile COMMAND rifiuti /dev/null) diff --git a/test/samples/INFO-95-ja-1-in-cp949.xml b/test/samples/INFO-95-ja-1-in-cp949.xml new file mode 100644 index 0000000..5bc9728 --- /dev/null +++ b/test/samples/INFO-95-ja-1-in-cp949.xml @@ -0,0 +1,37 @@ + + + + + 璟.lnk]]> + + + 켓<\AF>璟 꾀갸<\DE>.bmp]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/samples/dir-badfiles/$IF47Q09 b/test/samples/dir-badfiles/$IF47Q09 index e69de29..33afd63 100644 Binary files a/test/samples/dir-badfiles/$IF47Q09 and b/test/samples/dir-badfiles/$IF47Q09 differ