diff --git a/src/rifiuti-vista.c b/src/rifiuti-vista.c
index 03fe858..1e6086c 100644
--- a/src/rifiuti-vista.c
+++ b/src/rifiuti-vista.c
@@ -27,38 +27,31 @@ extern metarecord  *meta;
  * @note This only checks if index file has sufficient amount
  * of data for sensible reading
  */
-static gboolean
+static bool
 _validate_index_file   (const char   *filename,
-                        void        **filebuf,
+                        void        **outbuf,
                         gsize        *bufsize,
                         uint64_t     *ver,
                         GError      **error)
 {
-    gsize           expect_sz;
     char           *buf = NULL;
-    uint32_t        pathlen;
 
-    g_return_val_if_fail (filename &&   *filename, FALSE);
-    g_return_val_if_fail (filebuf  && ! *filebuf , FALSE);
-    g_return_val_if_fail (! error  || ! *error   , FALSE);
-    g_return_val_if_fail (bufsize  , FALSE);
-    g_return_val_if_fail (ver      , FALSE);
+    g_return_val_if_fail (filename && *filename, false);
+    g_return_val_if_fail (outbuf   && ! *outbuf, false);
+    g_return_val_if_fail (! error  || ! *error , false);
+    g_return_val_if_fail (bufsize  , false);
+    g_return_val_if_fail (ver      , false);
 
     g_debug ("Start file validation for '%s'...", filename);
 
     if (! g_file_get_contents (filename, &buf, bufsize, error))
         goto validate_fail;
 
-    g_debug ("Read '%s' successfully, size = %" G_GSIZE_FORMAT,
-        filename, *bufsize);
-
     if (*bufsize <= VERSION1_FILENAME_OFFSET)
     {
-        g_debug ("File size = %" G_GSIZE_FORMAT
-            ", expected > %" G_GSIZE_FORMAT,
-            *bufsize, (gsize) VERSION1_FILENAME_OFFSET);
-        g_set_error_literal (error, R2_REC_ERROR, R2_REC_ERROR_IDX_SIZE_INVALID,
-            _("File is prematurely truncated, or not a $Recycle.bin index."));
+        g_set_error_literal (error, R2_REC_ERROR,
+        R2_REC_ERROR_IDX_SIZE_INVALID,
+            _("File is not a $Recycle.bin index"));
         goto validate_fail;
     }
 
@@ -68,59 +61,39 @@ _validate_index_file   (const char   *filename,
 
     switch (*ver)
     {
-        case VERSION_VISTA:
-
-            expect_sz = VERSION1_FILE_SIZE;
-            /* see _populate_record_data() for reason */
-            if ((*bufsize != expect_sz) && (*bufsize != expect_sz - 1))
-            {
-                g_debug ("File size = %" G_GSIZE_FORMAT
-                    ", expected = %" G_GSIZE_FORMAT " or %" G_GSIZE_FORMAT, *bufsize, expect_sz, expect_sz - 1);
-                g_set_error (error, R2_REC_ERROR, R2_REC_ERROR_IDX_SIZE_INVALID,
-                    "%s", _("Might be an index file, but file size is unexpected."));
-                goto validate_fail;
-            }
-            break;
-
-        case VERSION_WIN10:
-
-            // Version 2 adds a uint32 file name strlen before file name.
-            // This presumably breaks the 260 char barrier in version 1.
-            copy_field (pathlen, buf, VERSION1_FILENAME_OFFSET, VERSION2_FILENAME_OFFSET);
-            pathlen = GUINT32_FROM_LE (pathlen);
-
-            /* Header length + strlen in UTF-16 encoding */
-            expect_sz = VERSION2_FILENAME_OFFSET + pathlen * sizeof(gunichar2);
-            if (*bufsize != expect_sz)
-            {
-                g_debug ("File size = %" G_GSIZE_FORMAT
-                    ", expected = %" G_GSIZE_FORMAT,
-                    *bufsize, expect_sz);
-                g_set_error (error, R2_REC_ERROR, R2_REC_ERROR_IDX_SIZE_INVALID,
-                    "%s", _("Might be an index file, but file size is unexpected."));
-                goto validate_fail;
-            }
-            break;
-
-        default:
-            if (*ver < 10)
-                g_set_error (error, R2_REC_ERROR,
-                    R2_REC_ERROR_VER_UNSUPPORTED,
-                    _("Index file version %" PRIu64 " is unsupported"), *ver);
-            else
-                g_set_error (error, R2_REC_ERROR,
-                    R2_REC_ERROR_VER_UNSUPPORTED,
-                    "%s", _("File is not a $Recycle.bin index"));
+    case VERSION_VISTA: break;  // already handled above
+
+    case VERSION_WIN10:
+        // Version 2 adds a uint32 file name strlen before file name.
+        // This presumably breaks the 260 char barrier in version 1.
+        if (*bufsize <= VERSION2_FILENAME_OFFSET)
+        {
+            g_set_error_literal (error, R2_REC_ERROR,
+            R2_REC_ERROR_IDX_SIZE_INVALID,
+                _("File is not a $Recycle.bin index"));
             goto validate_fail;
+        }
+        break;
+
+    default:
+        if (*ver < 10)
+            g_set_error (error, R2_REC_ERROR,
+                R2_REC_ERROR_VER_UNSUPPORTED,
+                _("Index file version %" PRIu64 " is unsupported"), *ver);
+        else
+            g_set_error (error, R2_REC_ERROR,
+                R2_REC_ERROR_VER_UNSUPPORTED,
+                "%s", _("File is not a $Recycle.bin index"));
+        goto validate_fail;
     }
 
-    *filebuf = buf;
+    *outbuf = buf;
     g_debug ("Finished file validation for '%s'", filename);
-    return TRUE;
+    return true;
 
     validate_fail:
     g_free (buf);
-    return FALSE;
+    return false;
 }
 
 
@@ -130,34 +103,41 @@ _populate_record_data  (void      *buf,
                         uint64_t   version)
 {
     rbin_struct  *record;
-    size_t        pathbuf_sz = 0;
+    uint32_t      path_sz_expected, path_sz_actual;
+    size_t        null_terminator_offset;
     void         *pathbuf_start = NULL;
     bool          erraneous = false;
+    GString      *u;  // shorthand
 
     switch (version)
     {
-        case VERSION_VISTA:
-            // In rare cases, the size of index file is one byte short of
-            // (fixed) 544 bytes in Vista. Under such occasion, file size
-            // only occupies 56 bit, not 64 bit as it ought to be.
-            // Actually this 56-bit file size is very likely wrong after all.
-            // This is observed during deletion of dd.exe from Forensic
-            // Acquisition Utilities (by George M. Garner Jr)
-            // in certain localized Vista.
-            if (bufsize == VERSION1_FILE_SIZE - 1)
-                erraneous = true;
-
-            pathbuf_sz = WIN_PATH_MAX * sizeof(gunichar2);
-            pathbuf_start = buf - (int)erraneous + VERSION1_FILENAME_OFFSET;
-            break;
-
-        case VERSION_WIN10:
-            pathbuf_sz = bufsize - VERSION2_FILENAME_OFFSET;
-            pathbuf_start = buf + VERSION2_FILENAME_OFFSET;
-            break;
-
-        default:
-            g_assert_not_reached ();
+    case VERSION_VISTA:
+        // In rare cases, the size of index file is one byte short of
+        // (fixed) 544 bytes in Vista. Under such occasion, file size
+        // only occupies 56 bit, not 64 bit as it ought to be.
+        // Actually this 56-bit file size is very likely wrong after all.
+        // This is observed during deletion of dd.exe from Forensic
+        // Acquisition Utilities (by George M. Garner Jr)
+        // in certain localized Vista.
+        if (bufsize == VERSION1_FILE_SIZE - 1)
+            erraneous = true;
+
+        path_sz_expected = WIN_PATH_MAX * sizeof(gunichar2);
+        path_sz_actual = bufsize + (int)erraneous - VERSION1_FILENAME_OFFSET;
+        pathbuf_start = buf - (int)erraneous + VERSION1_FILENAME_OFFSET;
+        break;
+
+    case VERSION_WIN10:
+        copy_field (path_sz_expected, buf, VERSION1_FILENAME_OFFSET,
+            VERSION2_FILENAME_OFFSET);
+        path_sz_expected = GUINT32_FROM_LE (path_sz_expected) *
+            sizeof(gunichar2);
+        path_sz_actual = bufsize - VERSION2_FILENAME_OFFSET;
+        pathbuf_start = buf + VERSION2_FILENAME_OFFSET;
+        break;
+
+    default:
+        g_assert_not_reached ();
     }
 
     record = g_malloc0 (sizeof (rbin_struct));
@@ -184,23 +164,36 @@ _populate_record_data  (void      *buf,
     record->winfiletime = GINT64_FROM_LE (record->winfiletime);
     record->deltime = win_filetime_to_gdatetime (record->winfiletime);
 
-    record->raw_uni_path = g_malloc0 (pathbuf_sz + sizeof(gunichar2));
-    memcpy (record->raw_uni_path, pathbuf_start, pathbuf_sz);
+    // Unicode path
 
+    if (path_sz_actual > path_sz_expected)
+    {
+        g_set_error_literal (&record->error, R2_REC_ERROR,
+            R2_REC_ERROR_DUBIOUS_PATH,
+            _("Ignored dangling extraneous data after record"));
+    }
+    else if (path_sz_actual < path_sz_expected && ! erraneous)
     {
-        // Never set len = -1 for UCS2 source string
-        char *s = g_convert (record->raw_uni_path,
-            ucs2_strnlen (record->raw_uni_path, pathbuf_sz) * sizeof (gunichar2),
+        g_set_error_literal (&record->error, R2_REC_ERROR,
+            R2_REC_ERROR_DUBIOUS_PATH,
+            _("Record is truncated, thus unicode path might be incomplete"));
+    }
+
+    u = g_string_new_len ((const char *) pathbuf_start,
+        MIN(path_sz_actual, path_sz_expected));
+    record->raw_uni_path = u;
+
+    null_terminator_offset = ucs2_bytelen (u->str, u->len);
+
+    if (record->error == NULL)
+    {
+        char *s = g_convert (u->str, null_terminator_offset,
             "UTF-8", "UTF-16LE", NULL, NULL, NULL);
         if (s)
-        {
             g_free (s);
-        }
         else
-        {
             g_set_error_literal (&record->error, R2_REC_ERROR, R2_REC_ERROR_CONV_PATH,
                 _("Path contains broken unicode character(s)"));
-        }
     }
 
     return record;
diff --git a/src/rifiuti-vista.h b/src/rifiuti-vista.h
index c5665f8..2bf7274 100644
--- a/src/rifiuti-vista.h
+++ b/src/rifiuti-vista.h
@@ -7,7 +7,7 @@
 #ifndef _RIFIUTI_VISTA_H
 #define _RIFIUTI_VISTA_H
 
-#include "utils.h"
+#include "utils-conv.h"
 
 #define VERSION_OFFSET               0x0
 #define FILESIZE_OFFSET              0x8
diff --git a/src/rifiuti.c b/src/rifiuti.c
index 68627a0..4379c4d 100644
--- a/src/rifiuti.c
+++ b/src/rifiuti.c
@@ -62,7 +62,7 @@ _validate_index_file   (const char   *filename,
     {
         g_set_error_literal (error, R2_FATAL_ERROR,
             R2_FATAL_ERROR_ILLEGAL_DATA,
-            _("File is prematurely truncated, or not an INFO2 index."));
+            _("File is not an INFO2 index."));
         goto validation_fail;
     }
 
@@ -139,19 +139,29 @@ _validate_index_file   (const char   *filename,
 
 static rbin_struct *
 _populate_record_data   (void     *buf,
-                         gsize     bufsize,
-                         gboolean *junk_detected)
+                         size_t    bufsize)
 {
     rbin_struct    *record;
     uint32_t        drivenum;
-    size_t          uni_buf_sz, null_terminator_offset;
+    size_t          null_terminator_offset;
+    GString        *l, *u;  // shorthand for paths
+
+    // Unicode records accept partial path truncation,
+    // but no fault tolerance for Legacy records
+
+    if (meta->recordsize == LEGACY_RECORD_SIZE &&
+        bufsize < LEGACY_RECORD_SIZE)
+        return NULL;
+
+    if (meta->recordsize == UNICODE_RECORD_SIZE &&
+        bufsize <= LEGACY_RECORD_SIZE)
+        return NULL;
 
     record = g_malloc0 (sizeof (rbin_struct));
 
     // Verbatim path in ANSI code page
-    record->raw_legacy_path = g_malloc0 (RECORD_INDEX_OFFSET - LEGACY_FILENAME_OFFSET);
-    copy_field (*(record->raw_legacy_path), buf,
-        LEGACY_FILENAME_OFFSET, RECORD_INDEX_OFFSET);
+    l = g_string_new_len (buf, WIN_PATH_MAX);
+    record->raw_legacy_path = l;
 
     /* Index number associated with the record */
     copy_field (record->index_n, buf, RECORD_INDEX_OFFSET, DRIVE_LETTER_OFFSET);
@@ -173,10 +183,10 @@ _populate_record_data   (void     *buf,
     record->gone = FILESTATUS_EXISTS;
     // If file is not in recycle bin (restored or permanently deleted),
     // first byte will be removed from filename
-    if (! *record->raw_legacy_path)
+    if (l->str[0] == '\0')
     {
         record->gone = FILESTATUS_GONE;
-        *record->raw_legacy_path = record->drive;
+        l->str[0] = record->drive;
     }
 
     /* File deletion time */
@@ -195,7 +205,7 @@ _populate_record_data   (void     *buf,
     // because otherwise we don't know which encoding to use
     if (legacy_encoding)
     {
-        char *s = g_convert (record->raw_legacy_path, -1,
+        char *s = g_convert (l->str, -1,
             "UTF-8", legacy_encoding, NULL, NULL, NULL);
         if (s)
             g_free (s);
@@ -208,28 +218,30 @@ _populate_record_data   (void     *buf,
     if (bufsize == LEGACY_RECORD_SIZE)
         return record;
 
-    /* Part below deals with unicode path only */
+    // Part below deals with unicode path only
+
+    if (bufsize < UNICODE_RECORD_SIZE && record->error == NULL)
+    {
+        g_set_error_literal (&record->error, R2_REC_ERROR,
+            R2_REC_ERROR_DUBIOUS_PATH,
+            _("Record is truncated, thus unicode path might be incomplete"));
+    }
 
-    uni_buf_sz = UNICODE_RECORD_SIZE - UNICODE_FILENAME_OFFSET;
-    record->raw_uni_path = g_malloc (uni_buf_sz);
-    copy_field (*(record->raw_uni_path), buf,
-        UNICODE_FILENAME_OFFSET, UNICODE_RECORD_SIZE);
-    null_terminator_offset = ucs2_strnlen (
-        record->raw_uni_path, WIN_PATH_MAX) * sizeof (gunichar2);
+    u = g_string_new_len ((const char *) (buf + UNICODE_FILENAME_OFFSET),
+        bufsize - UNICODE_FILENAME_OFFSET);
+    record->raw_uni_path = u;
 
+    null_terminator_offset = ucs2_bytelen (u->str, u->len);
+
+    if (record->error == NULL)
     {
-        // Never set len = -1 for wchar source string
-        char *s = g_convert (record->raw_uni_path, null_terminator_offset,
+        char *s = g_convert (u->str, null_terminator_offset,
             "UTF-8", "UTF-16LE", NULL, NULL, NULL);
         if (s)
-        {
             g_free (s);
-        }
         else
-        {
             g_set_error_literal (&record->error, R2_REC_ERROR, R2_REC_ERROR_CONV_PATH,
                 _("Path contains broken unicode character(s)"));
-        }
     }
 
     /*
@@ -252,26 +264,23 @@ _populate_record_data   (void     *buf,
      * - accented latin chars transliterated to pure ASCII
      * - first DBCS char converted to UCS2 codepoint
      */
-    if (junk_detected && ! *junk_detected)
+    if (! meta->fill_junk && u->len > null_terminator_offset)
     {
-        // Beware: start pos shouldn't be previously read bytes,
-        // as it may contain invalid seq and quit prematurely.
-        char *p = record->raw_uni_path + null_terminator_offset;
-
-        while (p < record->raw_uni_path + uni_buf_sz)
+        char *p = u->str + null_terminator_offset;
+        while (p < u->str + u->len)
         {
             if (*p != '\0')
             {
                 g_debug ("Junk detected at offset 0x%tx of unicode path",
-                    p - record->raw_uni_path);
-                *junk_detected = TRUE;
+                    p - u->str);
+                meta->fill_junk = true;
                 break;
             }
             p++;
         }
 
-        if (*junk_detected)
-            hexdump (record->raw_uni_path, uni_buf_sz);
+        if (meta->fill_junk)
+            hexdump (u->str, u->len);
     }
 
     return record;
@@ -282,12 +291,14 @@ static void
 _parse_record_cb   (const char *index_file,
                     metarecord *meta)
 {
-    rbin_struct   *record;
+    rbin_struct   *record = NULL;
     FILE          *infile = NULL;
-    gsize          read_sz, record_sz;
+    size_t         read_sz,
+                   prev_pos,
+                   curr_pos;
     void          *buf = NULL;
     GError        *error = NULL;
-    int64_t        prev_pos, curr_pos;
+    char          *segment_id;
 
     if (! _validate_index_file (index_file, &infile, &error))
     {
@@ -295,44 +306,37 @@ _parse_record_cb   (const char *index_file,
             g_strdup (index_file), error);
         return;
     }
-
     g_debug ("Start populating record for '%s'...", index_file);
 
-    record_sz = meta->recordsize;
-    buf = g_malloc0 (record_sz);
-
     fseek (infile, RECORD_START_OFFSET, SEEK_SET);
-    curr_pos = (int64_t) ftell (infile);
-    prev_pos = curr_pos;
+    prev_pos = curr_pos = ftell (infile);
 
-    while ((read_sz = fread (buf, 1, record_sz, infile)) > 0)
+    buf = g_malloc0 (meta->recordsize);
+    while ((read_sz = fread (buf, 1, meta->recordsize, infile)) > 0)
     {
         prev_pos = curr_pos;
-        curr_pos = (int64_t) ftell (infile);
-        g_debug ("Read %s, byte range %" PRId64 " - %" PRId64,
-            index_file, prev_pos, curr_pos);
-        if (read_sz < record_sz) {
-            g_debug ("read size = %zu, less than needed %zu", read_sz, record_sz);
-            break;
-        }
-        record = _populate_record_data (buf, record_sz, &meta->fill_junk);
-        g_ptr_array_add (meta->records, record);
+        curr_pos = ftell (infile);
+        g_debug ("Read byte range %zu-%zu %s", prev_pos, curr_pos,
+            (read_sz < meta->recordsize ? "" : " (!!!)"));
+        if (NULL != (record = _populate_record_data (buf, read_sz)))
+            g_ptr_array_add (meta->records, record);
     }
     g_free (buf);
 
-    char *segment_id = g_strdup_printf ("|%" PRId64 "|%" PRId64, prev_pos, curr_pos);
+    segment_id = g_strdup_printf ("|%zu|%zu", prev_pos, curr_pos);
 
-    if (feof (infile) && read_sz && (read_sz < record_sz))
+    if (feof (infile))
     {
-        g_set_error_literal (&error, R2_REC_ERROR,
-            R2_REC_ERROR_IDX_SIZE_INVALID,
-            _("Last segment does not constitute a valid "
-            "record. Likely a premature end of file."));
+        if (read_sz > 0 && record == NULL)
+            g_set_error_literal (&error, R2_REC_ERROR,
+                R2_REC_ERROR_IDX_SIZE_INVALID,
+                _("Premature end of file encountered, and "
+                "the last segment is not recoverable."));
     }
     else if (ferror (infile))  // other generic error
     {
-        g_set_error (&error, G_FILE_ERROR, G_FILE_ERROR_FAILED,
-            _("Failed to read record at %s"), segment_id);
+        g_set_error_literal (&error, G_FILE_ERROR, G_FILE_ERROR_FAILED,
+            _("Failed to read record for unknown reason"));
     }
 
     if (error) {
diff --git a/src/rifiuti.h b/src/rifiuti.h
index fc8c7f0..ccd2374 100644
--- a/src/rifiuti.h
+++ b/src/rifiuti.h
@@ -8,7 +8,7 @@
 #ifndef _RIFIUTI_H
 #define _RIFIUTI_H
 
-#include "utils.h"
+#include "utils-conv.h"
 
 /* These offsets are relative to file start */
 #define VERSION_OFFSET           0
diff --git a/src/utils-conv.c b/src/utils-conv.c
index 7c9159f..6e9ab5e 100644
--- a/src/utils-conv.c
+++ b/src/utils-conv.c
@@ -10,9 +10,42 @@
 #include <glib.h>
 #include <glib/gi18n.h>
 
+#include "utils-error.h"
 #include "utils-conv.h"
 
 
+struct _fmt_data fmt[] = {
+    // must match out_fmt enum order
+    {
+        .friendly_name = "unknown format",
+        .fallback_tmpl = {"", "", ""},
+    },
+    {
+        .friendly_name = "TSV format",
+        .fallback_tmpl = {"<\\u%04X>", "<\\%02X>", "<\\u%04X>"},
+    },
+    {
+        .friendly_name = "XML format",
+        // All paths are placed inside CDATA, using entities
+        // can be confusing
+        .fallback_tmpl = {"<\\u%04X>", "<\\%02X>", "<\\u%04X>"},
+    },
+    {
+        .friendly_name = "JSON format",
+        .fallback_tmpl = {
+            "",  // Unused, see json_escape()
+            // JSON doesn't allow encoding raw byte data in strings
+            // (must be proper characters)
+            "<\\%02X>",
+            // HACK \u sequence collides with path separator, which
+            // will be processed in json escaping routine. Use a temp
+            // char to avoid collision and convert it back later
+            "*u%04X"
+        },
+    },
+};
+
+
 /**
  * @brief Try out if encoding is compatible to ASCII
  * @param enc The encoding to test
@@ -47,86 +80,75 @@ enc_is_ascii_compatible    (const char   *enc,
 
 
 /**
- * @brief Compute UCS2 string length like `wcslen()`
- * @param str The string to check (in `char*` !)
- * @param max_sz Maximum length to check, or use -1 to
- * denote the string is nul-terminated
- * @return Either number of UCS2 char for whole string,
- * or return `max_sz` when `max_sz` param is exceeded
+ * @brief Find null terminator position in UCS2 string
+ * @param str The string to check (in `char *` !)
+ * @param max_sz Maximum byte length to check, or use -1 to
+ * denote the string should be nul-terminated
+ * @return Byte position where null terminator (double \\0)
+ * is found, or `max_sz` otherwise
+ * @note Being different from standard C funcs like `wcsnlen()`
+ * or `strnlen()`, it returns bytes, not chars. And it would
+ * take care of odd bytes when UCS2 strings are expecting
+ * even number of bytes.
  */
 size_t
-ucs2_strnlen   (const char   *str,
+ucs2_bytelen   (const char   *str,
                 ssize_t       max_sz)
 {
-    // wcsnlen_s should be equivalent except for boundary
-    // cases we don't care about
-
-    size_t i = 0;
     char *p = (char *) str;
 
-    if (str == NULL)
+    if (str == NULL || max_sz == 0)
         return 0;
 
+    if (max_sz == 1)
+        return 1;
+
     while (*p || *(p+1))
     {
-        if (max_sz >= 0 && i >= (size_t) max_sz)
-            break;
-        i++;
         p += 2;
+        if (max_sz >= 0 && p - str + 1 >= max_sz)
+            return max_sz;
     }
-    return i;
+    return p - str;
 }
 
 
 /**
  * @brief Move character pointer for specified bytes
  * @param sz Must be either 1 or 2, denoting broken byte or broken UCS2 character
- * @param in_str Reference to input string to be converted
- * @param read_bytes Reference to already read bytes count to keep track of
- * @param out_str Reference to output string to be appended
- * @param write_bytes Reference to writable bytes count to decrement
- * @param tmpl `printf` template to represent the broken character
+ * @param ptr Location of char pointer to string to be converted
+ * @param bytes_left Location to number of remaining bytes to read
+ * @param s Broken byte(s) will be formatted and appended to this `GString`
+ * @param fmt_type Type of output format; see `fmt[]` for detail
  * @note This is the core of `conv_path_to_utf8_with_tmpl()` doing
  * error fallback, converting a single broken char to `printf` output.
  */
 static void
 _advance_octet    (size_t       sz,
-                   char       **in_str,
-                   gsize       *read_bytes,
-                   char       **out_str,
-                   gsize       *write_bytes,
-                   const char  *tmpl)
+                   char       **ptr,
+                   gsize       *bytes_left,
+                   GString     *s,
+                   out_fmt      fmt_type)
 {
-    char *repl;
-
-    switch (sz) {
-        case 1:
-        {
-            unsigned char c = *(unsigned char *) (*in_str);
-            repl = g_strdup_printf (tmpl, c);
-        }
-            break;
+    int c = 0;
 
-        case 2:
-        {
-            uint16_t c = GUINT16_FROM_LE (*(uint16_t *) (*in_str));
-            repl = g_strdup_printf (tmpl, c);
-        }
-            break;
+    g_return_if_fail (*bytes_left > 0);
+    g_return_if_fail (sz == 1 || sz == 2);
+    g_return_if_fail (*ptr != NULL);
 
-        default:
-            g_assert_not_reached();
-    }
+    if (*bytes_left == 1)
+        sz = 1;
 
-    (*in_str) += sz;
-    if (read_bytes != NULL)
-        (*read_bytes) -= sz;
+    if (sz == 1)
+        c = *(uint8_t *) (*ptr);
+    else
+        c = GUINT16_FROM_LE (*(uint16_t *) (*ptr));
 
-    *out_str = g_stpcpy (*out_str, (const char *) repl);
-    if (write_bytes != NULL)
-        *write_bytes -= strlen (repl);
+    g_string_append_printf (s,
+        fmt[fmt_type].fallback_tmpl[sz], c);
 
-    g_free (repl);
+    *ptr += sz;
+    *bytes_left -= sz;
     return;
 }
 
@@ -134,15 +156,15 @@ _advance_octet    (size_t       sz,
 /**
  * @brief Convert non-printable characters to escape sequences
  * @param str The original string to be converted
- * @param tmpl `printf` template to represent non-printable chars
+ * @param fmt_type Type of output format; see `fmt[]` for detail
  * @return Converted string, maybe containing escape sequences
  * @attention Caller is responsible for using correct template, no
  * error checking is performed. This template should handle a single
  * Windows unicode path character, which is in UTF-16LE encoding.
  */
 static char *
-_filter_printable_char (const char *str,
-                        const char *tmpl)
+_filter_printable_char   (const char   *str,
+                          out_fmt       fmt_type)
 {
     char     *p, *np;
     gunichar  c;
@@ -155,14 +177,12 @@ _filter_printable_char (const char *str,
         c  = g_utf8_get_char  (p);
         np = g_utf8_next_char (p);
 
-        /*
-         * ASCII space is the norm (e.g. Program Files), but
-         * all other kinds of spaces are rare, so escape them too
-         */
+        // ASCII space is common (e.g. "Program Files"), but not
+        // for any other kinds of space or invisible char
         if (g_unichar_isgraph (c) || (c == 0x20))
-            s = g_string_append_len (s, p, (gssize) (np - p));
+            s = g_string_append_len (s, p, (size_t) (np - p));
         else
-            g_string_append_printf (s, tmpl, c);
+            g_string_append_printf (s, fmt[fmt_type].fallback_tmpl[0], c);
 
         p = np;
     }
@@ -171,15 +191,33 @@ _filter_printable_char (const char *str,
 }
 
 
+static void
+_sync_pos   (GString   *str,
+             gsize     *bytes_left,
+             char     **chr_ptr,
+             bool       from_gstring)
+{
+    if (from_gstring)
+    {
+        *bytes_left = str->allocated_len - str->len - 1;
+        *chr_ptr = str->str + str->len;
+    }
+    else
+    {
+        str->len = str->allocated_len - *bytes_left - 1;
+        g_assert (*chr_ptr == str->str + str->len);
+        str->str[str->len] = '\0';
+    }
+}
+
 /**
  * @brief Convert path to UTF-8 encoding with customizable fallback
  * @param path The path string to be converted
  * @param from_enc Either a legacy Windows ANSI encoding, or use
  * `NULL` to represent Windows wide char encoding (UTF-16LE)
- * @param tmpl `printf`-style string template to represent broken
- * character. This template should handle either single- or
- * double-octet, namely `%u`, `%o`, `%d`, `%i`, `%x` and `%X`.
- * @param read Reference to number of successfully read bytes
+ * @param fmt_type Type of output format; see `fmt[]` for detail
+ * @param func String transform func for post processing; can be
+ * `NULL`, which still does some internal filtering
  * @param error Location to store error upon problem
  * @return UTF-8 encoded path, or `NULL` if conversion error happens
  * @note This is very similar to `g_convert_with_fallback()`, but the
@@ -189,107 +227,134 @@ _filter_printable_char (const char *str,
  * @attention 1. This routine is not for generic charset conversion.
  * Extra transformation is intended for path display only.
  * @attention 1. Caller is responsible for using correct template,
- * almost no error checking is performed.
+ * no error checking is performed.
  */
 char *
-conv_path_to_utf8_with_tmpl (const char *path,
-                             ssize_t     pathlen,
-                             const char *from_enc,
-                             const char *tmpl,
-                             size_t     *read,
-                             GError    **error)
+conv_path_to_utf8_with_tmpl (const GString   *path,
+                             const char      *from_enc,
+                             out_fmt          fmt_type,
+                             StrTransformFunc func,
+                             GError         **error)
 {
-    char *u8_path, *i_ptr, *o_ptr, *result = NULL;
-    gsize len, r_total, rbyte, wbyte, status, in_ch_width, out_ch_width;
-    GIConv conv;
-
-    g_return_val_if_fail (path && *path, NULL);
-    g_return_val_if_fail (tmpl && *tmpl, NULL);
+    char            *i_ptr,
+                    *o_ptr,
+                    *result;
+    gsize            i_size,
+                     i_left,
+                     o_left,
+                     char_sz,
+                     status;
+    GIConv           conv;
+    GPtrArray       *err_offsets;
+    GString         *s;
+
+    // For unicode path, the first char must be ASCII drive letter
+    // or slash. And since it is in little endian, first byte is
+    // always non-null
+    g_return_val_if_fail (path != NULL, NULL);
     g_return_val_if_fail (! from_enc || *from_enc, NULL);
-    g_return_val_if_fail (! error    || ! *error , NULL);
 
-    /* try the template */
+    if (from_enc)
     {
-        char *s = g_strdup_printf (tmpl, from_enc ? 0xFF : 0xFFFF);
-        /* UTF-8 character occupies at most 6 bytes */
-        out_ch_width = MAX (strlen(s), 6);
-        g_free (s);
+        char_sz = sizeof (char);
+        i_left = i_size = strnlen (path->str, WIN_PATH_MAX);
     }
-
-    if (from_enc != NULL) {
-        in_ch_width = sizeof (char);
-        len = strnlen (path, (size_t) pathlen);
-    } else {
-        in_ch_width = sizeof (gunichar2);
-        len = ucs2_strnlen (path, (size_t) pathlen);
+    else
+    {
+        char_sz = sizeof (gunichar2);
+        i_left = i_size = ucs2_bytelen (path->str, path->len);
     }
+    i_ptr = path->str;
 
-    rbyte   = len *  in_ch_width;
-    wbyte   = len * out_ch_width;
-    u8_path = g_malloc0 (wbyte);
-
-    r_total = rbyte;
-    i_ptr   = (char *) path;
-    o_ptr   = u8_path;
+    // Ballpark figure, GString decides alloc size on its own
+    s = g_string_sized_new (i_size + 1);
+    _sync_pos (s, &o_left, &o_ptr, true);
 
-    /* Shouldn't fail, from_enc already tested upon start of prog */
+    // Shouldn't fail, encoding already tested upon start of prog
     conv = g_iconv_open ("UTF-8", from_enc ? from_enc : "UTF-16LE");
 
-    g_debug ("Initial: read=%" G_GSIZE_FORMAT ", write=%" G_GSIZE_FORMAT,
-            rbyte, wbyte);
+    g_debug ("Initial : r=%02zu, w=%02zu/%02zu",
+        i_left, o_left, s->allocated_len - 1);
+    err_offsets = g_ptr_array_new_with_free_func ((GDestroyNotify) g_free);
 
-    /* Pass 1: Convert to UTF-8, all illegal seq become escaped hex */
-    while (TRUE)
-    {
-        int e;
+    // Pass 1: Convert to UTF-8, all illegal seq become escaped hex
 
+    while (i_left > 0)
+    {
         if (*i_ptr == '\0') {
             if (from_enc   != NULL) break;
             if (*(i_ptr+1) == '\0') break; /* utf-16: check "\0\0" */
         }
 
-        // GNU iconv may return number of nonreversible conversions
-        // upon success, but we don't need to worry about it, as
-        // conversion from code page to UTF-8 would not be nonreversible
-        if ((gsize) -1 != (status = g_iconv (
-            conv, &i_ptr, &rbyte, &o_ptr, &wbyte)))
+        // When non-reversible char are converted to \uFFFD, there
+        // is nothing we can do. Just accept the status quo.
+        status = g_iconv (conv, &i_ptr, &i_left, &o_ptr, &o_left);
+        _sync_pos (s, &o_left, &o_ptr, false);
+        if (status != (gsize) -1)
             break;
 
-        e = errno;
-
-        g_debug ("r=%02" G_GSIZE_FORMAT ", w=%02" G_GSIZE_FORMAT
-            ", stt=%" G_GSIZE_FORMAT " (%s) str=%s",
-            rbyte, wbyte, status, g_strerror(e), u8_path);
-
-        switch (e) {
-            case EILSEQ:
-            case EINVAL:  // TODO Handle partial input for EINVAL
-                if (error && ! *error) {
-                    g_set_error (error, G_CONVERT_ERROR,
-                        G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
-                        _("Illegal sequence or partial input at offset %" G_GSIZE_FORMAT), rbyte);
-                }
-                _advance_octet (in_ch_width, &i_ptr, &rbyte, &o_ptr, &wbyte, tmpl);
-                g_iconv (conv, NULL, NULL, &o_ptr, &wbyte);  // reset state
-                break;
-            case E2BIG:  // TODO realloc instead of Kaboom!
-                g_assert_not_reached();
+        int e = errno;
+        g_debug ("Progress: r=%02zu, w=%02zu/%02zu, status=%zd (%s), str=%s",
+            i_left, o_left, s->allocated_len - 1,
+            status, g_strerror(e), s->str);
+
+        switch (e)
+        {
+        case EINVAL:
+        case EILSEQ:
+        {
+            size_t *processed = g_malloc (sizeof (size_t));
+            *processed = i_size - i_left;
+            g_ptr_array_add (err_offsets, processed);
+        }
+            _advance_octet (char_sz, &i_ptr, &i_left, s, fmt_type);
+            _sync_pos (s, &o_left, &o_ptr, true);
+            g_debug ("Progress: r=%02zu, w=%02zu/%02zu, str=%s",
+                i_left, o_left, s->allocated_len - 1, s->str);
+            g_iconv (conv, NULL, NULL, &o_ptr, &o_left);  // reset state
+            _sync_pos (s, &o_left, &o_ptr, false);
+            break;
+        case E2BIG:
+            s = g_string_set_size (s, s->allocated_len * 2);
+            _sync_pos (s, &o_left, &o_ptr, true);
+            break;
         }
     }
 
-    g_debug ("r=%02" G_GSIZE_FORMAT ", w=%02" G_GSIZE_FORMAT
-        ", stt=%" G_GSIZE_FORMAT ", str=%s", rbyte, wbyte, status, u8_path);
+    g_debug ("Finally : r=%02zu, w=%02zu/%02zu, status=%zd, str=%s",
+        i_left, o_left, s->allocated_len - 1, status, s->str);
 
     g_iconv_close (conv);
 
-    if (read != NULL)
-        *read = r_total - rbyte;
+    if (error &&
+        g_error_matches ((const GError *) (*error),
+            R2_REC_ERROR, R2_REC_ERROR_CONV_PATH) &&
+        err_offsets->len > 0)
+    {
+        // More detailed error message showing offsets
+        char *old = (*error)->message;
+        GString *dbg_str = g_string_new ((const char *) old);
+        dbg_str = g_string_append (dbg_str, ", at offset:");
+        for (size_t i = 0; i < err_offsets->len; i++)
+        {
+            g_string_append_printf (dbg_str, " %zu",
+                *((size_t *) (err_offsets->pdata[i])));
+        }
+        (*error)->message = g_string_free (dbg_str, FALSE);
+        g_free (old);
+    }
+
+    g_ptr_array_free (err_offsets, TRUE);
 
-    /* Pass 2: Convert all non-printable chars to hex */
-    g_return_val_if_fail (g_utf8_validate (u8_path, -1, NULL), NULL);
+    // Pass 2: Post processing, e.g. convert non-printable chars to hex
 
-    result = _filter_printable_char (u8_path, tmpl);
-    g_free (u8_path);
+    g_return_val_if_fail (g_utf8_validate (s->str, -1, NULL), NULL);
+
+    if (func == NULL)
+        result = _filter_printable_char (s->str, fmt_type);
+    else
+        result = func (s->str);
+    g_string_free (s, TRUE);
 
     return result;
 }
@@ -353,22 +418,49 @@ filter_escapes (const char *str)
 
 
 char *
-json_escape_path (const char *path)
+json_escape (const char *src)
 {
     // TODO g_string_replace from glib 2.68 does it all
 
-    char *p = (char *) path;
-    gunichar c = 0;
-    GString *s = g_string_new ("");
+    char *p = (char *) src;
+    GString *s = g_string_sized_new (strlen (src));
 
     while (*p) {
-        c = g_utf8_get_char (p);
-        if (c == '\\')
-            s = g_string_append (s, "\\\\");
-        else if (c == '*')
+        gunichar c = g_utf8_get_char (p);
+        switch (c)
+        {
+        // JSON does not need to escape asterisk. This is for
+        // workaround in format template
+        case '*' : s = g_string_append_c (s, '\\'); break;
+        case '\\':
+        // For all other chars below, they are actually disallowed
+        // in Windows path. This is for the mischievous who
+        // move data to other OS and rename
+        case 0x22:
+        case 0x27:
             s = g_string_append_c (s, '\\');
-        else
-            s = g_string_append_unichar (s, c);
+            s = g_string_append_c (s, c);
+            break;
+        case 0x08: s = g_string_append (s, "\\b"); break;
+        case 0x09: s = g_string_append (s, "\\t"); break;
+        case 0x0A: s = g_string_append (s, "\\n"); break;
+        case 0x0B: s = g_string_append (s, "\\v"); break;
+        case 0x0C: s = g_string_append (s, "\\f"); break;
+        case 0x0D: s = g_string_append (s, "\\r"); break;
+        default  :
+            if (g_unichar_isgraph (c) || c == 0x20)
+                s = g_string_append_unichar (s, c);
+            else if (c < 0x10000)
+                g_string_append_printf (s, "\\u%04X", c);
+            else  // calculate surrogate
+            {
+                uint16_t high, low;
+                high = 0xD800 + ((c - 0x10000) >> 10  );
+                low  = 0xDC00 + ((c - 0x10000) & 0x3FF);
+                g_string_append_printf (s, "\\u%04X\\u%04X", high, low);
+            }
+            break;
+        }
         p = g_utf8_next_char (p);
     }
     return g_string_free (s, FALSE);
diff --git a/src/utils-conv.h b/src/utils-conv.h
index 1bb1be8..4a3368e 100644
--- a/src/utils-conv.h
+++ b/src/utils-conv.h
@@ -10,21 +10,55 @@
 #include <stdbool.h>
 #include <glib.h>
 
+// All versions of recycle bin prior to Windows 10 use full PATH_MAX
+// or FILENAME_MAX (260 char) to store file paths in either ANSI or
+// Unicode variations. However it is impossible to reuse any similar
+// constant as it is totally platform dependent.
+#define WIN_PATH_MAX 260
+
+
+// Minimum bytes needed to guarantee writing a utf8 character
+#define MIN_WRITEBUF_SPACE 4
+
+
+typedef enum
+{
+    FORMAT_UNKNOWN,
+    FORMAT_TEXT,
+    FORMAT_XML,
+    FORMAT_JSON,
+} out_fmt;
+
+
+typedef struct _fmt_data {
+    const char *friendly_name;
+    // tmpl[0]=utf8 (max 32bit), 1=char (8bit), 2=ucs2 (16bit)
+    // templates should use numeric printf format since
+    // they are not proper characters, or non-printable
+    // chars in case of UTF-8
+    // namely `%u`, `%o`, `%d`, `%i`, `%x` and `%X`
+    const char *fallback_tmpl[3];
+} _fmt_data;
+
+
+typedef
+char *      (*StrTransformFunc)           (const char       *src);
+
+
 bool          enc_is_ascii_compatible     (const char       *enc,
                                            GError          **error);
 
-size_t        ucs2_strnlen                (const char       *str,
+size_t        ucs2_bytelen                (const char       *str,
                                            ssize_t           max_sz);
 
-char *        conv_path_to_utf8_with_tmpl (const char       *path,
-                                           ssize_t           pathlen,
+char *        conv_path_to_utf8_with_tmpl (const GString    *path,
                                            const char       *from_enc,
-                                           const char       *tmpl,
-                                           size_t           *read,
+                                           out_fmt           fmt_type,
+                                           StrTransformFunc  func,
                                            GError          **error);
 
 char *        filter_escapes              (const char       *str);
 
-char *        json_escape_path            (const char       *path);
+char *        json_escape                 (const char       *src);
 
 #endif
diff --git a/src/utils.c b/src/utils.c
index c3ba0ab..6101bee 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -85,12 +85,6 @@ static char *os_strings[] = {
     N_("Windows 10 or above")
 };
 
-static char *out_format_name[] = {
-    "unknown format",
-    "TSV format",
-    "XML format",
-    "JSON format",
-};
 
 static out_fmt      output_format      = FORMAT_UNKNOWN;
 static gboolean     no_heading         = FALSE;
@@ -186,6 +180,8 @@ static gboolean
 _set_out_format    (out_fmt     desired_format,
                     GError    **error)
 {
+    extern struct _fmt_data fmt[];
+
     if (output_format == desired_format)
         return TRUE;
 
@@ -197,8 +193,8 @@ _set_out_format    (out_fmt     desired_format,
     g_set_error (error, G_OPTION_ERROR, G_OPTION_ERROR_FAILED,
         "Output was already set in %s, but later argument "
         "attempts to change to %s",
-        out_format_name[output_format],
-        out_format_name[desired_format]);
+        fmt[output_format].friendly_name,
+        fmt[desired_format].friendly_name);
     return FALSE;
 }
 
@@ -709,8 +705,10 @@ _free_record_cb (rbin_struct *record)
 {
     g_free (record->index_s);
     g_date_time_unref (record->deltime);
-    g_free (record->raw_uni_path);
-    g_free (record->raw_legacy_path);
+    if (record->raw_uni_path)
+        g_string_free (record->raw_uni_path, TRUE);
+    if (record->raw_legacy_path)
+        g_string_free (record->raw_legacy_path, TRUE);
     g_clear_error (&record->error);
     g_free (record);
 }
@@ -1176,7 +1174,7 @@ _print_json_header (const metarecord *meta)
 
     {
         char *s = g_filename_display_name (meta->filename);
-        char *rbin_path = json_escape_path (s);
+        char *rbin_path = json_escape (s);
         g_print ("  \"path\": \"%s\",\n", rbin_path);
         g_free (s);
         g_free (rbin_path);
@@ -1190,8 +1188,8 @@ static void
 _print_text_record   (rbin_struct        *record,
                       const metarecord   *meta)
 {
-    char         *outstr;
-    char         **header;
+    char         *output, **header;
+    GString      *src;
     GDateTime    *dt;
 
     g_return_if_fail (record != NULL);
@@ -1215,19 +1213,17 @@ _print_text_record   (rbin_struct        *record,
         g_strdup ("???") :
         g_strdup_printf ("%" PRIu64, record->filesize);
 
-    if (legacy_encoding)
-        header[4] = conv_path_to_utf8_with_tmpl (record->raw_legacy_path,
-            -1, legacy_encoding, "<\\%02X>", NULL, NULL);
-    else
-        header[4] = conv_path_to_utf8_with_tmpl (record->raw_uni_path,
-            -1, NULL, "<\\u%04X>", NULL, NULL);
+    src = legacy_encoding ? record->raw_legacy_path :
+                            record->raw_uni_path    ;
+    header[4] = conv_path_to_utf8_with_tmpl (src,
+        legacy_encoding, FORMAT_TEXT, NULL, &record->error);
     if (! header[4])
         header[4] = g_strdup ("???");
 
-    outstr = g_strjoinv (delim, header);
-    g_print ("%s\n", outstr);
+    output = g_strjoinv (delim, header);
+    g_print ("%s\n", output);
 
-    g_free (outstr);
+    g_free (output);
     g_date_time_unref (dt);
     g_strfreev (header);
 }
@@ -1239,7 +1235,7 @@ _print_xml_record   (rbin_struct        *record,
 {
     char         *path, *dt_str;
     GDateTime    *dt;
-    GString      *s;
+    GString      *s, *src;
 
     g_return_if_fail (record != NULL);
 
@@ -1273,14 +1269,12 @@ _print_xml_record   (rbin_struct        *record,
         g_string_append_printf (s,
             " size=\"%" PRIu64 "\"", record->filesize);
 
-    // Still need to be converted despite using CDATA, otherwise
-    // could be writing garbage on screen or into file
-    if (legacy_encoding)
-        path = conv_path_to_utf8_with_tmpl (record->raw_legacy_path,
-            -1, legacy_encoding, "&#x%02X;", NULL, NULL);
-    else
-        path = conv_path_to_utf8_with_tmpl (record->raw_uni_path,
-            -1, NULL, "&#x%04X;", NULL, NULL);
+    // Still need to be converted despite using CDATA,
+    // otherwise could be writing garbage output
+    src = legacy_encoding ? record->raw_legacy_path :
+                            record->raw_uni_path    ;
+    path = conv_path_to_utf8_with_tmpl (src,
+        legacy_encoding, FORMAT_XML, NULL, &record->error);
 
     if (path)
         g_string_append_printf (s, ">\n"
@@ -1302,9 +1296,9 @@ static void
 _print_json_record   (rbin_struct        *record,
                       const metarecord   *meta)
 {
-    char         *tmp, *path, *dt_str;
+    char         *path, *dt_str;
     GDateTime    *dt;
-    GString      *s;
+    GString      *src, *s;
 
     g_return_if_fail (record != NULL);
 
@@ -1338,22 +1332,10 @@ _print_json_record   (rbin_struct        *record,
         g_string_append_printf (s,
             ", \"size\": %" PRIu64, record->filesize);
 
-    if (legacy_encoding)
-    {
-        // JSON spec doesn't even allow encoding raw byte data,
-        // so transform it like text output format
-        tmp = conv_path_to_utf8_with_tmpl (record->raw_legacy_path,
-            -1, legacy_encoding, "<\\%02X>", NULL, NULL);
-    }
-    else
-    {
-        // HACK \u sequence collides with path separator, which
-        // will be processed in json escaping routine. Use a temp
-        // char to avoid collision and convert it back later
-        tmp = conv_path_to_utf8_with_tmpl (record->raw_uni_path,
-            -1, NULL, "*u%04X", NULL, NULL);
-    }
-    path = json_escape_path (tmp);
+    src = legacy_encoding ? record->raw_legacy_path :
+                            record->raw_uni_path    ;
+    path = conv_path_to_utf8_with_tmpl (src, legacy_encoding,
+        FORMAT_JSON, &json_escape, &record->error);
 
     if (path)
         g_string_append_printf (s, ", \"path\": \"%s\"},\n", path);
@@ -1363,7 +1345,6 @@ _print_json_record   (rbin_struct        *record,
     g_print ("%s", s->str);
 
     g_date_time_unref (dt);
-    g_free (tmp);
     g_free (path);
     g_free (dt_str);
     g_string_free (s, TRUE);
diff --git a/src/utils.h b/src/utils.h
index 75be0ba..f147d27 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -16,6 +16,7 @@
 #define _POSIX_C_SOURCE 199309L
 #endif
 
+#include <stdbool.h>
 #include <inttypes.h>
 #include <stdio.h>
 #include <glib.h>
@@ -64,13 +65,6 @@ typedef enum
     VERSION_ME_03,
 } detected_os_ver;
 
-typedef enum _out_fmt
-{
-    FORMAT_UNKNOWN,
-    FORMAT_TEXT,
-    FORMAT_XML,
-    FORMAT_JSON,
-} out_fmt;
 
 /**
  * @brief Whether original trashed file still exists
@@ -117,7 +111,7 @@ typedef struct _rbin_meta
      * segments due to sloppy programming practice.
      * @attention For `INFO2` only
      */
-    gboolean fill_junk;
+    bool fill_junk;
     /**
      * @brief List of trash file records pointer
      */
@@ -170,23 +164,25 @@ typedef struct _rbin_struct
      */
     uint64_t filesize;
 
-    /* despite var names, all filenames are converted to UTF-8 upon parsing */
-
     /**
-     * @brief Unicode trashed file original path
-     * @note Original path was stored in index file in UTF-16 encoding
-     * since Windows 2000. The path is converted to UTF-8 encoding and stored here .
+     * @brief Original path of trashed file, in unicode
+     * @note Original path was stored in index file in UTF-16
+     * encoding since Windows 2000. The raw UTF-16 data is
+     * stored here. `GString` structure is chosen for
+     * convenience in storing buffer length, which can't be
+     * easily determined from null termination when path data
+     * is truncated (due to broken file)
      */
-    char *raw_uni_path;
+    GString *raw_uni_path;
 
     /**
-     * @brief ANSI encoded trash file original path
-     * @note Until Windows 2003, index file preserves trashed file path in
-     * ANSI code page. The path is converted to UTF-8 encoding and stored here.
-     * @attention For `INFO2` only. Can be either full path or using 8.3 format,
-     * depending on Windows version and code page used.
+     * @brief Original path of trashed file, in ANSI code page
+     * @note Until Windows 2003, index file preserves trashed file
+     * path in ANSI code page. The raw path is stored here.
+     * @attention For `INFO2` only. Can be either full path or
+     * 8.3 format, depending on Windows version and code page used.
      */
-    char *raw_legacy_path;
+    GString *raw_legacy_path;
 
     /**
      * @brief Whether original trashed file is gone
@@ -218,15 +214,6 @@ typedef struct _rbin_struct
 /*! Every Windows use this GUID in recycle bin desktop.ini */
 #define RECYCLE_BIN_CLSID "645FF040-5081-101B-9F08-00AA002F954E"
 
-/*
- * Most versions of recycle bin use full PATH_MAX (260 char) to store file paths,
- * in either ANSI or Unicode variations, except Windows 10 which uses variable size.
- * However we don't want to use PATH_MAX directly since on Linux/Unix it's
- * another thing.
- */
-#define WIN_PATH_MAX 260
-
-
 typedef void (*ParseIdxFunc)              (const char       *path,
                                            metarecord       *meta);
 
diff --git a/test/cmake/crafted.cmake b/test/cmake/crafted.cmake
index 39e3917..ef3bd36 100644
--- a/test/cmake/crafted.cmake
+++ b/test/cmake/crafted.cmake
@@ -75,18 +75,20 @@ add_test(NAME f_TruncatedFile
 set_tests_properties(f_TruncatedFile
     PROPERTIES
         LABELS "info2;crafted"
-        PASS_REGULAR_EXPRESSION "byte range 3220 - 4015: Last segment does not constitute a valid record\.")
+        PASS_REGULAR_EXPRESSION "5: Record is truncated")
 
 
 #
-# Partial bad $Recycle.bin
+# Ditto for $Recycle.bin
 #
 
 # TODO different tests for stdout and stderr
-add_test(NAME d_SomeBadIdx
+add_test(NAME d_TruncatedFile
     COMMAND rifiuti-vista dir-badfiles
     WORKING_DIRECTORY ${sample_dir})
-set_tests_properties(d_SomeBadIdx
+set_tests_properties(d_TruncatedFile
     PROPERTIES
         LABELS "recycledir;crafted"
-        PASS_REGULAR_EXPRESSION "\\$IX1JBL3\\.djvu: Might be an index file, but file size is unexpected\.")
+        PASS_REGULAR_EXPRESSION [=[
+\$IF47Q09: File is not a \$Recycle\.bin index
+\$IX1JBL3\.djvu: Record is truncated]=])
diff --git a/test/cmake/encoding.cmake b/test/cmake/encoding.cmake
index 317816b..0bb9c0d 100644
--- a/test/cmake/encoding.cmake
+++ b/test/cmake/encoding.cmake
@@ -149,17 +149,42 @@ generate_simple_comparison_test("JsonInfo2Win95" 1
     "" "INFO-95-ja-1.json" "encoding|json")
 
 
-add_encoding_test_with_cwd(f_JsonWin95WrongEnc_Prep
+add_encoding_test_with_cwd(f_JsonWrongEnc_Prep
     ${sample_dir}
     -DINFO2=INFO-95-ja-1
     -DCHOICES=CP1255|MS-HEBR|WINDOWS-1255|HEBREW|ISO-8859-8|ISO-IR-138|ISO8859-8|ISO_8859-8|ISO_8859-8:1988|CSISOLATINHEBREW
-    -DOUTFILE=${bindir}/f_JsonWin95WrongEnc.output
+    -DOUTFILE=${bindir}/f_JsonWrongEnc.output
     -DEXTRA_ARGS=-f|json
 )
 
-set_tests_properties(f_JsonWin95WrongEnc_Prep
+set_tests_properties(f_JsonWrongEnc_Prep
     PROPERTIES
     PASS_REGULAR_EXPRESSION "could not be interpreted in .+ encoding")
 
-generate_simple_comparison_test("JsonWin95WrongEnc" 1
+generate_simple_comparison_test("JsonWrongEnc" 1
     "" "INFO-95-ja-1-in-cp1255.json" "encoding|xfail|json")
+
+# It turns out different iconv implemention may have
+# different behavior even for the same code page. Take this
+# for example, GNU iconv marks 0x90 illegal for CP1255, but
+# winiconv converts that to U+0090.
+if(WIN32)
+    set_tests_properties(f_JsonWrongEnc
+    PROPERTIES WILL_FAIL true)
+endif()
+
+
+add_encoding_test_with_cwd(f_XmlWrongEnc_Prep
+    ${sample_dir}
+    -DINFO2=INFO-95-ja-1
+    -DCHOICES=CP949|UHC|ISO-IR-149|KOREAN|KSC_5601|KS_C_5601-1987|KS_C_5601-1989|CSKSC56011987
+    -DOUTFILE=${bindir}/f_XmlWrongEnc.output
+    -DEXTRA_ARGS=-f|xml
+)
+
+set_tests_properties(f_XmlWrongEnc_Prep
+    PROPERTIES
+    PASS_REGULAR_EXPRESSION "could not be interpreted in .+ encoding")
+
+generate_simple_comparison_test("XmlWrongEnc" 1
+    "" "INFO-95-ja-1-in-cp949.xml" "encoding|xfail|xml")
diff --git a/test/cmake/read-write.cmake b/test/cmake/read-write.cmake
index 7927b41..e20ded0 100644
--- a/test/cmake/read-write.cmake
+++ b/test/cmake/read-write.cmake
@@ -29,7 +29,7 @@ add_test(NAME f_InputSpecialFile COMMAND rifiuti       nul)
 set_tests_properties(d_InputSpecialFile f_InputSpecialFile
     PROPERTIES
         LABELS "xfail"
-        PASS_REGULAR_EXPRESSION "File is prematurely truncated, or not .+ index")
+        PASS_REGULAR_EXPRESSION "File is not .+ index")
 else()
 add_test(NAME d_InputSpecialFile COMMAND rifiuti-vista /dev/null)
 add_test(NAME f_InputSpecialFile COMMAND rifiuti       /dev/null)
diff --git a/test/samples/INFO-95-ja-1-in-cp949.xml b/test/samples/INFO-95-ja-1-in-cp949.xml
new file mode 100644
index 0000000..5bc9728
--- /dev/null
+++ b/test/samples/INFO-95-ja-1-in-cp949.xml
@@ -0,0 +1,37 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<recyclebin format="file" version="0" ever_existed="16">
+  <filename><![CDATA[INFO-95-ja-1]]></filename>
+  <record index="1" time="2015-05-11T05:59:49Z" gone="false" size="32768">
+    <path><![CDATA[D:\WINDOWS\춈슛캄璟\The Microsoft Network 궻쒸캇<\AF>璟.lnk]]></path>
+  </record>
+  <record index="2" time="2015-05-11T06:00:25Z" gone="false" size="950272">
+    <path><![CDATA[D:\WINDOWS\춈슛캄璟\륷딮拒<\AF>켓<\AF>璟 꾀갸<\DE>.bmp]]></path>
+  </record>
+  <record index="3" time="2015-05-11T07:19:25Z" gone="false" size="32768">
+    <path><![CDATA[D:\WINDOWS\춈슛캄璟\륷딮첨식빒룕.txt]]></path>
+  </record>
+  <record index="4" time="2015-05-11T09:48:21Z" gone="false" size="589824">
+    <path><![CDATA[D:\My Documents\DirectX-V8.0a\bda.cab]]></path>
+  </record>
+  <record index="5" time="2015-05-11T09:48:21Z" gone="false" size="589824">
+    <path><![CDATA[D:\My Documents\DirectX-V8.0a\bdant.cab]]></path>
+  </record>
+  <record index="6" time="2015-05-11T09:48:21Z" gone="false" size="65536">
+    <path><![CDATA[D:\My Documents\DirectX-V8.0a\cfgmgr32.dll]]></path>
+  </record>
+  <record index="11" time="2015-05-11T09:48:23Z" gone="false" size="163840">
+    <path><![CDATA[D:\My Documents\DirectX-V8.0a\dxsetup.exe]]></path>
+  </record>
+  <record index="12" time="2015-05-11T09:48:23Z" gone="false" size="360448">
+    <path><![CDATA[D:\My Documents\DirectX-V8.0a\setupapi.dll]]></path>
+  </record>
+  <record index="13" time="2015-05-11T09:59:19Z" gone="false" size="32768">
+    <path><![CDATA[D:\WINDOWS\춈슛캄璟\Connect to the Internet.LNK]]></path>
+  </record>
+  <record index="14" time="2015-05-11T09:59:22Z" gone="false" size="32768">
+    <path><![CDATA[D:\WINDOWS\춈슛캄璟\Outlook Express.lnk]]></path>
+  </record>
+  <record index="15" time="2015-05-18T00:45:09Z" gone="false" size="32768">
+    <path><![CDATA[D:\WINDOWS\춈슛캄璟\륷딮첨식빒룕.txt]]></path>
+  </record>
+</recyclebin>
diff --git a/test/samples/dir-badfiles/$IF47Q09 b/test/samples/dir-badfiles/$IF47Q09
index e69de29..33afd63 100644
Binary files a/test/samples/dir-badfiles/$IF47Q09 and b/test/samples/dir-badfiles/$IF47Q09 differ