Skip to content

Commit

Permalink
feat!: throw an error on UTF-16 or UTF-32 BOM
Browse files Browse the repository at this point in the history
BREAKING CHANGE: UTF-16 and UTF-32 files that have the byte-order mark
are no longer passed through with `-p`/`--pass-through`.

v0.16.0
  • Loading branch information
dbohdan committed May 20, 2024
1 parent c75ea8e commit 00d0f31
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 9 deletions.
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,11 @@ Commands act on all of them at the same time.
### Text encodings

Initool is encoding-naive and assumes one character is one byte.
It correctly processes UTF-8-encoded files when given UTF-8 command-line arguments but can't open files in UTF-16 or UTF-32.
On Windows, it will receive the command-line arguments in the encoding for your system's language for non-Unicode programs (e.g., [Windows-1252](https://en.wikipedia.org/wiki/Windows-1252)),
It correctly processes UTF-8-encoded files when given UTF-8 command-line arguments.
It exits with an encoding error if it detects the UTF-16 or UTF-32 [BOM](https://en.wikipedia.org/wiki/Byte_order_mark).
Trying to open a UTF-16 or UTF-32 file without the BOM results in an "invalid line" error because initool is unable to parse it.

On Windows, initool will receive the command-line arguments in the encoding for your system's language for non-Unicode programs (e.g., [Windows-1252](https://en.wikipedia.org/wiki/Windows-1252)),
which limits what you can do with UTF-8-encoded files.


Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.15.0
0.16.0
42 changes: 36 additions & 6 deletions initool.sml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,29 @@
* License: MIT
*)

type options = {ignoreCase: bool, passThrough: bool}
exception Encoding of string

fun idOptions (opts: options) : Id.options = {ignoreCase = #ignoreCase opts}
val unsupportedEncoding = "unsupported encoding: "

fun checkWrongEncoding (lines: string list) =
let
val _ =
case lines of
[first] =>
(case map Char.ord (String.explode first) of
0x00 :: 0x00 :: 0xFE :: 0xFF :: _ =>
raise Encoding (unsupportedEncoding ^ "UTF-32 BE")
| 0xFF :: 0xFE :: 0x00 :: 0x00 :: _ =>
raise Encoding (unsupportedEncoding ^ "UTF-32 LE")
| 0xFE :: 0xFF :: _ =>
raise Encoding (unsupportedEncoding ^ "UTF-16 BE")
| 0xFF :: 0xFE :: _ =>
raise Encoding (unsupportedEncoding ^ "UTF-16 LE")
| _ => ())
| _ => ()
in
lines
end

fun readLines (filename: string) : string list =
let
Expand Down Expand Up @@ -44,7 +64,8 @@ datatype result = Output of string | FailureOutput of string | Error of string

fun processFileCustom quiet passThrough successFn filterFn filename =
let
val parsed = Ini.parse passThrough (readLines filename)
val parsed =
((Ini.parse passThrough) o checkWrongEncoding o readLines) filename
val filtered = filterFn parsed
val success = successFn (parsed, filtered)
val output = if quiet then "" else Ini.stringify filtered
Expand Down Expand Up @@ -100,13 +121,17 @@ fun helpCommand [] = Output allUsage
Error (invalidUsage ^ (formatArgs (cmd :: rest)) ^ "\n" ^ usage ^ cmd)

fun versionCommand [] =
let val version = "0.15.0"
let val version = "0.16.0"
in Output (version ^ "\n")
end
| versionCommand [_] = versionCommand []
| versionCommand (cmd :: rest) =
Error (invalidUsage ^ (formatArgs (cmd :: rest)) ^ "\n" ^ usage ^ cmd)

type options = {ignoreCase: bool, passThrough: bool}

fun idOptions (opts: options) : Id.options = {ignoreCase = #ignoreCase opts}

fun getCommand (opts: options) [_, filename] =
processFile (#passThrough opts) (fn _ => true) (fn x => x) filename
| getCommand opts [_, filename, section] =
Expand Down Expand Up @@ -139,7 +164,7 @@ fun getCommand (opts: options) [_, filename] =
val q = Ini.SelectProperty {section = section, key = key}
val parsed =
((Ini.select (idOptions opts) q) o (Ini.parse (#passThrough opts))
o readLines) filename
o checkWrongEncoding o readLines) filename
val allItems = List.concat
(List.map (fn {name = _, contents = xs} => xs) parsed)
val values =
Expand Down Expand Up @@ -275,11 +300,16 @@ fun processArgs (opts: options) [] = helpCommand []
| processArgs opts (cmd :: _) =
Error (unknownCommand ^ (formatArgs [cmd]) ^ "\n" ^ availableCommands)

fun handleException (message: string) =
exitWithError "" ("Error: " ^ message)

val args = CommandLine.arguments ()

val result =
processArgs {ignoreCase = false, passThrough = false} args
handle Ini.Tokenization (message) => exitWithError "" ("Error: " ^ message)
handle
Encoding message => handleException message
| Ini.Tokenization message => handleException message
val _ =
case result of
Output s => printFlush TextIO.stdOut s
Expand Down

0 comments on commit 00d0f31

Please sign in to comment.