Skip to content

Commit

Permalink
fix: correct check of BOM using open(2)
Browse files Browse the repository at this point in the history
this emulates the implementation of _tcc_open which uses also open with a
O_BINARY flag which is zero on POSIX. Another approach may be to change the tcc
code, but I prefer to not touch that when possible.
  • Loading branch information
jaromil committed Dec 8, 2024
1 parent 9ca9848 commit 94f3669
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 15 deletions.
39 changes: 25 additions & 14 deletions src/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
#include <string.h>
#include <errno.h>
#include <time.h>
#include <fcntl.h>
#include <unistd.h>
#include <inttypes.h>

#include <ftw.h> // _GNU_SOURCE

Expand All @@ -36,27 +39,35 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <dirent.h>
#ifndef O_BINARY
# define O_BINARY 0
#endif
#endif
extern void _err(const char *fmt, ...);

// from exec-headers.c
extern bool gen_exec_headers(char *tmpdir);

int detect_bom(const char *filename) {
FILE *file = fopen(filename, "rb");
if (!file) return -1;
char bom[3];
fread(bom, 1, 3, file);
fclose(file);
if (bom[0] == 0xFF && bom[1] == 0xFE) {
return 1; // UTF-16 LE
} else if (bom[0] == 0xFE && bom[1] == 0xFF) {
return 2; // UTF-16 BE
} else if (bom[0] == 0xEF && bom[1] == 0xBB && bom[2] == 0xBF) {
return 3; // UTF-8
} else {
return 0; // No BOM
}
uint8_t bom[3];
int res;
int fd = open(filename, O_RDONLY | O_BINARY);
res = read(fd,bom,3);
if (res!=3) {
_err("read error on %s: %s",filename, strerror(errno));
return -1;
}
close(fd);
// _err("%s bom: %x %x %x",filename,bom[0],bom[1],bom[2]);
if (bom[0] == 0xFF && bom[1] == 0xFE) {
return 1; // UTF-16 LE
} else if (bom[0] == 0xFE && bom[1] == 0xFF) {
return 2; // UTF-16 BE
} else if (bom[0] == 0xEF && bom[1] == 0xBB && bom[2] == 0xBF) {
return 3; // UTF-8
} else {
return 0; // No BOM
}
}

bool append_path(char **stored_path, const char *new_path) {
Expand Down
4 changes: 3 additions & 1 deletion test/windows.bats
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ load bats_setup
}

@test "BOM source file UTF8" {
run ${CJIT} test/hello-bom-utf8.c
run ${CJIT} -q test/hello-bom-utf8.c
assert_failure
assert_line 'UTF BOM detected in file: test/hello-bom-utf8.c'
assert_line 'Encoding is not yet supported, execution aborted.'
}

0 comments on commit 94f3669

Please sign in to comment.