Skip to content

Commit

Permalink
Rafactor tokens representation
Browse files Browse the repository at this point in the history
Use a more data-oriented SOA layout for tokens and calculate line and column information on the fly
  • Loading branch information
LesleyLai committed Jan 15, 2025
1 parent 5dc2cb1 commit 5150d69
Show file tree
Hide file tree
Showing 21 changed files with 523 additions and 221 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ cmake-build-*
*.obj
*.o
.venv
*.i

test/test_driver/target

Expand Down
9 changes: 5 additions & 4 deletions include/mcc/arena.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,17 @@ __attribute((malloc))
#endif
void* arena_aligned_alloc(Arena* arena, size_t alignment, size_t size);

void* arena_aligned_grow(Arena* arena, void* p, size_t new_alignment,
size_t new_size);
void* arena_aligned_realloc(Arena* arena, void* old_p, size_t alignment,
size_t old_size, size_t new_size);

#define ARENA_ALLOC_OBJECT(arena, Type) \
(Type*)arena_aligned_alloc((arena), alignof(Type), sizeof(Type))

#define ARENA_ALLOC_ARRAY(arena, Type, n) \
(Type*)arena_aligned_alloc((arena), alignof(Type), sizeof(Type) * (n))

#define ARENA_GROW_ARRAY(arena, Type, p, n) \
(Type*)arena_aligned_grow((arena), (p), alignof(Type), sizeof(Type) * (n))
#define ARENA_REALLOC_ARRAY(arena, Type, old_p, old_n, new_n) \
(Type*)arena_aligned_realloc((arena), (old_p), alignof(Type), \
sizeof(Type) * (old_n), sizeof(Type) * (new_n))

#endif // MCC_ARENA_H
6 changes: 4 additions & 2 deletions include/mcc/dynarray.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@
#define DYNARRAY_PUSH_BACK(arr, T, arena, elem) \
do { \
if ((arr)->length == (arr)->capacity) { \
const size_t old_capacity = (arr)->capacity; \
(arr)->capacity = (arr)->capacity ? (arr)->capacity * 2 : 16; \
(arr)->data = (T*)arena_aligned_grow((arena), (arr)->data, alignof(T), \
(arr)->capacity * sizeof(T)); \
(arr)->data = (T*)arena_aligned_realloc( \
(arena), (arr)->data, alignof(T), old_capacity * sizeof(T), \
(arr)->capacity * sizeof(T)); \
} \
(arr)->data[(arr)->length++] = elem; \
} while (0)
Expand Down
55 changes: 55 additions & 0 deletions include/mcc/frontend.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#ifndef MCC_PARSER_H
#define MCC_PARSER_H

#include "diagnostic.h"
#include "token.h"

#include <mcc/arena.h>
#include <mcc/str.h>

typedef struct TranslationUnit TranslationUnit;

typedef struct ParseErrorsView {
size_t length;
ParseError* data;
} ParseErrorsView;

typedef struct ParseResult {
TranslationUnit* ast;
ParseErrorsView errors;
} ParseResult;

/// @brief Scan the source file and generate a list of tokens
Tokens lex(const char* source, Arena* permanent_arena, Arena scratch_arena);

/// @brief Parse tokens into AST
ParseResult parse(const char* src_filename, const char* src, Tokens tokens,
Arena* permanent_arena, Arena scratch_arena);

/// @brief A table used to compute line/column numbers
typedef struct LineNumTable {
uint32_t line_count;
const uint32_t* line_starts;
} LineNumTable;

typedef struct LineColumn {
uint32_t line;
uint32_t column;
} LineColumn;

/**
* Get the table for calculate line numbers of a file. If the table is already
* initialized, just return it. Otherwise create the table.
*/
const LineNumTable* get_line_num_table(const char* file_name, StringView src,
Arena* permanent_arena,
Arena scratch_arena);

LineColumn calculate_line_and_column(const LineNumTable* table,
uint32_t offset);

/// @brief Print Tokens
void print_tokens(const char* src, const Tokens* tokens,
const LineNumTable* line_num_table);

#endif // MCC_PARSER_H
28 changes: 0 additions & 28 deletions include/mcc/parser.h

This file was deleted.

33 changes: 33 additions & 0 deletions include/mcc/prelude.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef MCC_PRELUDE_H
#define MCC_PRELUDE_H

#include <limits.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
Expand All @@ -22,6 +23,23 @@

#define MCC_ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))

#define MCC_PANIC(message) \
do { \
(void)fprintf(stderr, "mcc fatal error:\n[%s:%i] in %s: %s\n\n", __FILE__, \
__LINE__, __func__, message); \
abort(); \
} while (0)

#define MCC_ASSERT(condition) \
do { \
if (!(condition)) { \
(void)fprintf(stderr, \
"mcc fatal error:\n[%s:%i] Assert failed in %s\n\n", \
__FILE__, __LINE__, __func__); \
abort(); \
} \
} while (0)

#define MCC_ASSERT_MSG(condition, message) \
do { \
if (!(condition)) { \
Expand Down Expand Up @@ -64,4 +82,19 @@
for (int MCC_MACRO_VAR(_i_) = 0; !MCC_MACRO_VAR(_i_); \
((MCC_MACRO_VAR(_i_) += 1), end))

/// @brief Safe cast size_t to uint32_t
inline static uint32_t u32_from_usize(size_t v)
{
if (v > UINT_MAX) { MCC_PANIC("out of range"); }
return (uint32_t)(v);
}

/// @brief Safe cast intptr_t to uint32_t
inline static uint32_t u32_from_isize(intptr_t v)
{
if (v < 0) { MCC_PANIC("out of range"); }
if (v > UINT_MAX) { MCC_PANIC("out of range"); }
return (uint32_t)(v);
}

#endif // MCC_PRELUDE_H
23 changes: 15 additions & 8 deletions include/mcc/token.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include "source_location.h"
#include "str.h"

typedef enum TokenType {
typedef enum TokenType : char {
TOKEN_INVALID = 0,

TOKEN_LEFT_PAREN, // (
Expand Down Expand Up @@ -69,18 +69,25 @@ typedef enum TokenType {
} TokenType;

typedef struct Token {
StringView src;
TokenType type;
SourceLocation location;
uint32_t start; // The offset of the starting character in a token
uint32_t size;
} Token;

/// @brief A view of tokens
/// @brief An SOA view of tokens
typedef struct Tokens {
Token* begin; // Points to the first element of the token array
Token* end; // Points to one plus the last element of the token array
TokenType* token_types;
uint32_t* token_starts;
uint32_t* token_sizes;
uint32_t token_count;
} Tokens;

/// @brief Print Tokens
void print_tokens(Tokens* tokens);
inline static Token get_token(const Tokens* tokens, uint32_t i)
{
MCC_ASSERT(i < tokens->token_count);
return MCC_COMPOUND_LITERAL(Token){.type = tokens->token_types[i],
.start = tokens->token_starts[i],
.size = tokens->token_sizes[i]};
}

#endif // MCC_TOKEN_H
3 changes: 2 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ add_library(mcc_lib
${include_dir}/prelude.h
${include_dir}/format.h
${include_dir}/ast.h
${include_dir}/parser.h
${include_dir}/frontend.h
${include_dir}/source_location.h
${include_dir}/diagnostic.h
${include_dir}/cli_args.h
Expand All @@ -22,6 +22,7 @@ add_library(mcc_lib
utils/diagnostic.c
utils/cli_args.c

frontend/line_numbers.c
frontend/lexer.c
frontend/ast_printer.c
frontend/parser.c
Expand Down
Loading

0 comments on commit 5150d69

Please sign in to comment.