From 9c755abe6448cd26dfdfb67de0246709ccee6d22 Mon Sep 17 00:00:00 2001 From: Alex Williams Date: Wed, 18 Jan 2023 08:18:14 +0900 Subject: [PATCH] Better error handling, bounds check, bug fixes. Fixes #7, #8, #13, #14 --- README.md | 17 +++++++--- docs/HOWTO.md | 2 +- docs/REFERENCE.md | 2 +- docs/TUTORIALS.md | 6 ++-- src/01-variables-constants.s | 2 +- src/02-macros.s | 44 +++++++++++++++++++++----- src/05-internal-functions.s | 28 ++++++----------- src/06-initialization.s | 20 ++++++++++-- src/07-error-handling.s | 38 +++++++++-------------- src/08-forth-primitives.s | 60 ++++++++++++++++++++++++------------ src/09-interpreter.s | 16 ++++++---- 11 files changed, 149 insertions(+), 86 deletions(-) diff --git a/README.md b/README.md index a727eb2..fbd7927 100644 --- a/README.md +++ b/README.md @@ -21,8 +21,8 @@ Development progress has been logged regularly in the [devlogs](https://aw.githu The quickest way to get started is to download and flash one of the firmware binaries listed below:. -* [fiveforths-longan-nano-lite.bin](https://github.com/aw/fiveforths/releases/download/v0.2/fiveforths-longan-nano-lite.bin) (64K Flash, 20K RAM) -* [fiveforths-longan-nano.bin](https://github.com/aw/fiveforths/releases/download/v0.2/fiveforths-longan-nano.bin) (128K Flash, 32K RAM) +* [fiveforths-longan-nano-lite.bin](https://github.com/aw/fiveforths/releases/download/v0.3/fiveforths-longan-nano-lite.bin) (64K Flash, 20K RAM) +* [fiveforths-longan-nano.bin](https://github.com/aw/fiveforths/releases/download/v0.3/fiveforths-longan-nano.bin) (128K Flash, 32K RAM) See the [TUTORIALS](docs/TUTORIALS.md) for detailed download and flashing information. @@ -35,8 +35,6 @@ See the [TUTORIALS](docs/TUTORIALS.md) for detailed download and flashing inform # TODO -- [ ] Implement bounds checks for stacks and dictionary -- [ ] Add example Forth code to turn it into a "real" Forth (ex: `[`, `]`, `branch`, etc) - [ ] Code cleanup and optimizations # Contributing @@ -45,6 +43,17 @@ Please create a pull-request or [open an issue](https://github.com/aw/picolisp-k # Changelog +## 0.3 (2023-01-19) + + * Fix issue #7 - Implement bounds checks for stacks + * Fix issue #8 - Implement bounds checks for user dictionary + * Fix issue #13 - `TOIN` should not be an address + * Fix issue #14 - `STORE` primitive is incorrect + * Add better error messages + * Add detailed documentation in [docs](docs/) + * Add `djb2.c` to generate a word's hash locally + * Add RAM zerofill of unused dictionary space on reset + ## 0.2 (2023-01-10) * Fix issue #9 - Handling of carriage return diff --git a/docs/HOWTO.md b/docs/HOWTO.md index 7677e5b..059453b 100644 --- a/docs/HOWTO.md +++ b/docs/HOWTO.md @@ -95,7 +95,7 @@ Accessing _FiveForths_ through the terminal should look similar to this: ``` --- Miniterm on /dev/ttyUSB0 115200,8,N,1 --- --- Quit: Ctrl+] | Menu: Ctrl+T | Help: Ctrl+T followed by Ctrl+H --- -FiveForths v0.2, Copyright (c) 2021~ Alexander Williams, https://a1w.ca +FiveForths v0.3, Copyright (c) 2021~ Alexander Williams, https://a1w.ca ``` diff --git a/docs/REFERENCE.md b/docs/REFERENCE.md index cd342c8..451e43e 100644 --- a/docs/REFERENCE.md +++ b/docs/REFERENCE.md @@ -34,6 +34,7 @@ Below is a list of specifications for _FiveForths_, most can be changed in the s * Return character newline: `\n` * Maximum word length: `32 characters` * Stack effects comments support `( x -- x )`: **yes** +* Stack and memory overflow/underflow protection: **yes** * Backslash comments support `\ comment`: **yes** * Multiline code definitions support: **no** * OK message: `" ok\n"` @@ -167,7 +168,6 @@ The hash is a 32-bit hash with the last 8 bits (from the LSB) used for the Flags | FLAGS | LENGTH | HASH | +-------+--------+------------------+ 3-bits 5-bits 24-bits - ``` ### Other Forths diff --git a/docs/TUTORIALS.md b/docs/TUTORIALS.md index d325197..d42b9d8 100644 --- a/docs/TUTORIALS.md +++ b/docs/TUTORIALS.md @@ -54,8 +54,8 @@ It is possible to download a pre-built firmware binary, or build the firmware ma Download one of the firmware binaries from the [releases page](https://github.com/aw/fiveforths/releases). -* [fiveforths-longan-nano-lite.bin](https://github.com/aw/fiveforths/releases/download/v0.2/fiveforths-longan-nano-lite.bin) (64K Flash, 20K RAM) -* [fiveforths-longan-nano.bin](https://github.com/aw/fiveforths/releases/download/v0.2/fiveforths-longan-nano.bin) (128K Flash, 32K RAM) +* [fiveforths-longan-nano-lite.bin](https://github.com/aw/fiveforths/releases/download/v0.3/fiveforths-longan-nano-lite.bin) (64K Flash, 20K RAM) +* [fiveforths-longan-nano.bin](https://github.com/aw/fiveforths/releases/download/v0.3/fiveforths-longan-nano.bin) (128K Flash, 32K RAM) ### Build it @@ -87,7 +87,7 @@ $ make Additional build options are explained in the [HOWTO](HOWTO.md) section. -The firmware file is called `fiveforths.bin` and is **under 2 KBytes** as of _release v0.1_ since _January 08, 2023_. +The firmware file is called `fiveforths.bin` and is **nearly 2.5 KBytes** as of _release v0.3_ since _January 19, 2023_. ### Flash it diff --git a/src/01-variables-constants.s b/src/01-variables-constants.s index 84b128b..03d39f5 100644 --- a/src/01-variables-constants.s +++ b/src/01-variables-constants.s @@ -2,7 +2,7 @@ # Variables and constants ## -.equ FORTH_VERSION, 2 +.equ FORTH_VERSION, 3 ## # Memory map diff --git a/src/02-macros.s b/src/02-macros.s index 90d2da1..3145680 100644 --- a/src/02-macros.s +++ b/src/02-macros.s @@ -18,25 +18,27 @@ # push register to top of stack and move DSP .macro PUSH reg - sw \reg, -CELL(sp) # store the value in the register to the top of the DSP - addi sp, sp, -CELL # move the DSP down by 1 cell -.endm + li t0, RSP_TOP+CELL # load address of bottom of stack + 1 CELL + blt sp, t0, err_overflow # jump to error handler if stack overflow -# push variable to top of stack -.macro PUSHVAR var - li t0, \var # load variable into temporary - sw t0, -CELL(sp) # store the variable value to the top of the DSP + sw \reg, -CELL(sp) # store the value in the register to the top of the DSP addi sp, sp, -CELL # move the DSP down by 1 cell .endm # push register to return stack .macro PUSHRSP reg + li t0, TIB_TOP+CELL # load address of bottom of stack + 1 CELL + blt s2, t0, err_overflow # jump to error handler if stack overflow + sw \reg, -CELL(s2) # store value from register into RSP addi s2, s2, -CELL # decrement RSP by 1 cell .endm # pop top of return stack to register .macro POPRSP reg + li t0, RSP_TOP # load address of top of RSP + bge s2, t0, err_underflow # jump to error handler if stack underflow + lw \reg, 0(s2) # load value from RSP into register addi s2, s2, CELL # increment RSP by 1 cell .endm @@ -67,3 +69,31 @@ li t0, \char # load character into temporary beq a0, t0, \dest # jump to the destination if the char matches .endm + +# print a message +.macro print_error name, size, jump + .balign CELL + err_\name : + la a1, msg_\name # load string message + addi a2, a1, \size # load string length + call uart_print # call uart print function + j \jump # jump when print returns +.endm + +# restore HERE and LATEST variables +.macro restorevars reg + # update HERE + li t0, HERE # load HERE variable into temporary + sw \reg, 0(t0) # store the address of LATEST back into HERE + + # update LATEST + li t0, LATEST # load LATEST variable into temporary + lw t1, 0(\reg) # load LATEST variable value into temporary + sw t1, 0(t0) # store LATEST word into LATEST variable +.endm + +# check for stack underflow +.macro checkunderflow stacktop + li t0, DSP_TOP-\stacktop # load address of top of stack + bge sp, t0, err_underflow # jump to error handler if stack underflow +.endm diff --git a/src/05-internal-functions.s b/src/05-internal-functions.s index e9ccae2..eb4cd48 100644 --- a/src/05-internal-functions.s +++ b/src/05-internal-functions.s @@ -97,32 +97,24 @@ lookup_loop: lw t0, 4(a1) # load the hash of the word from the X working register # check if the word is hidden - li t1, F_HIDDEN # load the HIDDEN flag into temporary - and t1, t0, t1 # read the hidden flag bit - bnez t1, lookup_next # skip the word if it's hidden + li t1, F_HIDDEN # load the HIDDEN flag into temporary + and t1, t0, t1 # read the hidden flag bit + bnez t1, lookup_next # skip the word if it's hidden # remove the 3-bit flags using a mask - li t1, ~FLAGS_MASK # load the inverted 3-bit flags mask into temporary - and t0, t0, t1 # ignore flags when comparing the hashes - beq t0, a0, lookup_done # done if the hashes match + li t1, ~FLAGS_MASK # load the inverted 3-bit flags mask into temporary + and t0, t0, t1 # ignore flags when comparing the hashes + beq t0, a0, lookup_done # done if the hashes match lookup_next: - lw a1, 0(a1) # follow link to next word in dict + lw a1, 0(a1) # follow link to next word in dict j lookup_loop lookup_error: # check the STATE li t0, STATE # load the address of the STATE variable into temporary lw t0, 0(t0) # load the current state into a temporary - beqz t0, error # if in execute mode (STATE = 0), jump to error handler to reset + beqz t0, err_error # if in execute mode (STATE = 0), jump to error handler to reset - # update HERE since we're in compile mode - li t0, HERE # load HERE variable into temporary - sw t2, 0(t0) # store the address of LATEST back into HERE - - # update LATEST since we're in compile mode - li t0, LATEST # load LATEST variable into temporary - lw t1, 0(t2) # load LATEST variable value into temporary - sw t1, 0(t0) # store LATEST word into LATEST variable - - j error # jump to error handler + restorevars t2 # restore HERE and LATEST (t2) + j err_error # jump to error handler lookup_done: ret diff --git a/src/06-initialization.s b/src/06-initialization.s index 53fd70a..7b1245f 100644 --- a/src/06-initialization.s +++ b/src/06-initialization.s @@ -41,13 +41,29 @@ reset: li t0, STATE # load STATE variable sw zero, 0(t0) # initialize STATE variable (0 = execute) +.balign CELL +# reset the RAM from the last defined word +ram_init: + li t0, HERE # load HERE memory address + lw t0, 0(t0) # load HERE value + li t1, PAD # load PAD variable +ram_zerofill: + # initialize the memory cells + beq t0, t1,ram_done # loop until counter (HERE) == PAD + sw zero, 0(t0) # zero-fill the memory address + addi t0, t0, CELL # increment counter by 1 CELL + j ram_zerofill # repeat +ram_done: + # continue to tib_init + +.balign CELL # reset the terminal input buffer tib_init: # initialize TOIN variable li t0, TIB # load TIB memory address li t1, TOIN # load TOIN variable li t2, TIB_TOP # load TIB_TOP variable - sw t0, 0(t1) # initialize TOIN variable to contain TIB start address + sw zero, 0(t1) # initialize TOIN variable to contain zero tib_zerofill: # initialize the TIB beq t2, t0,tib_done # loop until TIB_TOP == TIB @@ -57,4 +73,4 @@ tib_zerofill: tib_done: j interpreter_start # jump to the main interpreter REPL -msg_boot: .ascii "FiveForths v0.2, Copyright (c) 2021~ Alexander Williams, https://a1w.ca \n\n" +msg_boot: .ascii "FiveForths v0.3, Copyright (c) 2021~ Alexander Williams, https://a1w.ca \n\n" diff --git a/src/07-error-handling.s b/src/07-error-handling.s index 259ac05..02ca7a5 100644 --- a/src/07-error-handling.s +++ b/src/07-error-handling.s @@ -2,30 +2,20 @@ # Error handling ## -.balign CELL -# print an error message to the UART -error: - la a1, msg_error # load string message - addi a2, a1, 4 # load string length - call uart_print # call uart print function - j reset # jump to reset the stack pointers, variables, etc before jumping to the interpreter - -.balign CELL -# print an OK message to the UART -ok: - la a1, msg_ok # load string message - addi a2, a1, 6 # load string length - call uart_print # call uart print function - j tib_init # jump to reset the terminal input buffer before jumping to the interpreter - -.balign CELL -# print a REBOOTING message to the UART -reboot: - la a1, msg_reboot # load string message - addi a2, a1, 12 # load string length - call uart_print # call uart print function - j _start # reboot when print returns +print_error error, 4, reset +print_error ok, 6, tib_init +print_error reboot, 16, _start +print_error tib, 14, reset +print_error mem, 16, reset +print_error token, 14, reset +print_error underflow, 20, reset +print_error overflow, 20, reset msg_error: .ascii " ?\n" msg_ok: .ascii " ok\n" -msg_reboot: .ascii " rebooting\n" +msg_reboot: .ascii " ok rebooting\n" +msg_tib: .ascii " ? tib full\n" +msg_mem: .ascii " ? memory full\n" +msg_token: .ascii " ? big token\n" +msg_underflow: .ascii " ? stack underflow\n" +msg_overflow: .ascii " ? stack overflow\n" diff --git a/src/08-forth-primitives.s b/src/08-forth-primitives.s index f5d56e9..17437b2 100644 --- a/src/08-forth-primitives.s +++ b/src/08-forth-primitives.s @@ -6,10 +6,11 @@ # reboot ( -- ) # Reboot the entire system and initialize memory defcode "reboot", 0x06266b70, REBOOT, NULL - j reboot # jump to reboot + j err_reboot # jump to reboot # @ ( addr -- x ) Fetch memory at addr defcode "@", 0x0102b5e5, FETCH, REBOOT + checkunderflow 0 # check for stack underflow of data stack (1 CELL) lw t0, 0(sp) # load the top of stack into temporary lw t0, 0(t0) # load the value from the temporary (addr) sw t0, 0(sp) # store the value back the top of stack (x) @@ -17,8 +18,9 @@ defcode "@", 0x0102b5e5, FETCH, REBOOT # ! ( x addr -- ) Store x at addr defcode "!", 0x0102b5c6, STORE, FETCH - lw t0, 0(sp) # load the DSP value (x) into temporary - lw t1, CELL(sp) # load the DSP value (addr) into temporary + checkunderflow CELL # check for stack underflow of data stack (2 CELLs) + lw t1, 0(sp) # load the DSP value (addr) into temporary + lw t0, CELL(sp) # load the DSP value (x) into temporary sw t0, 0(t1) # store x into addr addi sp, sp, 2*CELL # move DSP up by 2 cells NEXT @@ -35,6 +37,7 @@ defcode "rp@", 0x0388a687, RSPFETCH, DSPFETCH # 0= ( x -- f ) -1 if top of stack is 0, 0 otherwise defcode "0=", 0x025970b2, ZEQU, RSPFETCH + checkunderflow 0 # check for stack underflow of data stack (1 CELL) lw t0, 0(sp) # load the DSP value (x) into temporary snez t0, t0 # store 0 in temporary if it's equal to 0, otherwise store 1 addi t0, t0, -1 # store -1 in temporary if it's 0, otherwise store 0 @@ -43,6 +46,7 @@ defcode "0=", 0x025970b2, ZEQU, RSPFETCH # + ( x1 x2 -- n ) Add the two values at the top of the stack defcode "+", 0x0102b5d0, ADD, ZEQU + checkunderflow CELL # check for stack underflow of data stack (2 CELLs) POP t0 # pop DSP value (x1) into temporary lw t1, 0(sp) # load DSP value (x2) into temporary add t0, t0, t1 # add the two values @@ -51,6 +55,7 @@ defcode "+", 0x0102b5d0, ADD, ZEQU # nand ( x1 x2 -- n ) Bitwise NAND the two values at the top of the stack defcode "nand", 0x049b0c66, NAND, ADD + checkunderflow CELL # check for stack underflow of data stack (2 CELLs) POP t0 # pop DSP value (x1) into temporary lw t1, 0(sp) # load DSP value (x2) into temporary and t0, t0, t1 # perform bitwise AND of the two values @@ -60,8 +65,8 @@ defcode "nand", 0x049b0c66, NAND, ADD # lit ( -- n ) Get the next word from IP and push it to the stack, increment IP defcode "lit", 0x03888c4e, LIT, NAND - lw t0, 0(s1) # load the memory address from IP into temporary - PUSH t0 # push the literal to the top of the stack + lw t1, 0(s1) # load the memory address from IP into temporary + PUSH t1 # push the literal to the top of the stack addi s1, s1, CELL # increment IP by 1 CELL NEXT @@ -82,6 +87,7 @@ defcode "key", 0x0388878e, KEY, EXIT # emit ( x -- ) Write 8-bit character to uart output defcode "emit", 0x04964f74, EMIT, KEY + checkunderflow 0 # check for stack underflow of data stack (1 CELL) POP a0 # copy top of data stack into W call uart_put # send character from W to uart NEXT @@ -92,27 +98,32 @@ defcode "emit", 0x04964f74, EMIT, KEY # tib ( -- addr ) Store TIB variable address in top of data stack defcode "tib", 0x0388ae44, TIB, EMIT - PUSHVAR TIB + li t1, TIB # load variable into temporary + PUSH t1 NEXT # state ( -- addr ) Store STATE variable address in top of data stack defcode "state", 0x05614a06, STATE, TIB - PUSHVAR STATE + li t1, STATE # load variable into temporary + PUSH t1 NEXT # >in ( -- addr ) Store TOIN variable address in top of data stack defcode ">in", 0x0387c89a, TOIN, STATE - PUSHVAR TOIN + li t1, TOIN # load variable into temporary + PUSH t1 NEXT # here ( -- addr ) Store HERE variable address in top of data stack defcode "here", 0x0497d3a9, HERE, TOIN - PUSHVAR HERE + li t1, HERE # load variable into temporary + PUSH t1 NEXT # latest ( -- addr ) Store LATEST variable address in top of data stack defcode "latest", 0x06e8ca72, LATEST, HERE - PUSHVAR LATEST + li t1, LATEST # load variable into temporary + PUSH t1 NEXT ## @@ -121,18 +132,22 @@ defcode "latest", 0x06e8ca72, LATEST, HERE # : ( -- ) # Start the definition of a new word defcode ":", 0x0102b5df, COLON, LATEST + li t2, TIB # load TIB memory address li t3, TOIN # load TOIN variable into unused temporary register - lw a0, 0(t3) # load TOIN address value into temporary + lw a0, 0(t3) # load TOIN value into temporary + add a0, a0, t2 # add TIB to TOIN to get the start address of TOIN call token # read the token # move TOIN + li t2, TIB # load TIB memory address add t0, a0, a1 # add the size of the token to TOIN + sub t0, t0, t2 # subtract the address of TOIN from TIB to get the new size of TOIN sw t0, 0(t3) # move TOIN to process the next word in the TIB # bounds checks on token size - beqz a1, ok # ok if token size is 0 + beqz a1, err_ok # ok if token size is 0 li t0, 32 # load max token size (2^5 = 32) in temporary - bgtu a1, t0, error # error if token size is greater than 32 + bgtu a1, t0, err_token # error if token size is greater than 32 call djb2_hash # hash the token @@ -152,7 +167,7 @@ defcode ":", 0x0102b5df, COLON, LATEST # bounds check on new word memory location addi t4, t2, 3*CELL # prepare to move the HERE pointer to the end of the word li t5, PAD # load out of bounds memory address (PAD) - bgt t4, t5, error # error if the memory address is out of bounds + bge t4, t5, err_mem # error if the memory address is out of bounds # update LATEST variable sw t2, 0(t1) # store the current value of HERE into the LATEST variable @@ -189,21 +204,28 @@ defcode ";", 0x8102b5e0, SEMI, COLON and t1, t1, t2 # unhide the word sw t1, CELL(t0) # write the hash back to memory - # update HERE variable + # store codeword into memory li t0, HERE # copy the memory address of HERE into temporary lw t2, 0(t0) # load the HERE value into temporary - la t1, code_EXIT # load the codeword address into temporary - sw t1, 0(t2) # store the codeword address into HERE # bounds check on the exit memory location - addi t2, t2, CELL # prepare to move the HERE pointer by 1 CELL li t3, PAD # load out of bounds memory address (PAD) - bgt t2, t3, error # error if the memory address is out of bounds + bge t2, t3, memory_error # error if the memory address is out of bounds + + la t1, code_EXIT # load the codeword address into temporary + sw t1, 0(t2) # store the codeword address into HERE # move HERE pointer + addi t2, t2, CELL # prepare to move the HERE pointer by 1 CELL sw t2, 0(t0) # store the new address of HERE into the HERE variable # update the STATE variable li t0, STATE # load the address of the STATE variable into temporary sw zero, 0(t0) # store the current state back into the STATE variable NEXT + +memory_error: + li t2, LATEST # copy the memory address of LATEST into temporary + lw t2, 0(t2) # load the address value into temporary + restorevars t2 # restore HERE and LATEST (t2) + j err_mem diff --git a/src/09-interpreter.s b/src/09-interpreter.s index 3b3d940..bf511a9 100644 --- a/src/09-interpreter.s +++ b/src/09-interpreter.s @@ -8,7 +8,8 @@ interpreter_start: li t2, TIB # load TIB memory address li t3, TOIN # load the TOIN variable into unused temporary register - lw a1, 0(t3) # load TOIN address value into X working register + lw a1, 0(t3) # load TOIN value into X working register + add a1, a1, t2 # add TIB to TOIN to get the start address of TOIN interpreter: call uart_get # read a character from UART @@ -16,8 +17,7 @@ interpreter: beq a0, t4, skip_send # don't send the character if it's a newline # ignore specific characters - mv t4, zero # load 0x00 zero into temporary - beq a0, t4, interpreter # ignore the character if it matches + beqz a0, interpreter # ignore the character if it matches li t4, CHAR_CARRIAGE # load 0x0D carriage return into temporary beq a0, t4, interpreter # ignore the character if it matches @@ -37,7 +37,7 @@ skip_send: interpreter_tib: # add the character to the TIB li t4, TIB_TOP # load TIB_TOP memory address - bge a1, t4, error # error if the terminal input buffer is full # FIXME: handle this better + bge a1, t4, err_tib # error if the terminal input buffer is full sb a0, 0(a1) # store the character from W register in the TIB addi a1, a1, 1 # increment TOIN value by 1 li t0, CHAR_NEWLINE # load newline into temporary @@ -73,18 +73,22 @@ replace_newline: process_token: # process the token + li t2, TIB # load TIB memory address li t3, TOIN # load TOIN variable into unused temporary register lw a0, 0(t3) # load TOIN address value into temporary + add a0, a0, t2 # add TIB to TOIN to get the start address of TOIN call token # read the token # move TOIN + li t2, TIB # load TIB memory address add t0, a0, a1 # add the size of the token to TOIN + sub t0, t0, t2 # subtract the address of TOIN from TIB to get the new size of TOIN sw t0, 0(t3) # move TOIN to process the next word in the TIB # bounds checks on token size - beqz a1, ok # ok if token size is 0 + beqz a1, err_ok # ok if token size is 0 li t0, 32 # load max token size (2^5 = 32) in temporary - bgtu a1, t0, error # error if token size is greater than 32 + bgtu a1, t0, err_token # error if token size is greater than 32 # check if the token is a number mv t5, a0 # save a0 temporarily