Skip to content

Commit

Permalink
fixed embedded strings bugs (#32)
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelkamprath authored May 11, 2024
1 parent 7b8d794 commit 95102cf
Show file tree
Hide file tree
Showing 8 changed files with 87 additions and 11 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ Changes that are planned but not implemented yet:
* Create a "align if needed" preprocessor directive paid that generates an `.align` directive if the bytecode in between the pair isn't naturally on the same page and can fit on the same page if aligned. An error would be benerated if the block of code can't fit on the same page regardless of alignment.

## [Unreleased]
* Upgrade python version requirements to 3.11
* Fixed a bug where embedded stringsd weren't properly parsed if they contained a newline character or there were multiple embedded strings per line

## [0.4.2]
* Added support for The Minimal 64x4 Home Computer with an example and updated assembler functionality to support it.
Expand Down
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,18 @@ build-backend = "setuptools.build_meta"

[project]
name = "bespokeasm"
version = "0.4.2"
version = "0.4.3"
authors = [
{ name="Michael Kamprath", email="[email protected]" },
]
description = "A customizable byte code assembler that allows for the definition of custom instruction set architecture"
readme = "README.md"
license = {file = "LICENSE"}
requires-python = ">=3.9"
requires-python = ">=3.11"
classifiers = [
"Development Status :: 4 - Beta",
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.11",
"Topic :: Software Development :: Assemblers",
]
dependencies = [
Expand Down
2 changes: 1 addition & 1 deletion src/bespokeasm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
BESPOKEASM_VERSION_STR = '0.4.2'
BESPOKEASM_VERSION_STR = '0.4.3b1'

# if a cconfig file requires a certain bespoke ASM version, it should be at least this version.
BESPOKEASM_MIN_REQUIRED_STR = '0.3.0'
2 changes: 1 addition & 1 deletion src/bespokeasm/assembler/line_object/emdedded_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from bespokeasm.assembler.line_identifier import LineIdentifier


EMBEDDED_STRING_PATTERN = r'(?P<quote>[\"])((?:\\(?P=quote)|.)*)(?P=quote)'
EMBEDDED_STRING_PATTERN = r'(?P<quote>[\"])((?:\\(?P=quote)|.|\n)*?)(?P=quote)'


class EmbeddedString(LineWithBytes):
Expand Down
4 changes: 2 additions & 2 deletions src/bespokeasm/assembler/line_object/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class LineOjectFactory:
flags=re.IGNORECASE | re.MULTILINE
)
PATTERN_INSTRUCTION_CONTENT = re.compile(
r'^([^\;\n]*)',
r'^([^;\v]*)(?:;.*)?$',
flags=re.IGNORECASE | re.MULTILINE
)

Expand Down Expand Up @@ -68,7 +68,7 @@ def parse_line(
log_verbosity,
))
else:
# resolve proprocessor symbols
# resolve preprocessor symbols
instruction_str = preprocessor.resolve_symbols(line_id, instruction_str)
# parse instruction
while len(instruction_str) > 0:
Expand Down
1 change: 1 addition & 0 deletions src/bespokeasm/assembler/preprocessor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def resolve_symbols(
# Errors if there are recursion loops caused byt symbols that indirectly refer to themselves.

# to make this fast, all symbol candidates should be identified first, then the symbols should be resolved
# TODO: ignore tokens that are in quoted strings
found_symbols: list[str] = re.findall(f'\\b({SYMBOL_PATTERN})\\b', line_str)
symbols_replaced: set[str] = set()

Expand Down
15 changes: 15 additions & 0 deletions test/config_files/test_operand_features.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -240,3 +240,18 @@ instructions:
argument:
size: 8
byte_align: true
macros:
add_twice:
- operands:
count: 1
specific_operands:
numeric_expression:
list:
numeric_expression:
type: numeric
argument:
size: 8
byte_align: true
instructions:
- "add @ARG(0)"
- "add @ARG(0)"
66 changes: 62 additions & 4 deletions test/test_line_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,15 @@ class TestLineObject(unittest.TestCase):

@classmethod
def setUpClass(cls):
lineid = LineIdentifier(1, 'setUpClass')
global_scope = GlobalLabelScope(set())
global_scope.set_label_value('var1', 12, 1)
global_scope.set_label_value('my_val', 8, 2)
global_scope.set_label_value('the_two', 2, 3)
global_scope.set_label_value('var1', 12, lineid)
global_scope.set_label_value('my_val', 8, lineid)
global_scope.set_label_value('the_two', 2, lineid)
global_scope.set_label_value('VALUE1', 8777773, lineid)
global_scope.set_label_value('VALUE2', 139, lineid)
local_scope = LabelScope(LabelScopeType.LOCAL, global_scope, 'TestInstructionParsing')
local_scope.set_label_value('.local_var', 10, 3)
local_scope.set_label_value('.local_var', 10, lineid)
cls.label_values = local_scope

def setUp(self):
Expand Down Expand Up @@ -872,6 +875,61 @@ def test_embedded_string_bugs(self):
self.assertIsInstance(t1, EmbeddedString)
self.assertEqual(t1.byte_size, 2, 'string has 2 bytes')

# test single lines of code where the embedded string is in between two statements and
# contains a newline character.
# for example:
# add 5 "this is a test\n" nop
# the embedded string should be parsed as a separate line object
lo1: list[LineObject] = LineOjectFactory.parse_line(
lineid,
'add 5 "this is a test\nof new lines" nop ; comments',
isa_model,
TestLineObject.label_values,
memzone_mngr.global_zone,
memzone_mngr,
Preprocessor(),
ConditionStack(),
0,
)
self.assertEqual(len(lo1), 3, 'There should be 3 parsed instructions')
self.assertIsInstance(lo1[0], InstructionLine)
self.assertIsInstance(lo1[1], EmbeddedString)
self.assertIsInstance(lo1[2], InstructionLine)
self.assertEqual(lo1[1].byte_size, 28, 'string has 28 bytes (27 characters + 1 null terminator)')

def test_multiple_embedded_stringa_bug(self):
# ensure that a single line of code can correctly parse multiple embedded strings
fp = pkg_resources.files(config_files).joinpath('test_operand_features.yaml')
isa_model = AssemblerModel(str(fp), 0)
isa_model._config['general']['allow_embedded_strings'] = True
memzone_mngr = MemoryZoneManager(
isa_model.address_size,
isa_model.default_origin,
isa_model.predefined_memory_zones,
)
lineid = LineIdentifier(88, 'test_multiple_embedded_stringa_bug')
# test a more complex case where the embedded string is in the middle of a line
# for example:
# add 5 "string 1" nop "string 2" nop
# the embedded string should be parsed as a separate line object
lo2: list[LineObject] = LineOjectFactory.parse_line(
lineid,
'add VALUE2 "string 1" nop "string 2" nop',
isa_model,
TestLineObject.label_values,
memzone_mngr.global_zone,
memzone_mngr,
Preprocessor(),
ConditionStack(),
0,
)
self.assertEqual(len(lo2), 5, 'There should be 5 parsed instructions')
self.assertIsInstance(lo2[0], InstructionLine)
self.assertIsInstance(lo2[1], EmbeddedString)
self.assertIsInstance(lo2[2], InstructionLine)
self.assertIsInstance(lo2[3], EmbeddedString)
self.assertIsInstance(lo2[4], InstructionLine)


if __name__ == '__main__':
unittest.main()

0 comments on commit 95102cf

Please sign in to comment.