Skip to content

Commit

Permalink
Merge pull request #33 from edanalytics/bugfix/state_file_ignored_mac…
Browse files Browse the repository at this point in the history
…ros_issues_charset_issues

several bugfixes
  • Loading branch information
tomreitz authored May 12, 2023
2 parents 2119356 + 4e9fcf7 commit 5009464
Show file tree
Hide file tree
Showing 7 changed files with 26 additions and 26 deletions.
4 changes: 2 additions & 2 deletions earthmover/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def main(argv=None):
if args.version:
em_dir = os.path.dirname(os.path.abspath(__file__))
version_file = os.path.join(em_dir, 'VERSION.txt')
with open(version_file, 'r') as f:
with open(version_file, 'r', encoding='utf-8') as f:
VERSION = f.read().strip()
print(f"earthmover, version {VERSION}")
exit(0)
Expand Down Expand Up @@ -154,7 +154,7 @@ def main(argv=None):
cli_state_configs=cli_state_configs
)
except Exception as err:
logger.exception(err, exc_info=False)
logger.exception(err, exc_info=True)
raise # Avoids linting error

if args.command == 'compile':
Expand Down
25 changes: 12 additions & 13 deletions earthmover/earthmover.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ def __init__(self,
'log_level': _state_configs['log_level'].upper(),
'show_stacktrace': _state_configs['show_stacktrace'],
}
if 'state_file' in _state_configs.keys():
self.state_configs.update({'state_file': _state_configs['state_file']})

# Set up the logger
self.logger = logger
Expand Down Expand Up @@ -98,7 +100,7 @@ def load_config_file(self) -> dict:
"""

# pass 1: grab config.macros (if any) so Jinja in the YAML can be rendered with macros
with open(self.config_file, "r") as stream:
with open(self.config_file, "r", encoding='utf-8') as stream:
# cannot just yaml.load() here, since Jinja in the YAML may make it invalid...
# instead, pull out just the `config` section, which must not contain Jinja (except for `macros`)
# then we yaml.load() just the config section to grab any `macros`
Expand All @@ -121,14 +123,14 @@ def load_config_file(self) -> dict:
# Read the configs block and extract the (optional) macros field.
if start is not None and end is not None:
configs_pass1 = yaml.safe_load("".join(lines[start:end]))
self.macros = configs_pass1.get("config", {}).get("macros", "").strip()
self.macros = configs_pass1.get("config", {}).get("macros", "")
else:
configs_pass1 = {}

# Figure out lines range of macro definitions, to skip (re)reading/parsing them later
self.macros_lines = self.macros.count("\n")
macros_definitions = [i for i, x in enumerate(lines) if x.strip().startswith('macros:')]

self.macros = self.macros.strip()

# pass 2:
# (a) load template YAML minus macros (which were already loaded in pass 1)
Expand All @@ -137,10 +139,7 @@ def load_config_file(self) -> dict:
# (d) load YAML to config Dict

# (a)
if len(macros_definitions)>0:
self.config_template_string = "".join(lines[:macros_definitions[0]] + lines[macros_definitions[0] + self.macros_lines + 2:])
else:
self.config_template_string = "".join(lines)
self.config_template_string = "".join(lines)

# (b)
_env_backup = os.environ.copy() # backup envvars
Expand All @@ -166,7 +165,7 @@ def load_config_file(self) -> dict:
try:
self.config_template = jinja2.Environment(
loader=jinja2.FileSystemLoader(os.path.dirname('./'))
).from_string(self.macros + self.config_template_string)
).from_string(self.macros + "\n\n" + self.config_template_string)
self.config_template.globals['md5'] = util.jinja_md5

self.config_yaml = self.config_template.render()
Expand Down Expand Up @@ -340,7 +339,7 @@ def generate(self, selector):


### Hashing requires an entire class mixin and multiple additional steps.
if not self.skip_hashing and 'state_file' in self.state_configs:
if not self.skip_hashing and self.state_configs.get('state_file', False):
_runs_path = os.path.expanduser(self.state_configs['state_file'])

self.logger.info(f"computing input hashes for run log at {_runs_path}")
Expand Down Expand Up @@ -379,7 +378,7 @@ def generate(self, selector):
)
self.do_generate = False

elif 'state_file' not in self.state_configs:
elif not self.state_configs.get('state_file', False):
self.logger.info("skipping hashing and run-logging (no `state_file` defined in config)")
runs_file = None # This instantiation will never be used, but this avoids linter alerts.

Expand Down Expand Up @@ -452,12 +451,12 @@ def test(self, tests_dir):
# load expected and outputted content as dataframes, and sort them
# because dask may shuffle output order
_expected_file = os.path.join(tests_dir, 'expected', filename)
with open(_expected_file, "r") as f:
with open(_expected_file, "r", encoding='utf-8') as f:
_expected_df = pd.DataFrame([l.strip() for l in f.readlines()])
_expected_df = _expected_df.sort_values(by=_expected_df.columns.tolist()).reset_index(drop=True)

_outputted_file = os.path.join(tests_dir, 'outputs', filename)
with open(_outputted_file, "r") as f:
with open(_outputted_file, "r", encoding='utf-8') as f:
_outputted_df = pd.DataFrame([l.strip() for l in f.readlines()])
_outputted_df = _outputted_df.sort_values(by=_outputted_df.columns.tolist()).reset_index(drop=True)

Expand Down
4 changes: 2 additions & 2 deletions earthmover/nodes/destination.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def compile(self):

#
try:
with open(self.template, 'r') as fp:
with open(self.template, 'r', encoding='utf-8') as fp:
template_string = fp.read()

except Exception as err:
Expand Down Expand Up @@ -134,7 +134,7 @@ def execute(self):
self.data = self.data.fillna('')

os.makedirs(os.path.dirname(self.file), exist_ok=True)
with open(self.file, 'w') as fp:
with open(self.file, 'w', encoding='utf-8') as fp:

if self.header:
fp.write(self.header + "\n")
Expand Down
2 changes: 1 addition & 1 deletion earthmover/operations/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ def _read_map_file(self, file) -> dict:


try:
with open(file, 'r') as fp:
with open(file, 'r', encoding='utf-8') as fp:
_translations_list = list(csv.reader(fp, delimiter=sep))
return dict(_translations_list[1:])

Expand Down
6 changes: 3 additions & 3 deletions earthmover/runs_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def write_row(self, selector: Optional[str] = None):
if selector:
row_dict['selector'] = selector

with open(self.file, 'a') as fp:
with open(self.file, 'a', encoding='utf-8') as fp:
writer = csv.DictWriter(fp, fieldnames=self.HEADER)
writer.writerow(row_dict)

Expand Down Expand Up @@ -205,7 +205,7 @@ def _write_header(self):
:return:
"""
with open(self.file, 'x') as fp:
with open(self.file, 'x', encoding='utf-8') as fp:
writer = csv.writer(fp)
writer.writerow(self.HEADER)

Expand All @@ -215,7 +215,7 @@ def _read_runs(self):
:return:
"""
with open(self.file, 'r') as fp:
with open(self.file, 'r', encoding='utf-8') as fp:
runs = list(csv.DictReader(fp, delimiter=','))

# Raise a warning for the user to manually reset or select a new log-runs file.
Expand Down
7 changes: 4 additions & 3 deletions example_projects/10_jinja/earthmover.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ config:
# show_graph: True
# show_stacktrace: True
macros: >
{% macro test() %}
{% macro test() -%}
testing!
{% endmacro %}
{%- endmacro %}
parameter_defaults:
DO_LINEARIZE: "True"

Expand All @@ -20,6 +20,7 @@ sources:

transformations:
{% for i in range(0,5) %}
# {{ test() }}
actions{{i}}:
operations:
- operation: map_values
Expand Down Expand Up @@ -63,4 +64,4 @@ destinations:
template: ./templates/disciplineAction.jsont
extension: jsonl
linearize: True
{% endfor%}
{% endfor%}
4 changes: 2 additions & 2 deletions example_projects/run_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ earthmover
rm -f output/*
echo " ... done!"

echo " running 10_simple..."
cd ../10_simple/
echo " running 10_jinja..."
cd ../10_jinja/
earthmover
rm -rf outputs/*
echo " ... done!"
Expand Down

0 comments on commit 5009464

Please sign in to comment.