Skip to content

Commit

Permalink
Properly handle deleted files (#574)
Browse files Browse the repository at this point in the history
* Properly handle deleted files

* pylint
  • Loading branch information
skearnes authored Mar 9, 2021
1 parent cad41f7 commit d4c0f48
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 9 deletions.
17 changes: 9 additions & 8 deletions ord_schema/scripts/process_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
'GitHub pull request number. If provided, a comment will be added.')
flags.DEFINE_string('token', None, 'GitHub authentication token.')

# pylint: disable=too-many-locals
# pylint: disable=too-many-branches,too-many-locals


@dataclasses.dataclass(eq=True, frozen=True, order=True)
Expand Down Expand Up @@ -252,13 +252,14 @@ def run() -> Tuple[Set[str], Set[str], Set[str]]:
change_stats = {}
for file_status in inputs:
if file_status.status == 'D':
continue # Nothing to do for deleted files.
dataset = message_helpers.load_message(file_status.filename,
dataset_pb2.Dataset)
logging.info('%s: %d reactions', file_status.filename,
len(dataset.reactions))
dataset = None
else:
dataset = message_helpers.load_message(file_status.filename,
dataset_pb2.Dataset)
logging.info('%s: %d reactions', file_status.filename,
len(dataset.reactions))
datasets = {file_status.filename: dataset}
if FLAGS.validate:
if FLAGS.validate and dataset is not None:
# Note: this does not check if IDs are malformed.
validations.validate_datasets(datasets, FLAGS.write_errors)
# Check reaction sizes.
Expand All @@ -274,7 +275,7 @@ def run() -> Tuple[Set[str], Set[str], Set[str]]:
change_stats[file_status.filename] = (added, removed, changed)
logging.info('Summary: +%d -%d Δ%d reaction IDs', len(added),
len(removed), len(changed))
if FLAGS.update:
if FLAGS.update and dataset is not None:
_run_updates(datasets)
if change_stats:
total_added, total_removed, total_changed = set(), set(), set()
Expand Down
31 changes: 30 additions & 1 deletion ord_schema/scripts/process_dataset_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,16 @@ def _run(self, **kwargs):
changed: Set of changed reaction IDs.
filenames: List of .pb filenames in the updated database.
"""
subprocess.run(['git', 'add', '*.pb*', 'data/*/*.pb*'], check=True)
# These commands will fail if there are no files to match for a given
# pattern, so run them separately to make sure we pick up changes.
try:
subprocess.run(['git', 'add', '*.pb*'], check=True)
except subprocess.CalledProcessError as error:
logging.info(error)
try:
subprocess.run(['git', 'add', 'data/*/*.pb*'], check=True)
except subprocess.CalledProcessError as error:
logging.info(error)
changed = subprocess.run(
['git', 'diff', '--name-status', self._DEFAULT_BRANCH],
check=True,
Expand Down Expand Up @@ -408,6 +417,26 @@ def test_add_dataset_with_too_large_reaction(self):
with self.assertRaisesRegex(ValueError, 'larger than --max_size'):
self._run(max_size=0.0)

def test_delete_dataset(self):
subprocess.run(['git', 'rm', self.dataset_filename], check=True)
added, removed, changed, filenames = self._run()
self.assertEmpty(added)
self.assertLen(removed, 1)
self.assertEmpty(changed)
self.assertEmpty(filenames)

def test_replace_dataset(self):
dataset = message_helpers.load_message(self.dataset_filename,
dataset_pb2.Dataset)
dataset_filename = os.path.join(self.test_subdirectory, 'test.pbtxt')
message_helpers.write_message(dataset, dataset_filename)
subprocess.run(['git', 'rm', self.dataset_filename], check=True)
added, removed, changed, filenames = self._run()
self.assertLen(added, 1)
self.assertLen(removed, 1)
self.assertEmpty(changed)
self.assertLen(filenames, 1)


if __name__ == '__main__':
absltest.main()

0 comments on commit d4c0f48

Please sign in to comment.