Skip to content

Commit

Permalink
Merge pull request #134 from openstates/bugfix-scrape-archive-mode
Browse files Browse the repository at this point in the history
Bugfix: Google Cloud Storage permissions issue with archiving
  • Loading branch information
jessemortenson authored Aug 2, 2024
2 parents cd78548 + 6e50990 commit 7400639
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 3 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## 6.20.1 - Aug 2, 2024

* Fix permissions issue caused by slightly wrong usage of GCP storage client code

## 6.20.0 - Aug 1, 2024

* Adds support for --archive flag on os-update to archive a full scrape to google cloud storage bucket
Expand Down
10 changes: 8 additions & 2 deletions openstates/cli/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,19 +215,25 @@ def archive_to_cloud_storage(
"Scrape archiving is turned on, but necessary settings are missing. No archive was done."
)
return
cloud_storage_client = storage.Client()
bucket = cloud_storage_client.bucket(BUCKET_NAME, GCP_PROJECT)
logger.info("Beginning archive of scraped files to google cloud storage.")
logger.info(f"GCP Project is {GCP_PROJECT} and bucket is {BUCKET_NAME}")
cloud_storage_client = storage.Client(project=GCP_PROJECT)
bucket = cloud_storage_client.bucket(BUCKET_NAME)
jurisdiction_id = juris.jurisdiction_id.replace("ocd-jurisdiction/", "")
destination_prefx = (
f"{SCRAPE_LAKE_PREFIX}/{jurisdiction_id}/{last_scrape_end_datetime.isoformat()}"
)

# read files in directory and upload
files_count = 0
for file_path in glob.glob(datadir + "/*.json"):
files_count += 1
blob_name = os.path.join(destination_prefx, os.path.basename(file_path))
blob = bucket.blob(blob_name)
blob.upload_from_filename(file_path)

logger.info(f"Completed archive to Google Cloud Storage, {files_count} files were uploaded.")


def do_import(juris: State, args: argparse.Namespace) -> dict[str, typing.Any]:
# import inside here because to avoid loading Django code unnecessarily
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "openstates"
version = "6.20.0"
version = "6.20.1"
description = "core infrastructure for the openstates project"
authors = ["James Turk <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit 7400639

Please sign in to comment.