Skip to content

Scrape latest data #42251

Scrape latest data

Scrape latest data #42251

name: Scrape latest data
on:
push:
workflow_dispatch:
schedule:
- cron: '01,16,31,46 * * * *'
jobs:
fetch-external-data:
runs-on: ubuntu-latest
steps:
- name: Checkout latest commit
uses: actions/checkout@v2
with:
fetch-depth: 1
- name: Fetch and merge JSON data
run: |
STATES=('UT' 'WY' 'ID')
URL_BASE="https://www.rockymountainpower.net/etc/pcorp/datafiles/outagemap"
for state in "${STATES[@]}"; do
# Fetch the JSON data
echo "Fetching data for ${state}"
mapJson=$(curl -s ${URL_BASE}/map${state}.json)
listJson=$(curl -s ${URL_BASE}/list${state}.json)
# Get shared keys
shared_keys=$(echo $mapJson | jq -r 'keys[]' | grep -Fx -f <(echo $listJson | jq -r 'keys[]'))
# Validate shared values match between the two JSON objects (for data integrity)
for key in $shared_keys; do
mapKey=$(echo $mapJson | jq .${key})
listKey=$(echo $listJson | jq .${key})
if [[ "$mapKey" != "$listKey" ]]; then
echo "Mismatch in ${key}. Value in mapJson: ${mapKey}, Value in listJson: ${listKey}"
exit 2
fi
done
# Merge the two JSON data
merged_json=$(echo $mapJson $listJson | jq -s '.[0] * .[1]')
# Remove `last_upd` since it will cause a change every fetch even when no data has changed
merged_json=$(echo $merged_json | jq 'del(.last_upd)')
# Save the merged JSON to a file
echo "$merged_json" | jq . > maplist${state}.json
done
- name: Commit and push if it changed
run: |-
git config user.name "outage-detector"
git config user.email "[email protected]"
git add -A
timestamp=$(date -u)
git commit -m "Latest data as of ${timestamp}" || exit 0
git push