Skip to content

Commit cb8d2a7

Browse files
committed
Add tally scripts and clean up
1 parent da1761c commit cb8d2a7

File tree

14 files changed

+123
-764042
lines changed

14 files changed

+123
-764042
lines changed

JSON_dualgraphs/4x4.json

-409
This file was deleted.

JSON_dualgraphs/50x50.json

-61,909
This file was deleted.

JSON_dualgraphs/PA_VTDs.json

-701,718
This file was deleted.
+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Ignore everything in this folder, but keep the folder anyway so that replication is
22
# easier to do from a straight clone of the project
33
*
4-
!.gitignore
4+
!.gitignore
5+
!count_cuts_50x50.sh
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!/bin/bash
2+
3+
# Get the directory of the current script
4+
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
5+
TOP_DIR=$(realpath "$SCRIPT_DIR/../../..")
6+
7+
for file in $(find "$SCRIPT_DIR" -type f -name "*.ben")
8+
do
9+
sbatch --time=1-00:00:00 --mem=8G --wrap=" \
10+
ben-tally -g \"$TOP_DIR/JSON_dualgraphs/50x50_with_10_25_50.json\" -b \"$file\" && \
11+
output_file=\"\${file/.jsonl.ben/_cut_edges.parquet}\" && \
12+
mv \"\$output_file\" \"\$(dirname \"$output_file\")/../../hpc_processed_data/50x50/\$(basename \"$output_file\")\""
13+
done

hpc_files/hpc_raw_data/7x7/.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Ignore everything in this folder, but keep the folder anyway so that replication is
22
# easier to do from a straight clone of the project
33
*
4-
!.gitignore
4+
!.gitignore
5+
!count_cuts_7x7.sh
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!/bin/bash
2+
3+
# Get the directory of the current script
4+
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
5+
TOP_DIR=$(realpath "$SCRIPT_DIR/../../..")
6+
7+
for file in $(find "$SCRIPT_DIR" -type f -name "*.ben")
8+
do
9+
sbatch --time=1-00:00:00 --mem=8G --wrap=" \
10+
ben-tally -g \"$TOP_DIR/JSON_dualgraphs/7x7.json\" -b \"$file\" && \
11+
output_file=\"\${file/.jsonl.ben/_cut_edges.parquet}\" && \
12+
mv \"\$output_file\" \"\$(dirname \"$output_file\")/../../hpc_processed_data/7x7/\$(basename \"$output_file\")\""
13+
done

hpc_files/hpc_raw_data/PA/.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Ignore everything in this folder, but keep the folder anyway so that replication is
22
# easier to do from a straight clone of the project
33
*
4-
!.gitignore
4+
!.gitignore
5+
!tally_PA.sh

hpc_files/hpc_raw_data/PA/tally_PA.sh

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/bin/bash
2+
3+
# Get the directory of the current script
4+
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
5+
TOP_DIR=$(realpath "$SCRIPT_DIR/../../..")
6+
7+
for file in $(find "$SCRIPT_DIR" -type f -name "*.ben")
8+
do
9+
sbatch --time 2-00:00:00 \
10+
--mem=160G \
11+
--output=PA_tally_new_%x_%j.out \
12+
--error=PA_tally_new_%x_%j.log \
13+
--nodes=1 \
14+
--cpus-per-task=12 \
15+
--ntasks-per-node=1 \
16+
--job-name="PA_tally" \
17+
--wrap="ben-tally -b '$file' \
18+
-g '$TOP_DIR/JSON_dualgraphs/PA_VTD_20.json' \
19+
-m tally-keys \
20+
-k TOTPOP PRES16D PRES16R SEND16D SEND16R && \
21+
output_file=\"\${file/.jsonl.ben/_tallies.parquet}\" && \
22+
mv \"\$output_file\" \"\$(dirname \"$output_file\")/../../hpc_processed_data/PA/\$(basename \"$output_file\")\""
23+
done

hpc_files/hpc_raw_data/VA/.gitignore

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# Ignore everything in this folder, but keep the folder anyway so that replication is
22
# easier to do from a straight clone of the project
33
*
4-
!.gitignore
4+
!.gitignore
5+
!tally_VA.sh
6+
!count_cuts_VA.sh
+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!/bin/bash
2+
3+
# Get the directory of the current script
4+
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
5+
TOP_DIR=$(realpath "$SCRIPT_DIR/../../..")
6+
7+
for file in $(find "$SCRIPT_DIR" -type f -name "*.ben")
8+
do
9+
sbatch --time=1-00:00:00 --mem=8G --wrap=" \
10+
ben-tally -g \"$TOP_DIR/JSON_dualgraphs/VA_precincts.json\" -b \"$file\" && \
11+
output_file=\"\${file/.jsonl.ben/_cut_edges.parquet}\" && \
12+
mv \"\$output_file\" \"\$(dirname \"$output_file\")/../../hpc_processed_data/VA/\$(basename \"$output_file\")\""
13+
done

hpc_files/hpc_raw_data/VA/tally_VA.sh

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/bin/bash
2+
3+
# Get the directory of the current script
4+
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
5+
TOP_DIR=$(realpath "$SCRIPT_DIR/../../..")
6+
7+
for file in $(find "$SCRIPT_DIR" -type f -name "*.ben")
8+
do
9+
sbatch --time 10:00:00 \
10+
--mem=64G \
11+
--output=VA_tally_new_%x_%j.out \
12+
--error=VA_tally_new_%x_%j.log \
13+
--nodes=1 \
14+
--cpus-per-task=12 \
15+
--ntasks-per-node=1 \
16+
--job-name="VA_tally" \
17+
--wrap="ben-tally -b '$file' \
18+
-g \"$TOP_DIR/JSON_dualgraphs/VA_precincts.json\" \
19+
-m tally-keys \
20+
-k G16DPRS G16RPRS && \
21+
output_file=\"\${file/.jsonl.ben/_tallies.parquet}\" && \
22+
mv \"\$output_file\" \"\$(dirname \"$output_file\")/../../hpc_processed_data/VA/\$(basename \"$output_file\")\""
23+
done

other_data_files/script_files/run_frcw_multigrid.sh

+14-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
#!/bin/bash
22

3+
# Trap Ctrl-C and kill all background jobs
4+
trap 'echo .; echo "Keyboard interrupt detected. Exiting..."; kill 0; exit 1;' SIGINT
5+
36
# Get the directory of the current script
47
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
58
TOP_DIR=$(realpath "$SCRIPT_DIR/../..")
@@ -62,5 +65,14 @@ do
6265
--writer ben -o "${TOP_DIR}/other_data_files/raw_data_files/linear_multigrid/linear_${var_names[$i]}_1M.jsonl.ben" &
6366
done
6467

65-
wait
66-
echo "Done!"
68+
spinner_chars='-\|/'
69+
while [ $(jobs | grep -v "Done" | grep -c "[f]rcw --graph") -gt 0 ]; do
70+
for ((i=0; i<${#spinner_chars}; i++)); do
71+
72+
if [ $(jobs | grep -v "Done" | grep -c "[f]rcw --graph") -eq 0 ]; then
73+
break
74+
fi
75+
printf "\rRunning FRCW... %s" "${spinner_chars:$i:1}"
76+
sleep 0.1
77+
done
78+
done
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#!/bin/bash
2+
3+
# Get the directory of the current script
4+
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
5+
TOP_DIR=$(realpath "$SCRIPT_DIR/../..")
6+
7+
for f in $(find "$TOP_DIR/other_data_files/raw_data_files" -type f -name "*.ben" )
8+
do
9+
output_file="${f/.jsonl.ben/_accept_50000_changed_assignments.txt}"
10+
relative_dir="${f#$TOP_DIR/other_data_files/raw_data_files/}" # Get relative path
11+
target_dir="$TOP_DIR/other_data_files/processed_data_files/$(dirname "$relative_dir")"
12+
13+
ben-tally -m changed-assignments -b "$f" --max-accepted 50000 --normalize && \
14+
mv "$output_file" "$target_dir/"
15+
done

0 commit comments

Comments
 (0)