forked from princeton-nlp/WebShop
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsetup.sh
executable file
·78 lines (72 loc) · 2.3 KB
/
setup.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/bin/bash
# TODO: Change to full in script
export LC_CTYPE="en_US.UTF-8"
# Displays information on how to use script
helpFunction()
{
echo "Usage: $0 [-d small|all]"
echo -e "\t-d small|all - Specify whether to download entire dataset (all) or just 1000 (small)"
exit 1 # Exit script after printing help
}
# Get values of command line flags
while getopts d: flag
do
case "${flag}" in
d) data=${OPTARG};;
esac
done
if [ -z "$data" ]; then
echo "[ERROR]: Missing -d flag"
helpFunction
fi
# # Install Environment Dependencies via `conda`
# conda install -c pytorch faiss-cpu=1.7.4 mkl=2021 blas=1.0=mkl;
# conda install -c conda-forge openjdk=11;
#
# # Install Python Dependencies
# pip install -r requirements.txt;
#
# # Download dataset into `data` folder via `gdown` command
# mkdir -p data;
# cd data;
# if [ "$data" == "small" ]; then
# gdown https://drive.google.com/uc?id=1EgHdxQ_YxqIQlvvq5iKlCrkEKR6-j0Ib; # items_shuffle_1000 - product scraped info
# gdown https://drive.google.com/uc?id=1IduG0xl544V_A_jv3tHXC0kyFi7PnyBu; # items_ins_v2_1000 - product attributes
# elif [ "$data" == "all" ]; then
# gdown https://drive.google.com/uc?id=1A2whVgOO0euk5O13n2iYDM0bQRkkRduB; # items_shuffle
# gdown https://drive.google.com/uc?id=1s2j6NgHljiZzQNL3veZaAiyW_qDEgBNi; # items_ins_v2
# else
# echo "[ERROR]: argument for `-d` flag not recognized"
# helpFunction
# fi
# gdown https://drive.google.com/uc?id=14Kb5SPBk_jfdLZ_CDBNitW98QLDlKR5O # items_human_ins
# cd ..
#
# # Download spaCy large NLP model
# pip install --force-reinstall typing-extensions==4.5.0
# python -m spacy download en_core_web_lg
#
# Build search engine index
cd search_engine
mkdir -p resources resources_100 resources_1k resources_100k
python convert_product_file_format.py # convert items.json => required doc format
mkdir -p indexes
./run_indexing.sh
cd ..
#
# # Create logging folder + samples of log data
# get_human_trajs () {
# PYCMD=$(cat <<EOF
# import gdown
# url="https://drive.google.com/drive/u/1/folders/16H7LZe2otq4qGnKw_Ic1dkt-o3U9Zsto"
# gdown.download_folder(url, quiet=True, remaining_ok=True)
# EOF
# )
# python -c "$PYCMD"
# }
# mkdir -p user_session_logs/
# cd user_session_logs/
# echo "Downloading 50 example human trajectories..."
# get_human_trajs
# echo "Downloading example trajectories complete"
# cd ..