Source: dvc.org
git init
dvc init
git status
git commit -m "Initialize DVC"
git push
mkdir data
# Add .csv file to data/
# copy ID of your folder drive.google.com/drive/folders/[ID]
# you will need to authenticate and allow dvc to access that folder
dvc remote add -d storage gdrive://[ID]
git commit .dvc/config -m "Configure remote storage"
git push
dvc add data/data.csv
# This .csv file will be automaticaly added to .gitignore
dvc push
git add data/data.csv.dvc
git add data/.gitignore
git commit -m "Add raw data"
git push
For example we remove our file.
rm -f data/data.csv
rm -rf .dvc/cache
Let's pull from dvc.
dvc pull
mkdir tmp
cp data/data.csv tmp/data.csv
cat tmp/data.csv >> data/data.csv
ls -lh data
dvc add data/data.csv
git add data/data.csv.dvc
git commit -m "Dataset updates"
dvc push
Check changes:
git log --online
Restore changes:
# Make sure to checkout data/data.csv.dvc and not data/data.csv !
git checkout HEAD^1 data/data.csv.dvc
dvc checkout
Verify:
ls -lh data