|
48 | 48 | done
|
49 | 49 | cd ../..
|
50 | 50 |
|
| 51 | +# initialize VCF config file |
| 52 | +VCFCONFIG="vcf_config.1000G.txt" |
| 53 | +printf "${VCF1000G}\n" > $VCFCONFIG |
| 54 | + |
| 55 | +# download 1000G samplesIDs |
| 56 | +SAMPLESIDS="samplesIDs" |
| 57 | +mkdir -p $SAMPLESIDS # create sample ids dir |
| 58 | +cd $SAMPLESIDS |
| 59 | +# download 1000G samples IDs |
| 60 | +echo "Downloading samples ids for 1000G dataset" |
| 61 | +SAMPLES1000G="hg38_1000G.samplesID.txt" |
| 62 | +wget https://raw.githubusercontent.com/pinellolab/CRISPRme/refs/heads/gnomad-4.1-converter/download_data/${SAMPLES1000G} |
| 63 | +cd .. |
| 64 | + |
| 65 | +# initialize samples config file |
| 66 | +SAMPLESCONFIG="samplesIDs.1000G.txt" |
| 67 | +printf "${SAMPLES1000G}\n" > $SAMPLESCONFIG |
| 68 | + |
51 | 69 | # download annotation data
|
52 | 70 | ANNOTATIONDIR="Annotations"
|
53 | 71 | mkdir -p $ANNOTATIONDIR # create annotation folder
|
54 | 72 | cd $ANNOTATIONDIR
|
55 | 73 | echo "Downloading ENCODE+GENCODE annotation data..."
|
56 |
| -original_md5sum="$(curl -sL https://www.dropbox.com/s/1n2f0qxdba7u3gb/encode%2Bgencode.hg38.bed.zip?dl=0 | md5sum | cut -d ' ' -f 1)" |
57 |
| -encodegencode="encode+gencode.hg38.bed.zip" |
| 74 | +original_md5sum="$(curl -sL https://raw.githubusercontent.com/pinellolab/CRISPRme/gnomad-4.1-converter/download_data/dhs+encode+gencode.hg38.bed.tar.gz | md5sum | cut -d ' ' -f 1)" |
| 75 | +encodegencode="dhs+encode+gencode.hg38.bed.zip" |
58 | 76 | while true; do # retry download if caught timeout
|
59 |
| - wget -T 15 -c -O $encodegencode https://www.dropbox.com/s/1n2f0qxdba7u3gb/encode%2Bgencode.hg38.bed.zip?dl=1 && break |
| 77 | + wget -T 15 -c -O $encodegencode https://raw.githubusercontent.com/pinellolab/CRISPRme/gnomad-4.1-converter/download_data/dhs+encode+gencode.hg38.bed.tar.gz && break |
60 | 78 | done
|
61 | 79 | local_md5sum="$(md5sum $encodegencode | cut -d ' ' -f 1)"
|
62 | 80 | if [ "$original_md5sum" != "$local_md5sum" ]; then
|
63 | 81 | echo "ERROR: unexpected failure while downloading ${encodegencode}"
|
64 | 82 | exit 1
|
65 | 83 | fi
|
66 | 84 | echo "Extracting ${encodegencode}..."
|
67 |
| -unzip $encodegencode |
| 85 | +tar -xvf $encodegencode |
| 86 | + |
68 | 87 | echo "Downloading GENCODE encoding sequences..."
|
69 |
| -original_md5sum="$(curl -sL https://www.dropbox.com/s/isqpkg113cr1xea/gencode.protein_coding.bed.zip?dl=0 | md5sum | cut -d ' ' -f 1)" |
| 88 | +original_md5sum="$(curl -sL https://raw.githubusercontent.com/pinellolab/CRISPRme/gnomad-4.1-converter/download_data/dhs+encode+gencode.hg38.bed.tar.gz | md5sum | cut -d ' ' -f 1)" |
70 | 89 | gencode="gencode.protein_coding.bed.zip"
|
71 | 90 | while true; do # retry download if caught timeout
|
72 |
| - wget -T 15 -c -O $gencode https://www.dropbox.com/s/isqpkg113cr1xea/gencode.protein_coding.bed.zip?dl=1 && break |
| 91 | + wget -T 15 -c -O $gencode https://raw.githubusercontent.com/pinellolab/CRISPRme/gnomad-4.1-converter/download_data/gencode.protein_coding.bed.tar.gz && break |
73 | 92 | done
|
74 | 93 | local_md5sum="$(md5sum $gencode | cut -d ' ' -f 1)"
|
75 | 94 | if [ "$original_md5sum" != "$local_md5sum" ]; then
|
76 | 95 | echo "ERROR: unexpected failure while downloading ${gencode}"
|
77 | 96 | exit 1
|
78 | 97 | fi
|
79 | 98 | echo "Extracting ${gencode}..."
|
80 |
| -unzip $gencode |
| 99 | +tar -xvf $gencode |
| 100 | +cd .. |
| 101 | + |
| 102 | +# create Dictionaries folder |
| 103 | +mkdir -p "Dictionaries" |
| 104 | + |
| 105 | +# create sg1617 guide file |
| 106 | +GUIDEFILE="sg1617.txt" |
| 107 | +printf "CTAACAGTTGCTTTTATCACNNN\n" > $GUIDEFILE |
| 108 | + |
| 109 | +# create NGG PAM file |
| 110 | +PAM="PAMs" |
| 111 | +mkdir -p $PAM |
| 112 | +cd $PAM |
| 113 | +NGGPAM="20bp-NGG-spCas9.txt" |
| 114 | +printf "NNNNNNNNNNNNNNNNNNNNNGG 3\n" > $NGGPAM |
81 | 115 | cd ..
|
82 | 116 |
|
83 | 117 | echo "Start CRISPRme test..."
|
84 |
| -crisprme.py complete-search --genome Genomes/hg38/ --vcf list_vcf.txt/ --guide sg1617.txt --pam PAMs/20bp-NGG-spCas9.txt --annotation Annotations/encode+gencode.hg38.bed --samplesID list_samplesID.txt --gene_annotation Annotations/gencode.protein_coding.bed --bMax 2 --mm 6 --bDNA 2 --bRNA 2 --merge 3 --output sg1617.6.2.2 --thread 4 |
| 118 | +crisprme.py complete-search --genome Genomes/hg38/ --vcf $VCFCONFIG --guide $GUIDEFILE --pam PAMs/$NGGPAM --annotation Annotations/$encodegencode --samplesID $SAMPLESCONFIG --gene_annotation Annotations/$gencode --mm 6 --bDNA 2 --bRNA 2 --merge 3 --output sg1617.6.2.2 --thread 4 |
0 commit comments