set environment
Code
source ../run_config_project.sh
show_env
You are working on Duke Server: RCC
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei
REPO DIRECTORY (FD_REPO): /data/reddylab/Kuei/repo
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/work
DATA DIRECTORY (FD_DATA): /data/reddylab/Kuei/data
CONTAINER DIR. (FD_SING): /data/reddylab/Kuei/container
You are working with ENCODE FCC
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC
PROJECT RESULTS (FD_RES): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results
PROJECT SCRIPTS (FD_EXE): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/scripts
PROJECT DATA (FD_DAT): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/data
PROJECT NOTE (FD_NBK): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/notebooks
PROJECT DOCS (FD_DOC): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/docs
PROJECT LOG (FD_LOG): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/log
PROJECT REF (FD_REF): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/references
PROJECT IMAGE (FP_PRJ_SIF): /data/reddylab/Kuei/container/project/singularity_proj_encode_fcc.sif
PROJECT CONF. (FP_CNF): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/scripts/config_project.sh
Set global variables
Code
TXT_FOLDER = "encode_chipseq_histone_250120"
Execute
Preview scripts
Code
FN_EXE = run_download_files.sh
FP_EXE = ${FD_EXE} /${FN_EXE}
chmod +x ${FP_EXE}
cat ${FP_EXE}
#!/bin/bash
### print start message
timer_start=`date +%s`
echo "Hostname: " $(hostname)
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Time Stamp: " $(date +"%m-%d-%y+%T")
echo
### get arguments
FP_CNF=${1}
FP_DOWNLOAD_FILE_LIST=${2}
FD_DOWNLOAD_DIRECTORY=${3}
### set environment
source ${FP_CNF}
### show I/O
cd ${FD_DOWNLOAD_DIRECTORY}
echo "Change directory:"
echo $(pwd)
echo
### execute: download file
echo "Download files..."
xargs -L 1 curl -O -J -L < ${FP_DOWNLOAD_FILE_LIST}
echo
### print end message
timer=`date +%s`
runtime=$(echo "${timer} - ${timer_start}" | bc -l)
echo
echo 'Done!'
echo "Run Time: $(displaytime ${runtime})"
echo
Preview folders and files
Code
FD_OUT = ${FD_DAT} /external/${TXT_FOLDER}
ls ${FD_OUT}
region_narrowPeak signal_fold_change signal_pvalue
Code
FD_OUTS = ($( ls -d ${FD_DAT} /external/${TXT_FOLDER} /* ))
for FD_OUT in ${FD_OUTS [@] } ; do
FOLDER_SUB = $( basename ${FD_OUT})
echo ${FD_OUT}
echo "Folder:" ${FOLDER_SUB}
echo
done
/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/data/external/encode_chipseq_histone_250120/region_narrowPeak
Folder: region_narrowPeak
/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/data/external/encode_chipseq_histone_250120/signal_fold_change
Folder: signal_fold_change
/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/data/external/encode_chipseq_histone_250120/signal_pvalue
Folder: signal_pvalue
Code
FP_INPS = ($( ls ${FD_DAT} /external/${TXT_FOLDER} /* /files.txt))
for FP_INP in ${FP_INPS [@] } ; do
FD_OUT = $( dirname ${FP_INP})
FOLDER_SUB = $( basename ${FD_OUT})
NUM = $( cat ${FP_INP} | wc -l )
echo "Folder:" ${FOLDER_SUB}
echo "Count: " ${NUM}
echo
done
Folder: region_narrowPeak
Count: 19
Folder: signal_fold_change
Count: 19
Folder: signal_pvalue
Count: 19
Code
FP_INPS = ($( ls ${FD_DAT} /external/${TXT_FOLDER} /* /checksum_md5sum.txt))
for FP_INP in ${FP_INPS [@] } ; do
FD_OUT = $( dirname ${FP_INP})
FOLDER_SUB = $( basename ${FD_OUT})
NUM = $( cat ${FP_INP} | wc -l )
echo "Folder:" ${FOLDER_SUB}
echo "Count: " ${NUM}
echo
done
Folder: region_narrowPeak
Count: 19
Folder: signal_fold_change
Count: 19
Folder: signal_pvalue
Count: 19
Run download script
Code
### init: exe
FN_EXE = run_download_files.sh
FP_EXE = ${FD_EXE} /${FN_EXE}
### init: directory
FD_OUTS = ($( ls -d ${FD_DAT} /external/${TXT_FOLDER} /* ))
### loop and execute
for FD_OUT in ${FD_OUTS [@] } ; do
### init: file list
FP_INP = ${FD_OUT} /files.txt
NUM = $( cat ${FP_INP} | wc -l )
### init: log file
TXT_FOLDER_SUB = $( basename ${FD_OUT})
FN_LOG = download.encode_chipseq_histone_250120.${TXT_FOLDER_SUB} .txt
FP_LOG = ${FD_LOG} /${FN_LOG}
### show progress
echo "Folder:" ${FOLDER_SUB}
echo "Count: " ${NUM}
echo
### execute
cd ${FD_OUT}
sbatch \
--cpus-per-task 4 \
--mem 4G \
--output ${FP_LOG} \
${FP_EXE} ${FP_CNF} ${FP_INP} ${FD_OUT}
echo
done
Folder: signal_pvalue
Count: 19
Submitted batch job 275736
Folder: signal_pvalue
Count: 19
Submitted batch job 275737
Folder: signal_pvalue
Count: 19
Submitted batch job 275738
Run checksum
Code
### init: exe
FN_EXE = run_checksum_files.sh
FP_EXE = ${FD_EXE} /${FN_EXE}
### init: directory
FD_OUTS = ($( ls -d ${FD_DAT} /external/${TXT_FOLDER} /* ))
### loop and execute
for FD_OUT in ${FD_OUTS [@] } ; do
### init: I/O file
FP_INP = ${FD_OUT} /checksum_md5sum.txt
FP_OUT = ${FD_OUT} /checksum_results.txt
NUM = $( cat ${FP_INP} | wc -l )
### init: log file
TXT_FOLDER_SUB = $( basename ${FD_OUT})
FN_LOG = checksum.encode_chipseq_histone_250120.${TXT_FOLDER_SUB} .txt
FP_LOG = ${FD_LOG} /${FN_LOG}
### show progress
echo "Folder:" ${FOLDER_SUB}
echo "Count: " ${NUM}
echo
### execute
cd ${FD_OUT}
sbatch \
--cpus-per-task 4 \
--mem 4G \
--output ${FP_LOG} \
${FP_EXE} ${FP_CNF} ${FD_OUT} ${FP_INP} ${FP_OUT}
echo
done
Folder: signal_pvalue
Count: 19
Submitted batch job 275742
Folder: signal_pvalue
Count: 19
Submitted batch job 275743
Folder: signal_pvalue
Count: 19
Submitted batch job 275744
Review
Check output files
Code
ls ${FD_DAT} /external/${TXT_FOLDER}
region_narrowPeak signal_fold_change signal_pvalue
Code
FD_OUT = ${FD_DAT} /external/${TXT_FOLDER} /region_narrowPeak
cd ${FD_OUT}
ls -sh { * bed.gz , * .tsv} | wc -l
ls -sh { * bed.gz , * .tsv}
20
648K ENCFF122CSI.bed.gz 204K ENCFF462AVD.bed.gz 3.1M ENCFF801AHF.bed.gz
2.3M ENCFF135ZLM.bed.gz 2.7M ENCFF540NGG.bed.gz 596K ENCFF885FQN.bed.gz
2.7M ENCFF148UQI.bed.gz 1.4M ENCFF544LXB.bed.gz 1004K ENCFF891CHI.bed.gz
3.6M ENCFF193ERO.bed.gz 1.4M ENCFF561OUZ.bed.gz 804K ENCFF909RKY.bed.gz
2.0M ENCFF209OQD.bed.gz 1.1M ENCFF689QIJ.bed.gz 504K ENCFF963GZJ.bed.gz
2.8M ENCFF213OTI.bed.gz 752K ENCFF706WUF.bed.gz 8.0K metadata.tsv
2.3M ENCFF323WOT.bed.gz 1.6M ENCFF749KLQ.bed.gz
Code
FD_OUT = ${FD_DAT} /external/${TXT_FOLDER} /signal_fold_change
cd ${FD_OUT}
ls -sh { * bigWig , * .tsv} | wc -l
ls -sh { * bigWig , * .tsv}
20
610M ENCFF139KZL.bigWig 434M ENCFF399SGM.bigWig 652M ENCFF654SLZ.bigWig
1.4G ENCFF242ENK.bigWig 885M ENCFF544AVW.bigWig 510M ENCFF660WUG.bigWig
1.1G ENCFF253TOF.bigWig 558M ENCFF583BKU.bigWig 345M ENCFF806YEZ.bigWig
593M ENCFF286WRJ.bigWig 970M ENCFF601JGK.bigWig 302M ENCFF911JVK.bigWig
1.2G ENCFF317VHO.bigWig 658M ENCFF605FAF.bigWig 380M ENCFF959YJV.bigWig
399M ENCFF347YYH.bigWig 1.4G ENCFF607SUJ.bigWig 8.0K metadata.tsv
1.5G ENCFF381NDD.bigWig 770M ENCFF621DJP.bigWig
Code
FD_OUT = ${FD_DAT} /external/${TXT_FOLDER} /signal_pvalue
cd ${FD_OUT}
ls -sh { * bigWig , * .tsv} | wc -l
ls -sh { * bigWig , * .tsv}
20
685M ENCFF054RSU.bigWig 857M ENCFF334HSS.bigWig 899M ENCFF632NQA.bigWig
337M ENCFF071GML.bigWig 1.1G ENCFF405ZDL.bigWig 369M ENCFF633OZC.bigWig
568M ENCFF178QDA.bigWig 965M ENCFF457URZ.bigWig 675M ENCFF694ODT.bigWig
837M ENCFF202EVH.bigWig 487M ENCFF461RKK.bigWig 761M ENCFF767UON.bigWig
732M ENCFF220RGS.bigWig 1.3G ENCFF465GBD.bigWig 613M ENCFF847BFA.bigWig
642M ENCFF239EBH.bigWig 1.4G ENCFF582IMB.bigWig 8.0K metadata.tsv
496M ENCFF287LBI.bigWig 1.3G ENCFF605EVL.bigWig
Check checksum results
Code
ls ${FD_DAT} /external/${TXT_FOLDER}
region_narrowPeak signal_fold_change signal_pvalue
Code
FD_OUT = ${FD_DAT} /external/${TXT_FOLDER} /region_narrowPeak
FP_OUT = ${FD_OUT} /checksum_results.txt
cat ${FP_OUT}
cat ${FP_OUT} | grep "FAILED" && echo "FAILED" || echo "All PASSED"
ENCFF689QIJ.bed.gz: OK
ENCFF323WOT.bed.gz: OK
ENCFF540NGG.bed.gz: OK
ENCFF462AVD.bed.gz: OK
ENCFF749KLQ.bed.gz: OK
ENCFF909RKY.bed.gz: OK
ENCFF209OQD.bed.gz: OK
ENCFF891CHI.bed.gz: OK
ENCFF148UQI.bed.gz: OK
ENCFF706WUF.bed.gz: OK
ENCFF122CSI.bed.gz: OK
ENCFF213OTI.bed.gz: OK
ENCFF193ERO.bed.gz: OK
ENCFF801AHF.bed.gz: OK
ENCFF544LXB.bed.gz: OK
ENCFF561OUZ.bed.gz: OK
ENCFF885FQN.bed.gz: OK
ENCFF963GZJ.bed.gz: OK
ENCFF135ZLM.bed.gz: OK
All PASSED
Code
FD_OUT = ${FD_DAT} /external/${TXT_FOLDER} /signal_fold_change
FP_OUT = ${FD_OUT} /checksum_results.txt
cat ${FP_OUT}
cat ${FP_OUT} | grep "FAILED" && echo "FAILED" || echo "All PASSED"
ENCFF544AVW.bigWig: OK
ENCFF286WRJ.bigWig: OK
ENCFF621DJP.bigWig: OK
ENCFF959YJV.bigWig: OK
ENCFF911JVK.bigWig: OK
ENCFF242ENK.bigWig: OK
ENCFF806YEZ.bigWig: OK
ENCFF583BKU.bigWig: OK
ENCFF660WUG.bigWig: OK
ENCFF605FAF.bigWig: OK
ENCFF654SLZ.bigWig: OK
ENCFF317VHO.bigWig: OK
ENCFF139KZL.bigWig: OK
ENCFF399SGM.bigWig: OK
ENCFF347YYH.bigWig: OK
ENCFF381NDD.bigWig: OK
ENCFF607SUJ.bigWig: OK
ENCFF601JGK.bigWig: OK
ENCFF253TOF.bigWig: OK
All PASSED
Code
FD_OUT = ${FD_DAT} /external/${TXT_FOLDER} /signal_pvalue
FP_OUT = ${FD_OUT} /checksum_results.txt
cat ${FP_OUT}
cat ${FP_OUT} | grep "FAILED" && echo "FAILED" || echo "All PASSED"
ENCFF767UON.bigWig: OK
ENCFF582IMB.bigWig: OK
ENCFF287LBI.bigWig: OK
ENCFF220RGS.bigWig: OK
ENCFF054RSU.bigWig: OK
ENCFF694ODT.bigWig: OK
ENCFF334HSS.bigWig: OK
ENCFF239EBH.bigWig: OK
ENCFF178QDA.bigWig: OK
ENCFF071GML.bigWig: OK
ENCFF461RKK.bigWig: OK
ENCFF202EVH.bigWig: OK
ENCFF605EVL.bigWig: OK
ENCFF847BFA.bigWig: OK
ENCFF465GBD.bigWig: OK
ENCFF633OZC.bigWig: OK
ENCFF405ZDL.bigWig: OK
ENCFF632NQA.bigWig: OK
ENCFF457URZ.bigWig: OK
All PASSED
Check execution log
Code
ls ${FD_DAT} /external/${TXT_FOLDER}
region_narrowPeak signal_fold_change signal_pvalue
Code
FN_LOG = download.encode_chipseq_histone_250120.region_narrowPeak.txt
FP_LOG = ${FD_LOG} /${FN_LOG}
head -n 20 ${FP_LOG}
Hostname: plp-rcc-node-03
Slurm Array Index:
Time Stamp: 05-22-25+16:25:55
Change directory:
/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/data/external/encode_chipseq_histone_250120/region_narrowPeak
Download files...
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 1935 100 1935 0 0 497 0 0:00:03 0:00:03 --:--:-- 497
Warning: Failed to create the file ENCFF689QIJ.bed.gz: File exists
0 0 0 0 0 0 0 0 --:--:-- 0:00:04 --:--:-- 0
curl: (23) Failed writing header
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 1933 100 1933 0 0 5154 0 --:--:-- --:--:-- --:--:-- 5140
Warning: Failed to create the file ENCFF323WOT.bed.gz: File exists
0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0
curl: (23) Failed writing header
Code
FN_LOG = checksum.encode_chipseq_histone_250120.region_narrowPeak.txt
FP_LOG = ${FD_LOG} /${FN_LOG}
head -n 20 ${FP_LOG}
Hostname: plp-rcc-node-03
Slurm Array Index:
Time Stamp: 05-22-25+16:37:50
Change directory:
/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/data/external/encode_chipseq_histone_250120/region_narrowPeak
Checksum files...
Done!
Run Time: 1 seconds