Set environment
Code
suppressMessages (suppressWarnings (source ("../run_config_project_sing.R" )))
show_env ()
You are working on Singularity: singularity_proj_encode_fcc
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei
REPO DIRECTORY (FD_REPO): /data/reddylab/Kuei/repo
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/work
DATA DIRECTORY (FD_DATA): /data/reddylab/Kuei/data
You are working with ENCODE FCC
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC
PROJECT RESULTS (FD_RES): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results
PROJECT SCRIPTS (FD_EXE): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/scripts
PROJECT DATA (FD_DAT): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/data
PROJECT NOTE (FD_NBK): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/notebooks
PROJECT DOCS (FD_DOC): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/docs
PROJECT LOG (FD_LOG): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/log
PROJECT REF (FD_REF): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/references
Set global variables
Code
TXT_FOLDER_INP = "encode_chipseq_histone"
TXT_FOLDER_OUT = "encode_chipseq_histone_250120"
Import data
Code
### set file path
txt_foldr = TXT_FOLDER_OUT
txt_fdiry = file.path (FD_DAT, "external" , txt_foldr)
### get file directory
vec_txt_foldr = dir (txt_fdiry)
vec_txt_fdiry = file.path (txt_fdiry, vec_txt_foldr)
Get all downloaded files
Code
### get file names
lst = lapply (vec_txt_fdiry, function (txt_fdiry){
vec = list.files (
path = txt_fdiry,
pattern = " \\ .(bigWig|bed \\ .gz)$" ,
full.names = FALSE ,
recursive = FALSE
)
return (vec)
})
names (lst) = vec_txt_foldr
### assign
lst_vec_txt_files = lst
lst
$region_narrowPeak
'ENCFF122CSI.bed.gz' 'ENCFF135ZLM.bed.gz' 'ENCFF148UQI.bed.gz' 'ENCFF193ERO.bed.gz' 'ENCFF209OQD.bed.gz' 'ENCFF213OTI.bed.gz' 'ENCFF323WOT.bed.gz' 'ENCFF462AVD.bed.gz' 'ENCFF540NGG.bed.gz' 'ENCFF544LXB.bed.gz' 'ENCFF561OUZ.bed.gz' 'ENCFF689QIJ.bed.gz' 'ENCFF706WUF.bed.gz' 'ENCFF749KLQ.bed.gz' 'ENCFF801AHF.bed.gz' 'ENCFF885FQN.bed.gz' 'ENCFF891CHI.bed.gz' 'ENCFF909RKY.bed.gz' 'ENCFF963GZJ.bed.gz'
$signal_fold_change
'ENCFF139KZL.bigWig' 'ENCFF242ENK.bigWig' 'ENCFF253TOF.bigWig' 'ENCFF286WRJ.bigWig' 'ENCFF317VHO.bigWig' 'ENCFF347YYH.bigWig' 'ENCFF381NDD.bigWig' 'ENCFF399SGM.bigWig' 'ENCFF544AVW.bigWig' 'ENCFF583BKU.bigWig' 'ENCFF601JGK.bigWig' 'ENCFF605FAF.bigWig' 'ENCFF607SUJ.bigWig' 'ENCFF621DJP.bigWig' 'ENCFF654SLZ.bigWig' 'ENCFF660WUG.bigWig' 'ENCFF806YEZ.bigWig' 'ENCFF911JVK.bigWig' 'ENCFF959YJV.bigWig'
$signal_pvalue
'ENCFF054RSU.bigWig' 'ENCFF071GML.bigWig' 'ENCFF178QDA.bigWig' 'ENCFF202EVH.bigWig' 'ENCFF220RGS.bigWig' 'ENCFF239EBH.bigWig' 'ENCFF287LBI.bigWig' 'ENCFF334HSS.bigWig' 'ENCFF405ZDL.bigWig' 'ENCFF457URZ.bigWig' 'ENCFF461RKK.bigWig' 'ENCFF465GBD.bigWig' 'ENCFF582IMB.bigWig' 'ENCFF605EVL.bigWig' 'ENCFF632NQA.bigWig' 'ENCFF633OZC.bigWig' 'ENCFF694ODT.bigWig' 'ENCFF767UON.bigWig' 'ENCFF847BFA.bigWig'
Import metatable
Code
### set file path
txt_fname = "metadata.tsv"
### get file directory
vec_txt_fpath = file.path (vec_txt_fdiry, txt_fname)
### read metadata
lst = lapply (vec_txt_fpath, function (txt_fpath){
dat = read_tsv (txt_fpath, show_col_types = FALSE )
return (dat)
})
names (lst) = vec_txt_foldr
### assign and show
lst_dat_metadata = lst
for (idx in names (lst)){
dat = lst[[idx]]
cat (idx, " \n " ); flush.console ()
fun_display_table (head (dat, 3 ))
cat (" \n " )
}
Histone ChIP-seq
ENCSR000AKU
ENCFF689QIJ
bed narrowPeak
bed
pseudoreplicated peaks
GRCh38
H3K4me3
1, 2
ENCODE4 v1.5.1 GRCh38
5dea2993c0831ae344a989d601c09178
ENCFF689QIJ.bed.gz
https://www.encodeproject.org/files/ENCFF689QIJ/@@download/ENCFF689QIJ.bed.gz
Histone ChIP-seq
ENCSR000AKQ
ENCFF323WOT
bed narrowPeak
bed
pseudoreplicated peaks
GRCh38
H3K27me3
1, 2, 3
ENCODE4 v1.8.0 GRCh38
4422969d0b63260e2fcb83e10fdcc02f
ENCFF323WOT.bed.gz
https://www.encodeproject.org/files/ENCFF323WOT/@@download/ENCFF323WOT.bed.gz
Histone ChIP-seq
ENCSR000EWC
ENCFF540NGG
bed narrowPeak
bed
pseudoreplicated peaks
GRCh38
H3K4me1
1, 2
ENCODE4 v1.5.1 GRCh38
63db47e5b9b98dbebff2ce20df066106
ENCFF540NGG.bed.gz
https://www.encodeproject.org/files/ENCFF540NGG/@@download/ENCFF540NGG.bed.gz
Histone ChIP-seq
ENCSR000APD
ENCFF544AVW
bigWig
bigWig
fold change over control
GRCh38
H3K79me2
1, 2, 3
ENCODE4 v1.8.0 GRCh38
61dc50179ae8d880b972c3697a6a2fc2
ENCFF544AVW.bigWig
https://www.encodeproject.org/files/ENCFF544AVW/@@download/ENCFF544AVW.bigWig
Histone ChIP-seq
ENCSR000AKV
ENCFF286WRJ
bigWig
bigWig
fold change over control
GRCh38
H3K9ac
1, 2
ENCODE4 v1.6.1 GRCh38
ccd7b8c413fdb998ffd799ec52dd5098
ENCFF286WRJ.bigWig
https://www.encodeproject.org/files/ENCFF286WRJ/@@download/ENCFF286WRJ.bigWig
Histone ChIP-seq
ENCSR000APC
ENCFF621DJP
bigWig
bigWig
fold change over control
GRCh38
H2AFZ
1, 2
ENCODE4 v1.6.0 GRCh38
3492c0e4a64e29231558f9e1e2fe520e
ENCFF621DJP.bigWig
https://www.encodeproject.org/files/ENCFF621DJP/@@download/ENCFF621DJP.bigWig
Histone ChIP-seq
ENCSR000AKU
ENCFF767UON
bigWig
bigWig
signal p-value
GRCh38
H3K4me3
1, 2
ENCODE4 v1.5.1 GRCh38
4c102d45be8326062895ed0a03d4ded7
ENCFF767UON.bigWig
https://www.encodeproject.org/files/ENCFF767UON/@@download/ENCFF767UON.bigWig
Histone ChIP-seq
ENCSR000AKQ
ENCFF582IMB
bigWig
bigWig
signal p-value
GRCh38
H3K27me3
1, 2, 3
ENCODE4 v1.8.0 GRCh38
2ca48f44075eef7118a387260f2f95b9
ENCFF582IMB.bigWig
https://www.encodeproject.org/files/ENCFF582IMB/@@download/ENCFF582IMB.bigWig
Histone ChIP-seq
ENCSR000EWC
ENCFF287LBI
bigWig
bigWig
signal p-value
GRCh38
H3K4me1
1, 2
ENCODE4 v1.5.1 GRCh38
28df1a757a2e5517209c10d57f0ce03e
ENCFF287LBI.bigWig
https://www.encodeproject.org/files/ENCFF287LBI/@@download/ENCFF287LBI.bigWig
Check data
Check if the files downloaded matched the metatable
Code
for (txt_foldr in vec_txt_foldr){
###
lst = lst_dat_metadata
dat = lst[[txt_foldr]]
vec1 = sort (dat$ File_Name)
###
lst = lst_vec_txt_files
vec2 = sort (lst[[txt_foldr]])
cat (txt_foldr, " \n " )
cat ("#Files (Info): " , length (vec1), " \n " )
cat ("#Files (Download):" , length (vec2), " \n " )
cat ("Matched?" , all (vec1 == vec2), " \n " )
cat (" \n " )
}
region_narrowPeak
#Files (Info): 19
#Files (Download): 19
Matched? TRUE
signal_fold_change
#Files (Info): 19
#Files (Download): 19
Matched? TRUE
signal_pvalue
#Files (Info): 19
#Files (Download): 19
Matched? TRUE