Set environment
Code
suppressMessages (suppressWarnings (source ("../run_config_project_sing.R" )))
show_env ()
You are working on Singularity: singularity_proj_encode_fcc
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei
REPO DIRECTORY (FD_REPO): /data/reddylab/Kuei/repo
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/work
DATA DIRECTORY (FD_DATA): /data/reddylab/Kuei/data
You are working with ENCODE FCC
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC
PROJECT RESULTS (FD_RES): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results
PROJECT SCRIPTS (FD_EXE): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/scripts
PROJECT DATA (FD_DAT): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/data
PROJECT NOTE (FD_NBK): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/notebooks
PROJECT DOCS (FD_DOC): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/docs
PROJECT LOG (FD_LOG): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/log
PROJECT REF (FD_REF): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/references
Set global variables
Code
TXT_FODLER_INP = "TF_modules_Shannon"
TXT_FOLDER_OUT = "module_tf_shannon"
Code
txt_folder = TXT_FODLER_INP
txt_fdiry = file.path (FD_DAT, "processed" , txt_folder)
vec = dir (txt_fdiry)
for (txt in vec) {cat (txt, " \n " )}
K562.full.region.assignments.txt
K562.TFzscore.txt
Import data
Code
### set directory
txt_folder = TXT_FODLER_INP
txt_fdiry = file.path (FD_DAT, "processed" , txt_folder)
txt_fname = "K562.full.region.assignments.txt"
txt_fpath = file.path (txt_fdiry, txt_fname)
### read table
dat = read_tsv (txt_fpath, show_col_types = FALSE )
### assign and show
dat_module_full_import = dat
print (dim (dat))
fun_display_table (head (dat))
0
1
0
0
1
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
chr1
115702
115751
NONO,RNF2,FOXK2,NR2F1,SOX6,ESRRA,ATF4,GMEB1,NFIC,TRIM24,HDAC1,TCF12,NCOR1,NFE2,ATF7,ZNF24,GABPB1,ZBTB2,MTA3,FOXM1,ZBTB40,DPF2,NFRKB,HDAC2,GATAD2A,IKZF1,ARID3A,NBN,EP300,SMARCE1,ZNF281,KDM1A,NCOA1,MAFG,POLR2A,EGR1,REST,TCF3,SMARCC2,MTA1,MEIS2
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
chr1
118585
118665
RNF2,MAFG,NFE2L1,NFE2
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
chr1
136446
136510
CEBPB,ESRRA,NR2F1,EGR1
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
chr1
139031
139110
ZFX,CTCF,RFX1
0
1
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
chr1
268005
268051
SMC3,CTCF,MAZ,ATF7,ARID2,ZNF281,REST,RAD21
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
chr1
586187
586203
CTCF,MAZ,SMC3
Code
### set directory
txt_folder = TXT_FODLER_INP
txt_fdiry = file.path (FD_DAT, "processed" , txt_folder)
txt_fname = "K562.TFzscore.txt"
txt_fpath = file.path (txt_fdiry, txt_fname)
### read table
dat = read_tsv (txt_fpath, show_col_types = FALSE )
### assign and show
dat_module_tfs_import = dat
print (dim (dat))
fun_display_table (head (dat))
1
ZNF589
1
JUNB
1
FOSL1
1
MEIS2
1
EGR1
1
MEF2D
Rename columns
Code
dat = dat_module_full_import
dat = dat %>%
dplyr:: rename (
"Chrom" = "chr" ,
"ChromStart" = "start" ,
"ChromEnd" = "end"
) %>%
dplyr:: mutate (Region = fun_gen_region (Chrom, ChromStart, ChromEnd))
### assign and show
dat_module_full = dat
print (dim (dat))
fun_display_table (head (dat))
0
1
0
0
1
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
chr1
115702
115751
NONO,RNF2,FOXK2,NR2F1,SOX6,ESRRA,ATF4,GMEB1,NFIC,TRIM24,HDAC1,TCF12,NCOR1,NFE2,ATF7,ZNF24,GABPB1,ZBTB2,MTA3,FOXM1,ZBTB40,DPF2,NFRKB,HDAC2,GATAD2A,IKZF1,ARID3A,NBN,EP300,SMARCE1,ZNF281,KDM1A,NCOA1,MAFG,POLR2A,EGR1,REST,TCF3,SMARCC2,MTA1,MEIS2
chr1:115702-115751
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
chr1
118585
118665
RNF2,MAFG,NFE2L1,NFE2
chr1:118585-118665
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
chr1
136446
136510
CEBPB,ESRRA,NR2F1,EGR1
chr1:136446-136510
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
chr1
139031
139110
ZFX,CTCF,RFX1
chr1:139031-139110
0
1
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
chr1
268005
268051
SMC3,CTCF,MAZ,ATF7,ARID2,ZNF281,REST,RAD21
chr1:268005-268051
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
chr1
586187
586203
CTCF,MAZ,SMC3
chr1:586187-586203
Check missing values
Code
dat = dat_module_full
dat = dat %>% dplyr:: select (Chrom, ChromStart, ChromEnd, Region, TFs)
lst = lapply (dat, function (vec){sum (is.na (vec))})
print (lst)
$Chrom
[1] 1
$ChromStart
[1] 1
$ChromEnd
[1] 1
$Region
[1] 0
$TFs
[1] 1
Code
dat = dat_module_full
dat = dat %>% dplyr:: select (Chrom, ChromStart, ChromEnd, Region, TFs)
dat = dat %>% dplyr:: filter (is.na (Chrom))
dat
A tibble: 1 × 5
<chr>
<dbl>
<dbl>
<chr>
<chr>
NA
NA
NA
NA:NA-NA
NA
Remove NA
Code
dat = dat_module_full
print (any (is.na (dat)))
print (dim (dat))
dat = dat %>% dplyr:: filter (! is.na (Chrom))
dat_module_full_rmna = dat
print (any (is.na (dat)))
print (dim (dat))
[1] TRUE
[1] 173997 75
[1] FALSE
[1] 173996 75
Arrange TF-Module mapping
Code
### rename columns
dat = dat_module_tfs_import
colnames (dat) = c ("Module" , "TF" )
### rename values
vec = dat$ Module
vec = stringr:: str_pad (vec, 2 , pad = "0" )
vec = paste ("Module" , vec, sep = "_" )
dat = dat %>% dplyr:: mutate (Module = vec)
### assign and show
dat_module2tf_list = dat
print (dim (dat))
fun_display_table (head (dat))
Module_01
ZNF589
Module_01
JUNB
Module_01
FOSL1
Module_01
MEIS2
Module_01
EGR1
Module_01
MEF2D
Code
###
dat = dat_module2tf_list
dat = dat %>%
dplyr:: mutate (Value = 1 ) %>%
tidyr:: spread (Module, Value) %>%
replace (is.na (.), 0 )
### assign and show
dat_module2tf_matrix = dat
print (dim (dat))
fun_display_table (head (dat))
ADNP
0
0
1
0
0
0
0
1
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
AGO1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
1
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
1
0
1
0
0
0
0
1
0
1
0
0
0
0
0
0
0
0
0
1
0
0
0
0
ARHGAP35
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
ARID2
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
1
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
1
ARID3A
1
1
0
0
0
0
0
1
0
1
0
1
0
0
0
0
1
0
0
0
0
0
0
0
1
0
0
1
0
0
0
0
0
0
1
0
1
0
0
0
1
0
0
0
1
0
0
0
0
0
0
1
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
1
0
0
ARID4B
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
Arrange TF regions
Region-TFs
Code
### get region info of the rows
dat = dat_module_full_rmna
dat = dat %>% dplyr:: select (Chrom, ChromStart, ChromEnd, Region, TFs)
### assign and show
dat_region2tf_list = dat
print (dim (dat))
fun_display_table (head (dat))
chr1
115702
115751
chr1:115702-115751
NONO,RNF2,FOXK2,NR2F1,SOX6,ESRRA,ATF4,GMEB1,NFIC,TRIM24,HDAC1,TCF12,NCOR1,NFE2,ATF7,ZNF24,GABPB1,ZBTB2,MTA3,FOXM1,ZBTB40,DPF2,NFRKB,HDAC2,GATAD2A,IKZF1,ARID3A,NBN,EP300,SMARCE1,ZNF281,KDM1A,NCOA1,MAFG,POLR2A,EGR1,REST,TCF3,SMARCC2,MTA1,MEIS2
chr1
118585
118665
chr1:118585-118665
RNF2,MAFG,NFE2L1,NFE2
chr1
136446
136510
chr1:136446-136510
CEBPB,ESRRA,NR2F1,EGR1
chr1
139031
139110
chr1:139031-139110
ZFX,CTCF,RFX1
chr1
268005
268051
chr1:268005-268051
SMC3,CTCF,MAZ,ATF7,ARID2,ZNF281,REST,RAD21
chr1
586187
586203
chr1:586187-586203
CTCF,MAZ,SMC3
Region-TFs matrix
Code
dat = dat_region2tf_list
dat = dat %>%
tidyr:: separate_rows (TFs, sep = "," ) %>%
dplyr:: distinct () %>%
na.omit
### convert into Region-TF matrix
dat = dat %>%
dplyr:: mutate (Value = 1 ) %>%
tidyr:: spread (TFs, Value) %>%
replace (is.na (.), 0 )
### assign and show
dat_region2tf_matrix = dat
print (dim (dat))
fun_display_table (head (dat))
chr1
115702
115751
chr1:115702-115751
0
0
0
0
1
0
0
0
0
0
1
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
1
0
0
0
0
1
0
1
0
0
0
0
0
0
0
0
0
0
1
1
0
0
0
0
1
0
1
1
0
0
0
1
1
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
1
0
0
0
0
0
0
1
0
0
0
1
1
0
0
0
0
0
0
1
1
0
1
0
1
0
1
1
0
1
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
1
0
0
0
0
0
0
0
0
0
0
0
1
1
0
0
1
0
0
0
0
0
0
0
0
0
0
0
1
1
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
1
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
chr1
118585
118665
chr1:118585-118665
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
chr1
136446
136510
chr1:136446-136510
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
chr1
139031
139110
chr1:139031-139110
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
chr1
268005
268051
chr1:268005-268051
0
0
0
1
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
chr1
586187
586203
chr1:586187-586203
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
Arrange module regions
Region-Module matrix
Code
### init
dat = dat_module_full_rmna
### get Region-Module matrix
tmp = dat %>% dplyr:: select ( Chrom, ChromStart, ChromEnd, Region)
mat = dat %>% dplyr:: select (- Chrom, - ChromStart, - ChromEnd, - Region, - TFs)
### reset column names
vec = colnames (mat)
vec = stringr:: str_pad (vec, 2 , pad = "0" )
vec = paste ("Module" , vec, sep = "_" )
colnames (mat) = vec
### re-combine
dat = cbind (tmp, mat)
### assign and show
dat_region2module_matrix = dat
print (dim (dat))
fun_display_table (head (dat))
chr1
115702
115751
chr1:115702-115751
0
1
0
0
1
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
chr1
118585
118665
chr1:118585-118665
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
chr1
136446
136510
chr1:136446-136510
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
chr1
139031
139110
chr1:139031-139110
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
chr1
268005
268051
chr1:268005-268051
0
1
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
chr1
586187
586203
chr1:586187-586203
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
Region-Module
Code
### reshape the region-module
dat = dat_region2module_matrix
dat = dat %>%
dplyr:: mutate (Group = "TF_Module" ) %>%
tidyr:: gather (Label, Value, - Chrom, - ChromStart, - ChromEnd, - Region, - Group) %>%
dplyr:: filter (Value == 1 ) %>%
dplyr:: select (- Value)
### assign and show
dat_region2module_list = dat
print (dim (dat))
head (dat)
A data.frame: 6 × 6
<chr>
<dbl>
<dbl>
<chr>
<chr>
<chr>
1
chr1
1079499
1080590
chr1:1079499-1080590
TF_Module
Module_01
2
chr1
1282057
1282312
chr1:1282057-1282312
TF_Module
Module_01
3
chr1
1305319
1306287
chr1:1305319-1306287
TF_Module
Module_01
4
chr1
1307969
1309285
chr1:1307969-1309285
TF_Module
Module_01
5
chr1
1318296
1318659
chr1:1318296-1318659
TF_Module
Module_01
6
chr1
1344799
1345337
chr1:1344799-1345337
TF_Module
Module_01
Check missing values
Code
dat = dat_region2module_list
lst = lapply (dat, function (vec){any (is.na (vec))})
print (lst)
$Chrom
[1] FALSE
$ChromStart
[1] FALSE
$ChromEnd
[1] FALSE
$Region
[1] FALSE
$Group
[1] FALSE
$Label
[1] FALSE
Define column description
The peak file is in narrowPeak format, which is a standard six field bed with four additional fields (BED6+4 format)
Code
### create metadata: column information
dat = tribble (
~ Name, ~ Note,
"Chrom" , "Name of the chromosome" ,
"ChromStart" , "The starting position of the feature in the chromosome" ,
"ChromEnd" , "The ending position of the feature in the chromosome" ,
"Name" , "Region of the row" ,
"Group" , "Annotation name" ,
"Label" , "TF Modules"
)
### assign and show
dat_cname = dat
fun_display_table (dat)
Chrom
Name of the chromosome
ChromStart
The starting position of the feature in the chromosome
ChromEnd
The ending position of the feature in the chromosome
Name
Region of the row
Group
Annotation name
Label
TF Modules
Save results
Save column description
Code
txt_folder = TXT_FOLDER_OUT
txt_fdiry = file.path (FD_RES, "region" , txt_folder, "summary" )
txt_fname = "description.tsv"
txt_fpath = file.path (txt_fdiry, txt_fname)
dir.create (txt_fdiry, showWarnings = FALSE )
dat = dat_cname
write_tsv (dat, txt_fpath)
Save bed file: region2model list
Code
### set directory
txt_fdiry = file.path (FD_RES, "region" , "module_tf_shannon" )
txt_fname = "K562.hg38.TF_Module.bed.gz"
txt_fpath = file.path (txt_fdiry, txt_fname)
txt_cmd = paste ("mkdir -p" , txt_fdiry)
system (txt_cmd)
### write table
dat = dat_region2module_list
dat = dat %>% dplyr:: arrange (Chrom, ChromStart, ChromEnd)
write_tsv (dat, txt_fpath, col_names = FALSE )
Save matrix summary
Code
### set directory
txt_fdiry = file.path (FD_RES, "region" , "module_tf_shannon" , "summary" )
txt_cmd = paste ("mkdir -p" , txt_fdiry)
system (txt_cmd)
### write region-module (Matrix)
txt_fname = "matrix.region2module.tsv"
txt_fpath = file.path (txt_fdiry, txt_fname)
dat = dat_region2module_matrix
dat = dat %>% dplyr:: arrange (Chrom, ChromStart, ChromEnd)
write_tsv (dat, txt_fpath)
### write region-module (List)
txt_fname = "data.region2module.tsv"
txt_fpath = file.path (txt_fdiry, txt_fname)
dat = dat_region2module_list
dat = dat %>% dplyr:: arrange (Chrom, ChromStart, ChromEnd)
write_tsv (dat, txt_fpath)
### write region-tfs matrix
txt_fname = "matrix.region2TF.tsv"
txt_fpath = file.path (txt_fdiry, txt_fname)
dat = dat_region2tf_matrix
dat = dat %>% dplyr:: arrange (Chrom, ChromStart, ChromEnd)
write_tsv (dat, txt_fpath)
### write region-tfs matrix
txt_fname = "data.region2TF.tsv"
txt_fpath = file.path (txt_fdiry, txt_fname)
dat = dat_region2tf_list
dat = dat %>% dplyr:: arrange (Chrom, ChromStart, ChromEnd)
write_tsv (dat, txt_fpath)
### write TF-Module mapping (matrix)
txt_fname = "matrix.module2TF.tsv"
txt_fpath = file.path (txt_fdiry, txt_fname)
dat = dat_module2tf_matrix
write_tsv (dat, txt_fpath)
### write TF-Module mapping (list)
txt_fname = "data.module2TF.tsv"
txt_fpath = file.path (txt_fdiry, txt_fname)
dat = dat_module2tf_list
write_tsv (dat, txt_fpath)