Prepare TF Modules 03

Explore and visualize the data

Set environment

Code
suppressMessages(suppressWarnings(source("../run_config_project_sing.R")))
suppressMessages(suppressWarnings(library("pheatmap")))
show_env()
You are working on        Singularity: singularity_proj_encode_fcc 
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei 
REPO DIRECTORY (FD_REPO): /data/reddylab/Kuei/repo 
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/work 
DATA DIRECTORY (FD_DATA): /data/reddylab/Kuei/data 

You are working with      ENCODE FCC 
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC 
PROJECT RESULTS (FD_RES): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results 
PROJECT SCRIPTS (FD_EXE): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/scripts 
PROJECT DATA    (FD_DAT): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/data 
PROJECT NOTE    (FD_NBK): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/notebooks 
PROJECT DOCS    (FD_DOC): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/docs 
PROJECT LOG     (FD_LOG): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/log 
PROJECT REF     (FD_REF): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/references 

Set global variables

Code
TXT_FOLDER_REGION = "module_tf_shannon"

Import data

Code
### set directory
txt_folder = TXT_FOLDER_REGION
txt_fdiry  = file.path(FD_RES, "region", txt_folder, "summary")

vec = dir(txt_fdiry)
for (txt in vec){cat(txt, "\n")}
data.module2TF.tsv 
data.region2module.tsv 
data.region2TF.tsv 
description.tsv 
matrix.module2TF.tsv 
matrix.region2module.tsv 
matrix.region2TF.tsv 
metadata.label.tsv 
Code
### set file path
txt_folder = TXT_FOLDER_REGION
txt_fdiry  = file.path(FD_RES, "region", txt_folder, "summary")
txt_fname  = "matrix.region2module.tsv"
txt_fpath  = file.path(txt_fdiry, txt_fname)

### read table
dat = read_tsv(txt_fpath, show_col_types = FALSE)

### assign and show
dat_region2module_matrix = dat
print(dim(dat))
fun_display_table(head(dat, 3))
[1] 173996     74
Chrom ChromStart ChromEnd Region Module_01 Module_02 Module_03 Module_04 Module_05 Module_06 Module_07 Module_08 Module_09 Module_10 Module_11 Module_12 Module_13 Module_14 Module_15 Module_16 Module_17 Module_18 Module_19 Module_20 Module_21 Module_22 Module_23 Module_24 Module_25 Module_26 Module_27 Module_28 Module_29 Module_30 Module_31 Module_33 Module_34 Module_35 Module_36 Module_37 Module_39 Module_40 Module_41 Module_42 Module_43 Module_44 Module_45 Module_46 Module_47 Module_48 Module_49 Module_50 Module_51 Module_52 Module_53 Module_54 Module_55 Module_56 Module_57 Module_58 Module_59 Module_61 Module_63 Module_64 Module_65 Module_66 Module_67 Module_68 Module_71 Module_72 Module_73 Module_74 Module_75 Module_77
chr1 115702 115751 chr1:115702-115751 0 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
chr1 118585 118665 chr1:118585-118665 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
chr1 136446 136510 chr1:136446-136510 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Code
### set file path
txt_folder = TXT_FOLDER_REGION
txt_fdiry  = file.path(FD_RES, "region", txt_folder, "summary")
txt_fname  = "matrix.region2TF.tsv"
txt_fpath  = file.path(txt_fdiry, txt_fname)

### read table
dat = read_tsv(txt_fpath, show_col_types = FALSE)

### assign and show
dat_region2TF_matrix = dat
print(dim(dat))
fun_display_table(head(dat, 3))
[1] 173996    269
Chrom ChromStart ChromEnd Region ADNP AGO1 ARHGAP35 ARID2 ARID3A ARID4B ARNT ATF1 ATF2 ATF3 ATF4 ATF6 ATF7 BCL6 BHLHE40 BRD4 BRF2 CAMTA2 CBFB CBX1 CBX2 CBX5 CCAR2 CEBPB CEBPG CEBPZ CHCHD3 CHD2 CHD4 CREB1 CREB3 CREM CTCF DDIT3 DMTF1 DPF2 E2F1 E2F4 E2F5 E2F8 EGR1 EHMT2 ELF1 ELF4 ELK1 EP300 ERF ESRRA ETS1 ETV5 ETV6 EZH2 FIP1L1 FOSL1 FOXA1 FOXA3 FOXJ3 FOXK1 FOXK2 FOXM1 FOXP1 FOXP4 FUS GABPA GABPB1 GATA2 GATAD2A GMEB1 GTF2F1 HBP1 HCFC1 HDAC1 HDAC2 HDAC6 HINFP HIVEP1 HMG20A HMG20B HNRNPH1 HNRNPK HNRNPL HNRNPLL HNRNPUL1 HOMEZ IKZF1 IRF1 IRF2 IRF9 JUN JUNB JUND KAT2B KAT7 KAT8 KDM1A KDM5B KLF13 KLF16 KLF6 LCOR MAFF MAFG MAFK MAX MAZ MBD1 MEF2A MEF2D MEIS2 MGA MLX MNT MTA1 MTA3 MTF1 MXD1 MXI1 MYBL2 MYC MYNN NBN NCOA1 NCOA2 NCOR1 NFATC3 NFE2 NFE2L1 NFIC NFRKB NFYA NONO NR1H2 NR2C2 NR2F1 NR2F2 NR2F6 NR3C1 NRF1 PATZ1 PBX2 PCBP1 PCBP2 PHB2 PHF20 PHF21A PHF8 POLR2A POLR2AphosphoS2 POLR2AphosphoS5 POLR2G PRDM10 PRPF4 PTBP1 RAD21 RAD51 RBFOX2 RBM22 RBM39 RBPJ RCOR1 RELA REST RFX1 RFX5 RNF2 RNF219 RREB1 SAFB2 SETDB1 SFPQ SIN3A SIN3B SKIL SMAD1 SMAD3 SMAD4 SMARCC2 SMARCE1 SMC3 SNRNP70 SOX6 SP1 SRF SRSF1 SRSF9 STAG1 STAT5B SUZ12 TAF15 TARDBP TBL1XR1 TBP TCF12 TCF3 TCF7 TCF7L2 TEAD1 TEAD2 TEAD4 TFAP4 TFDP1 TFE3 TGIF2 THRA THRB TOE1 TRIM24 U2AF1 U2AF2 UBTF USF1 USF2 XRCC5 YY1 ZBTB2 ZBTB26 ZBTB33 ZBTB34 ZBTB40 ZBTB49 ZBTB7A ZBTB8A ZC3H4 ZC3H8 ZFP1 ZFP36 ZFP91 ZFX ZHX1 ZKSCAN1 ZKSCAN8 ZMYM3 ZNF12 ZNF121 ZNF124 ZNF143 ZNF146 ZNF217 ZNF224 ZNF232 ZNF24 ZNF253 ZNF263 ZNF274 ZNF280B ZNF281 ZNF282 ZNF3 ZNF318 ZNF354B ZNF384 ZNF431 ZNF511 ZNF512 ZNF589 ZNF639 ZNF644 ZNF7 ZNF707 ZNF740 ZNF766 ZNF780A ZNF83 ZSCAN29 ZZZ3
chr1 115702 115751 chr1:115702-115751 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 1 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0 1 1 0 1 0 1 0 1 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
chr1 118585 118665 chr1:118585-118665 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
chr1 136446 136510 chr1:136446-136510 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Code
### set file path
txt_folder = TXT_FOLDER_REGION
txt_fdiry  = file.path(FD_RES, "region", txt_folder, "summary")
txt_fname  = "matrix.module2TF.tsv"
txt_fpath  = file.path(txt_fdiry, txt_fname)

### read table
dat = read_tsv(txt_fpath, show_col_types = FALSE)

### assign and show
dat_module2TF_matrix = dat
print(dim(dat))
fun_display_table(head(dat, 3))
[1] 234  71
TF Module_01 Module_02 Module_03 Module_04 Module_05 Module_06 Module_07 Module_08 Module_09 Module_10 Module_11 Module_12 Module_13 Module_14 Module_15 Module_16 Module_17 Module_18 Module_19 Module_20 Module_21 Module_22 Module_23 Module_24 Module_25 Module_26 Module_27 Module_28 Module_29 Module_30 Module_31 Module_33 Module_34 Module_35 Module_36 Module_37 Module_39 Module_40 Module_41 Module_42 Module_43 Module_44 Module_45 Module_46 Module_47 Module_48 Module_49 Module_50 Module_51 Module_52 Module_53 Module_54 Module_55 Module_56 Module_57 Module_58 Module_59 Module_61 Module_63 Module_64 Module_65 Module_66 Module_67 Module_68 Module_71 Module_72 Module_73 Module_74 Module_75 Module_77
ADNP 0 0 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
AGO1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0
ARHGAP35 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Explore data

Region-Module: Module locations across the genome

Code
### arrange the matrix
dat = dat_region2module_matrix
dat = dat %>% 
    dplyr::select(-Chrom, -ChromStart, -ChromEnd) %>% 
    column_to_rownames("Region")

### assign and show
dat = dat_region2module_arrange = dat
print(dim(dat))
head(dat)
[1] 173996     70
A data.frame: 6 × 70
Module_01 Module_02 Module_03 Module_04 Module_05 Module_06 Module_07 Module_08 Module_09 Module_10 Module_65 Module_66 Module_67 Module_68 Module_71 Module_72 Module_73 Module_74 Module_75 Module_77
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
chr1:115702-115751 0 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
chr1:118585-118665 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
chr1:136446-136510 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
chr1:139031-139110 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
chr1:268005-268051 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
chr1:586187-586203 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Summarize counts of the binary matrix

Code
### count by row
dat = dat = dat_region2module_arrange
vec = apply(dat, 1, sum)
dat = tibble(Region = names(vec), Count = vec)
dat = dat %>% dplyr::arrange(Count)

### assign and summary
dat_region_stats = dat
print(dim(dat))
print(summary(vec))

### show top/bottom of table
fun_display_table(
    bind_rows(
        head(dat, 6), 
        tail(dat, 6)
    )
)
[1] 173996      2
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  1.000   1.000   2.000   2.378   3.000  14.000 
Region Count
chr1:118585-118665 1
chr1:136446-136510 1
chr1:586187-586203 1
chr1:905285-905486 1
chr1:905662-905685 1
chr1:912602-912614 1
chr17:46795428-46795947 12
chr18:23528512-23530101 12
chr19:6603491-6604311 12
chr8:102809995-102811632 12
chr19:10835074-10837434 13
chr10:71860432-71861688 14
Code
### count by column
dat = dat_region2module_arrange
vec = apply(dat, 2, sum)
dat = tibble(Module = names(vec), Count = vec)
dat = dat %>% dplyr::arrange(Count)

### assign and summary
dat_module_stats = dat
print(dim(dat))
print(summary(vec))

### show top/bottom of table
fun_display_table(
    bind_rows(
        head(dat, 6), 
        tail(dat, 6)
    )
)
[1] 70  2
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    636    2052    4620    5910    8176   24267 
Module Count
Module_08 636
Module_65 636
Module_53 695
Module_26 802
Module_74 849
Module_09 916
Module_35 13060
Module_27 13750
Module_15 17534
Module_02 18655
Module_06 21510
Module_44 24267

Visualize the summary

Code
dat = dat_region_stats
gpt = ggplot(dat, aes(x=Count)) + 
    geom_histogram(bins = 100) + 
    theme_cowplot() + 
    background_grid()

options(repr.plot.height=4, repr.plot.width=10)
print(gpt)

Code
dat = dat_module_stats         
gpt = ggplot(dat, aes(x=Count)) + 
    geom_histogram(bins = 100) + 
    theme_cowplot() + 
    background_grid() +
    labs(x="#{Region} each module binds)", y = "Count")

options(repr.plot.height=4, repr.plot.width=10)
print(gpt)

Subset and plot the matrix

Code
dat = dat_region_stats
dat = dat %>% dplyr::filter(Count > 5)
idx = dat$Region
idx_region = idx

dat = dat_module_stats
dat = dat %>% dplyr::filter(Count > 10)
idx = dat$Module
idx_module = idx
Code
### init
dat = dat_region2module_arrange
print(dim(dat))

### subset the matrix
dat = dat[idx_region, idx_module]
print(dim(dat))

### subset the matrix
set.seed(123)
idx = sample(1:nrow(dat), size=7000)
dat = dat[idx,]
print(dim(dat))

### assign
dat_region2module_subset = dat
[1] 173996     70
[1] 7574   70
[1] 7000   70
Code
dat = dat_region2module_subset

pheatmap(
    t(dat),
    color = c("white", "black"),
    breaks = c(0, 0.5, 1),
    treeheight_row = 0,
    treeheight_col = 0,
    legend = FALSE,
    show_colnames = FALSE,
    fontsize_row = 7,
    main = "Module annotation across genome"
)

Code
txt_fdiry = "."
txt_fname = "fig.matrix.region2module.png"
txt_fpath = file.path(txt_fdiry, txt_fname)
dat = dat_region2module_subset

options(repr.plot.height=10, repr.plot.width=25)
pheatmap(
    t(dat),
    color = c("white", "black"),
    breaks = c(0, 0.5, 1),
    treeheight_row = 0,
    treeheight_col = 0,
    legend = FALSE,
    show_colnames = FALSE,
    fontsize_row = 7,
    main = "Module annotation across genome",
    height   = 10,
    width    = 10,
    filename = txt_fpath
)

Region-TF: Summarized TF binding sites across the genome

Code
### arrange the matrix
dat = dat_region2TF_matrix
dat = dat %>% 
    dplyr::select(-Chrom, -ChromStart, -ChromEnd) %>% 
    column_to_rownames("Region")

### assign and show
dat = dat_region2TF_arrange = dat
print(dim(dat))
head(dat)
[1] 173996    265
A data.frame: 6 × 265
ADNP AGO1 ARHGAP35 ARID2 ARID3A ARID4B ARNT ATF1 ATF2 ATF3 ZNF639 ZNF644 ZNF7 ZNF707 ZNF740 ZNF766 ZNF780A ZNF83 ZSCAN29 ZZZ3
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
chr1:115702-115751 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
chr1:118585-118665 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
chr1:136446-136510 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
chr1:139031-139110 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
chr1:268005-268051 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
chr1:586187-586203 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Summarize counts of the binary matrix

Code
### count by row
dat = dat_region2TF_arrange
vec = apply(dat, 1, sum)
dat = tibble(Region = names(vec), Count = vec)
dat = dat %>% dplyr::arrange(Count)

### assign and summary
dat_region_stats = dat
print(dim(dat))
print(summary(vec))

### show top/bottom of table
fun_display_table(
    bind_rows(
        head(dat, 6), 
        tail(dat, 6)
    )
)
[1] 173996      2
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   2.00    3.00    6.00   15.74   15.00  214.00 
Region Count
chr1:1041445-1041646 2
chr1:1081931-1082133 2
chr1:1247497-1247699 2
chr1:1527412-1527616 2
chr1:1548746-1548953 2
chr1:1616720-1616967 2
chr7:139339776-139342125 188
chr6:27131655-27135406 195
chr11:62839262-62842225 205
chr1:44720944-44722487 212
chr1:11907607-11910232 214
chr1:28647570-28649255 214
Code
### count by column
dat = dat_region2TF_arrange
vec = apply(dat, 2, sum)
dat = tibble(TF = names(vec), Count = vec)
dat = dat %>% dplyr::arrange(Count)

### assign and summary
dat_tfs_stats = dat
print(dim(dat))
print(summary(vec))

### show top/bottom of table
fun_display_table(
    bind_rows(
        head(dat, 6), 
        tail(dat, 6)
    )
)
[1] 265   2
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      3    1694    7159   10333   16695   42737 
TF Count
SRSF9 3
SAFB2 10
BRF2 14
ZC3H4 15
ZNF217 30
TGIF2 31
RNF2 33892
IKZF1 33910
MEIS2 34477
REST 38175
RCOR1 40914
JUND 42737

Visualize the summary

Code
dat = dat_region_stats
gpt = ggplot(dat, aes(x=Count)) + 
    geom_histogram(bins = 100) + 
    theme_cowplot() + 
    background_grid()

options(repr.plot.height=4, repr.plot.width=10)
print(gpt)

Code
dat = dat_tfs_stats         
gpt = ggplot(dat, aes(x=Count)) + 
    geom_histogram(bins = 100) + 
    theme_cowplot() + 
    background_grid() +
    labs(x="#{Region} each TF binds)", y = "Count")

options(repr.plot.height=4, repr.plot.width=10)
print(gpt)

Subset and plot the matrix

Code
dat = dat_region_stats
dat = dat %>% dplyr::filter(Count > 10)
idx = dat$Region
idx_region = idx

dat = dat_tfs_stats
dat = dat %>% dplyr::filter(Count > 1000) %>% dplyr::filter(!str_detect(TF, "phospho"))
idx = dat$TF
idx_tfs = idx
Code
### init
dat = dat_region2TF_arrange
print(dim(dat))

### subset the matrix
dat = dat[idx_region, idx_tfs]
print(dim(dat))

### subset the matrix
set.seed(123)
idx = sample(1:nrow(dat), size=10000)
dat = dat[idx,]
print(dim(dat))

### assign
dat_region2TF_subset = dat
[1] 173996    265
[1] 56957   208
[1] 10000   208
Code
dat = dat_region2TF_subset

options(repr.plot.height=20, repr.plot.width=10)
pheatmap(
    t(dat),
    color = c("white", "black"),
    breaks = c(0, 0.5, 1),
    treeheight_row = 0,
    treeheight_col = 0,
    legend = FALSE,
    show_colnames = FALSE,
    fontsize_row = 7,
    main = "TF binding sites across genome"
)

Code
txt_fdiry = "."
txt_fname = "fig.matrix.region2TF.png"
txt_fpath = file.path(txt_fdiry, txt_fname)
dat = dat_region2TF_subset

pheatmap(
    t(dat),
    color = c("white", "black"),
    breaks = c(0, 0.5, 1),
    treeheight_row = 0,
    treeheight_col = 0,
    legend = FALSE,
    show_colnames = FALSE,
    fontsize_row = 7,
    main = "TF binding sites across genome",
    height   = 20,
    width    = 10,
    filename = txt_fpath
)

Module-TF: composition of TFs for each modulde

Code
### arrange matrix
dat = dat_module2TF_matrix
dat = dat %>% column_to_rownames("TF")

### assign and show
dat_module2TF_arrange = dat
print(dim(dat))
fun_display_table(head(dat))
[1] 234  70
Module_01 Module_02 Module_03 Module_04 Module_05 Module_06 Module_07 Module_08 Module_09 Module_10 Module_11 Module_12 Module_13 Module_14 Module_15 Module_16 Module_17 Module_18 Module_19 Module_20 Module_21 Module_22 Module_23 Module_24 Module_25 Module_26 Module_27 Module_28 Module_29 Module_30 Module_31 Module_33 Module_34 Module_35 Module_36 Module_37 Module_39 Module_40 Module_41 Module_42 Module_43 Module_44 Module_45 Module_46 Module_47 Module_48 Module_49 Module_50 Module_51 Module_52 Module_53 Module_54 Module_55 Module_56 Module_57 Module_58 Module_59 Module_61 Module_63 Module_64 Module_65 Module_66 Module_67 Module_68 Module_71 Module_72 Module_73 Module_74 Module_75 Module_77
ADNP 0 0 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
AGO1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0
ARHGAP35 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ARID2 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1
ARID3A 1 1 0 0 0 0 0 1 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0
ARID4B 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
Code
dat = dat_module2TF_arrange

options(repr.plot.height=10, repr.plot.width=25)
pheatmap(
    t(dat), 
    color  = c("white", "black"),
    breaks = c(0, 0.5, 1),
    legend = FALSE,
    fontsize     = 20,
    fontsize_row = 7,
    fontsize_col = 5,
    main = "Module-TF mapping"
)

Code
txt_fdiry = "."
txt_fname = "fig.matrix.module_tf_mapping.png"
txt_fpath = file.path(txt_fdiry, txt_fname)
dat = dat_module2TF_arrange

options(repr.plot.height=10, repr.plot.width=25)
pheatmap(
    t(dat), 
    color  = c("white", "black"),
    breaks = c(0, 0.5, 1),
    legend = FALSE,
    fontsize     = 20,
    fontsize_row = 7,
    fontsize_col = 5,
    main     = "Module-TF mapping",
    height   = 10,
    width    = 25,
    filename = txt_fpath
)