Annotation of ATAC (ASTARR Input) 11

Arrange region pairs (Main)

Set environment

Code
suppressMessages(suppressWarnings(source("../run_config_project_sing.R")))
show_env()
You are working on        Singularity: singularity_proj_encode_fcc 
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei 
REPO DIRECTORY (FD_REPO): /data/reddylab/Kuei/repo 
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/work 
DATA DIRECTORY (FD_DATA): /data/reddylab/Kuei/data 

You are working with      ENCODE FCC 
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC 
PROJECT RESULTS (FD_RES): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results 
PROJECT SCRIPTS (FD_EXE): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/scripts 
PROJECT DATA    (FD_DAT): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/data 
PROJECT NOTE    (FD_NBK): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/notebooks 
PROJECT DOCS    (FD_DOC): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/docs 
PROJECT LOG     (FD_LOG): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/log 
PROJECT REF     (FD_REF): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/references 

Set global variables

Code
txt_fdiry = file.path(FD_RES, "region", "summary")
vec = dir(txt_fdiry)
for (txt in vec){cat(txt, "\n")}
metadata.label.astarr_macs_merge.tsv 
metadata.label.chipseq_histone.tsv 
metadata.label.chipseq_subset.tsv 
metadata.label.chipseq_tf_full.tsv 
metadata.label.hic.tsv 
metadata.label.main.tsv 
metadata.label.ocr.tsv 
metadata.label.region_for_analysis.tsv 
Code
TXT_FDIRY_REGION_LABEL   = file.path(FD_RES, "region", "summary")
TXT_FNAME_REGION_LABEL_A = "metadata.label.astarr_macs_merge.tsv"
TXT_FNAME_REGION_LABEL_B = "metadata.label.main.tsv"

Import data

Code
txt_fdiry = TXT_FDIRY_REGION_LABEL
txt_fname = TXT_FNAME_REGION_LABEL_A
txt_fpath = file.path(txt_fdiry, txt_fname)

dat_region_label_A = read_tsv(txt_fpath, show_col_types = FALSE)
dat_region_label_A
A spec_tbl_df: 2 × 4
Folder FName Label FPath
<chr> <chr> <chr> <chr>
fcc_astarr_macs_merge K562.hg38.ASTARR.macs.KS91.input.rep_all.max_overlaps.q5.bed.gz fcc_astarr_macs_input_overlap /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_astarr_macs_merge/K562.hg38.ASTARR.macs.KS91.input.rep_all.max_overlaps.q5.bed.gz
fcc_astarr_macs_merge K562.hg38.ASTARR.macs.KS91.input.rep_all.union.q5.bed.gz fcc_astarr_macs_input_union /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_astarr_macs_merge/K562.hg38.ASTARR.macs.KS91.input.rep_all.union.q5.bed.gz
Code
txt_fdiry = TXT_FDIRY_REGION_LABEL
txt_fname = TXT_FNAME_REGION_LABEL_B
txt_fpath = file.path(txt_fdiry, txt_fname)

dat_region_label_B = read_tsv(txt_fpath, show_col_types = FALSE)
dat_region_label_B
A spec_tbl_df: 25 × 4
Folder FName Label FPath
<chr> <chr> <chr> <chr>
encode_chromatin_states K562.hg38.cCREs.silencer_rest.bed.gz encode_ccres_silencer_rest /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/encode_chromatin_states/K562.hg38.cCREs.silencer_rest.bed.gz
encode_chromatin_states K562.hg38.cCREs.silencer_starr.bed.gz encode_ccres_silencer_starr /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/encode_chromatin_states/K562.hg38.cCREs.silencer_starr.bed.gz
encode_chromatin_states K562.hg38.ENCSR365YNI.ENCFF106BGJ.ChromHMM.simplified.bed.gz encode_chromhmm_ENCFF106BGJ /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/encode_chromatin_states/K562.hg38.ENCSR365YNI.ENCFF106BGJ.ChromHMM.simplified.bed.gz
encode_chromatin_states K562.hg38.ENCSR913HQX.ENCFF286VQG.cCREs.simplified.bed.gz encode_ccres_ENCFF286VQG /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/encode_chromatin_states/K562.hg38.ENCSR913HQX.ENCFF286VQG.cCREs.simplified.bed.gz
encode_e2g_benchmark K562.hg38.ENCODE_E2G.benchmark.bed.gz encode_e2g_benchmark /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/encode_e2g_benchmark/K562.hg38.ENCODE_E2G.benchmark.bed.gz
fcc_astarr_csaw K562.hg38.ASTARR.csaw.KS91.bed.gz fcc_astarr_csaw_KS91 /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_astarr_csaw/K562.hg38.ASTARR.csaw.KS91.bed.gz
fcc_astarr_csaw K562.hg38.ASTARR.csaw.KSMerge.bed.gz fcc_astarr_csaw_KSMerge /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_astarr_csaw/K562.hg38.ASTARR.csaw.KSMerge.bed.gz
fcc_astarr_macs_narrowpeak K562.hg38.ASTARR.macs.KS91.Input.rep1.narrowpeak.bed.gz fcc_astarr_macs_input_rep1 /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_astarr_macs_narrowpeak/K562.hg38.ASTARR.macs.KS91.Input.rep1.narrowpeak.bed.gz
fcc_astarr_macs_narrowpeak K562.hg38.ASTARR.macs.KS91.Input.rep2.narrowpeak.bed.gz fcc_astarr_macs_input_rep2 /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_astarr_macs_narrowpeak/K562.hg38.ASTARR.macs.KS91.Input.rep2.narrowpeak.bed.gz
fcc_astarr_macs_narrowpeak K562.hg38.ASTARR.macs.KS91.Input.rep3.narrowpeak.bed.gz fcc_astarr_macs_input_rep3 /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_astarr_macs_narrowpeak/K562.hg38.ASTARR.macs.KS91.Input.rep3.narrowpeak.bed.gz
fcc_astarr_macs_narrowpeak K562.hg38.ASTARR.macs.KS91.Input.rep4.narrowpeak.bed.gz fcc_astarr_macs_input_rep4 /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_astarr_macs_narrowpeak/K562.hg38.ASTARR.macs.KS91.Input.rep4.narrowpeak.bed.gz
fcc_astarr_macs_narrowpeak K562.hg38.ASTARR.macs.KS91.Input.rep5.narrowpeak.bed.gz fcc_astarr_macs_input_rep5 /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_astarr_macs_narrowpeak/K562.hg38.ASTARR.macs.KS91.Input.rep5.narrowpeak.bed.gz
fcc_astarr_macs_narrowpeak K562.hg38.ASTARR.macs.KS91.Input.rep6.narrowpeak.bed.gz fcc_astarr_macs_input_rep6 /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_astarr_macs_narrowpeak/K562.hg38.ASTARR.macs.KS91.Input.rep6.narrowpeak.bed.gz
fcc_crispri_growth K562.hg38.CRISPRi_Growth.signif.bed.gz fcc_crispri_growth_signif /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_crispri_growth/K562.hg38.CRISPRi_Growth.signif.bed.gz
fcc_crispri_growth K562.hg38.CRISPRi_Growth.total.bed.gz fcc_crispri_growth_total /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_crispri_growth/K562.hg38.CRISPRi_Growth.total.bed.gz
fcc_crispri_hcrff K562.hg38.CRISPRi_HCRFF.CASA.bed.gz fcc_crispri_hcrff_casa /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_crispri_hcrff/K562.hg38.CRISPRi_HCRFF.CASA.bed.gz
fcc_starrmpra_junke K562.hg38.ASTARR.junke.bed.gz fcc_starrmpra_junke_astarr /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_starrmpra_junke/K562.hg38.ASTARR.junke.bed.gz
fcc_starrmpra_junke K562.hg38.eSTARR.junke.bed.gz fcc_starrmpra_junke_estarr /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_starrmpra_junke/K562.hg38.eSTARR.junke.bed.gz
fcc_starrmpra_junke K562.hg38.LMPRA.junke.bed.gz fcc_starrmpra_junke_lmpra /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_starrmpra_junke/K562.hg38.LMPRA.junke.bed.gz
fcc_starrmpra_junke K562.hg38.TMPRA.junke.bed.gz fcc_starrmpra_junke_tmpra /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_starrmpra_junke/K562.hg38.TMPRA.junke.bed.gz
fcc_starrmpra_junke K562.hg38.WSTARR.junke.bed.gz fcc_starrmpra_junke_wstarr /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_starrmpra_junke/K562.hg38.WSTARR.junke.bed.gz
genome_cres K562.hg38.label_cres.bed.gz genome_cres /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/genome_cres/K562.hg38.label_cres.bed.gz
genome_tss K562.hg38.TSS.selected_by_highest_Pol2_signal.bed.gz genome_tss_pol2 /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/genome_tss/K562.hg38.TSS.selected_by_highest_Pol2_signal.bed.gz
genome_tss K562.hg38.TSS.selected_by_highest_Pol2_signal.filtered_by_RNAseq_TPM.bed.gz genome_tss_pol2_rnaseq /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/genome_tss/K562.hg38.TSS.selected_by_highest_Pol2_signal.filtered_by_RNAseq_TPM.bed.gz
module_tf_shannon K562.hg38.TF_Module.bed.gz module_tf_shannon /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/module_tf_shannon/K562.hg38.TF_Module.bed.gz

Check colnumn names

Code
dat = dat_region_label_A
vec = unique(dat$Folder)
vec_txt_region_folder_A = vec

for (txt_region_folder_A in vec_txt_region_folder_A){
    ### get column names for region A
    txt_fdiry = file.path(FD_RES, "region", txt_region_folder_A, "summary")
    txt_fname = "description.tsv"
    txt_fpath = file.path(txt_fdiry, txt_fname)
    dat_cname = read_tsv(txt_fpath, show_col_types = FALSE)
    vec_txt_cname_A = dat_cname$Name

    ### show results
    cat(txt_region_folder_A, "\n")
    print(vec_txt_cname_A)
    cat("\n")
}
fcc_astarr_macs_merge 
[1] "Chrom"      "ChromStart" "ChromEnd"   "Region"    
Code
dat = dat_region_label_B
vec = unique(dat$Folder)
vec_txt_region_folder_B = vec

for (txt_region_folder_B in vec_txt_region_folder_B){
    ### get column names for region B
    txt_fdiry = file.path(FD_RES, "region", txt_region_folder_B, "summary")
    txt_fname = "description.tsv"
    txt_fpath = file.path(txt_fdiry, txt_fname)
    dat_cname = read_tsv(txt_fpath, show_col_types = FALSE)
    vec_txt_cname_B = dat_cname$Name

    ### show results
    cat(txt_region_folder_B, "\n")
    print(vec_txt_cname_B)
    cat("\n")
}
encode_chromatin_states 
[1] "Chrom"      "ChromStart" "ChromEnd"   "Name"       "Group"     
[6] "Label"     

encode_e2g_benchmark 
 [1] "Chrom"      "ChromStart" "ChromEnd"   "Name"       "Score"     
 [6] "Zcore"      "Region"     "Target"     "NLog10P"    "Regulated" 
[11] "Source"     "Group"      "Label"     

fcc_astarr_csaw 
 [1] "Chrom"            "ChromStart"       "ChromEnd"         "Name"            
 [5] "Score"            "Strand"           "Log2FC"           "Input_CPM"       
 [9] "Output_CPM"       "MinusLog10PValue" "MinusLog10QValue" "Dataset"         
[13] "Group"            "Label"           

fcc_astarr_macs_narrowpeak 
 [1] "Chrom"       "ChromStart"  "ChromEnd"    "Region"      "Score"      
 [6] "Strand"      "SignalValue" "PValue"      "QValue"      "Peak"       
[11] "Group"       "Label"       "Note"       

fcc_crispri_growth 
[1] "Chrom"      "ChromStart" "ChromEnd"   "Region"     "Guide_ID"  
[6] "Log2FC"     "Padj"       "Group"      "Label"     

fcc_crispri_hcrff 
 [1] "Chrom"        "ChromStart"   "ChromEnd"     "Name"         "Score"       
 [6] "Strand"       "Gene_Symbol"  "Gene_Ensembl" "Group"        "Label"       

fcc_starrmpra_junke 
[1] "Chrom"      "ChromStart" "ChromEnd"   "Name"       "Score"     
[6] "Strand"     "Group"      "Label"      "Dataset"   

genome_cres 
 [1] "Chrom"       "ChromStart"  "ChromEnd"    "Region"      "Name"       
 [6] "Type"        "Target"      "Target_TSS"  "Description" "Reference"  
[11] "Note"        "Group"       "Label"      

genome_tss 
[1] "Chrom"      "ChromStart" "ChromEnd"   "Region"     "Gene"      
[6] "Score"      "Group"      "Label"     

module_tf_shannon 
[1] "Chrom"      "ChromStart" "ChromEnd"   "Name"       "Group"     
[6] "Label"     

Read region intersections and arrange tables

init of loop

Code
dat = dat_region_label_A
lst = split(dat, 1:nrow(dat))
lst = lapply(lst, as.list)

lst_region_label_A = lst
lst
$`1`
$Folder
'fcc_astarr_macs_merge'
$FName
'K562.hg38.ASTARR.macs.KS91.input.rep_all.max_overlaps.q5.bed.gz'
$Label
'fcc_astarr_macs_input_overlap'
$FPath
'/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_astarr_macs_merge/K562.hg38.ASTARR.macs.KS91.input.rep_all.max_overlaps.q5.bed.gz'
$`2`
$Folder
'fcc_astarr_macs_merge'
$FName
'K562.hg38.ASTARR.macs.KS91.input.rep_all.union.q5.bed.gz'
$Label
'fcc_astarr_macs_input_union'
$FPath
'/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_astarr_macs_merge/K562.hg38.ASTARR.macs.KS91.input.rep_all.union.q5.bed.gz'
Code
dat = dat_region_label_B
vec = unique(dat$Folder)

vec_txt_region_folder_B = vec
for(txt in vec){cat(txt, "\n")}
encode_chromatin_states 
encode_e2g_benchmark 
fcc_astarr_csaw 
fcc_astarr_macs_narrowpeak 
fcc_crispri_growth 
fcc_crispri_hcrff 
fcc_starrmpra_junke 
genome_cres 
genome_tss 
module_tf_shannon 

Loop and arrange each region pair intersect table

Code
### loop through region A
for (idx in seq_along(lst_region_label_A)){
    
    ### get region A
    lst = lst_region_label_A[[idx]]
    txt_region_folder_A = lst$Folder
    txt_region_label_A  = lst$Label

    ### get column names for region A
    txt_fdiry = file.path(FD_RES, "region", txt_region_folder_A, "summary")
    txt_fname = "description.tsv"
    txt_fpath = file.path(txt_fdiry, txt_fname)
    dat_cname = read_tsv(txt_fpath, show_col_types = FALSE)
    vec_txt_cname_A = dat_cname$Name

    ### loop through region B
    for (txt_region_folder_B in vec_txt_region_folder_B){
        
        ### get column names for region B
        txt_fdiry = file.path(FD_RES, "region", txt_region_folder_B, "summary")
        txt_fname = "description.tsv"
        txt_fpath = file.path(txt_fdiry, txt_fname)
        dat_cname = read_tsv(txt_fpath, show_col_types = FALSE)
        vec_txt_cname_B = dat_cname$Name

        ### get total column names for annotation table
        vec_txt_cname = c(
            paste0(vec_txt_cname_A, "_A"),
            paste0(vec_txt_cname_B, "_B"),
            "Overlap"
        )
        
        ### get directory of annotation table
        txt_fdiry = file.path(
            FD_RES, 
            "region_annotation",
            txt_region_label_A, 
            txt_region_folder_B
        )
        txt_fname = "*bed.gz"
        txt_fglob = file.path(txt_fdiry, txt_fname)

        ### get annotation table files
        vec_txt_fpath = Sys.glob(txt_fglob)
        vec_txt_fname = basename(vec_txt_fpath)

        ### show progress
        cat("===========================================\n")
        cat("Read annotation...", "\n")
        cat("Region A:", txt_region_folder_A, "|", txt_region_label_A, "\n")
        cat("Region B:", txt_region_folder_B, "\n")
        cat("FDiry:   ", "\n")
        print(txt_fdiry)
        cat("FName:   ", "\n")
        print(vec_txt_fname)
        cat("\n")
        flush.console()
        
        ### import annotation tables
        lst = lapply(vec_txt_fname, function(txt_fname){
            ### set directory
            txt_fpath = file.path(txt_fdiry, txt_fname)
            
            ### get annotation of region A and region B
            vec = str_split(txt_fname, "\\.")[[1]]
            txt_annot_A = vec[1]
            txt_annot_B = vec[2]

            ### read table and add annotation labels
            dat = read_tsv(txt_fpath, col_names = vec_txt_cname, show_col_types = FALSE)
            dat = dat %>% 
                dplyr::mutate(
                    Region_A = fun_gen_region(Chrom_A, ChromStart_A, ChromEnd_A),
                    Region_B = fun_gen_region(Chrom_B, ChromStart_B, ChromEnd_B),
                    Annotation_A = txt_annot_A,
                    Annotation_B = txt_annot_B
                )
            return(dat)
        }) 

        ### concatenate tables and assign
        dat = bind_rows(lst)
        dat_region_annot_pair = dat
        
        ### summarize annotation tables
        dat = dat_region_annot_pair
        dat = dat %>%
            dplyr::group_by(
                Chrom_A, ChromStart_A, ChromEnd_A, Region_A,
                Annotation_A,
                Annotation_B,
                Group_B,
                Label_B
            ) %>%
            dplyr::summarise(
                Region_Annot = paste(Region_B, collapse=";"),
                Region_Count = n(),
                .groups = "drop")

        ### rename the columns and assign
        colnames(dat) = c(
            "Chrom", "ChromStart", "ChromEnd", "Region", 
            "Annotation_A", 
            "Annotation_B",
            "Group",
            "Label",
            "Region_Annot",
            "Region_Count"
        )
        dat_region_annot_summary = dat

        ### set directory
        txt_fdiry = file.path(
            FD_RES, 
            "region_annotation",
            txt_region_label_A,
            "summary"
        )
        txt_fname1 = paste("region", "intersect", "pair",    txt_region_folder_B, "tsv", sep = ".")
        txt_fpath1 = file.path(txt_fdiry, txt_fname1)
        txt_fname2 = paste("region", "intersect", "summary", txt_region_folder_B, "tsv", sep = ".")
        txt_fpath2 = file.path(txt_fdiry, txt_fname2)
        
        ### show progress
        cat("Save annotation...",  "\n")
        cat("FDiry: ", txt_fdiry, "\n")
        cat("FName1:", txt_fname1, "\n")
        cat("FName2:", txt_fname2, "\n")
        cat("\n")
        flush.console()

        ### write tables
        dir.create(txt_fdiry, showWarnings = FALSE)
        write_tsv(dat_region_annot_pair,    txt_fpath1)
        write_tsv(dat_region_annot_summary, txt_fpath2)
    } ### end loop region B
} ### end loop region A
===========================================
Read annotation... 
Region A: fcc_astarr_macs_merge | fcc_astarr_macs_input_overlap 
Region B: encode_chromatin_states 
FDiry:    
[1] "/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_overlap/encode_chromatin_states"
FName:    
[1] "fcc_astarr_macs_input_overlap.encode_ccres_ENCFF286VQG.bed.gz"   
[2] "fcc_astarr_macs_input_overlap.encode_ccres_silencer_rest.bed.gz" 
[3] "fcc_astarr_macs_input_overlap.encode_ccres_silencer_starr.bed.gz"
[4] "fcc_astarr_macs_input_overlap.encode_chromhmm_ENCFF106BGJ.bed.gz"

Save annotation... 
FDiry:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_overlap/summary 
FName1: region.intersect.pair.encode_chromatin_states.tsv 
FName2: region.intersect.summary.encode_chromatin_states.tsv 

===========================================
Read annotation... 
Region A: fcc_astarr_macs_merge | fcc_astarr_macs_input_overlap 
Region B: encode_e2g_benchmark 
FDiry:    
[1] "/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_overlap/encode_e2g_benchmark"
FName:    
[1] "fcc_astarr_macs_input_overlap.encode_e2g_benchmark.bed.gz"

Save annotation... 
FDiry:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_overlap/summary 
FName1: region.intersect.pair.encode_e2g_benchmark.tsv 
FName2: region.intersect.summary.encode_e2g_benchmark.tsv 

===========================================
Read annotation... 
Region A: fcc_astarr_macs_merge | fcc_astarr_macs_input_overlap 
Region B: fcc_astarr_csaw 
FDiry:    
[1] "/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_overlap/fcc_astarr_csaw"
FName:    
[1] "fcc_astarr_macs_input_overlap.fcc_astarr_csaw_KS91.bed.gz"   
[2] "fcc_astarr_macs_input_overlap.fcc_astarr_csaw_KSMerge.bed.gz"

Save annotation... 
FDiry:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_overlap/summary 
FName1: region.intersect.pair.fcc_astarr_csaw.tsv 
FName2: region.intersect.summary.fcc_astarr_csaw.tsv 

===========================================
Read annotation... 
Region A: fcc_astarr_macs_merge | fcc_astarr_macs_input_overlap 
Region B: fcc_astarr_macs_narrowpeak 
FDiry:    
[1] "/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_overlap/fcc_astarr_macs_narrowpeak"
FName:    
[1] "fcc_astarr_macs_input_overlap.fcc_astarr_macs_input_rep1.bed.gz"
[2] "fcc_astarr_macs_input_overlap.fcc_astarr_macs_input_rep2.bed.gz"
[3] "fcc_astarr_macs_input_overlap.fcc_astarr_macs_input_rep3.bed.gz"
[4] "fcc_astarr_macs_input_overlap.fcc_astarr_macs_input_rep4.bed.gz"
[5] "fcc_astarr_macs_input_overlap.fcc_astarr_macs_input_rep5.bed.gz"
[6] "fcc_astarr_macs_input_overlap.fcc_astarr_macs_input_rep6.bed.gz"

Save annotation... 
FDiry:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_overlap/summary 
FName1: region.intersect.pair.fcc_astarr_macs_narrowpeak.tsv 
FName2: region.intersect.summary.fcc_astarr_macs_narrowpeak.tsv 

===========================================
Read annotation... 
Region A: fcc_astarr_macs_merge | fcc_astarr_macs_input_overlap 
Region B: fcc_crispri_growth 
FDiry:    
[1] "/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_overlap/fcc_crispri_growth"
FName:    
[1] "fcc_astarr_macs_input_overlap.fcc_crispri_growth_signif.bed.gz"
[2] "fcc_astarr_macs_input_overlap.fcc_crispri_growth_total.bed.gz" 

Save annotation... 
FDiry:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_overlap/summary 
FName1: region.intersect.pair.fcc_crispri_growth.tsv 
FName2: region.intersect.summary.fcc_crispri_growth.tsv 

===========================================
Read annotation... 
Region A: fcc_astarr_macs_merge | fcc_astarr_macs_input_overlap 
Region B: fcc_crispri_hcrff 
FDiry:    
[1] "/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_overlap/fcc_crispri_hcrff"
FName:    
[1] "fcc_astarr_macs_input_overlap.fcc_crispri_hcrff_casa.bed.gz"

Save annotation... 
FDiry:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_overlap/summary 
FName1: region.intersect.pair.fcc_crispri_hcrff.tsv 
FName2: region.intersect.summary.fcc_crispri_hcrff.tsv 

===========================================
Read annotation... 
Region A: fcc_astarr_macs_merge | fcc_astarr_macs_input_overlap 
Region B: fcc_starrmpra_junke 
FDiry:    
[1] "/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_overlap/fcc_starrmpra_junke"
FName:    
[1] "fcc_astarr_macs_input_overlap.fcc_starrmpra_junke_astarr.bed.gz"
[2] "fcc_astarr_macs_input_overlap.fcc_starrmpra_junke_estarr.bed.gz"
[3] "fcc_astarr_macs_input_overlap.fcc_starrmpra_junke_lmpra.bed.gz" 
[4] "fcc_astarr_macs_input_overlap.fcc_starrmpra_junke_tmpra.bed.gz" 
[5] "fcc_astarr_macs_input_overlap.fcc_starrmpra_junke_wstarr.bed.gz"

Save annotation... 
FDiry:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_overlap/summary 
FName1: region.intersect.pair.fcc_starrmpra_junke.tsv 
FName2: region.intersect.summary.fcc_starrmpra_junke.tsv 

===========================================
Read annotation... 
Region A: fcc_astarr_macs_merge | fcc_astarr_macs_input_overlap 
Region B: genome_cres 
FDiry:    
[1] "/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_overlap/genome_cres"
FName:    
[1] "fcc_astarr_macs_input_overlap.genome_cres.bed.gz"

Save annotation... 
FDiry:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_overlap/summary 
FName1: region.intersect.pair.genome_cres.tsv 
FName2: region.intersect.summary.genome_cres.tsv 

===========================================
Read annotation... 
Region A: fcc_astarr_macs_merge | fcc_astarr_macs_input_overlap 
Region B: genome_tss 
FDiry:    
[1] "/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_overlap/genome_tss"
FName:    
[1] "fcc_astarr_macs_input_overlap.genome_tss_pol2.bed.gz"       
[2] "fcc_astarr_macs_input_overlap.genome_tss_pol2_rnaseq.bed.gz"

Save annotation... 
FDiry:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_overlap/summary 
FName1: region.intersect.pair.genome_tss.tsv 
FName2: region.intersect.summary.genome_tss.tsv 

===========================================
Read annotation... 
Region A: fcc_astarr_macs_merge | fcc_astarr_macs_input_overlap 
Region B: module_tf_shannon 
FDiry:    
[1] "/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_overlap/module_tf_shannon"
FName:    
[1] "fcc_astarr_macs_input_overlap.module_tf_shannon.bed.gz"

Save annotation... 
FDiry:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_overlap/summary 
FName1: region.intersect.pair.module_tf_shannon.tsv 
FName2: region.intersect.summary.module_tf_shannon.tsv 

===========================================
Read annotation... 
Region A: fcc_astarr_macs_merge | fcc_astarr_macs_input_union 
Region B: encode_chromatin_states 
FDiry:    
[1] "/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_union/encode_chromatin_states"
FName:    
[1] "fcc_astarr_macs_input_union.encode_ccres_ENCFF286VQG.bed.gz"   
[2] "fcc_astarr_macs_input_union.encode_ccres_silencer_rest.bed.gz" 
[3] "fcc_astarr_macs_input_union.encode_ccres_silencer_starr.bed.gz"
[4] "fcc_astarr_macs_input_union.encode_chromhmm_ENCFF106BGJ.bed.gz"

Save annotation... 
FDiry:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_union/summary 
FName1: region.intersect.pair.encode_chromatin_states.tsv 
FName2: region.intersect.summary.encode_chromatin_states.tsv 

===========================================
Read annotation... 
Region A: fcc_astarr_macs_merge | fcc_astarr_macs_input_union 
Region B: encode_e2g_benchmark 
FDiry:    
[1] "/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_union/encode_e2g_benchmark"
FName:    
[1] "fcc_astarr_macs_input_union.encode_e2g_benchmark.bed.gz"

Save annotation... 
FDiry:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_union/summary 
FName1: region.intersect.pair.encode_e2g_benchmark.tsv 
FName2: region.intersect.summary.encode_e2g_benchmark.tsv 

===========================================
Read annotation... 
Region A: fcc_astarr_macs_merge | fcc_astarr_macs_input_union 
Region B: fcc_astarr_csaw 
FDiry:    
[1] "/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_union/fcc_astarr_csaw"
FName:    
[1] "fcc_astarr_macs_input_union.fcc_astarr_csaw_KS91.bed.gz"   
[2] "fcc_astarr_macs_input_union.fcc_astarr_csaw_KSMerge.bed.gz"

Save annotation... 
FDiry:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_union/summary 
FName1: region.intersect.pair.fcc_astarr_csaw.tsv 
FName2: region.intersect.summary.fcc_astarr_csaw.tsv 

===========================================
Read annotation... 
Region A: fcc_astarr_macs_merge | fcc_astarr_macs_input_union 
Region B: fcc_astarr_macs_narrowpeak 
FDiry:    
[1] "/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_union/fcc_astarr_macs_narrowpeak"
FName:    
[1] "fcc_astarr_macs_input_union.fcc_astarr_macs_input_rep1.bed.gz"
[2] "fcc_astarr_macs_input_union.fcc_astarr_macs_input_rep2.bed.gz"
[3] "fcc_astarr_macs_input_union.fcc_astarr_macs_input_rep3.bed.gz"
[4] "fcc_astarr_macs_input_union.fcc_astarr_macs_input_rep4.bed.gz"
[5] "fcc_astarr_macs_input_union.fcc_astarr_macs_input_rep5.bed.gz"
[6] "fcc_astarr_macs_input_union.fcc_astarr_macs_input_rep6.bed.gz"

Save annotation... 
FDiry:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_union/summary 
FName1: region.intersect.pair.fcc_astarr_macs_narrowpeak.tsv 
FName2: region.intersect.summary.fcc_astarr_macs_narrowpeak.tsv 

===========================================
Read annotation... 
Region A: fcc_astarr_macs_merge | fcc_astarr_macs_input_union 
Region B: fcc_crispri_growth 
FDiry:    
[1] "/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_union/fcc_crispri_growth"
FName:    
[1] "fcc_astarr_macs_input_union.fcc_crispri_growth_signif.bed.gz"
[2] "fcc_astarr_macs_input_union.fcc_crispri_growth_total.bed.gz" 

Save annotation... 
FDiry:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_union/summary 
FName1: region.intersect.pair.fcc_crispri_growth.tsv 
FName2: region.intersect.summary.fcc_crispri_growth.tsv 

===========================================
Read annotation... 
Region A: fcc_astarr_macs_merge | fcc_astarr_macs_input_union 
Region B: fcc_crispri_hcrff 
FDiry:    
[1] "/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_union/fcc_crispri_hcrff"
FName:    
[1] "fcc_astarr_macs_input_union.fcc_crispri_hcrff_casa.bed.gz"

Save annotation... 
FDiry:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_union/summary 
FName1: region.intersect.pair.fcc_crispri_hcrff.tsv 
FName2: region.intersect.summary.fcc_crispri_hcrff.tsv 

===========================================
Read annotation... 
Region A: fcc_astarr_macs_merge | fcc_astarr_macs_input_union 
Region B: fcc_starrmpra_junke 
FDiry:    
[1] "/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_union/fcc_starrmpra_junke"
FName:    
[1] "fcc_astarr_macs_input_union.fcc_starrmpra_junke_astarr.bed.gz"
[2] "fcc_astarr_macs_input_union.fcc_starrmpra_junke_estarr.bed.gz"
[3] "fcc_astarr_macs_input_union.fcc_starrmpra_junke_lmpra.bed.gz" 
[4] "fcc_astarr_macs_input_union.fcc_starrmpra_junke_tmpra.bed.gz" 
[5] "fcc_astarr_macs_input_union.fcc_starrmpra_junke_wstarr.bed.gz"

Save annotation... 
FDiry:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_union/summary 
FName1: region.intersect.pair.fcc_starrmpra_junke.tsv 
FName2: region.intersect.summary.fcc_starrmpra_junke.tsv 

===========================================
Read annotation... 
Region A: fcc_astarr_macs_merge | fcc_astarr_macs_input_union 
Region B: genome_cres 
FDiry:    
[1] "/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_union/genome_cres"
FName:    
[1] "fcc_astarr_macs_input_union.genome_cres.bed.gz"

Save annotation... 
FDiry:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_union/summary 
FName1: region.intersect.pair.genome_cres.tsv 
FName2: region.intersect.summary.genome_cres.tsv 

===========================================
Read annotation... 
Region A: fcc_astarr_macs_merge | fcc_astarr_macs_input_union 
Region B: genome_tss 
FDiry:    
[1] "/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_union/genome_tss"
FName:    
[1] "fcc_astarr_macs_input_union.genome_tss_pol2.bed.gz"       
[2] "fcc_astarr_macs_input_union.genome_tss_pol2_rnaseq.bed.gz"

Save annotation... 
FDiry:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_union/summary 
FName1: region.intersect.pair.genome_tss.tsv 
FName2: region.intersect.summary.genome_tss.tsv 

===========================================
Read annotation... 
Region A: fcc_astarr_macs_merge | fcc_astarr_macs_input_union 
Region B: module_tf_shannon 
FDiry:    
[1] "/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_union/module_tf_shannon"
FName:    
[1] "fcc_astarr_macs_input_union.module_tf_shannon.bed.gz"

Save annotation... 
FDiry:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_union/summary 
FName1: region.intersect.pair.module_tf_shannon.tsv 
FName2: region.intersect.summary.module_tf_shannon.tsv 

Review

Code
head(dat_region_annot_pair)
A tibble: 6 × 14
Chrom_A ChromStart_A ChromEnd_A Region_A Chrom_B ChromStart_B ChromEnd_B Name_B Group_B Label_B Overlap Region_B Annotation_A Annotation_B
<chr> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr>
chr1 115411 115986 chr1:115411-115986 chr1 115702 115751 chr1:115702-115751 TF_Module Module_02 49 chr1:115702-115751 fcc_astarr_macs_input_union module_tf_shannon
chr1 115411 115986 chr1:115411-115986 chr1 115702 115751 chr1:115702-115751 TF_Module Module_05 49 chr1:115702-115751 fcc_astarr_macs_input_union module_tf_shannon
chr1 115411 115986 chr1:115411-115986 chr1 115702 115751 chr1:115702-115751 TF_Module Module_10 49 chr1:115702-115751 fcc_astarr_macs_input_union module_tf_shannon
chr1 115411 115986 chr1:115411-115986 chr1 115702 115751 chr1:115702-115751 TF_Module Module_44 49 chr1:115702-115751 fcc_astarr_macs_input_union module_tf_shannon
chr1 118518 118743 chr1:118518-118743 chr1 118585 118665 chr1:118585-118665 TF_Module Module_47 80 chr1:118585-118665 fcc_astarr_macs_input_union module_tf_shannon
chr1 136071 137429 chr1:136071-137429 chr1 136446 136510 chr1:136446-136510 TF_Module Module_27 64 chr1:136446-136510 fcc_astarr_macs_input_union module_tf_shannon
Code
head(dat_region_annot_summary)
A tibble: 6 × 10
Chrom ChromStart ChromEnd Region Annotation_A Annotation_B Group Label Region_Annot Region_Count
<chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <int>
chr1 115411 115986 chr1:115411-115986 fcc_astarr_macs_input_union module_tf_shannon TF_Module Module_02 chr1:115702-115751 1
chr1 115411 115986 chr1:115411-115986 fcc_astarr_macs_input_union module_tf_shannon TF_Module Module_05 chr1:115702-115751 1
chr1 115411 115986 chr1:115411-115986 fcc_astarr_macs_input_union module_tf_shannon TF_Module Module_10 chr1:115702-115751 1
chr1 115411 115986 chr1:115411-115986 fcc_astarr_macs_input_union module_tf_shannon TF_Module Module_44 chr1:115702-115751 1
chr1 118518 118743 chr1:118518-118743 fcc_astarr_macs_input_union module_tf_shannon TF_Module Module_47 chr1:118585-118665 1
chr1 136071 137429 chr1:136071-137429 fcc_astarr_macs_input_union module_tf_shannon TF_Module Module_27 chr1:136446-136510 1