Prepare Hi-C data 13 (intact Hi-C)

Create metadata

Set environment

Code
suppressMessages(suppressWarnings(source("../run_config_project_sing.R")))
show_env()
You are working on        Singularity: singularity_proj_encode_fcc 
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei 
REPO DIRECTORY (FD_REPO): /data/reddylab/Kuei/repo 
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/work 
DATA DIRECTORY (FD_DATA): /data/reddylab/Kuei/data 

You are working with      ENCODE FCC 
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC 
PROJECT RESULTS (FD_RES): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results 
PROJECT SCRIPTS (FD_EXE): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/scripts 
PROJECT DATA    (FD_DAT): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/data 
PROJECT NOTE    (FD_NBK): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/notebooks 
PROJECT DOCS    (FD_DOC): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/docs 
PROJECT LOG     (FD_LOG): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/log 
PROJECT REF     (FD_REF): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/references 

Set global variables

Code
TXT_FOLDER_REGION = "hic_intact_K562_ENCSR479XDG"
TXT_ASSAY = "hic_intact_ENCSR479XDG"
TXT_INDEX = "ENCSR479XDG"

Define column description

Code
### set column name and description
dat = tribble(
    ~Name,        ~Note,
    "Chrom",      "Name of the chromosome",
    "ChromStart", "The starting position of the feature in the chromosome",
    "ChromEnd",   "The ending position of the feature in the chromosome",
    "Name",       "Name given to a region; Use '.' if no name is assigned.",
    "Group",      "Assay Name",
    "Label",      "Region Label"
)

### assign and show
dat_cname = dat
fun_display_table(dat)
Name Note
Chrom Name of the chromosome
ChromStart The starting position of the feature in the chromosome
ChromEnd The ending position of the feature in the chromosome
Name Name given to a region; Use '.' if no name is assigned.
Group Assay Name
Label Region Label

Define file labeling

Code
### set directory
txt_folder = TXT_FOLDER_REGION
txt_fdiry  = file.path(FD_RES, "region", txt_folder)
txt_fglob  = file.path(txt_fdiry, "*bed*")

### get file names
vec_txt_fpath = Sys.glob(txt_fglob)
vec_txt_fname = basename(vec_txt_fpath)
print(vec_txt_fname)
[1] "K562.hg38.hic_intact.Loop_A.bed.gz" "K562.hg38.hic_intact.Loop_B.bed.gz"
[3] "K562.hg38.hic_intact.TAD.bed.gz"   
Code
### set directory
txt_folder = TXT_FOLDER_REGION
txt_fdiry  = file.path(FD_RES, "region", txt_folder)
txt_fglob  = file.path(txt_fdiry, "*bed*")

### get file names
vec_txt_fpath = Sys.glob(txt_fglob)
vec_txt_fname = basename(vec_txt_fpath)

### init info table
dat = data.frame(
    "Folder" = txt_folder,
    "FName"  = vec_txt_fname,
    "FPath"  = vec_txt_fpath
)

### arrange table
txt_index = TXT_INDEX
dat = dat %>% tidyr::separate(
        FName, 
        c("Biosample", "Genome", "Assay", "Label", "File_Type", "File_Ext"),
        sep = "\\.",
        remove = FALSE
    ) %>%
    #dplyr::mutate(Label = paste(tolower(Assay), Label, Index_File, sep="_")) %>%
    dplyr::mutate(Label = paste(tolower(Assay), Label, txt_index, sep="_")) %>%
    dplyr::select(Folder, FName, Label, FPath) 

### assign and show
dat_region_label = dat
fun_display_table(dat)
Folder FName Label FPath
hic_intact_K562_ENCSR479XDG K562.hg38.hic_intact.Loop_A.bed.gz hic_intact_Loop_A_ENCSR479XDG /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/hic_intact_K562_ENCSR479XDG/K562.hg38.hic_intact.Loop_A.bed.gz
hic_intact_K562_ENCSR479XDG K562.hg38.hic_intact.Loop_B.bed.gz hic_intact_Loop_B_ENCSR479XDG /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/hic_intact_K562_ENCSR479XDG/K562.hg38.hic_intact.Loop_B.bed.gz
hic_intact_K562_ENCSR479XDG K562.hg38.hic_intact.TAD.bed.gz hic_intact_TAD_ENCSR479XDG /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/hic_intact_K562_ENCSR479XDG/K562.hg38.hic_intact.TAD.bed.gz

Save results

Code
txt_folder = TXT_FOLDER_REGION
txt_fdiry  = file.path(FD_RES, "region", txt_folder, "summary")
txt_fname  = "description.tsv"
txt_fpath  = file.path(txt_fdiry, txt_fname)

dir.create(txt_fdiry, showWarnings = FALSE)
dat = dat_cname
write_tsv(dat, txt_fpath)
Code
txt_folder = TXT_FOLDER_REGION
txt_fdiry  = file.path(FD_RES, "region", txt_folder, "summary")
txt_fname  = "metadata.label.tsv"
txt_fpath  = file.path(txt_fdiry, txt_fname)

dir.create(txt_fdiry, showWarnings = FALSE)
dat = dat_region_label
write_tsv(dat, txt_fpath)