Calculate GC% of ATAC regions 01

Run bedtools nuc

set environment

Code
source ../run_config_project.sh
show_env
You are working on             Duke Server: RCC
BASE DIRECTORY (FD_BASE):      /data/reddylab/Kuei
REPO DIRECTORY (FD_REPO):      /data/reddylab/Kuei/repo
WORK DIRECTORY (FD_WORK):      /data/reddylab/Kuei/work
DATA DIRECTORY (FD_DATA):      /data/reddylab/Kuei/data
CONTAINER DIR. (FD_SING):      /data/reddylab/Kuei/container

You are working with           ENCODE FCC
PATH OF PROJECT (FD_PRJ):      /data/reddylab/Kuei/repo/Proj_ENCODE_FCC
PROJECT RESULTS (FD_RES):      /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results
PROJECT SCRIPTS (FD_EXE):      /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/scripts
PROJECT DATA    (FD_DAT):      /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/data
PROJECT NOTE    (FD_NBK):      /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/notebooks
PROJECT DOCS    (FD_DOC):      /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/docs
PROJECT LOG     (FD_LOG):      /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/log
PROJECT REF     (FD_REF):      /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/references
PROJECT IMAGE   (FP_PRJ_SIF):  /data/reddylab/Kuei/container/project/singularity_proj_encode_fcc.sif
PROJECT CONF.   (FP_CNF):      /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/scripts/config_project.sh

Preview

Check genome file

Code
echo  ${FD_GEN}
ls -1 ${FD_GEN}
/data/reddylab/Kuei/data/annotation/genome/hg38
fasta
hg38.chrom.sizes
hg38.chrom.sizes.chr1
hg38.chrom.sizes.chr10
hg38.chrom.sizes.chr11
hg38.chrom.sizes.chr12
hg38.chrom.sizes.chr13
hg38.chrom.sizes.chr14
hg38.chrom.sizes.chr15
hg38.chrom.sizes.chr16
hg38.chrom.sizes.chr17
hg38.chrom.sizes.chr18
hg38.chrom.sizes.chr19
hg38.chrom.sizes.chr2
hg38.chrom.sizes.chr20
hg38.chrom.sizes.chr21
hg38.chrom.sizes.chr22
hg38.chrom.sizes.chr3
hg38.chrom.sizes.chr4
hg38.chrom.sizes.chr5
hg38.chrom.sizes.chr6
hg38.chrom.sizes.chr7
hg38.chrom.sizes.chr8
hg38.chrom.sizes.chr9
hg38.chrom.sizes.chrX
hg38.chrom.sizes.chrY
hg38.chrom.sizes.rm
Code
echo  ${FD_GEN}
ls -1 ${FD_GEN}/fasta/chr1.*
ls -1 ${FD_GEN}/fasta/hg38*
/data/reddylab/Kuei/data/annotation/genome/hg38
/data/reddylab/Kuei/data/annotation/genome/hg38/fasta/chr1.fa
/data/reddylab/Kuei/data/annotation/genome/hg38/fasta/chr1.fa.fai
/data/reddylab/Kuei/data/annotation/genome/hg38/fasta/hg38.fa
/data/reddylab/Kuei/data/annotation/genome/hg38/fasta/hg38.fa.fai
Code
echo ${FD_GEN}
FN_GEN=fasta/hg38.fa
FP_GEN=${FD_GEN}/${FN_GEN}

ls ${FP_GEN}
/data/reddylab/Kuei/data/annotation/genome/hg38
/data/reddylab/Kuei/data/annotation/genome/hg38/fasta/hg38.fa

Check region file

Code
FP_REGION_LABEL=${FD_RES}/region/summary/metadata.label.astarr_macs.tsv
cat ${FP_REGION_LABEL}
Folder  FName   Label
fcc_astarr_macs K562.hg38.ASTARR.macs.KS91.input.rep_all.max_overlaps.q5.bed.gz fcc_astarr_macs_input_overlap
fcc_astarr_macs K562.hg38.ASTARR.macs.KS91.input.rep_all.union.q5.bed.gz    fcc_astarr_macs_input_union
Code
FD_INP=${FD_RES}/region/fcc_astarr_macs
FN_INP="K562.hg38.ASTARR.macs.KS91.input.rep_all.max_overlaps.q5.bed.gz"
FP_INP=${FD_INP}/${FN_INP}

ls ${FP_INP}
/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_astarr_macs/K562.hg38.ASTARR.macs.KS91.input.rep_all.max_overlaps.q5.bed.gz

Check loop

Code
### Loop region A
while read FOLDER_REG FNAME_REG LABEL_REG; do
    
    ### Set input
    FD_INP=${FD_RES}/region/${FOLDER_REG}
    FN_INP=${FNAME_REG}
    FP_INP=${FD_INP}/${FN_INP}
    FOLDER=${FOLDER_REG}
    
    ### Set output
    FD_OUT=${FD_RES}/region_nuc/${FOLDER}
    FN_OUT=${FNAME_REG}
    FP_OUT=${FD_OUT}/${FN_OUT}
    
    ### setup log file
    FN_LOG=region.nuc.${LABEL_REG}.txt
    FP_LOG=${FD_LOG}/${FN_LOG}
    
    ### Set script
    FP_EXE=${FD_EXE}/run_bedtools_nuc.sh
    
    ### show progress
    echo ==============================
    echo "Input  FDiry:" ${FD_INP}
    echo "Input  FName:" ${FN_INP}
    echo 
    echo "Output FDiry:" ${FD_OUT}
    echo "Output FName:" ${FN_OUT}
    echo
    echo "Log    FPath:" '${FD_LOG}/'${FN_LOG}
    echo 
done < <(cat ${FP_REGION_LABEL} | awk 'NR >=2 {print}')
==============================
Input  FDiry: /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_astarr_macs
Input  FName: K562.hg38.ASTARR.macs.KS91.input.rep_all.max_overlaps.q5.bed.gz

Output FDiry: /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_nuc/fcc_astarr_macs
Output FName: K562.hg38.ASTARR.macs.KS91.input.rep_all.max_overlaps.q5.bed.gz

Log    FPath: ${FD_LOG}/region.nuc.fcc_astarr_macs_input_overlap.txt

==============================
Input  FDiry: /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_astarr_macs
Input  FName: K562.hg38.ASTARR.macs.KS91.input.rep_all.union.q5.bed.gz

Output FDiry: /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_nuc/fcc_astarr_macs
Output FName: K562.hg38.ASTARR.macs.KS91.input.rep_all.union.q5.bed.gz

Log    FPath: ${FD_LOG}/region.nuc.fcc_astarr_macs_input_union.txt

Check config file

Code
echo ${FP_CNF}
/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/scripts/config_project.sh

Execute

Code
### Loop region A
while read FOLDER_REG FNAME_REG LABEL_REG; do
    
    ### Set input
    FD_INP=${FD_RES}/region/${FOLDER_REG}
    FN_INP=${FNAME_REG}
    FP_INP=${FD_INP}/${FN_INP}
    
    ### Set output
    FD_OUT=${FD_RES}/region_nuc/${FOLDER_REG}
    FN_OUT=${FNAME_REG}
    FP_OUT=${FD_OUT}/${FN_OUT}
    
    ### setup log file
    FN_LOG=region.nuc.${LABEL_REG}.txt
    FP_LOG=${FD_LOG}/${FN_LOG}
    
    ### Set script
    FP_EXE=${FD_EXE}/run_bedtools_nuc.sh
    
    ### show progress
    echo ==============================
    echo "Input  FDiry:" ${FD_INP}
    echo "Input  FName:" ${FN_INP}
    echo 
    echo "Output FDiry:" ${FD_OUT}
    echo "Output FName:" ${FN_OUT}
    echo
    echo "Log    FPath:" '${FD_LOG}/'${FN_LOG}
    echo 
    
    ### execute
    mkdir -p ${FD_OUT}
    sbatch \
        --cpus-per-task 4 \
        --mem 4G \
        --output ${FP_LOG} \
        ${FP_EXE} ${FP_CNF} ${FP_GEN} ${FP_INP} ${FP_OUT}
    echo
done < <(cat ${FP_REGION_LABEL} | awk 'NR >=2 {print}')
==============================
Input  FDiry: /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_astarr_macs
Input  FName: K562.hg38.ASTARR.macs.KS91.input.rep_all.max_overlaps.q5.bed.gz

Output FDiry: /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_nuc/fcc_astarr_macs
Output FName: K562.hg38.ASTARR.macs.KS91.input.rep_all.max_overlaps.q5.bed.gz

Log    FPath: ${FD_LOG}/region.nuc.fcc_astarr_macs_input_overlap.txt

Submitted batch job 275494

==============================
Input  FDiry: /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_astarr_macs
Input  FName: K562.hg38.ASTARR.macs.KS91.input.rep_all.union.q5.bed.gz

Output FDiry: /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_nuc/fcc_astarr_macs
Output FName: K562.hg38.ASTARR.macs.KS91.input.rep_all.union.q5.bed.gz

Log    FPath: ${FD_LOG}/region.nuc.fcc_astarr_macs_input_union.txt

Submitted batch job 275495

Review

Code
cat ${FD_LOG}/region.nuc.fcc_astarr_macs_input_overlap.txt
Hostname:           plp-rcc-node-02
Slurm Array Index: 
Time Stamp:         05-20-25+08:34:47

Genome:  /data/reddylab/Kuei/data/annotation/genome/hg38/fasta/hg38.fa

Input:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_astarr_macs/K562.hg38.ASTARR.macs.KS91.input.rep_all.max_overlaps.q5.bed.gz

show first few lines of input
chr1    10038   10405   chr1:10038-10405
chr1    14282   14614   chr1:14282-14614
chr1    16025   16338   chr1:16025-16338
chr1    17288   17689   chr1:17288-17689
chr1    28934   29499   chr1:28934-29499
chr1    115429  115969  chr1:115429-115969
chr1    136201  137353  chr1:136201-137353
chr1    137748  138049  chr1:137748-138049
chr1    138321  139517  chr1:138321-139517
chr1    181005  181854  chr1:181005-181854


Output:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_nuc/fcc_astarr_macs/K562.hg38.ASTARR.macs.KS91.input.rep_all.max_overlaps.q5.bed.gz

show first few lines of output:
#1_usercol  2_usercol   3_usercol   4_usercol   5_pct_at    6_pct_gc    7_num_A 8_num_C 9_num_G 10_num_T    11_num_N    12_num_oth  13_seq_len
chr1    10038   10405   chr1:10038-10405    0.476839    0.523161    122 192 0   53  0   0   367
chr1    14282   14614   chr1:14282-14614    0.421687    0.578313    65  102 90  75  0   0   332
chr1    16025   16338   chr1:16025-16338    0.412141    0.587859    62  89  95  67  0   0   313
chr1    17288   17689   chr1:17288-17689    0.374065    0.625935    69  144 107 81  0   0   401
chr1    28934   29499   chr1:28934-29499    0.228319    0.771681    66  247 189 63  0   0   565
chr1    115429  115969  chr1:115429-115969  0.618519    0.381481    160 119 87  174 0   0   540
chr1    136201  137353  chr1:136201-137353  0.305556    0.694444    166 296 504 186 0   0   1152
chr1    137748  138049  chr1:137748-138049  0.365449    0.634551    69  68  123 41  0   0   301
chr1    138321  139517  chr1:138321-139517  0.413880    0.586120    282 238 463 213 0   0   1196


Done!
Run Time: 2 seconds
Code
cat ${FD_LOG}/region.nuc.fcc_astarr_macs_input_union.txt
Hostname:           plp-rcc-node-02
Slurm Array Index: 
Time Stamp:         05-20-25+08:34:47

Genome:  /data/reddylab/Kuei/data/annotation/genome/hg38/fasta/hg38.fa

Input:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_astarr_macs/K562.hg38.ASTARR.macs.KS91.input.rep_all.union.q5.bed.gz

show first few lines of input
chr1    10015   10442   chr1:10015-10442
chr1    14253   14645   chr1:14253-14645
chr1    16015   16477   chr1:16015-16477
chr1    17237   17772   chr1:17237-17772
chr1    28903   29613   chr1:28903-29613
chr1    30803   31072   chr1:30803-31072
chr1    101603  101849  chr1:101603-101849
chr1    115411  115986  chr1:115411-115986
chr1    118518  118743  chr1:118518-118743
chr1    136071  137429  chr1:136071-137429


Output:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_nuc/fcc_astarr_macs/K562.hg38.ASTARR.macs.KS91.input.rep_all.union.q5.bed.gz

show first few lines of output:
#1_usercol  2_usercol   3_usercol   4_usercol   5_pct_at    6_pct_gc    7_num_A 8_num_C 9_num_G 10_num_T    11_num_N    12_num_oth  13_seq_len
chr1    10015   10442   chr1:10015-10442    0.477752    0.522248    141 223 0   63  0   0   427
chr1    14253   14645   chr1:14253-14645    0.426020    0.573980    75  124 101 92  0   0   392
chr1    16015   16477   chr1:16015-16477    0.458874    0.541126    102 124 126 110 0   0   462
chr1    17237   17772   chr1:17237-17772    0.385047    0.614953    94  182 147 112 0   0   535
chr1    28903   29613   chr1:28903-29613    0.240845    0.759155    85  303 236 86  0   0   710
chr1    30803   31072   chr1:30803-31072    0.498141    0.501859    47  84  51  87  0   0   269
chr1    101603  101849  chr1:101603-101849  0.646341    0.353659    107 45  42  52  0   0   246
chr1    115411  115986  chr1:115411-115986  0.612174    0.387826    168 125 98  184 0   0   575
chr1    118518  118743  chr1:118518-118743  0.688889    0.311111    87  31  39  68  0   0   225


Done!
Run Time: 4 seconds
Code
FN_LOG=bedtools.nuc.region.atac.txt
FP_LOG=${FD_LOG}/${FN_LOG}

sbatch -p ${NODE} \
    --exclude=dl-01 \
    --cpus-per-task 4 \
    --mem 4G \
    --output ${FP_LOG} \
    ${FD_EXE}/run_bedtools_nuc.sh ${FD_PRJ} ${FP_GEN} ${FP_INP} ${FP_OUT}
Code
### execute
        mkdir -p ${FD_OUT}
        sbatch \
            --cpus-per-task 4 \
            --mem 4G \
            --output ${FP_LOG} \
            ${FP_EXE} ${FP_CNF} ${FP_INP_A} ${FP_INP_B} ${FP_OUT}
        echo
Code
cat ${FD_EXE}/run_test.sh
#!/bin/bash

### print start message
timer_start=`date +%s`
echo "Hostname:          " $(hostname)
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Time Stamp:        " $(date +"%m-%d-%y+%T")
echo

### setup env:
###     get project root path
###     load helper functions
FD_PRJ=$1
FD_EXE=${FD_PRJ}/scripts
source ${FD_EXE}/config_project.sh

### print end message
timer=`date +%s`
runtime=$(echo "${timer} - ${timer_start}" | bc -l)
echo
echo 'Done!'
echo "Run Time: $(displaytime ${runtime})"
Code
cat ${FD_EXE}/run_bedtools_test.sh
#!/bin/bash

### print start message
timer_start=`date +%s`
echo "Hostname:          " $(hostname)
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Time Stamp:        " $(date +"%m-%d-%y+%T")
echo

### setup env:
###     get project root path
###     load helper functions
FD_PRJ=$1
FD_EXE=${FD_PRJ}/scripts
source ${FD_EXE}/config_project.sh

### setup input and output
FP_INP=$2

### show input file
echo "Input: " ${FP_INP}
echo
echo "show first few lines of input"
fun_cat ${FP_INP} | head
echo

### execute
bedtools --help
echo

### print end message
timer=`date +%s`
runtime=$(echo "${timer} - ${timer_start}" | bc -l)
echo
echo 'Done!'
echo "Run Time: $(displaytime ${runtime})"
Code
echo  ${FD_GEN}
ls -1 ${FD_GEN}
/data/reddylab/Kuei/data/annotation/genome/hg38
fasta
hg38.chrom.sizes
hg38.chrom.sizes.chr1
hg38.chrom.sizes.chr10
hg38.chrom.sizes.chr11
hg38.chrom.sizes.chr12
hg38.chrom.sizes.chr13
hg38.chrom.sizes.chr14
hg38.chrom.sizes.chr15
hg38.chrom.sizes.chr16
hg38.chrom.sizes.chr17
hg38.chrom.sizes.chr18
hg38.chrom.sizes.chr19
hg38.chrom.sizes.chr2
hg38.chrom.sizes.chr20
hg38.chrom.sizes.chr21
hg38.chrom.sizes.chr22
hg38.chrom.sizes.chr3
hg38.chrom.sizes.chr4
hg38.chrom.sizes.chr5
hg38.chrom.sizes.chr6
hg38.chrom.sizes.chr7
hg38.chrom.sizes.chr8
hg38.chrom.sizes.chr9
hg38.chrom.sizes.chrX
hg38.chrom.sizes.chrY
hg38.chrom.sizes.rm
Code
echo  ${FD_GEN}
ls -1 ${FD_GEN}/fasta/chr1.*
ls -1 ${FD_GEN}/fasta/hg38*
/data/reddylab/Kuei/data/annotation/genome/hg38
/data/reddylab/Kuei/data/annotation/genome/hg38/fasta/chr1.fa
/data/reddylab/Kuei/data/annotation/genome/hg38/fasta/chr1.fa.fai
/data/reddylab/Kuei/data/annotation/genome/hg38/fasta/hg38.fa
/data/reddylab/Kuei/data/annotation/genome/hg38/fasta/hg38.fa.fai
Code
echo  ${FD_RES}
ls -1 ${FD_RES}/region/fcc_astarr_macs
/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results
K562.hg38.ASTARR.macs.KS91.input.rep_all.max_overlaps.q5.bed.gz
K562.hg38.ASTARR.macs.KS91.input.rep_all.union.q5.bed.gz
summary
tmp
Code
FD_INP=${FD_RES}/region/fcc_astarr_macs
FN_INP="K562.hg38.ASTARR.macs.KS91.input.rep_all.max_overlaps.q5.bed.gz"
FP_INP=${FD_INP}/${FN_INP}

ls ${FP_INP}
/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_astarr_macs/K562.hg38.ASTARR.macs.KS91.input.rep_all.max_overlaps.q5.bed.gz
Code
FD_OUT=${FD_RES}/region_nuc/fcc_astarr_macs
FN_OUT=${FN_INP}
FP_OUT=${FD_OUT}/${FN_OUT}

mkdir -p ${FD_OUT}
Code
echo ${FD_GEN}
FN_GEN=fasta/hg38.fa
FP_GEN=${FD_GEN}/${FN_GEN}

ls ${FP_GEN}
/data/reddylab/Kuei/data/annotation/genome/hg38
/data/reddylab/Kuei/data/annotation/genome/hg38/fasta/hg38.fa
Code
FN_LOG=bedtools.nuc.region.atac.txt
FP_LOG=${FD_LOG}/${FN_LOG}

sbatch -p ${NODE} \
    --exclude=dl-01 \
    --cpus-per-task 4 \
    --mem 4G \
    --output ${FP_LOG} \
    ${FD_EXE}/run_bedtools_nuc.sh ${FD_PRJ} ${FP_GEN} ${FP_INP} ${FP_OUT}
Submitted batch job 35135993
Code
cat ${FP_LOG}
Hostname:           x2-08-1.genome.duke.edu
Slurm Array Index: 
Time Stamp:         09-24-24+17:44:18

Genome:  /data/reddylab/Kuei/data/annotation/genome/hg38/fasta/hg38.fa

Input:   /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_astarr_macs/K562.hg38.ASTARR.macs.KS91.input.rep_all.max_overlaps.q5.bed.gz

show first few lines of input
chr1    10038   10405
chr1    14282   14614
chr1    16025   16338
chr1    17288   17689
chr1    28934   29499
chr1    115429  115969
chr1    136201  137353
chr1    137748  138049
chr1    138321  139517
chr1    181005  181854


Output:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_nuc/fcc_astarr_macs/K562.hg38.ASTARR.macs.KS91.input.rep_all.max_overlaps.q5.bed.gz

show first few lines of output:
#1_usercol  2_usercol   3_usercol   4_pct_at    5_pct_gc    6_num_A 7_num_C 8_num_G 9_num_T 10_num_N    11_num_oth  12_seq_len
chr1    10038   10405   0.476839    0.523161    122 192 0   53  0   0   367
chr1    14282   14614   0.421687    0.578313    65  102 90  75  0   0   332
chr1    16025   16338   0.412141    0.587859    62  89  95  67  0   0   313
chr1    17288   17689   0.374065    0.625935    69  144 107 81  0   0   401
chr1    28934   29499   0.228319    0.771681    66  247 189 63  0   0   565
chr1    115429  115969  0.618519    0.381481    160 119 87  174 0   0   540
chr1    136201  137353  0.305556    0.694444    166 296 504 186 0   0   1152
chr1    137748  138049  0.365449    0.634551    69  68  123 41  0   0   301
chr1    138321  139517  0.413880    0.586120    282 238 463 213 0   0   1196


Done!
Run Time: 6 seconds
Code
FN_LOG=bedtools.nuc.region.atac.txt
FP_LOG=${FD_LOG}/${FN_LOG}

sbatch -p ${NODE} \
    --exclude=dl-01 \
    --cpus-per-task 4 \
    --mem 4G \
    --output ${FP_LOG} \
    ${FD_EXE}/run_bedtools_test.sh ${FD_PRJ} ${FP_INP}
Submitted batch job 35135992
Code
cat ${FP_LOG}
Hostname:           x2-08-1.genome.duke.edu
Slurm Array Index: 
Time Stamp:         09-24-24+17:39:49

Input:  /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region/fcc_astarr_macs/K562.hg38.ASTARR.macs.KS91.input.rep_all.max_overlaps.q5.bed.gz

show first few lines of input
chr1    10038   10405
chr1    14282   14614
chr1    16025   16338
chr1    17288   17689
chr1    28934   29499
chr1    115429  115969
chr1    136201  137353
chr1    137748  138049
chr1    138321  139517
chr1    181005  181854

bedtools is a powerful toolset for genome arithmetic.

Version:   v2.31.1
About:     developed in the quinlanlab.org and by many contributors worldwide.
Docs:      http://bedtools.readthedocs.io/
Code:      https://github.com/arq5x/bedtools2
Mail:      https://groups.google.com/forum/#!forum/bedtools-discuss

Usage:     bedtools <subcommand> [options]

The bedtools sub-commands include:

[ Genome arithmetic ]
    intersect     Find overlapping intervals in various ways.
    window        Find overlapping intervals within a window around an interval.
    closest       Find the closest, potentially non-overlapping interval.
    coverage      Compute the coverage over defined intervals.
    map           Apply a function to a column for each overlapping interval.
    genomecov     Compute the coverage over an entire genome.
    merge         Combine overlapping/nearby intervals into a single interval.
    cluster       Cluster (but don't merge) overlapping/nearby intervals.
    complement    Extract intervals _not_ represented by an interval file.
    shift         Adjust the position of intervals.
    subtract      Remove intervals based on overlaps b/w two files.
    slop          Adjust the size of intervals.
    flank         Create new intervals from the flanks of existing intervals.
    sort          Order the intervals in a file.
    random        Generate random intervals in a genome.
    shuffle       Randomly redistribute intervals in a genome.
    sample        Sample random records from file using reservoir sampling.
    spacing       Report the gap lengths between intervals in a file.
    annotate      Annotate coverage of features from multiple files.

[ Multi-way file comparisons ]
    multiinter    Identifies common intervals among multiple interval files.
    unionbedg     Combines coverage intervals from multiple BEDGRAPH files.

[ Paired-end manipulation ]
    pairtobed     Find pairs that overlap intervals in various ways.
    pairtopair    Find pairs that overlap other pairs in various ways.

[ Format conversion ]
    bamtobed      Convert BAM alignments to BED (& other) formats.
    bedtobam      Convert intervals to BAM records.
    bamtofastq    Convert BAM records to FASTQ records.
    bedpetobam    Convert BEDPE intervals to BAM records.
    bed12tobed6   Breaks BED12 intervals into discrete BED6 intervals.

[ Fasta manipulation ]
    getfasta      Use intervals to extract sequences from a FASTA file.
    maskfasta     Use intervals to mask sequences from a FASTA file.
    nuc           Profile the nucleotide content of intervals in a FASTA file.

[ BAM focused tools ]
    multicov      Counts coverage from multiple BAMs at specific intervals.
    tag           Tag BAM alignments based on overlaps with interval files.

[ Statistical relationships ]
    jaccard       Calculate the Jaccard statistic b/w two sets of intervals.
    reldist       Calculate the distribution of relative distances b/w two files.
    fisher        Calculate Fisher statistic b/w two feature files.

[ Miscellaneous tools ]
    overlap       Computes the amount of overlap from two intervals.
    igv           Create an IGV snapshot batch script.
    links         Create a HTML page of links to UCSC locations.
    makewindows   Make interval "windows" across a genome.
    groupby       Group by common cols. & summarize oth. cols. (~ SQL "groupBy")
    expand        Replicate lines based on lists of values in columns.
    split         Split a file into multiple files with equal records or base pairs.
    summary       Statistical summary of intervals in a file.

[ General Parameters ]
     --cram-ref    Reference used by a CRAM input

[ General help ]
    --help        Print this help menu.
    --version     What version of bedtools are you using?.
    --contact     Feature requests, bugs, mailing lists, etc.



Done!
Run Time: 1 seconds
Code
which bedtools
/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/scripts/app_duke_hardac/bedtools
Code
ls /data/reddylab/Kuei/data/annotation/genome/hg38
fasta                   hg38.chrom.sizes.chr16  hg38.chrom.sizes.chr4
hg38.chrom.sizes        hg38.chrom.sizes.chr17  hg38.chrom.sizes.chr5
hg38.chrom.sizes.chr1   hg38.chrom.sizes.chr18  hg38.chrom.sizes.chr6
hg38.chrom.sizes.chr10  hg38.chrom.sizes.chr19  hg38.chrom.sizes.chr7
hg38.chrom.sizes.chr11  hg38.chrom.sizes.chr2   hg38.chrom.sizes.chr8
hg38.chrom.sizes.chr12  hg38.chrom.sizes.chr20  hg38.chrom.sizes.chr9
hg38.chrom.sizes.chr13  hg38.chrom.sizes.chr21  hg38.chrom.sizes.chrX
hg38.chrom.sizes.chr14  hg38.chrom.sizes.chr22  hg38.chrom.sizes.chrY
hg38.chrom.sizes.chr15  hg38.chrom.sizes.chr3   hg38.chrom.sizes.rm
Code
ls /data/reddylab/Kuei/data/annotation/genome/hg38/fasta
chr10.fa                    chr3_KI270777v1_alt.fa
chr10.fa.fai                chr3_KI270778v1_alt.fa
chr10_GL383545v1_alt.fa     chr3_KI270779v1_alt.fa
chr10_GL383546v1_alt.fa     chr3_KI270780v1_alt.fa
chr10_KI270824v1_alt.fa     chr3_KI270781v1_alt.fa
chr10_KI270825v1_alt.fa     chr3_KI270782v1_alt.fa
chr11.fa                    chr3_KI270783v1_alt.fa
chr11.fa.fai                chr3_KI270784v1_alt.fa
chr11_GL383547v1_alt.fa     chr3_KI270895v1_alt.fa
chr11_JH159136v1_alt.fa     chr3_KI270924v1_alt.fa
chr11_JH159137v1_alt.fa     chr3_KI270934v1_alt.fa
chr11_KI270721v1_random.fa  chr3_KI270935v1_alt.fa
chr11_KI270826v1_alt.fa     chr3_KI270936v1_alt.fa
chr11_KI270827v1_alt.fa     chr3_KI270937v1_alt.fa
chr11_KI270829v1_alt.fa     chr4.fa
chr11_KI270830v1_alt.fa     chr4.fa.fai
chr11_KI270831v1_alt.fa     chr4_GL000008v2_random.fa
chr11_KI270832v1_alt.fa     chr4_GL000257v2_alt.fa
chr11_KI270902v1_alt.fa     chr4_GL383527v1_alt.fa
chr11_KI270903v1_alt.fa     chr4_GL383528v1_alt.fa
chr11_KI270927v1_alt.fa     chr4_KI270785v1_alt.fa
chr12.fa                    chr4_KI270786v1_alt.fa
chr12.fa.fai                chr4_KI270787v1_alt.fa
chr12_GL383549v1_alt.fa     chr4_KI270788v1_alt.fa
chr12_GL383550v2_alt.fa     chr4_KI270789v1_alt.fa
chr12_GL383551v1_alt.fa     chr4_KI270790v1_alt.fa
chr12_GL383552v1_alt.fa     chr4_KI270896v1_alt.fa
chr12_GL383553v2_alt.fa     chr4_KI270925v1_alt.fa
chr12_GL877875v1_alt.fa     chr5.fa
chr12_GL877876v1_alt.fa     chr5.fa.fai
chr12_KI270833v1_alt.fa     chr5_GL000208v1_random.fa
chr12_KI270834v1_alt.fa     chr5_GL339449v2_alt.fa
chr12_KI270835v1_alt.fa     chr5_GL383530v1_alt.fa
chr12_KI270836v1_alt.fa     chr5_GL383531v1_alt.fa
chr12_KI270837v1_alt.fa     chr5_GL383532v1_alt.fa
chr12_KI270904v1_alt.fa     chr5_GL949742v1_alt.fa
chr13.fa                    chr5_KI270791v1_alt.fa
chr13.fa.fai                chr5_KI270792v1_alt.fa
chr13_KI270838v1_alt.fa     chr5_KI270793v1_alt.fa
chr13_KI270839v1_alt.fa     chr5_KI270794v1_alt.fa
chr13_KI270840v1_alt.fa     chr5_KI270795v1_alt.fa
chr13_KI270841v1_alt.fa     chr5_KI270796v1_alt.fa
chr13_KI270842v1_alt.fa     chr5_KI270897v1_alt.fa
chr13_KI270843v1_alt.fa     chr5_KI270898v1_alt.fa
chr14.fa                    chr6.fa
chr14.fa.fai                chr6.fa.fai
chr14_GL000009v2_random.fa  chr6_GL000250v2_alt.fa
chr14_GL000194v1_random.fa  chr6_GL000251v2_alt.fa
chr14_GL000225v1_random.fa  chr6_GL000252v2_alt.fa
chr14_KI270722v1_random.fa  chr6_GL000253v2_alt.fa
chr14_KI270723v1_random.fa  chr6_GL000254v2_alt.fa
chr14_KI270724v1_random.fa  chr6_GL000255v2_alt.fa
chr14_KI270725v1_random.fa  chr6_GL000256v2_alt.fa
chr14_KI270726v1_random.fa  chr6_GL383533v1_alt.fa
chr14_KI270844v1_alt.fa     chr6_KB021644v2_alt.fa
chr14_KI270845v1_alt.fa     chr6_KI270758v1_alt.fa
chr14_KI270846v1_alt.fa     chr6_KI270797v1_alt.fa
chr14_KI270847v1_alt.fa     chr6_KI270798v1_alt.fa
chr15.fa                    chr6_KI270799v1_alt.fa
chr15.fa.fai                chr6_KI270800v1_alt.fa
chr15_GL383554v1_alt.fa     chr6_KI270801v1_alt.fa
chr15_GL383555v2_alt.fa     chr6_KI270802v1_alt.fa
chr15_KI270727v1_random.fa  chr7.fa
chr15_KI270848v1_alt.fa     chr7.fa.fai
chr15_KI270849v1_alt.fa     chr7_GL383534v2_alt.fa
chr15_KI270850v1_alt.fa     chr7_KI270803v1_alt.fa
chr15_KI270851v1_alt.fa     chr7_KI270804v1_alt.fa
chr15_KI270852v1_alt.fa     chr7_KI270805v1_alt.fa
chr15_KI270905v1_alt.fa     chr7_KI270806v1_alt.fa
chr15_KI270906v1_alt.fa     chr7_KI270807v1_alt.fa
chr16.fa                    chr7_KI270808v1_alt.fa
chr16.fa.fai                chr7_KI270809v1_alt.fa
chr16_GL383556v1_alt.fa     chr7_KI270899v1_alt.fa
chr16_GL383557v1_alt.fa     chr8.fa
chr16_KI270728v1_random.fa  chr8.fa.fai
chr16_KI270853v1_alt.fa     chr8_KI270810v1_alt.fa
chr16_KI270854v1_alt.fa     chr8_KI270811v1_alt.fa
chr16_KI270855v1_alt.fa     chr8_KI270812v1_alt.fa
chr16_KI270856v1_alt.fa     chr8_KI270813v1_alt.fa
chr17.fa                    chr8_KI270814v1_alt.fa
chr17.fa.fai                chr8_KI270815v1_alt.fa
chr17_GL000205v2_random.fa  chr8_KI270816v1_alt.fa
chr17_GL000258v2_alt.fa     chr8_KI270817v1_alt.fa
chr17_GL383563v3_alt.fa     chr8_KI270818v1_alt.fa
chr17_GL383564v2_alt.fa     chr8_KI270819v1_alt.fa
chr17_GL383565v1_alt.fa     chr8_KI270820v1_alt.fa
chr17_GL383566v1_alt.fa     chr8_KI270821v1_alt.fa
chr17_JH159146v1_alt.fa     chr8_KI270822v1_alt.fa
chr17_JH159147v1_alt.fa     chr8_KI270900v1_alt.fa
chr17_JH159148v1_alt.fa     chr8_KI270901v1_alt.fa
chr17_KI270729v1_random.fa  chr8_KI270926v1_alt.fa
chr17_KI270730v1_random.fa  chr9.fa
chr17_KI270857v1_alt.fa     chr9.fa.fai
chr17_KI270858v1_alt.fa     chr9_GL383539v1_alt.fa
chr17_KI270859v1_alt.fa     chr9_GL383540v1_alt.fa
chr17_KI270860v1_alt.fa     chr9_GL383541v1_alt.fa
chr17_KI270861v1_alt.fa     chr9_GL383542v1_alt.fa
chr17_KI270862v1_alt.fa     chr9_KI270717v1_random.fa
chr17_KI270907v1_alt.fa     chr9_KI270718v1_random.fa
chr17_KI270908v1_alt.fa     chr9_KI270719v1_random.fa
chr17_KI270909v1_alt.fa     chr9_KI270720v1_random.fa
chr17_KI270910v1_alt.fa     chr9_KI270823v1_alt.fa
chr18.fa                    chrM.fa
chr18.fa.fai                chrUn_GL000195v1.fa
chr18_GL383567v1_alt.fa     chrUn_GL000213v1.fa
chr18_GL383568v1_alt.fa     chrUn_GL000214v1.fa
chr18_GL383569v1_alt.fa     chrUn_GL000216v2.fa
chr18_GL383570v1_alt.fa     chrUn_GL000218v1.fa
chr18_GL383571v1_alt.fa     chrUn_GL000219v1.fa
chr18_GL383572v1_alt.fa     chrUn_GL000220v1.fa
chr18_KI270863v1_alt.fa     chrUn_GL000224v1.fa
chr18_KI270864v1_alt.fa     chrUn_GL000226v1.fa
chr18_KI270911v1_alt.fa     chrUn_KI270302v1.fa
chr18_KI270912v1_alt.fa     chrUn_KI270303v1.fa
chr19.fa                    chrUn_KI270304v1.fa
chr19.fa.fai                chrUn_KI270305v1.fa
chr19_GL000209v2_alt.fa     chrUn_KI270310v1.fa
chr19_GL383573v1_alt.fa     chrUn_KI270311v1.fa
chr19_GL383574v1_alt.fa     chrUn_KI270312v1.fa
chr19_GL383575v2_alt.fa     chrUn_KI270315v1.fa
chr19_GL383576v1_alt.fa     chrUn_KI270316v1.fa
chr19_GL949746v1_alt.fa     chrUn_KI270317v1.fa
chr19_GL949747v2_alt.fa     chrUn_KI270320v1.fa
chr19_GL949748v2_alt.fa     chrUn_KI270322v1.fa
chr19_GL949749v2_alt.fa     chrUn_KI270329v1.fa
chr19_GL949750v2_alt.fa     chrUn_KI270330v1.fa
chr19_GL949751v2_alt.fa     chrUn_KI270333v1.fa
chr19_GL949752v1_alt.fa     chrUn_KI270334v1.fa
chr19_GL949753v2_alt.fa     chrUn_KI270335v1.fa
chr19_KI270865v1_alt.fa     chrUn_KI270336v1.fa
chr19_KI270866v1_alt.fa     chrUn_KI270337v1.fa
chr19_KI270867v1_alt.fa     chrUn_KI270338v1.fa
chr19_KI270868v1_alt.fa     chrUn_KI270340v1.fa
chr19_KI270882v1_alt.fa     chrUn_KI270362v1.fa
chr19_KI270883v1_alt.fa     chrUn_KI270363v1.fa
chr19_KI270884v1_alt.fa     chrUn_KI270364v1.fa
chr19_KI270885v1_alt.fa     chrUn_KI270366v1.fa
chr19_KI270886v1_alt.fa     chrUn_KI270371v1.fa
chr19_KI270887v1_alt.fa     chrUn_KI270372v1.fa
chr19_KI270888v1_alt.fa     chrUn_KI270373v1.fa
chr19_KI270889v1_alt.fa     chrUn_KI270374v1.fa
chr19_KI270890v1_alt.fa     chrUn_KI270375v1.fa
chr19_KI270891v1_alt.fa     chrUn_KI270376v1.fa
chr19_KI270914v1_alt.fa     chrUn_KI270378v1.fa
chr19_KI270915v1_alt.fa     chrUn_KI270379v1.fa
chr19_KI270916v1_alt.fa     chrUn_KI270381v1.fa
chr19_KI270917v1_alt.fa     chrUn_KI270382v1.fa
chr19_KI270918v1_alt.fa     chrUn_KI270383v1.fa
chr19_KI270919v1_alt.fa     chrUn_KI270384v1.fa
chr19_KI270920v1_alt.fa     chrUn_KI270385v1.fa
chr19_KI270921v1_alt.fa     chrUn_KI270386v1.fa
chr19_KI270922v1_alt.fa     chrUn_KI270387v1.fa
chr19_KI270923v1_alt.fa     chrUn_KI270388v1.fa
chr19_KI270929v1_alt.fa     chrUn_KI270389v1.fa
chr19_KI270930v1_alt.fa     chrUn_KI270390v1.fa
chr19_KI270931v1_alt.fa     chrUn_KI270391v1.fa
chr19_KI270932v1_alt.fa     chrUn_KI270392v1.fa
chr19_KI270933v1_alt.fa     chrUn_KI270393v1.fa
chr19_KI270938v1_alt.fa     chrUn_KI270394v1.fa
chr1.fa                     chrUn_KI270395v1.fa
chr1.fa.fai                 chrUn_KI270396v1.fa
chr1_GL383518v1_alt.fa      chrUn_KI270411v1.fa
chr1_GL383519v1_alt.fa      chrUn_KI270412v1.fa
chr1_GL383520v2_alt.fa      chrUn_KI270414v1.fa
chr1_KI270706v1_random.fa   chrUn_KI270417v1.fa
chr1_KI270707v1_random.fa   chrUn_KI270418v1.fa
chr1_KI270708v1_random.fa   chrUn_KI270419v1.fa
chr1_KI270709v1_random.fa   chrUn_KI270420v1.fa
chr1_KI270710v1_random.fa   chrUn_KI270422v1.fa
chr1_KI270711v1_random.fa   chrUn_KI270423v1.fa
chr1_KI270712v1_random.fa   chrUn_KI270424v1.fa
chr1_KI270713v1_random.fa   chrUn_KI270425v1.fa
chr1_KI270714v1_random.fa   chrUn_KI270429v1.fa
chr1_KI270759v1_alt.fa      chrUn_KI270435v1.fa
chr1_KI270760v1_alt.fa      chrUn_KI270438v1.fa
chr1_KI270761v1_alt.fa      chrUn_KI270442v1.fa
chr1_KI270762v1_alt.fa      chrUn_KI270448v1.fa
chr1_KI270763v1_alt.fa      chrUn_KI270465v1.fa
chr1_KI270764v1_alt.fa      chrUn_KI270466v1.fa
chr1_KI270765v1_alt.fa      chrUn_KI270467v1.fa
chr1_KI270766v1_alt.fa      chrUn_KI270468v1.fa
chr1_KI270892v1_alt.fa      chrUn_KI270507v1.fa
chr20.fa                    chrUn_KI270508v1.fa
chr20.fa.fai                chrUn_KI270509v1.fa
chr20_GL383577v2_alt.fa     chrUn_KI270510v1.fa
chr20_KI270869v1_alt.fa     chrUn_KI270511v1.fa
chr20_KI270870v1_alt.fa     chrUn_KI270512v1.fa
chr20_KI270871v1_alt.fa     chrUn_KI270515v1.fa
chr21.fa                    chrUn_KI270516v1.fa
chr21.fa.fai                chrUn_KI270517v1.fa
chr21_GL383578v2_alt.fa     chrUn_KI270518v1.fa
chr21_GL383579v2_alt.fa     chrUn_KI270519v1.fa
chr21_GL383580v2_alt.fa     chrUn_KI270521v1.fa
chr21_GL383581v2_alt.fa     chrUn_KI270522v1.fa
chr21_KI270872v1_alt.fa     chrUn_KI270528v1.fa
chr21_KI270873v1_alt.fa     chrUn_KI270529v1.fa
chr21_KI270874v1_alt.fa     chrUn_KI270530v1.fa
chr22.fa                    chrUn_KI270538v1.fa
chr22.fa.fai                chrUn_KI270539v1.fa
chr22_GL383582v2_alt.fa     chrUn_KI270544v1.fa
chr22_GL383583v2_alt.fa     chrUn_KI270548v1.fa
chr22_KB663609v1_alt.fa     chrUn_KI270579v1.fa
chr22_KI270731v1_random.fa  chrUn_KI270580v1.fa
chr22_KI270732v1_random.fa  chrUn_KI270581v1.fa
chr22_KI270733v1_random.fa  chrUn_KI270582v1.fa
chr22_KI270734v1_random.fa  chrUn_KI270583v1.fa
chr22_KI270735v1_random.fa  chrUn_KI270584v1.fa
chr22_KI270736v1_random.fa  chrUn_KI270587v1.fa
chr22_KI270737v1_random.fa  chrUn_KI270588v1.fa
chr22_KI270738v1_random.fa  chrUn_KI270589v1.fa
chr22_KI270739v1_random.fa  chrUn_KI270590v1.fa
chr22_KI270875v1_alt.fa     chrUn_KI270591v1.fa
chr22_KI270876v1_alt.fa     chrUn_KI270593v1.fa
chr22_KI270877v1_alt.fa     chrUn_KI270741v1.fa
chr22_KI270878v1_alt.fa     chrUn_KI270742v1.fa
chr22_KI270879v1_alt.fa     chrUn_KI270743v1.fa
chr22_KI270928v1_alt.fa     chrUn_KI270744v1.fa
chr2.fa                     chrUn_KI270745v1.fa
chr2.fa.fai                 chrUn_KI270746v1.fa
chr2_GL383521v1_alt.fa      chrUn_KI270747v1.fa
chr2_GL383522v1_alt.fa      chrUn_KI270748v1.fa
chr2_GL582966v2_alt.fa      chrUn_KI270749v1.fa
chr2_KI270715v1_random.fa   chrUn_KI270750v1.fa
chr2_KI270716v1_random.fa   chrUn_KI270751v1.fa
chr2_KI270767v1_alt.fa      chrUn_KI270752v1.fa
chr2_KI270768v1_alt.fa      chrUn_KI270753v1.fa
chr2_KI270769v1_alt.fa      chrUn_KI270754v1.fa
chr2_KI270770v1_alt.fa      chrUn_KI270755v1.fa
chr2_KI270771v1_alt.fa      chrUn_KI270756v1.fa
chr2_KI270772v1_alt.fa      chrUn_KI270757v1.fa
chr2_KI270773v1_alt.fa      chrX.fa
chr2_KI270774v1_alt.fa      chrX.fa.fai
chr2_KI270775v1_alt.fa      chrX_KI270880v1_alt.fa
chr2_KI270776v1_alt.fa      chrX_KI270881v1_alt.fa
chr2_KI270893v1_alt.fa      chrX_KI270913v1_alt.fa
chr2_KI270894v1_alt.fa      chrY.fa
chr3.fa                     chrY.fa.fai
chr3.fa.fai                 chrY_KI270740v1_random.fa
chr3_GL000221v1_random.fa   hg38.fa
chr3_GL383526v1_alt.fa      hg38.fa.fai
chr3_JH636055v2_alt.fa
Code
FD_GEN=/data/reddylab/Kuei/data/annotation/genome/hg38/fasta
FN_GEN=chr1.fa
FP_GEN=${FD_GEN}/${FN_GEN}

wc -l ${FP_GEN}
4979130 /data/reddylab/Kuei/data/annotation/genome/hg38/fasta/chr1.fa
Code
head -n 5 ${FP_GEN}
>chr1
NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
Code
FD_GEN=/data/reddylab/Kuei/data/annotation/genome/hg38/fasta
FN_GEN=hg38.fa
FP_GEN=${FD_GEN}/${FN_GEN}

wc -l ${FP_GEN}
64186394 /data/reddylab/Kuei/data/annotation/genome/hg38/fasta/hg38.fa
Code
ls ${FD_RES}/region/fcc_astarr_macs
ASTARRseq_K562_KS91.hg38.Input.rep_all.max_overlaps.q5.bed.gz
ASTARRseq_K562_KS91.hg38.Input.rep_all.union.q5.bed.gz
K562.hg38.ASTARR.macs.KS91.input.rep_all.max_overlaps.q5.bed.gz
K562.hg38.ASTARR.macs.KS91.input.rep_all.union.q5.bed.gz
summary
Code
FD_BED=${FD_RES}/region/fcc_astarr_macs
FN_BED=K562.hg38.ASTARR.macs.KS91.input.rep_all.max_overlaps.q5.bed.gz
FP_BED=${FD_BED}/${FN_BED}

zcat ${FP_BED} | head -n 3 > test.bed
Code
cat test.bed
chr1    10038   10405
chr1    14282   14614
chr1    16025   16338
Code
bedtools nuc \
    -fi  ${FP_GEN} \
    -bed test.bed
#1_usercol  2_usercol   3_usercol   4_pct_at    5_pct_gc    6_num_A 7_num_C 8_num_G 9_num_T 10_num_N    11_num_oth  12_seq_len
chr1    10038   10405   0.476839    0.523161    122 192 0   53  0   0   367
chr1    14282   14614   0.421687    0.578313    65  102 90  75  0   0   332
chr1    16025   16338   0.412141    0.587859    62  89  95  67  0   0   313
Code
echo ${FD_BASE}
ls   ${FD_BASE}
/data/reddylab/Kuei
backup  bin  container  data  repo  tmp  venv  work
Code
echo ${FD_DATA}
ls   ${FD_DATA}
/data/reddylab/Kuei/data
annotation  scratch  source
Code
echo ${FD_DATA}
ls   ${FD_DATA}/annotation
/data/reddylab/Kuei/data
genome  log  motif_cluster_jvierstra  snp
Code
echo ${FD_DATA}
ls   ${FD_DATA}/annotation/genome/hg38
/data/reddylab/Kuei/data
fasta                   hg38.chrom.sizes.chr16  hg38.chrom.sizes.chr4
hg38.chrom.sizes        hg38.chrom.sizes.chr17  hg38.chrom.sizes.chr5
hg38.chrom.sizes.chr1   hg38.chrom.sizes.chr18  hg38.chrom.sizes.chr6
hg38.chrom.sizes.chr10  hg38.chrom.sizes.chr19  hg38.chrom.sizes.chr7
hg38.chrom.sizes.chr11  hg38.chrom.sizes.chr2   hg38.chrom.sizes.chr8
hg38.chrom.sizes.chr12  hg38.chrom.sizes.chr20  hg38.chrom.sizes.chr9
hg38.chrom.sizes.chr13  hg38.chrom.sizes.chr21  hg38.chrom.sizes.chrX
hg38.chrom.sizes.chr14  hg38.chrom.sizes.chr22  hg38.chrom.sizes.chrY
hg38.chrom.sizes.chr15  hg38.chrom.sizes.chr3   hg38.chrom.sizes.rm
Code
echo ${FD_DATA}
cat  ${FD_DATA}/annotation/genome/hg38/hg38.chrom.sizes.chr1
/data/reddylab/Kuei/data
chr1    248956422
Code
echo ${FD_DATA}
ls   ${FD_DATA}/annotation/genome/hg38/fasta
/data/reddylab/Kuei/data
chr10.fa                    chr3_KI270777v1_alt.fa
chr10.fa.fai                chr3_KI270778v1_alt.fa
chr10_GL383545v1_alt.fa     chr3_KI270779v1_alt.fa
chr10_GL383546v1_alt.fa     chr3_KI270780v1_alt.fa
chr10_KI270824v1_alt.fa     chr3_KI270781v1_alt.fa
chr10_KI270825v1_alt.fa     chr3_KI270782v1_alt.fa
chr11.fa                    chr3_KI270783v1_alt.fa
chr11.fa.fai                chr3_KI270784v1_alt.fa
chr11_GL383547v1_alt.fa     chr3_KI270895v1_alt.fa
chr11_JH159136v1_alt.fa     chr3_KI270924v1_alt.fa
chr11_JH159137v1_alt.fa     chr3_KI270934v1_alt.fa
chr11_KI270721v1_random.fa  chr3_KI270935v1_alt.fa
chr11_KI270826v1_alt.fa     chr3_KI270936v1_alt.fa
chr11_KI270827v1_alt.fa     chr3_KI270937v1_alt.fa
chr11_KI270829v1_alt.fa     chr4.fa
chr11_KI270830v1_alt.fa     chr4.fa.fai
chr11_KI270831v1_alt.fa     chr4_GL000008v2_random.fa
chr11_KI270832v1_alt.fa     chr4_GL000257v2_alt.fa
chr11_KI270902v1_alt.fa     chr4_GL383527v1_alt.fa
chr11_KI270903v1_alt.fa     chr4_GL383528v1_alt.fa
chr11_KI270927v1_alt.fa     chr4_KI270785v1_alt.fa
chr12.fa                    chr4_KI270786v1_alt.fa
chr12.fa.fai                chr4_KI270787v1_alt.fa
chr12_GL383549v1_alt.fa     chr4_KI270788v1_alt.fa
chr12_GL383550v2_alt.fa     chr4_KI270789v1_alt.fa
chr12_GL383551v1_alt.fa     chr4_KI270790v1_alt.fa
chr12_GL383552v1_alt.fa     chr4_KI270896v1_alt.fa
chr12_GL383553v2_alt.fa     chr4_KI270925v1_alt.fa
chr12_GL877875v1_alt.fa     chr5.fa
chr12_GL877876v1_alt.fa     chr5.fa.fai
chr12_KI270833v1_alt.fa     chr5_GL000208v1_random.fa
chr12_KI270834v1_alt.fa     chr5_GL339449v2_alt.fa
chr12_KI270835v1_alt.fa     chr5_GL383530v1_alt.fa
chr12_KI270836v1_alt.fa     chr5_GL383531v1_alt.fa
chr12_KI270837v1_alt.fa     chr5_GL383532v1_alt.fa
chr12_KI270904v1_alt.fa     chr5_GL949742v1_alt.fa
chr13.fa                    chr5_KI270791v1_alt.fa
chr13.fa.fai                chr5_KI270792v1_alt.fa
chr13_KI270838v1_alt.fa     chr5_KI270793v1_alt.fa
chr13_KI270839v1_alt.fa     chr5_KI270794v1_alt.fa
chr13_KI270840v1_alt.fa     chr5_KI270795v1_alt.fa
chr13_KI270841v1_alt.fa     chr5_KI270796v1_alt.fa
chr13_KI270842v1_alt.fa     chr5_KI270897v1_alt.fa
chr13_KI270843v1_alt.fa     chr5_KI270898v1_alt.fa
chr14.fa                    chr6.fa
chr14.fa.fai                chr6.fa.fai
chr14_GL000009v2_random.fa  chr6_GL000250v2_alt.fa
chr14_GL000194v1_random.fa  chr6_GL000251v2_alt.fa
chr14_GL000225v1_random.fa  chr6_GL000252v2_alt.fa
chr14_KI270722v1_random.fa  chr6_GL000253v2_alt.fa
chr14_KI270723v1_random.fa  chr6_GL000254v2_alt.fa
chr14_KI270724v1_random.fa  chr6_GL000255v2_alt.fa
chr14_KI270725v1_random.fa  chr6_GL000256v2_alt.fa
chr14_KI270726v1_random.fa  chr6_GL383533v1_alt.fa
chr14_KI270844v1_alt.fa     chr6_KB021644v2_alt.fa
chr14_KI270845v1_alt.fa     chr6_KI270758v1_alt.fa
chr14_KI270846v1_alt.fa     chr6_KI270797v1_alt.fa
chr14_KI270847v1_alt.fa     chr6_KI270798v1_alt.fa
chr15.fa                    chr6_KI270799v1_alt.fa
chr15.fa.fai                chr6_KI270800v1_alt.fa
chr15_GL383554v1_alt.fa     chr6_KI270801v1_alt.fa
chr15_GL383555v2_alt.fa     chr6_KI270802v1_alt.fa
chr15_KI270727v1_random.fa  chr7.fa
chr15_KI270848v1_alt.fa     chr7.fa.fai
chr15_KI270849v1_alt.fa     chr7_GL383534v2_alt.fa
chr15_KI270850v1_alt.fa     chr7_KI270803v1_alt.fa
chr15_KI270851v1_alt.fa     chr7_KI270804v1_alt.fa
chr15_KI270852v1_alt.fa     chr7_KI270805v1_alt.fa
chr15_KI270905v1_alt.fa     chr7_KI270806v1_alt.fa
chr15_KI270906v1_alt.fa     chr7_KI270807v1_alt.fa
chr16.fa                    chr7_KI270808v1_alt.fa
chr16.fa.fai                chr7_KI270809v1_alt.fa
chr16_GL383556v1_alt.fa     chr7_KI270899v1_alt.fa
chr16_GL383557v1_alt.fa     chr8.fa
chr16_KI270728v1_random.fa  chr8.fa.fai
chr16_KI270853v1_alt.fa     chr8_KI270810v1_alt.fa
chr16_KI270854v1_alt.fa     chr8_KI270811v1_alt.fa
chr16_KI270855v1_alt.fa     chr8_KI270812v1_alt.fa
chr16_KI270856v1_alt.fa     chr8_KI270813v1_alt.fa
chr17.fa                    chr8_KI270814v1_alt.fa
chr17.fa.fai                chr8_KI270815v1_alt.fa
chr17_GL000205v2_random.fa  chr8_KI270816v1_alt.fa
chr17_GL000258v2_alt.fa     chr8_KI270817v1_alt.fa
chr17_GL383563v3_alt.fa     chr8_KI270818v1_alt.fa
chr17_GL383564v2_alt.fa     chr8_KI270819v1_alt.fa
chr17_GL383565v1_alt.fa     chr8_KI270820v1_alt.fa
chr17_GL383566v1_alt.fa     chr8_KI270821v1_alt.fa
chr17_JH159146v1_alt.fa     chr8_KI270822v1_alt.fa
chr17_JH159147v1_alt.fa     chr8_KI270900v1_alt.fa
chr17_JH159148v1_alt.fa     chr8_KI270901v1_alt.fa
chr17_KI270729v1_random.fa  chr8_KI270926v1_alt.fa
chr17_KI270730v1_random.fa  chr9.fa
chr17_KI270857v1_alt.fa     chr9.fa.fai
chr17_KI270858v1_alt.fa     chr9_GL383539v1_alt.fa
chr17_KI270859v1_alt.fa     chr9_GL383540v1_alt.fa
chr17_KI270860v1_alt.fa     chr9_GL383541v1_alt.fa
chr17_KI270861v1_alt.fa     chr9_GL383542v1_alt.fa
chr17_KI270862v1_alt.fa     chr9_KI270717v1_random.fa
chr17_KI270907v1_alt.fa     chr9_KI270718v1_random.fa
chr17_KI270908v1_alt.fa     chr9_KI270719v1_random.fa
chr17_KI270909v1_alt.fa     chr9_KI270720v1_random.fa
chr17_KI270910v1_alt.fa     chr9_KI270823v1_alt.fa
chr18.fa                    chrM.fa
chr18.fa.fai                chrUn_GL000195v1.fa
chr18_GL383567v1_alt.fa     chrUn_GL000213v1.fa
chr18_GL383568v1_alt.fa     chrUn_GL000214v1.fa
chr18_GL383569v1_alt.fa     chrUn_GL000216v2.fa
chr18_GL383570v1_alt.fa     chrUn_GL000218v1.fa
chr18_GL383571v1_alt.fa     chrUn_GL000219v1.fa
chr18_GL383572v1_alt.fa     chrUn_GL000220v1.fa
chr18_KI270863v1_alt.fa     chrUn_GL000224v1.fa
chr18_KI270864v1_alt.fa     chrUn_GL000226v1.fa
chr18_KI270911v1_alt.fa     chrUn_KI270302v1.fa
chr18_KI270912v1_alt.fa     chrUn_KI270303v1.fa
chr19.fa                    chrUn_KI270304v1.fa
chr19.fa.fai                chrUn_KI270305v1.fa
chr19_GL000209v2_alt.fa     chrUn_KI270310v1.fa
chr19_GL383573v1_alt.fa     chrUn_KI270311v1.fa
chr19_GL383574v1_alt.fa     chrUn_KI270312v1.fa
chr19_GL383575v2_alt.fa     chrUn_KI270315v1.fa
chr19_GL383576v1_alt.fa     chrUn_KI270316v1.fa
chr19_GL949746v1_alt.fa     chrUn_KI270317v1.fa
chr19_GL949747v2_alt.fa     chrUn_KI270320v1.fa
chr19_GL949748v2_alt.fa     chrUn_KI270322v1.fa
chr19_GL949749v2_alt.fa     chrUn_KI270329v1.fa
chr19_GL949750v2_alt.fa     chrUn_KI270330v1.fa
chr19_GL949751v2_alt.fa     chrUn_KI270333v1.fa
chr19_GL949752v1_alt.fa     chrUn_KI270334v1.fa
chr19_GL949753v2_alt.fa     chrUn_KI270335v1.fa
chr19_KI270865v1_alt.fa     chrUn_KI270336v1.fa
chr19_KI270866v1_alt.fa     chrUn_KI270337v1.fa
chr19_KI270867v1_alt.fa     chrUn_KI270338v1.fa
chr19_KI270868v1_alt.fa     chrUn_KI270340v1.fa
chr19_KI270882v1_alt.fa     chrUn_KI270362v1.fa
chr19_KI270883v1_alt.fa     chrUn_KI270363v1.fa
chr19_KI270884v1_alt.fa     chrUn_KI270364v1.fa
chr19_KI270885v1_alt.fa     chrUn_KI270366v1.fa
chr19_KI270886v1_alt.fa     chrUn_KI270371v1.fa
chr19_KI270887v1_alt.fa     chrUn_KI270372v1.fa
chr19_KI270888v1_alt.fa     chrUn_KI270373v1.fa
chr19_KI270889v1_alt.fa     chrUn_KI270374v1.fa
chr19_KI270890v1_alt.fa     chrUn_KI270375v1.fa
chr19_KI270891v1_alt.fa     chrUn_KI270376v1.fa
chr19_KI270914v1_alt.fa     chrUn_KI270378v1.fa
chr19_KI270915v1_alt.fa     chrUn_KI270379v1.fa
chr19_KI270916v1_alt.fa     chrUn_KI270381v1.fa
chr19_KI270917v1_alt.fa     chrUn_KI270382v1.fa
chr19_KI270918v1_alt.fa     chrUn_KI270383v1.fa
chr19_KI270919v1_alt.fa     chrUn_KI270384v1.fa
chr19_KI270920v1_alt.fa     chrUn_KI270385v1.fa
chr19_KI270921v1_alt.fa     chrUn_KI270386v1.fa
chr19_KI270922v1_alt.fa     chrUn_KI270387v1.fa
chr19_KI270923v1_alt.fa     chrUn_KI270388v1.fa
chr19_KI270929v1_alt.fa     chrUn_KI270389v1.fa
chr19_KI270930v1_alt.fa     chrUn_KI270390v1.fa
chr19_KI270931v1_alt.fa     chrUn_KI270391v1.fa
chr19_KI270932v1_alt.fa     chrUn_KI270392v1.fa
chr19_KI270933v1_alt.fa     chrUn_KI270393v1.fa
chr19_KI270938v1_alt.fa     chrUn_KI270394v1.fa
chr1.fa                     chrUn_KI270395v1.fa
chr1.fa.fai                 chrUn_KI270396v1.fa
chr1_GL383518v1_alt.fa      chrUn_KI270411v1.fa
chr1_GL383519v1_alt.fa      chrUn_KI270412v1.fa
chr1_GL383520v2_alt.fa      chrUn_KI270414v1.fa
chr1_KI270706v1_random.fa   chrUn_KI270417v1.fa
chr1_KI270707v1_random.fa   chrUn_KI270418v1.fa
chr1_KI270708v1_random.fa   chrUn_KI270419v1.fa
chr1_KI270709v1_random.fa   chrUn_KI270420v1.fa
chr1_KI270710v1_random.fa   chrUn_KI270422v1.fa
chr1_KI270711v1_random.fa   chrUn_KI270423v1.fa
chr1_KI270712v1_random.fa   chrUn_KI270424v1.fa
chr1_KI270713v1_random.fa   chrUn_KI270425v1.fa
chr1_KI270714v1_random.fa   chrUn_KI270429v1.fa
chr1_KI270759v1_alt.fa      chrUn_KI270435v1.fa
chr1_KI270760v1_alt.fa      chrUn_KI270438v1.fa
chr1_KI270761v1_alt.fa      chrUn_KI270442v1.fa
chr1_KI270762v1_alt.fa      chrUn_KI270448v1.fa
chr1_KI270763v1_alt.fa      chrUn_KI270465v1.fa
chr1_KI270764v1_alt.fa      chrUn_KI270466v1.fa
chr1_KI270765v1_alt.fa      chrUn_KI270467v1.fa
chr1_KI270766v1_alt.fa      chrUn_KI270468v1.fa
chr1_KI270892v1_alt.fa      chrUn_KI270507v1.fa
chr20.fa                    chrUn_KI270508v1.fa
chr20.fa.fai                chrUn_KI270509v1.fa
chr20_GL383577v2_alt.fa     chrUn_KI270510v1.fa
chr20_KI270869v1_alt.fa     chrUn_KI270511v1.fa
chr20_KI270870v1_alt.fa     chrUn_KI270512v1.fa
chr20_KI270871v1_alt.fa     chrUn_KI270515v1.fa
chr21.fa                    chrUn_KI270516v1.fa
chr21.fa.fai                chrUn_KI270517v1.fa
chr21_GL383578v2_alt.fa     chrUn_KI270518v1.fa
chr21_GL383579v2_alt.fa     chrUn_KI270519v1.fa
chr21_GL383580v2_alt.fa     chrUn_KI270521v1.fa
chr21_GL383581v2_alt.fa     chrUn_KI270522v1.fa
chr21_KI270872v1_alt.fa     chrUn_KI270528v1.fa
chr21_KI270873v1_alt.fa     chrUn_KI270529v1.fa
chr21_KI270874v1_alt.fa     chrUn_KI270530v1.fa
chr22.fa                    chrUn_KI270538v1.fa
chr22.fa.fai                chrUn_KI270539v1.fa
chr22_GL383582v2_alt.fa     chrUn_KI270544v1.fa
chr22_GL383583v2_alt.fa     chrUn_KI270548v1.fa
chr22_KB663609v1_alt.fa     chrUn_KI270579v1.fa
chr22_KI270731v1_random.fa  chrUn_KI270580v1.fa
chr22_KI270732v1_random.fa  chrUn_KI270581v1.fa
chr22_KI270733v1_random.fa  chrUn_KI270582v1.fa
chr22_KI270734v1_random.fa  chrUn_KI270583v1.fa
chr22_KI270735v1_random.fa  chrUn_KI270584v1.fa
chr22_KI270736v1_random.fa  chrUn_KI270587v1.fa
chr22_KI270737v1_random.fa  chrUn_KI270588v1.fa
chr22_KI270738v1_random.fa  chrUn_KI270589v1.fa
chr22_KI270739v1_random.fa  chrUn_KI270590v1.fa
chr22_KI270875v1_alt.fa     chrUn_KI270591v1.fa
chr22_KI270876v1_alt.fa     chrUn_KI270593v1.fa
chr22_KI270877v1_alt.fa     chrUn_KI270741v1.fa
chr22_KI270878v1_alt.fa     chrUn_KI270742v1.fa
chr22_KI270879v1_alt.fa     chrUn_KI270743v1.fa
chr22_KI270928v1_alt.fa     chrUn_KI270744v1.fa
chr2.fa                     chrUn_KI270745v1.fa
chr2.fa.fai                 chrUn_KI270746v1.fa
chr2_GL383521v1_alt.fa      chrUn_KI270747v1.fa
chr2_GL383522v1_alt.fa      chrUn_KI270748v1.fa
chr2_GL582966v2_alt.fa      chrUn_KI270749v1.fa
chr2_KI270715v1_random.fa   chrUn_KI270750v1.fa
chr2_KI270716v1_random.fa   chrUn_KI270751v1.fa
chr2_KI270767v1_alt.fa      chrUn_KI270752v1.fa
chr2_KI270768v1_alt.fa      chrUn_KI270753v1.fa
chr2_KI270769v1_alt.fa      chrUn_KI270754v1.fa
chr2_KI270770v1_alt.fa      chrUn_KI270755v1.fa
chr2_KI270771v1_alt.fa      chrUn_KI270756v1.fa
chr2_KI270772v1_alt.fa      chrUn_KI270757v1.fa
chr2_KI270773v1_alt.fa      chrX.fa
chr2_KI270774v1_alt.fa      chrX.fa.fai
chr2_KI270775v1_alt.fa      chrX_KI270880v1_alt.fa
chr2_KI270776v1_alt.fa      chrX_KI270881v1_alt.fa
chr2_KI270893v1_alt.fa      chrX_KI270913v1_alt.fa
chr2_KI270894v1_alt.fa      chrY.fa
chr3.fa                     chrY.fa.fai
chr3.fa.fai                 chrY_KI270740v1_random.fa
chr3_GL000221v1_random.fa   hg38.fa
chr3_GL383526v1_alt.fa      hg38.fa.fai
chr3_JH636055v2_alt.fa