...
Code Block |
---|
|
cat yeast_mrna.gene_coverage.almost.bedGraph | awk '
BEGIN{FS=OFS="\t"; chr=""; start=-1; end=-1; totcount=0}
{if (chr != $1) { # new contig; finish previous
if (startcount > -10) { print chr,start,end,totcount }
chr=$1; start=$2; end=$3; totcount=$4
} else if (($2==end || $2==end+1) && ($4==count)) { # same or adjacent position
with same tot = tot + $4;count
end=$3;
} else { # new region on same contig; finish prev
if (startcount > -10) { print chr,start,end,tot count}
start=$2; end=$3; totcount=$4
}
}
END{ # finish last
if (startcount > -10) { print chr,start,end,totcount }
}' > yeast_mrna.gene_coverage.bedGraph
wc -l yeast_mrna.gene_coverage.bedGraph # 1241,048,591510 -- much better! |
Make sure the total counts match!
Code Block |
---|
|
cat yeast_mrna.gene_coverage.txt | awk '
BEGIN{tot=0}{tot=tot+$8}END{print tot}' # should be 86703686
cat yeast_mrna.gene_coverage.almost.bedGraphbed | awk '
BEGIN{tot=0}{tot=tot+$4}END{print tot}' # should also be 86703686
cat yeast_mrna.gene_coverage.bedGraph | awk '
BEGIN{tot=0}{tot=tot+$4*($3-$2)}END{print tot}' # should also be 86703686 |
Now our yeast_mrna.gene_coverage.bedGraph file is a proper bedGraph, whose first lines look like this:
Code Block |
---|
chrI 7250 7271 1
chrI 7271 7274 2
chrI 7274 7278 3
chrI 7278 7310 4
chrI 7310 7317 3
chrI 7317 7349 2
chrI 7349 7353 1
chrI 7500 7556 1
chrI 8851 8891 1
chrI 11919 11951 1 |