Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
languagebash
cat yeast_mrna.gene_coverage.almost.bedGraph | awk '
BEGIN{FS=OFS="\t"; chr=""; start=-1; end=-1; totcount=0}
{if (chr != $1) { # new contig; finish previous
   if (startcount > -10) { print chr,start,end,totcount }
   chr=$1; start=$2; end=$3; totcount=$4
 } else if (($2==end || $2==end+1) && ($4==count)) { # same or adjacent position
 with same tot = tot + $4;count
   end=$3; 
 } else { # new region on same contig; finish prev
   if (startcount > -10) { print chr,start,end,tot count}
   start=$2; end=$3; totcount=$4
 }
}
END{ # finish last
  if (startcount > -10) { print chr,start,end,totcount }
}' > yeast_mrna.gene_coverage.bedGraph

wc -l yeast_mrna.gene_coverage.bedGraph  # 1241,048,591510 -- much better!

Make sure the total counts match!

Code Block
languagebash
cat yeast_mrna.gene_coverage.txt | awk '
  BEGIN{tot=0}{tot=tot+$8}END{print tot}'          # should be 86703686 
cat yeast_mrna.gene_coverage.almost.bedGraphbed | awk '
  BEGIN{tot=0}{tot=tot+$4}END{print tot}'          # should also be 86703686 
cat yeast_mrna.gene_coverage.bedGraph | awk '
  BEGIN{tot=0}{tot=tot+$4*($3-$2)}END{print tot}'  # should also be 86703686

Now our yeast_mrna.gene_coverage.bedGraph file is a proper bedGraph, whose first lines look like this:

Code Block
chrI    7250    7271    1
chrI    7271    7274    2
chrI    7274    7278    3
chrI    7278    7310    4
chrI    7310    7317    3
chrI    7317    7349    2
chrI    7349    7353    1
chrI    7500    7556    1
chrI    8851    8891    1
chrI    11919   11951   1