gawk '/>/ {$0=substr($0,2); sid=$1;next} \ {sl[sid]+=length($0)} \ END {for (i in sl) printf "%s\t%d\n",i,sl[i]}' \ mysequences.fa > dist.csv