#!/usr/local/bin/perl5 -w ################################################################3 ## ## Paul Garrett, Sep 02, 1997 ## ## Counts frequencies of digrams ## ## Switch -rank: if invoked will rank occurrences by frequency ## ################################################################### %counter = (); $total = 0; if ($ARGV[0] && $ARGV[0] eq '-rank') { shift; $rank = 'true'; } else { $rank = 'false'; } while (<>) { s/[^a-zA-Z ]+//g; $_ = lc($_); ## s/\s+//g; ## for "with spaces removed" while (m/([a-z ])([a-z ])([a-z ])(.*)/) { ## critical line ## s/\s+//g; ## for "with spaces removed" ## while (m/([a-z])([a-z])(.*)/) { ## digrams $key = $1.$2.$3; ## trigrams ## $key = $1.$2; ## digrams $counter{$key}++; $total++; $_ = $2.$3.$4; } } ############################## ## ## Below is like previous, just to display... ## ############################ if ($rank ne 'true') { foreach $key (sort keys %counter) { $percentage = int($counter{$key}/$total*1000000)/100000; print "$key $percentage\n"; } } else { %percentage = (); foreach $key (sort keys %counter) { $percentage{$key} = int($counter{$key}/$total*10000000)/100000; } foreach $key (sort by_percentage keys %percentage) { $percentage = $percentage{$key}; print "$key $percentage\n"; } } sub by_percentage { $percentage{$b} <=> $percentage{$a}; }