#!/usr/local/bin/perl5 -w ################################################################ ## ## Paul Garrett, garrett@math.umn.edu, 18 Sept 1997 ## ## Script to compute _average_ version of Friedman's ## "Index of Coincidence" of a string and "inner product" ## with a reference string ## ## Usage: ## $0 < file ## ################################################################ %ref = ( 'e' => 0.117011318185686, 't' => 0.0993106969619607, 'o' => 0.0828865628457153, 'a' => 0.0742064505148498, 'i' => 0.0703769891924092, 'n' => 0.0697812952089184, 's' => 0.0639945536550081, 'r' => 0.061611777721045, 'h' => 0.0448472470428049, 'l' => 0.043485660794826, 'u' => 0.0376989192409157, 'd' => 0.0321674751085014, 'c' => 0.0307207897200238, 'm' => 0.0291890051910476, 'y' => 0.0229767679346439, 'f' => 0.0204237937196834, 'p' => 0.0203386945791847, 'w' => 0.0182963152072164, 'b' => 0.0171049272402349, 'g' => 0.0162539358352481, 'v' => 0.0119138796698153, 'k' => 0.00808441834737469, 'x' => 0.00331886647944856, 'j' => 0.00238277593396307, 'q' => 0.00102118968598417, 'z' => 0.000595693983490767 ); @in = ; $in = join('',@in); $in = lc($in); $in =~ s/[^a-z]+//g; ## now it's all lowercase sans spaces @in = split(//,$in); foreach $i (0..$#in) { %counter -> {$in[$i]}++; } foreach $i (keys %counter) { %frequencies -> {$i} = %counter -> {$i} / $#in; } $avg_ix = 0; $ref_avg = 0; $ip = 0; foreach $i (keys %frequencies) { $avg_ix += (%frequencies -> {$i}) * (%frequencies -> {$i}); $ip += (%frequencies -> {$i}) * (%ref -> {$i}); } foreach $i (keys %ref) { $ref_avg += (%ref -> {$i}) * (%ref -> {$i}); } $avg_ix = int(10000 * $avg_ix)/100; $ref_avg = int(10000 * $ref_avg)/100; $ip = int(10000 * $ip)/100; print "\n"; print "Comparison is $ip\n"; print "Self-avg is $avg_ix\n"; print "Reference = $ref_avg\n"; print "\n"; #####################################