#!/usr/bin/perl # this program figures tri-grams for a file # TODO: # 1. input file on command line # 2. checking for case where length of file % 3 == 1 # 3. output to file use strict; defined($ARGV[1]) or die "Usage: bigram.pl \n"; my ($buf, $first, $second, @answer, $kara, $length_of_file, $count, $max_length); $max_length = 100000; $length_of_file = 0; open(O,">".$ARGV[1]) or die "Output file open failed: $!\n"; open(IN,"$ARGV[0]") || die "Sorry. Couldn't open: $!\n"; while (read (IN, $buf, 1)) { $length_of_file++; } close (IN); open(IN,"$ARGV[0]") || die "Sorry. Couldn't open: $!\n"; print ("\nresults from : "); $kara = ; chomp ($kara); read(IN, $buf, 1); $first = ord($buf); read(IN, $buf, 1); $second = ord($buf); $answer[$first][$second]++; for ($count = 2; $count <= $length_of_file; $count++) { $first = $second; read(IN, $buf, 1); $second = ord($buf); if (($count == $length_of_file) && ($second == ord("\n"))) { $second = ord(" "); } while ($second == ord("\n") or $second == ord("\t")) { read(IN, $buf, 1); $second = ord($buf); } $answer[$first][$second]++; } $count = 0; for ($first = ord(" "); $first <= ord("~"); $first++) { for ($second = ord(" "); $second <= ord("~"); $second++) { if (($answer[$first][$second]) >= $kara) { $count++; print O (chr($first)); print O (chr($second)); print O ("\t"); print O ($answer[$first][$second]); if ($count == 4) { $count = 0; print O ("\n"); } else { print O ("\t");} } } } print("\n"); close(IN); close(O);