#!/usr/bin/perl # this program figures tri-grams for a file # TODO: # 1. input file on command line **DONE** # 2. checking for case where length of file % 3 == 1 **DONE** # 3. output to file **DONE** use strict; defined($ARGV[1]) or die "Usage: trigram.pl \n"; my ($buf, $first, $second, $third, @answer, $mod_length, $kara, $length_of_file, $openme, $count, $max_length); $max_length = 100000; $length_of_file = 0; open(O,">".$ARGV[1]) or die "Output file open failed: $!\n"; open(IN,"$ARGV[0]") || die "Sorry. Couldn't open: $!\n"; while (read (IN, $buf, 1)) { $length_of_file++; } $mod_length = $length_of_file; if (($length_of_file % 3) != 0) { $mod_length = $length_of_file + (3 - ($length_of_file % 3)); } close (IN); print ("\nresults from : "); $kara = ; chomp ($kara); open(IN,"$ARGV[0]") || die "Sorry. Couldn't open: $!\n"; read(IN, $buf, 1); $first = ord($buf); read(IN, $buf, 1); $second = ord($buf); read(IN, $buf, 1); $third = ord($buf); $answer[$first][$second][$third]++; for ($count = 3; $count <= $mod_length; $count++) { $first = $second; $second = $third; read(IN, $buf, 1); $third = ord($buf); if ($count > $length_of_file) { $third = ord(" "); } while ($third == ord("\n") or $third == ord("\t")) { read(IN, $buf, 1); $third = ord($buf); } $answer[$first][$second][$third]++; } $count = 0; for ($first = ord(" "); $first <= ord("~"); $first++) { for ($second = ord(" "); $second <= ord("~"); $second++) { for ($third = ord(" "); $third <= ord("~"); $third++) { if (($answer[$first][$second][$third]) >= $kara) { $count++; print O (chr($first)); print O (chr($second)); print O (chr($third)); print O ("\t"); print O ($answer[$first][$second][$third]); if ($count == 4) { $count = 0; print O ("\n"); } else { print O ("\t");} } } } } print("\n"); close(IN); close(O);