#!/usr/bin/perl
# this program figures tri-grams for a file
# TODO:
# 1. input file on command line **DONE**
# 2. checking for case where length of file % 3 == 1 **DONE**
# 3. output to file **DONE**
use strict;
defined($ARGV[1]) or die "Usage: trigram.pl \n";
my ($buf, $first, $second, $third, @answer, $mod_length,
$kara, $length_of_file, $openme, $count, $max_length);
$max_length = 100000;
$length_of_file = 0;
open(O,">".$ARGV[1]) or die "Output file open failed: $!\n";
open(IN,"$ARGV[0]") || die "Sorry. Couldn't open: $!\n";
while (read (IN, $buf, 1)) {
$length_of_file++;
}
$mod_length = $length_of_file;
if (($length_of_file % 3) != 0) {
$mod_length = $length_of_file + (3 - ($length_of_file % 3));
}
close (IN);
print ("\nresults from : ");
$kara = ;
chomp ($kara);
open(IN,"$ARGV[0]") || die "Sorry. Couldn't open: $!\n";
read(IN, $buf, 1);
$first = ord($buf);
read(IN, $buf, 1);
$second = ord($buf);
read(IN, $buf, 1);
$third = ord($buf);
$answer[$first][$second][$third]++;
for ($count = 3; $count <= $mod_length; $count++) {
$first = $second;
$second = $third;
read(IN, $buf, 1);
$third = ord($buf);
if ($count > $length_of_file) {
$third = ord(" ");
}
while ($third == ord("\n") or $third == ord("\t")) {
read(IN, $buf, 1);
$third = ord($buf);
}
$answer[$first][$second][$third]++;
}
$count = 0;
for ($first = ord(" "); $first <= ord("~"); $first++) {
for ($second = ord(" "); $second <= ord("~"); $second++) {
for ($third = ord(" "); $third <= ord("~"); $third++) {
if (($answer[$first][$second][$third]) >= $kara)
{
$count++;
print O (chr($first));
print O (chr($second));
print O (chr($third));
print O ("\t");
print O ($answer[$first][$second][$third]);
if ($count == 4) {
$count = 0;
print O ("\n");
}
else { print O ("\t");}
}
}
}
}
print("\n");
close(IN);
close(O);