#!/usr/bin/perl
# this program figures tri-grams for a file
# TODO:
# 1. input file on command line
# 2. checking for case where length of file % 3 == 1
# 3. output to file
use strict;
defined($ARGV[1]) or die "Usage: bigram.pl \n";
my ($buf, $first, $second, @answer, $kara,
$length_of_file, $count, $max_length);
$max_length = 100000;
$length_of_file = 0;
open(O,">".$ARGV[1]) or die "Output file open failed: $!\n";
open(IN,"$ARGV[0]") || die "Sorry. Couldn't open: $!\n";
while (read (IN, $buf, 1)) {
$length_of_file++;
}
close (IN);
open(IN,"$ARGV[0]") || die "Sorry. Couldn't open: $!\n";
print ("\nresults from : ");
$kara = ;
chomp ($kara);
read(IN, $buf, 1);
$first = ord($buf);
read(IN, $buf, 1);
$second = ord($buf);
$answer[$first][$second]++;
for ($count = 2; $count <= $length_of_file; $count++) {
$first = $second;
read(IN, $buf, 1);
$second = ord($buf);
if (($count == $length_of_file) && ($second == ord("\n"))) {
$second = ord(" ");
}
while ($second == ord("\n") or $second == ord("\t")) {
read(IN, $buf, 1);
$second = ord($buf);
}
$answer[$first][$second]++;
}
$count = 0;
for ($first = ord(" "); $first <= ord("~"); $first++) {
for ($second = ord(" "); $second <= ord("~"); $second++) {
if (($answer[$first][$second]) >= $kara)
{
$count++;
print O (chr($first));
print O (chr($second));
print O ("\t");
print O ($answer[$first][$second]);
if ($count == 4) {
$count = 0;
print O ("\n");
}
else { print O ("\t");}
}
}
}
print("\n");
close(IN);
close(O);