#!/usr/bin/perl

# Find word frequencies in a document

$/ = ""; # paragraph mode.
$* = 1;  # enable multiline patterns

@mytextone = ("what the hell", " do you think you are doing",
           "with the last of the element's"); 

@mytexttwo = ("if you ever do", "that to me",
"again", "I'll not be friends with you");


%wordcount = &countwords (@mytextone);

# print out the entries in the wordcount array

foreach $word (sort keys(%wordcount))
{
printf "%-20s %d\n", $word, $wordcount{$word};
}

%morewords = &countwords (@mytexttwo);

foreach $word (sort keys(%morewords))
{
printf "%-20s %d\n", $word, $morewords{$word};
}

print      "\nSubcommand here \n\n";
@common =  commonkeys( \%wordcount, \%morewords, \%morewords);
print " @common \n";


# The countwords routine implemented as a subroutine.

sub countwords{

my %wordcount;
     
foreach(@_)
{
s/-\n//g;    # de-hyphenate
s/-/ /g;     # get rid of text "-"
s/ly\b//g;
s/est\b//g;
s/ing\b//g;
s/ed\b//g;


tr/A-Z/a-z/;   # everything goes to lower case,
               # TRanslates characters in a search list to characters
               # in a replacement list

my @words = split(/\W*\s+\W*/, $_); # splits the current paragraph into an array
                                 # containing the words. \W is a non-word
                                 #                       \s is whitespace       

foreach $word (@words)
{
$wordcount{$word}++;      # $word is the key into the associative array
                          # %wordcount. The value is incremented eachtime.
}


}
return(%wordcount);
}


# SUBROUTINE to find common keys of the hashes.

sub commonkeys {
   my ($k, $href, %seen); # locals
   foreach $href (@_){
   while ( ($k) = each %$href) {
       $seen{$k}++;
        }
      }
   return grep { $seen{$_} == @_ } keys %seen;
}