#!/usr/bin/perl # Find word frequencies in a document $/ = ""; # paragraph mode. $* = 1; # enable multiline patterns @mytextone = ("what the hell", " do you think you are doing", "with the last of the element's"); @mytexttwo = ("if you ever do", "that to me", "again", "I'll not be friends with you"); %wordcount = &countwords (@mytextone); # print out the entries in the wordcount array foreach $word (sort keys(%wordcount)) { printf "%-20s %d\n", $word, $wordcount{$word}; } %morewords = &countwords (@mytexttwo); foreach $word (sort keys(%morewords)) { printf "%-20s %d\n", $word, $morewords{$word}; } print "\nSubcommand here \n\n"; @common = commonkeys( \%wordcount, \%morewords, \%morewords); print " @common \n"; # The countwords routine implemented as a subroutine. sub countwords{ my %wordcount; foreach(@_) { s/-\n//g; # de-hyphenate s/-/ /g; # get rid of text "-" s/ly\b//g; s/est\b//g; s/ing\b//g; s/ed\b//g; tr/A-Z/a-z/; # everything goes to lower case, # TRanslates characters in a search list to characters # in a replacement list my @words = split(/\W*\s+\W*/, $_); # splits the current paragraph into an array # containing the words. \W is a non-word # \s is whitespace foreach $word (@words) { $wordcount{$word}++; # $word is the key into the associative array # %wordcount. The value is incremented eachtime. } } return(%wordcount); } # SUBROUTINE to find common keys of the hashes. sub commonkeys { my ($k, $href, %seen); # locals foreach $href (@_){ while ( ($k) = each %$href) { $seen{$k}++; } } return grep { $seen{$_} == @_ } keys %seen; }