#!/usr/bin/perl

#Author George Mpouras, 2015
#comp.lang.perl.misc 

use strict;
use warnings;

my @ListOfPossibleMatches = (
    [ "be well", "kidney" ],
    [ "cancer", "gene", "third" ],
    [ "cancer", "nearly", "blood" ]
);

my $text =<<EOTEXT;
The drug, already taken by millions of patients to control blood sugar
  levels, is thought to be capable of starving some cancer cells to death.
  
New research suggests it can slash the risk of developing liver
  cancer by an astonishing 78 per cent, breast cancer by a third,
  pancreatic cancer by 46 per cent and bowel cancer by nearly a quarter.
EOTEXT
  
## Reverse the lists data
my %Result;
my %Reverse;
for (my $i=0; $i<@ListOfPossibleMatches; $i++) {
    for ( @{$ListOfPossibleMatches[$i]}) {
	push @{$Reverse{$_}}, $i;
    }
}


# One pass of the document
while ($text =~/(.+?)\v/gp) {    
    for my $word (split /(?:\s|[^\w])+/, ${^MATCH}) {
	# Skip everything that do not match
	next unless exists $Reverse{$word};
	    
	# Update the %Result
	for ( @{$Reverse{$word}} ) { 
	    $Result{"list$_"}->{$word}++; 
	}   
    }
}


# use Data::Dumper;
# print Dumper \%Result;
for my $i (keys %Result){
    for my $j (keys $Result{$i}){
	print "$i $j $Result{$i}{$j}\n";
    }
}

