#!/usr/bin/perl

use warnings;
use strict;

unless (@ARGV) {
    print "Uso: $0 <data_file>\n\ndata_file es un fichero de dos columnas numéricas separadas por espacios.\n";
}

my $DEBUG = 0;

open my $IN, "<", $ARGV[0] or die "Cannot open $ARGV[0]: $!";
my $count=0;
my (@x,@y);
print "Datos:\n" if $DEBUG;
while (<$IN>){
    next if /^#/;
    chomp;
    ($x[$count],$y[$count]) = split /\s+/, $_;
    print "$y[$count]\n" if $DEBUG;
    $count++;
}
close $IN;
print "-" x 40, "\n";
print "Lag\t\tR2\n";
my $max = 0;
for (0..scalar(@y)-1-14){
    if ($_ > 0) {
	shift @y;
	pop @x;
    }
    my $result = regresion(\@x, \@y);
    print "$_\t\t $result \n" if $DEBUG;
    my $R2adj = $result;
    if ($R2adj > $max) {
	$max = $R2adj;
	print "$_\t\t$R2adj adjusted R2\n";
    }
}

exit 2;


sub regresion {
    my ($x, $y) = @_;
    my @x = @$x;
    my @y = @$y;
    my ($sumxy, $sumx, $sumy, $sumx2);
    for (0..@y-1) {
    	$sumxy+=$x[$_]*$y[$_];
	$sumx+= $x[$_];
	$sumy+= $y[$_]; 
	$sumx2 += $x[$_]**2;
    }

    my $count = scalar @y;
    my $beta = ($count*$sumxy-$sumx*$sumy) / ($count*$sumx2-$sumx**2);
    my $alfa = $sumy/$count - $beta*$sumx/$count;

# print "-" x 70,"\n";
# print "y = $alfa + $beta x\n";
# print "-" x 70,"\n";

    my $media = $sumy/$count;
# print "\tD\tF\tAD\tSE\n";
    my ($ra, $rb, $j, $ad, $mad, $R2);
    for my $i (@x){
	$j++;
	my $f = $alfa+$beta*$i;
	$ad=abs($y[$i]-$f);
#	$se=($dd-$f)**2;
#        printf "%i)\t%.02f\t%.2f\t%.2f\t%.2f\n",$i,$dd,$f,$ad,$se;
	$ra += ($f-$media)**2;
	$rb += ($y[$j-1]-$media)**2;
	$mad+=$ad;
#	$mse+=$se;
    }
    if ($rb != 0){
	my $R2 = $ra/$rb;
    }else{
	die "ERROR: No regression posible";
    }
    $mad /= $count;
# $mse /= $count;
    print "MAD = $mad\n" if $DEBUG; 
# print "R2=$r2\tMAD=$mad\tMSE=$mse\n";
# for $i ($count+1..$count+10){
# 	print "$i)\t\t", $alfa+$beta*$i, "\n";
# }
#    return $R2;
    my $npredictors = 1;  # número de variables independientes a predecir
    my $nsample = $count; # sample size
    my $R2adjusted = 1 - (1-$R2) * ($nsample -1) / ($nsample - $npredictors -1);

    return $R2adjusted; 
}
__END__	
