#!/usr/bin/perl use 5.040; # RDEA stands for 'randomized DEA', similar to 'random forests' applied # to DMUs. (c) Jesús Lozano Mosterín, 2024, 2025. my $version = 0.005; say "\nRandomized Data Envelopment Analysis, RDEA v. $version\n"; # bug hunting + embellishment of index lun 06 ene 2025 13:07:45 CET # more iterations vie 03 ene 2025 06:36:17 CET # bug hunting vie 27 dic 2024 22:52:18 CET # STEP 1: read the data file my @datafile; if (@ARGV){ die "ERROR in datafile. Does not exists." unless (-e $ARGV[0]); warn "Warning: $ARGV[0] seems not a plain text file." if (-B $ARGV[0]); open my $in, '<', $ARGV[0] or die $!; @datafile = <$in>; close $in; }else{ say ""; say "Usage: [perl] $0 "; say ""; say "- where in the first row are number of inputs number of outputs"; say "- rest of the rows contains data for each DMU, values of inputs and outputs"; say "- the separator between data is the 'space' or one comma"; say "- comments in a row are avilable if the row begins with #"; say "- DMU stands for 'decision making units' whatever it can be"; say ""; say "version $version\n"; exit 4; } my $header = 0; my (@input, @output); my ($nin, $nout, $rest, $ndmu); for (@datafile){ next if /^#/; chomp; if ($header == 0){ ($nin, $nout, $rest) = split /\s+|\,/, $_; sure_d( $nin ); no_negative( $nin ); sure_d( $nout ); no_negative( $nout ); $header = 1; next; }else{ $ndmu++; } my @list = split /\s+|\,/, $_; die "ERROR in number of data DMU number $ndmu" if (scalar(@list) != $nin+$nout); for my $i (0 .. $nin-1){ $input[$ndmu][$i] = shift @list; sure_d( $input[$ndmu][$i] ); no_negative( $input[$ndmu][$i] ); } for my $i (0 .. $nout-1){ $output[$ndmu][$i] = shift @list; sure_d( $output[$ndmu][$i] ); no_negative( $output[$ndmu][$i] ); } } # STEP 2: generate a set of weigths and shuffle that a number of times my $nloop = 100_000; my $vueltas = (factorial_bound($nin+$nout) < 200_000) ? factorial_bound($nin+$nout) : 200_000 ; my $countfail = 0; # STEP 3: Las restricciones normales (DEA - Charnes, Cooper, Rhodes) son: # - suma denominador == 1 --> Con RDEA, se podría normalizar ambos, pero creo que con solo cumplir lo siguiente vale # Es decir, el denominador=1 lo puso la transformación de Charnes para convertir program fraccionario en LP. No es el caso. # - numerador <= denominador --> no problem # - testar para todos los dmus --> no problem # STEP 4: probar un número bajo de permutaciones de un set 100_000 # - cambiar de set 30*60 veces # STEP 5: # - sub evaluate { se debe almacenar las veces que una dmu obtiene el premio MAX, pero el rdo. sería un ranking } # - sub evaluateplus { si se pone un threshold normal, como 0.001, o un redondeo, se puede poner a la dmu en una # categoría (lista) de eficiencia. Lo contrario no existiría. } # STEP 6: # - Mirar cuáles son eficientes según evaluateplus # - Listar en ranking y porcentual ordenadas todas las DMUs my @RDEA; my @effs; my @prog = qw( | / - \ | / - \ ); my $p = scalar(@prog)-1; $|++; for my $i (1 .. $nloop) { print "\r", $prog[ $i % $p ], " ", "=" x int( 100*$i /$nloop ), ">", " " x int( 100*($nloop-$i)/$nloop ), "| $i / $nloop"; my @set; for (1 .. $nin+$nout){ # problema básico: rand() devuelve en intervalo [0,1) y lo que se # necesita es (0,1] ¡justo lo contrario! mersenne-twister ofrece opciones # sobre el mismo generador... Otra opción podría ser tomar (1-rand) # Vale, aprobada la moción. push @set, 1-rand; # sprintf ("%.03f", 1-rand); No, to not have 0.000 } for (1 .. $vueltas){ my @weigths = jlm_shuffle(@set); # AQUI VA LO IMPORTANTE my $RDEA_MAX = 0; my $candidate; my $tmp; for my $d (1 .. $ndmu){ my ($numerador, $denominador); for my $o ( 0 .. $nout-1 ){ $numerador += $weigths[$o] * $output[$d][$o]; } for my $i ($nout .. $nin + $nout -1){ $denominador += $weigths[$i] * $input[$d][$i-$nout]; } if ($numerador > $denominador){ undef $candidate; $countfail++; last; }elsif ($denominador > 0) { my $tmp = ($numerador / $denominador); # / ($nloop * $vueltas); mejor abajo, descontando fallos if ($tmp > $RDEA_MAX){ $RDEA_MAX = $tmp; $candidate = $d; } }else{ # Aquí hay un error porque el denominador es 0, y esto no puede pasar # porque las ponderaciones son no nulas. Luego hay inputs negativos. No vale. } } if ($candidate){ $RDEA[$candidate] += $RDEA_MAX; # / ($nloop * $vueltas - $countfail) ); FALLO, al tener solo contador de fallos bueno al final push @effs, $candidate if defined $candidate; }else{ # huh ? } } } # FINAL REPORT say "\n"; my @counter; for my $i (@effs){ if ($i){ $counter[$i]++; $counter[0]++; } } for my $j (1 .. $ndmu) { unless (defined $counter[$j]) { $counter[$j] = 0; } unless ( defined $RDEA[$j] ) { $RDEA[$j] = 0; }else{ $RDEA[$j] /= ($nloop * $vueltas - $countfail); } } for my $i (sort { $counter[$b] <=> $counter[$a] || $RDEA[$b] <=> $RDEA[$a] } 1..$ndmu){ if ($i){ say "DMU $i) counter $counter[$i] corrected percent ", 100*($counter[$i]/$counter[0] - 1/$ndmu), "% index = $RDEA[$i]"; } } say ""; exit 3; sub sure_d { my $n = shift; return if (0+$n eq $n); die "Not numeric -> $n"; } sub no_negative { my $n = shift; if ($n < 0){ die "ERROR: Strict Negative values (< 0) are not allowed in this program."; } } sub factorial_bound { my $n = shift; return 200_111 if $n >= 200_000; return 1 if $n <= 1; return $n * factorial_bound($n-1); } sub jlm_shuffle { my @deck = @_; my @deck2; my $n = scalar(@deck); while ($n){ my $t = int $n*rand; push @deck2, splice(@deck,$t,1); --$n; } return @deck2; } __END__ Original article CCR type DEA [1978] CHARNES, COOPER & RHODES, "Measuring the efficiency of decision making units", European Journal of Operational Research, vol. 2, pp. 429-444.