#!/usr/bin/perl -w use 5.032; # use Compress::Zstd; # local $/ = undef; my @cont; my @dest; my $content; my $file = $ARGV[0] // "AMillionRandomDigits.bin"; if (-s $file > 16777216) { die "Tamaño no soportado"; } open IN, '<' , $file or die $!; while (){ $content .= $_; } close IN; my $l0 = length($content); say "origen $l0"; goto C; goto B; A: say "FASE A"; my $p = int $l0/10; my $r = $l0 - $p*10; my ($sum, $sum2); my @dest2; for my $i (0..9){ $cont[$i] = substr $content, 0, $p, ""; if ($i == 9) { $cont[$i] .= $content; } say "origen $i = ", length($cont[$i]); # system "echo $cont[$i] > temp.bin"; open OUT, '>', "temp.bin" or die $!; print OUT $cont[$i]; close OUT; $dest2[$i] = `xz -9e --stdout temp.bin`; $dest[$i] = compress($cont[$i], 100); say "destino $i = ", length($dest[$i]); say "destino2 $i = ", length($dest2[$i]); $sum += length($dest[$i]); $sum2 += length($dest2[$i]); } say "suma = $sum"; say "suma2 = $sum2"; # my $dest = compress($content, 100); say "FASE B"; B: goto C; my %hash; my @eq; my $tmp; system "xxd AMillionRandomDigits.bin > hex.txt"; open IN, '<', "hex.txt" or die $!; while (my $line = ){ chomp $line; my ($counter, $p1, $p2, $p3, $p4, $p5, $p6, $p7, $p8, $tmp) = split /\s/, $line; $counter =~ s/://; $counter =~ s/^0+//; my $c = 0; for ($p1, $p2, $p3, $p4, $p5, $p6, $p7, $p8){ $c++; if (defined $_){ $hash{$_}++; push @{ $eq[hex $_] }, $counter.$c; # (hex + no. 1-8) } } } close IN; for my $i (keys %hash){ say $hash{$i}; say "@{ $eq[hex $i] }"; # say ""; } # while (my ($i,$j) = each %hash){ # my @oeq = sort { scalar(@$a) <=> scalar(@$b) } @eq; # for my $j (@oeq){ # for my $k (@$j){ # say $k; # } # say ""; # } open OUT, '>', "position.txt" or die $!; for my $i (keys %hash){ if ($hash{$i} >= 2){ my $k = 0; for my $j (@{ $eq[hex $i] }){ if ($j ne @{ $eq[hex $i] }[0]){ print OUT hex($j)-hex(@{ $eq[hex $i] }[$k]), " "; $k++; }else{ print OUT $j," "; } } say OUT ""; }else{ say OUT @{ $eq[hex $i] }[0]; } } close OUT; C: say "FASE C"; # my $pos = 0; my $DEBUG = 1; my $SIZE = 10; # importante my %hc; my %ho; # my $s; my $copia = $content; while (my $chunk = substr ($content, 0, $SIZE, '')){ # extraigo 5*8 = 40 bits $hc{$chunk}++; if (length($chunk) < $SIZE){ $hc{$chunk} = 0; # esto es importante para que el cacho del final, que es más corto, quede siempre } # en último lugar de la ordenación. Si no, corrompe las cadenas } # for my $i (keys %hc){ # $s += $hc{$i}; # } # say "$s chunks" if $DEBUG; my @odict = sort { $hc{$b} <=> $hc{$a} } keys %hc; # DICCIONARIO PRINCIPAL open OUT, ">", "millionbits9.dict" or die $!; for (@odict){ print OUT $_; } close OUT; say scalar(@odict), " number of ordered chunks" if $DEBUG; ### compresión my $kounter = 0; for (@odict){ # say bin($kounter) if $DEBUG; # $ho{$_} = pack "L", ($kounter); $ho{$_} = part($kounter); # 2 niveles de compresión: en %ho no están todos los chunk, sino solo los que no se repiten $kounter++; # el segundo nivel es que el binario asociado sea más corto para los más frecuentes. } open OUT, ">", "millionbits9.comp" or die $!; while (my $chunk = substr ($copia, 0, $SIZE, '')){ print OUT $ho{$chunk}; # escribo números de 24 bits en caractéres } close OUT; ### descompresión use Fcntl; my @dict; my $bytes; my $k = 0; sysopen IN, "millionbits9.dict", O_RDONLY or die $!; do { sysread IN, $bytes, $SIZE; $dict[$k] = $bytes; $k++; }until (length($bytes) < $SIZE); close IN; open IN, "<", "millionbits9.comp" or die $!; my $comp; while (){ $comp .= $_; } close IN; my $num; open OUT, ">", "millionbits9" or die $!; while (my $chunk = substr ($comp, 0, 3, '')){ # debo leer números de 24 bits (3*8 = 24) # $num = unpack "L" , $chunk; if (length($chunk)<3){ $chunk = "\0" x (3-length($chunk)) . $chunk; } $num = unpart($chunk); print OUT $dict[$num]; } close OUT; if ($DEBUG){ say "OK" if (1230 == unpart(part(1230))); } exit 1; sub bin{ # esto debe sustituir a pack/unpack "L" que es 4 bytes, por algo de solo 3 bytes my $n = shift; # posteriormente en caractéres pack "CCC" my $bin = ""; for (0..23){ if ($n & 2**$_){ $bin .= "0"; }else{ $bin .= "1"; } last if $n <= 2**$_; } return $bin; } sub part { my $n = shift; my $n1 = int ($n / (256**2)); my $r1 = $n - $n1*(256**2); die "Número desbocado" if $n1 > 255; my $n2 = int ($r1 / 256); my $r2 = $n -$n1*256**2 - $n2*256; my $n3 = $r2 / 1 ; return chr($n1).chr($n2).chr($n3); } sub unpart { my $s = shift; my $n1 = 256**2 * ord(substr ($s, 0, 1)); my $n2 = 256 * ord(substr ($s, 1, 1)); my $n3 = ord(substr ($s, 2, 1)); return $n1+$n2+$n3; }