| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201 |
- # -*- perl -*-
- #
- # Perfect Minimal Hash Generator written in Perl, which produces
- # C output.
- #
- require 'random_sv_vectors.ph';
- require 'crc64.ph';
- #
- # Compute the prehash for a key
- #
- # prehash(key, sv, N)
- #
- sub prehash($$$) {
- my($key, $n, $sv) = @_;
- my @c = crc64($sv, $key);
- # Create a bipartite graph...
- $k1 = (($c[1] & ($n-1)) << 1) + 0; # low word
- $k2 = (($c[0] & ($n-1)) << 1) + 1; # high word
- return ($k1, $k2);
- }
- #
- # Walk the assignment graph, return true on success
- #
- sub walk_graph($$$$) {
- my($nodeval,$nodeneigh,$n,$v) = @_;
- my $nx;
- # print STDERR "Vertex $n value $v\n";
- $$nodeval[$n] = $v;
- foreach $nx (@{$$nodeneigh[$n]}) {
- # $nx -> [neigh, hash]
- my ($o, $e) = @$nx;
- # print STDERR "Edge $n,$o value $e: ";
- my $ov;
- if (defined($ov = $$nodeval[$o])) {
- if ($v+$ov != $e) {
- # Cyclic graph with collision
- # print STDERR "error, should be ", $v+$ov, "\n";
- return 0;
- } else {
- # print STDERR "ok\n";
- }
- } else {
- return 0 unless (walk_graph($nodeval, $nodeneigh, $o, $e-$v));
- }
- }
- return 1;
- }
- #
- # Generate the function assuming a given N.
- #
- # gen_hash_n(N, sv, \%data, run)
- #
- sub gen_hash_n($$$$) {
- my($n, $sv, $href, $run) = @_;
- my @keys = keys(%{$href});
- my $i, $sv;
- my $gr;
- my $k, $v;
- my $gsize = 2*$n;
- my @nodeval;
- my @nodeneigh;
- my %edges;
- for ($i = 0; $i < $gsize; $i++) {
- $nodeneigh[$i] = [];
- }
- %edges = ();
- foreach $k (@keys) {
- my ($pf1, $pf2) = prehash($k, $n, $sv);
- ($pf1,$pf2) = ($pf2,$pf1) if ($pf1 > $pf2); # Canonicalize order
- my $pf = "$pf1,$pf2";
- my $e = ${$href}{$k};
- my $xkey;
- if (defined($xkey = $edges{$pf})) {
- next if ($e == ${$href}{$xkey}); # Duplicate hash, safe to ignore
- if (defined($run)) {
- print STDERR "$run: Collision: $pf: $k with $xkey\n";
- }
- return;
- }
- # print STDERR "Edge $pf value $e from $k\n";
- $edges{$pf} = $k;
- push(@{$nodeneigh[$pf1]}, [$pf2, $e]);
- push(@{$nodeneigh[$pf2]}, [$pf1, $e]);
- }
- # Now we need to assign values to each vertex, so that for each
- # edge, the sum of the values for the two vertices give the value
- # for the edge (which is our hash index.) If we find an impossible
- # sitation, the graph was cyclic.
- @nodeval = (undef) x $gsize;
- for ($i = 0; $i < $gsize; $i++) {
- if (scalar(@{$nodeneigh[$i]})) {
- # This vertex has neighbors (is used)
- if (!defined($nodeval[$i])) {
- # First vertex in a cluster
- unless (walk_graph(\@nodeval, \@nodeneigh, $i, 0)) {
- if (defined($run)) {
- print STDERR "$run: Graph is cyclic\n";
- }
- return;
- }
- }
- }
- }
- # for ($i = 0; $i < $n; $i++) {
- # print STDERR "Vertex ", $i, ": ", $g[$i], "\n";
- # }
- if (defined($run)) {
- printf STDERR "$run: Done: n = $n, sv = [0x%08x, 0x%08x]\n",
- $$sv[0], $$sv[1];
- }
- return ($n, $sv, \@nodeval);
- }
- #
- # Driver for generating the function
- #
- # gen_perfect_hash(\%data)
- #
- sub gen_perfect_hash($) {
- my($href) = @_;
- my @keys = keys(%{$href});
- my @hashinfo;
- my $n, $i, $j, $sv, $maxj;
- my $run = 1;
- # Minimal power of 2 value for N with enough wiggle room.
- # The scaling constant must be larger than 0.5 in order for the
- # algorithm to ever terminate.
- my $room = int(scalar(@keys)*0.8);
- $n = 1;
- while ($n < $room) {
- $n <<= 1;
- }
- # Number of times to try...
- $maxj = scalar @random_sv_vectors;
- for ($i = 0; $i < 4; $i++) {
- printf STDERR "%d vectors, trying n = %d...\n",
- scalar @keys, $n;
- for ($j = 0; $j < $maxj; $j++) {
- $sv = $random_sv_vectors[$j];
- @hashinfo = gen_hash_n($n, $sv, $href, $run++);
- return @hashinfo if (@hashinfo);
- }
- $n <<= 1;
- }
- return;
- }
- #
- # Verify that the hash table is actually correct...
- #
- sub verify_hash_table($$)
- {
- my ($href, $hashinfo) = @_;
- my ($n, $sv, $g) = @{$hashinfo};
- my $k;
- my $err = 0;
- foreach $k (keys(%$href)) {
- my ($pf1, $pf2) = prehash($k, $n, $sv);
- my $g1 = ${$g}[$pf1];
- my $g2 = ${$g}[$pf2];
- if ($g1+$g2 != ${$href}{$k}) {
- printf STDERR "%s(%d,%d): %d+%d = %d != %d\n",
- $k, $pf1, $pf2, $g1, $g2, $g1+$g2, ${$href}{$k};
- $err = 1;
- } else {
- # printf STDERR "%s: %d+%d = %d ok\n",
- # $k, $g1, $g2, $g1+$g2;
- }
- }
- die "$0: hash validation error\n" if ($err);
- }
- 1;
|