| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363 |
- #!/usr/bin/perl
- ## --------------------------------------------------------------------------
- ##
- ## Copyright 1996-2017 The NASM Authors - All Rights Reserved
- ## See the file AUTHORS included with the NASM distribution for
- ## the specific copyright holders.
- ##
- ## Redistribution and use in source and binary forms, with or without
- ## modification, are permitted provided that the following
- ## conditions are met:
- ##
- ## * Redistributions of source code must retain the above copyright
- ## notice, this list of conditions and the following disclaimer.
- ## * Redistributions in binary form must reproduce the above
- ## copyright notice, this list of conditions and the following
- ## disclaimer in the documentation and/or other materials provided
- ## with the distribution.
- ##
- ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
- ## CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
- ## INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- ## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- ## DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- ## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- ## NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- ## LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- ## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- ## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- ## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
- ## EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- ##
- ## --------------------------------------------------------------------------
- #
- # Generate a perfect hash for general case-insensitive string-to-enum
- # lookup. This generates an enum and the corresponding hash, but
- # relies on a common function to parse the hash.
- #
- # Usage:
- # perfhash.pl h foohash.dat foohash.h (to generate C header)
- # perfhash.pl c foohash.dat foohash.c (to generate C source)
- #
- use strict;
- require 'phash.ph';
- sub basename($) {
- my($s) = @_;
- $s =~ s/^.*[^-[:alnum:]_\.]//; # Remove path component as best we can
- return $s;
- }
- sub intval($) {
- my($s) = @_;
- if ($s =~ /^0/) {
- return oct($s); # Handles octal or hexadecimal
- } elsif ($s =~ /^\-(0.*)$/) {
- return -oct($1);
- } else {
- return $s + 0; # Forcibly convert to number
- }
- }
- my($output, $infile, $outfile) = @ARGV;
- my $me = basename($0);
- # The following special things are allowed in the input file:
- # #<space> or ; begins a comment
- # #include filename
- # #name str
- # The name of the hash
- # #prefix str
- # Defines the prefix before enum
- # #guard str
- # Defines the header guard string
- # #special str [= value]
- # Generate an enum value without a corresponding string; not capitalized.
- # #header str
- # Indicates the name of the .h file to include from the .c file
- # #errval str
- # Define the value to be returned if a string is not found
- # (defaults to -1). This can be any constant C expression,
- # including one of the enum values.
- #
- # Regular lines are just str [= value]
- #
- # Enumeration is generated in the order listed in the file, just as in C
- # specifying a value causes the values to increase by 1 from that point on
- # unless specified.
- my $name;
- my $prefix;
- my $guard;
- my $hfile;
- my %strings = ();
- my %specials = ();
- my $next_value = 0;
- my $errval = '-1';
- my @incstack = ();
- my @filenames = ($infile);
- my @linenums = (0);
- my $dd = undef;
- my $err = 0;
- while (scalar(@filenames)) {
- if (!defined($dd)) {
- open($dd, '<', $filenames[-1])
- or die "$0: cannot open: $filenames[-1]: $!\n";
- }
- my $line = <$dd>;
- if (!defined($line)) {
- close($dd);
- $dd = pop @incstack;
- pop @filenames;
- pop @linenums;
- next;
- }
- $linenums[-1]++;
- chomp $line;
- $line =~ s/\s*(|\;.*|\#\s.*|\#)$//; # Remove comments and trailing space
- $line =~ s/^\s+//; # Remove leading space
- if ($line eq '') {
- # Do nothing
- } elsif ($line =~ /^\#name\s+([[:alnum:]_]+)$/) {
- $name = $1;
- } elsif ($line =~ /^\#prefix\s+([[:alnum:]_]+)$/) {
- $prefix = $1;
- } elsif ($line =~ /^\#guard\s+([[:alnum:]_]+)$/) {
- $guard = $1;
- } elsif ($line =~ /^\#errval\s+(\S.*)$/) {
- $errval = $1;
- } elsif ($line =~ /^\#header\s+(\"(.+)\"|\S+)$/) {
- $hfile = ($2 ne '') ? $2 : $1;
- } elsif ($line =~ /^\#include\s+(\"(.+)\"|\S+)$/) {
- push @incstack, $dd;
- push @filenames, (($2 ne '') ? $2 : $1);
- push @linenums, 0;
- undef $dd; # Open a new file
- } elsif ($line =~ /^(|\#special\s+)(\S+)\s*(|=\s*(\-?(0[Xx][[:xdigit:]]+|0[0-7]*|[0-9]+)))$/) {
- $next_value = intval($4) if ($4 ne '');
- if ($1 eq '') {
- $strings{$2} = $next_value++;
- } else {
- $specials{$2} = $next_value++;
- }
- } else {
- printf STDERR "%s:%d:%s syntax error: \"%s\"\n",
- $filenames[-1], $linenums[-1],
- (scalar(@incstack) == 1) ? '' : "(from $infile)", $line;
- $err++;
- }
- }
- exit 1 if ($err);
- # Default name, prefix, and header guard name
- if (!defined($name)) {
- $name = basename($infile);
- $name =~ s/(\..*)$//; # Strip extension, if any
- }
- if (!defined($prefix)) {
- $prefix = "\U${name}\E_";
- }
- if (!defined($hfile)) {
- $hfile = $outfile;
- $hfile =~ s/\.c$/\.h/;
- }
- if (!defined($guard)) {
- $guard = basename($hfile);
- $guard =~ s/[^[:alnum:]_]/_/g;
- $guard =~ s/__+/_/g;
- $guard = "\U$guard";
- }
- # Verify input. We can't have more than one constant with the same
- # enumeration value, nor the same enumeration string.
- if (scalar(keys(%strings)) == 0) {
- die "$0: $infile: no strings to hash!\n";
- }
- my %enums;
- my %enumvals;
- my %stringbyval;
- my $max_enum;
- my $tbllen = 0;
- my $tbloffs;
- foreach my $s (keys(%strings)) {
- my $es = "${prefix}\U${s}";
- $es =~ s/[^[:alnum:]_]/_/g;
- $es =~ s/__+/_/g;
- my $v = $strings{$s};
- $stringbyval{$v} = $s;
- if (defined($enums{$es})) {
- printf STDERR "%s: string \"%s\" duplicates existing enum %s\n",
- $infile, $s, $es;
- $err++;
- } else {
- $enums{$es} = $v;
- }
- if (defined($enumvals{$v})) {
- printf STDERR "%s: string \"%s\" duplicates existing enum constant %d\n", $v;
- $err++;
- } else {
- $enumvals{$v} = $es;
- }
- $max_enum = $v if ($v > $max_enum || !defined($max_enum));
- $tbloffs = $v if ($v < $tbloffs || !defined($tbloffs));
- $tbllen = $v+1 if ($v >= $tbllen || !defined($tbllen));
- }
- foreach my $s (keys(%specials)) {
- my $es = $prefix . $s; # No string mangling here
- my $v = $specials{$s};
- if (defined($enums{$es})) {
- printf STDERR "%s: special \"%s\" duplicates existing enum %s\n",
- $infile, $s, $es;
- $err++;
- } else {
- $enums{$es} = $v;
- }
- if (defined ($enumvals{$v})) {
- printf STDERR "%s: special \"%s\" duplicates existing enum constant %d\n", $v;
- $err++;
- } else {
- $enumvals{$v} = $es;
- }
- $max_enum = $v if ($v > $max_enum || !defined($max_enum));
- }
- $tbllen -= $tbloffs;
- if ($tbllen > 65536) {
- printf STDERR "%s: span of enumeration values too large\n";
- $err++;
- }
- exit 1 if ($err);
- open(F, '>', $outfile)
- or die "$0: cannot create: ${outfile}: $!\n";
- if ($output eq 'h') {
- print F "/*\n";
- print F " * This file is generated from $infile\n";
- print F " * by $me; do not edit.\n";
- print F " */\n";
- print F "\n";
- print F "#ifndef $guard\n";
- print F "#define $guard 1\n\n";
- print F "#include \"perfhash.h\"\n\n";
- my $c = '{';
- $next_value = 0;
- print F "enum ${name} ";
- foreach my $v (sort { $a <=> $b } keys(%enumvals)) {
- my $s = $enumvals{$v};
- print F "$c\n $s";
- print F " = $v" if ($v != $next_value);
- $next_value = $v + 1;
- $c = ',';
- }
- print F "\n};\n\n";
- print F "extern const struct perfect_hash ${name}_hash;\n";
- printf F "extern const char * const %s_tbl[%d];\n", $name, $tbllen;
- print F "\nstatic inline enum ${name} ${name}_find(const char *str)\n";
- print F "{\n";
- print F " return perfhash_find(&${name}_hash, str);\n";
- print F "}\n";
- print F "\nstatic inline const char * ${name}_name(enum ${name} x)\n";
- print F "{\n";
- printf F " size_t ix = (size_t)x - (%d);\n", $tbloffs;
- printf F " if (ix >= %d)\n", $tbllen;
- print F " return NULL;\n";
- print F " return ${name}_tbl[ix];\n";
- print F "}\n";
- print F "\nstatic inline const char * ${name}_dname(enum ${name} x)\n";
- print F "{\n";
- print F " const char *y = ${name}_name(x);\n";
- print F " return y ? y : invalid_enum_str(x);\n";
- print F "}\n";
- print F "\n#endif /* $guard */\n";
- } elsif ($output eq 'c') {
- # The strings we hash must all be lower case, even if the string
- # table doesn't contain them that way.
- my %lcstrings;
- foreach my $s (keys(%strings)) {
- my $ls = "\L$s";
- if (defined($lcstrings{$ls})) {
- printf STDERR "%s: strings \"%s\" and \"%s\" differ only in case\n",
- $infile, $s, $strings{$lcstrings{$s}};
- } else {
- $lcstrings{$ls} = $strings{$s} - $tbloffs;
- }
- }
- my @hashinfo = gen_perfect_hash(\%lcstrings);
- if (!@hashinfo) {
- die "$0: no hash found\n";
- }
- # Paranoia...
- verify_hash_table(\%lcstrings, \@hashinfo);
- my ($n, $sv, $g) = @hashinfo;
- die if ($n & ($n-1));
- print F "/*\n";
- print F " * This file is generated from $infile\n";
- print F " * by $me; do not edit.\n";
- print F " */\n";
- print F "\n";
- print F "#include \"$hfile\"\n\n";
- printf F "const char * const %s_tbl[%d] = ", $name, $tbllen;
- my $c = '{';
- for (my $i = $tbloffs; $i < $tbloffs+$tbllen; $i++) {
- printf F "%s\n %s", $c,
- defined($stringbyval{$i}) ? '"'.$stringbyval{$i}.'"' : 'NULL';
- $c = ',';
- }
- print F "\n};\n\n";
- print F "#define UNUSED (65536/3)\n\n";
- printf F "static const int16_t %s_hashvals[%d] = ", $name, $n*2;
- $c = '{';
- for (my $i = 0; $i < $n; $i++) {
- my $h = ${$g}[$i*2+0];
- print F "$c\n ", defined($h) ? $h : 'UNUSED';
- $c = ',';
- }
- for (my $i = 0; $i < $n; $i++) {
- my $h = ${$g}[$i*2+1];
- print F "$c\n ", defined($h) ? $h : 'UNUSED';
- $c = ',';
- }
- print F "\n};\n\n";
- print F "const struct perfect_hash ${name}_hash = {\n";
- printf F " UINT64_C(0x%08x%08x),\n", $$sv[0], $$sv[1]; # crcinit
- printf F " UINT32_C(0x%x),\n", $n-1; # hashmask
- printf F " UINT32_C(%u),\n", $tbllen; # tbllen
- printf F " %d,\n", $tbloffs; # tbloffs
- printf F " (%s),\n", $errval; # errval
- printf F " ${name}_hashvals,\n"; # hashvals
- printf F " ${name}_tbl\n"; # strings
- print F "};\n";
- }
|