perfhash.pl 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. #!/usr/bin/perl
  2. ## --------------------------------------------------------------------------
  3. ##
  4. ## Copyright 1996-2017 The NASM Authors - All Rights Reserved
  5. ## See the file AUTHORS included with the NASM distribution for
  6. ## the specific copyright holders.
  7. ##
  8. ## Redistribution and use in source and binary forms, with or without
  9. ## modification, are permitted provided that the following
  10. ## conditions are met:
  11. ##
  12. ## * Redistributions of source code must retain the above copyright
  13. ## notice, this list of conditions and the following disclaimer.
  14. ## * Redistributions in binary form must reproduce the above
  15. ## copyright notice, this list of conditions and the following
  16. ## disclaimer in the documentation and/or other materials provided
  17. ## with the distribution.
  18. ##
  19. ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  20. ## CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  21. ## INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  22. ## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  23. ## DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  24. ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. ## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  26. ## NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  27. ## LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28. ## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29. ## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  30. ## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  31. ## EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32. ##
  33. ## --------------------------------------------------------------------------
  34. #
  35. # Generate a perfect hash for general case-insensitive string-to-enum
  36. # lookup. This generates an enum and the corresponding hash, but
  37. # relies on a common function to parse the hash.
  38. #
  39. # Usage:
  40. # perfhash.pl h foohash.dat foohash.h (to generate C header)
  41. # perfhash.pl c foohash.dat foohash.c (to generate C source)
  42. #
  43. use strict;
  44. require 'phash.ph';
  45. sub basename($) {
  46. my($s) = @_;
  47. $s =~ s/^.*[^-[:alnum:]_\.]//; # Remove path component as best we can
  48. return $s;
  49. }
  50. sub intval($) {
  51. my($s) = @_;
  52. if ($s =~ /^0/) {
  53. return oct($s); # Handles octal or hexadecimal
  54. } elsif ($s =~ /^\-(0.*)$/) {
  55. return -oct($1);
  56. } else {
  57. return $s + 0; # Forcibly convert to number
  58. }
  59. }
  60. my($output, $infile, $outfile) = @ARGV;
  61. my $me = basename($0);
  62. # The following special things are allowed in the input file:
  63. # #<space> or ; begins a comment
  64. # #include filename
  65. # #name str
  66. # The name of the hash
  67. # #prefix str
  68. # Defines the prefix before enum
  69. # #guard str
  70. # Defines the header guard string
  71. # #special str [= value]
  72. # Generate an enum value without a corresponding string; not capitalized.
  73. # #header str
  74. # Indicates the name of the .h file to include from the .c file
  75. # #errval str
  76. # Define the value to be returned if a string is not found
  77. # (defaults to -1). This can be any constant C expression,
  78. # including one of the enum values.
  79. #
  80. # Regular lines are just str [= value]
  81. #
  82. # Enumeration is generated in the order listed in the file, just as in C
  83. # specifying a value causes the values to increase by 1 from that point on
  84. # unless specified.
  85. my $name;
  86. my $prefix;
  87. my $guard;
  88. my $hfile;
  89. my %strings = ();
  90. my %specials = ();
  91. my $next_value = 0;
  92. my $errval = '-1';
  93. my @incstack = ();
  94. my @filenames = ($infile);
  95. my @linenums = (0);
  96. my $dd = undef;
  97. my $err = 0;
  98. while (scalar(@filenames)) {
  99. if (!defined($dd)) {
  100. open($dd, '<', $filenames[-1])
  101. or die "$0: cannot open: $filenames[-1]: $!\n";
  102. }
  103. my $line = <$dd>;
  104. if (!defined($line)) {
  105. close($dd);
  106. $dd = pop @incstack;
  107. pop @filenames;
  108. pop @linenums;
  109. next;
  110. }
  111. $linenums[-1]++;
  112. chomp $line;
  113. $line =~ s/\s*(|\;.*|\#\s.*|\#)$//; # Remove comments and trailing space
  114. $line =~ s/^\s+//; # Remove leading space
  115. if ($line eq '') {
  116. # Do nothing
  117. } elsif ($line =~ /^\#name\s+([[:alnum:]_]+)$/) {
  118. $name = $1;
  119. } elsif ($line =~ /^\#prefix\s+([[:alnum:]_]+)$/) {
  120. $prefix = $1;
  121. } elsif ($line =~ /^\#guard\s+([[:alnum:]_]+)$/) {
  122. $guard = $1;
  123. } elsif ($line =~ /^\#errval\s+(\S.*)$/) {
  124. $errval = $1;
  125. } elsif ($line =~ /^\#header\s+(\"(.+)\"|\S+)$/) {
  126. $hfile = ($2 ne '') ? $2 : $1;
  127. } elsif ($line =~ /^\#include\s+(\"(.+)\"|\S+)$/) {
  128. push @incstack, $dd;
  129. push @filenames, (($2 ne '') ? $2 : $1);
  130. push @linenums, 0;
  131. undef $dd; # Open a new file
  132. } elsif ($line =~ /^(|\#special\s+)(\S+)\s*(|=\s*(\-?(0[Xx][[:xdigit:]]+|0[0-7]*|[0-9]+)))$/) {
  133. $next_value = intval($4) if ($4 ne '');
  134. if ($1 eq '') {
  135. $strings{$2} = $next_value++;
  136. } else {
  137. $specials{$2} = $next_value++;
  138. }
  139. } else {
  140. printf STDERR "%s:%d:%s syntax error: \"%s\"\n",
  141. $filenames[-1], $linenums[-1],
  142. (scalar(@incstack) == 1) ? '' : "(from $infile)", $line;
  143. $err++;
  144. }
  145. }
  146. exit 1 if ($err);
  147. # Default name, prefix, and header guard name
  148. if (!defined($name)) {
  149. $name = basename($infile);
  150. $name =~ s/(\..*)$//; # Strip extension, if any
  151. }
  152. if (!defined($prefix)) {
  153. $prefix = "\U${name}\E_";
  154. }
  155. if (!defined($hfile)) {
  156. $hfile = $outfile;
  157. $hfile =~ s/\.c$/\.h/;
  158. }
  159. if (!defined($guard)) {
  160. $guard = basename($hfile);
  161. $guard =~ s/[^[:alnum:]_]/_/g;
  162. $guard =~ s/__+/_/g;
  163. $guard = "\U$guard";
  164. }
  165. # Verify input. We can't have more than one constant with the same
  166. # enumeration value, nor the same enumeration string.
  167. if (scalar(keys(%strings)) == 0) {
  168. die "$0: $infile: no strings to hash!\n";
  169. }
  170. my %enums;
  171. my %enumvals;
  172. my %stringbyval;
  173. my $max_enum;
  174. my $tbllen = 0;
  175. my $tbloffs;
  176. foreach my $s (keys(%strings)) {
  177. my $es = "${prefix}\U${s}";
  178. $es =~ s/[^[:alnum:]_]/_/g;
  179. $es =~ s/__+/_/g;
  180. my $v = $strings{$s};
  181. $stringbyval{$v} = $s;
  182. if (defined($enums{$es})) {
  183. printf STDERR "%s: string \"%s\" duplicates existing enum %s\n",
  184. $infile, $s, $es;
  185. $err++;
  186. } else {
  187. $enums{$es} = $v;
  188. }
  189. if (defined($enumvals{$v})) {
  190. printf STDERR "%s: string \"%s\" duplicates existing enum constant %d\n", $v;
  191. $err++;
  192. } else {
  193. $enumvals{$v} = $es;
  194. }
  195. $max_enum = $v if ($v > $max_enum || !defined($max_enum));
  196. $tbloffs = $v if ($v < $tbloffs || !defined($tbloffs));
  197. $tbllen = $v+1 if ($v >= $tbllen || !defined($tbllen));
  198. }
  199. foreach my $s (keys(%specials)) {
  200. my $es = $prefix . $s; # No string mangling here
  201. my $v = $specials{$s};
  202. if (defined($enums{$es})) {
  203. printf STDERR "%s: special \"%s\" duplicates existing enum %s\n",
  204. $infile, $s, $es;
  205. $err++;
  206. } else {
  207. $enums{$es} = $v;
  208. }
  209. if (defined ($enumvals{$v})) {
  210. printf STDERR "%s: special \"%s\" duplicates existing enum constant %d\n", $v;
  211. $err++;
  212. } else {
  213. $enumvals{$v} = $es;
  214. }
  215. $max_enum = $v if ($v > $max_enum || !defined($max_enum));
  216. }
  217. $tbllen -= $tbloffs;
  218. if ($tbllen > 65536) {
  219. printf STDERR "%s: span of enumeration values too large\n";
  220. $err++;
  221. }
  222. exit 1 if ($err);
  223. open(F, '>', $outfile)
  224. or die "$0: cannot create: ${outfile}: $!\n";
  225. if ($output eq 'h') {
  226. print F "/*\n";
  227. print F " * This file is generated from $infile\n";
  228. print F " * by $me; do not edit.\n";
  229. print F " */\n";
  230. print F "\n";
  231. print F "#ifndef $guard\n";
  232. print F "#define $guard 1\n\n";
  233. print F "#include \"perfhash.h\"\n\n";
  234. my $c = '{';
  235. $next_value = 0;
  236. print F "enum ${name} ";
  237. foreach my $v (sort { $a <=> $b } keys(%enumvals)) {
  238. my $s = $enumvals{$v};
  239. print F "$c\n $s";
  240. print F " = $v" if ($v != $next_value);
  241. $next_value = $v + 1;
  242. $c = ',';
  243. }
  244. print F "\n};\n\n";
  245. print F "extern const struct perfect_hash ${name}_hash;\n";
  246. printf F "extern const char * const %s_tbl[%d];\n", $name, $tbllen;
  247. print F "\nstatic inline enum ${name} ${name}_find(const char *str)\n";
  248. print F "{\n";
  249. print F " return perfhash_find(&${name}_hash, str);\n";
  250. print F "}\n";
  251. print F "\nstatic inline const char * ${name}_name(enum ${name} x)\n";
  252. print F "{\n";
  253. printf F " size_t ix = (size_t)x - (%d);\n", $tbloffs;
  254. printf F " if (ix >= %d)\n", $tbllen;
  255. print F " return NULL;\n";
  256. print F " return ${name}_tbl[ix];\n";
  257. print F "}\n";
  258. print F "\nstatic inline const char * ${name}_dname(enum ${name} x)\n";
  259. print F "{\n";
  260. print F " const char *y = ${name}_name(x);\n";
  261. print F " return y ? y : invalid_enum_str(x);\n";
  262. print F "}\n";
  263. print F "\n#endif /* $guard */\n";
  264. } elsif ($output eq 'c') {
  265. # The strings we hash must all be lower case, even if the string
  266. # table doesn't contain them that way.
  267. my %lcstrings;
  268. foreach my $s (keys(%strings)) {
  269. my $ls = "\L$s";
  270. if (defined($lcstrings{$ls})) {
  271. printf STDERR "%s: strings \"%s\" and \"%s\" differ only in case\n",
  272. $infile, $s, $strings{$lcstrings{$s}};
  273. } else {
  274. $lcstrings{$ls} = $strings{$s} - $tbloffs;
  275. }
  276. }
  277. my @hashinfo = gen_perfect_hash(\%lcstrings);
  278. if (!@hashinfo) {
  279. die "$0: no hash found\n";
  280. }
  281. # Paranoia...
  282. verify_hash_table(\%lcstrings, \@hashinfo);
  283. my ($n, $sv, $g) = @hashinfo;
  284. die if ($n & ($n-1));
  285. print F "/*\n";
  286. print F " * This file is generated from $infile\n";
  287. print F " * by $me; do not edit.\n";
  288. print F " */\n";
  289. print F "\n";
  290. print F "#include \"$hfile\"\n\n";
  291. printf F "const char * const %s_tbl[%d] = ", $name, $tbllen;
  292. my $c = '{';
  293. for (my $i = $tbloffs; $i < $tbloffs+$tbllen; $i++) {
  294. printf F "%s\n %s", $c,
  295. defined($stringbyval{$i}) ? '"'.$stringbyval{$i}.'"' : 'NULL';
  296. $c = ',';
  297. }
  298. print F "\n};\n\n";
  299. print F "#define UNUSED (65536/3)\n\n";
  300. printf F "static const int16_t %s_hashvals[%d] = ", $name, $n*2;
  301. $c = '{';
  302. for (my $i = 0; $i < $n; $i++) {
  303. my $h = ${$g}[$i*2+0];
  304. print F "$c\n ", defined($h) ? $h : 'UNUSED';
  305. $c = ',';
  306. }
  307. for (my $i = 0; $i < $n; $i++) {
  308. my $h = ${$g}[$i*2+1];
  309. print F "$c\n ", defined($h) ? $h : 'UNUSED';
  310. $c = ',';
  311. }
  312. print F "\n};\n\n";
  313. print F "const struct perfect_hash ${name}_hash = {\n";
  314. printf F " UINT64_C(0x%08x%08x),\n", $$sv[0], $$sv[1]; # crcinit
  315. printf F " UINT32_C(0x%x),\n", $n-1; # hashmask
  316. printf F " UINT32_C(%u),\n", $tbllen; # tbllen
  317. printf F " %d,\n", $tbloffs; # tbloffs
  318. printf F " (%s),\n", $errval; # errval
  319. printf F " ${name}_hashvals,\n"; # hashvals
  320. printf F " ${name}_tbl\n"; # strings
  321. print F "};\n";
  322. }