tokhash.pl 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. #!/usr/bin/perl
  2. ## --------------------------------------------------------------------------
  3. ##
  4. ## Copyright 1996-2014 The NASM Authors - All Rights Reserved
  5. ## See the file AUTHORS included with the NASM distribution for
  6. ## the specific copyright holders.
  7. ##
  8. ## Redistribution and use in source and binary forms, with or without
  9. ## modification, are permitted provided that the following
  10. ## conditions are met:
  11. ##
  12. ## * Redistributions of source code must retain the above copyright
  13. ## notice, this list of conditions and the following disclaimer.
  14. ## * Redistributions in binary form must reproduce the above
  15. ## copyright notice, this list of conditions and the following
  16. ## disclaimer in the documentation and/or other materials provided
  17. ## with the distribution.
  18. ##
  19. ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  20. ## CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  21. ## INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  22. ## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  23. ## DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  24. ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. ## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  26. ## NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  27. ## LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28. ## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29. ## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  30. ## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  31. ## EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32. ##
  33. ## --------------------------------------------------------------------------
  34. #
  35. # Generate a perfect hash for token parsing
  36. #
  37. # Usage: tokenhash.pl insns.dat regs.dat tokens.dat
  38. #
  39. require 'phash.ph';
  40. my($output, $insns_dat, $regs_dat, $tokens_dat) = @ARGV;
  41. %tokens = ();
  42. @tokendata = ();
  43. #
  44. # List of condition codes
  45. #
  46. @conditions = ('a', 'ae', 'b', 'be', 'c', 'e', 'g', 'ge', 'l', 'le',
  47. 'na', 'nae', 'nb', 'nbe', 'nc', 'ne', 'ng', 'nge', 'nl',
  48. 'nle', 'no', 'np', 'ns', 'nz', 'o', 'p', 'pe', 'po', 's', 'z');
  49. #
  50. # Read insns.dat
  51. #
  52. open(ID, '<', $insns_dat) or die "$0: cannot open $insns_dat: $!\n";
  53. while (defined($line = <ID>)) {
  54. if ($line =~ /^([A-Z0-9_]+)(|cc)\s/) {
  55. $insn = $1.$2;
  56. ($token = $1) =~ tr/A-Z/a-z/;
  57. if ($2 eq '') {
  58. # Single instruction token
  59. if (!defined($tokens{$token})) {
  60. $tokens{$token} = scalar @tokendata;
  61. push(@tokendata, "\"${token}\", TOKEN_INSN, C_none, 0, I_${insn}");
  62. }
  63. } else {
  64. # Conditional instruction
  65. foreach $cc (@conditions) {
  66. if (!defined($tokens{$token.$cc})) {
  67. $tokens{$token.$cc} = scalar @tokendata;
  68. push(@tokendata, "\"${token}${cc}\", TOKEN_INSN, C_\U$cc\E, 0, I_${insn}");
  69. }
  70. }
  71. }
  72. }
  73. }
  74. close(ID);
  75. #
  76. # Read regs.dat
  77. #
  78. open(RD, '<', $regs_dat) or die "$0: cannot open $regs_dat: $!\n";
  79. while (defined($line = <RD>)) {
  80. if ($line =~ /^([a-z0-9_-]+)\s*\S+\s*\S+\s*[0-9]+\s*(\S*)/) {
  81. $reg = $1;
  82. $reg_flag = $2;
  83. if ($reg =~ /^(.*[^0-9])([0-9]+)\-([0-9]+)(|[^0-9].*)$/) {
  84. $nregs = $3-$2+1;
  85. $reg = $1.$2.$4;
  86. $reg_nr = $2;
  87. $reg_prefix = $1;
  88. $reg_suffix = $4;
  89. } else {
  90. $nregs = 1;
  91. undef $reg_prefix, $reg_suffix;
  92. }
  93. while ($nregs--) {
  94. if (defined($tokens{$reg})) {
  95. die "Duplicate definition: $reg\n";
  96. }
  97. $tokens{$reg} = scalar @tokendata;
  98. if ($reg_flag eq '') {
  99. push(@tokendata, "\"${reg}\", TOKEN_REG, 0, 0, R_\U${reg}\E");
  100. } else {
  101. push(@tokendata, "\"${reg}\", TOKEN_REG, 0, ${reg_flag}, R_\U${reg}\E");
  102. }
  103. if (defined($reg_prefix)) {
  104. $reg_nr++;
  105. $reg = sprintf("%s%u%s", $reg_prefix, $reg_nr, $reg_suffix);
  106. } else {
  107. # Not a dashed sequence
  108. die if ($nregs);
  109. }
  110. }
  111. }
  112. }
  113. close(RD);
  114. #
  115. # Read tokens.dat
  116. #
  117. open(TD, '<', $tokens_dat) or die "$0: cannot open $tokens_dat: $!\n";
  118. while (defined($line = <TD>)) {
  119. if ($line =~ /^\%\s+(.*)$/) {
  120. $pattern = $1;
  121. } elsif ($line =~ /^([a-z0-9_-]+)/) {
  122. $token = $1;
  123. if (defined($tokens{$token})) {
  124. die "Duplicate definition: $token\n";
  125. }
  126. $tokens{$token} = scalar @tokendata;
  127. $data = $pattern;
  128. if ($data =~ /^(.*)\{(.*)\}(.*)$/) {
  129. my $head = $1, $tail = $3;
  130. my $px = $2;
  131. $px =~ s/\*/(.*)/g;
  132. if ($token =~ /$px/i) {
  133. $data = $head."\U$1".$tail;
  134. } else {
  135. die "$0: token $token doesn't match $px\n";
  136. }
  137. }
  138. $data =~ s/\*/\U$token/g;
  139. push(@tokendata, "\"$token\", $data");
  140. }
  141. }
  142. close(TD);
  143. if ($output eq 'h') {
  144. #
  145. # tokens.h
  146. #
  147. $max_len = 0;
  148. foreach $token (keys(%tokens)) {
  149. if (length($token) > $max_len) {
  150. $max_len = length($token);
  151. }
  152. }
  153. print "/*\n";
  154. print " * This file is generated from insns.dat, regs.dat and token.dat\n";
  155. print " * by tokhash.pl; do not edit.\n";
  156. print " */\n";
  157. print "\n";
  158. print "#ifndef NASM_TOKENS_H\n";
  159. print "#define NASM_TOKENS_H\n";
  160. print "\n";
  161. print "#define MAX_KEYWORD $max_len /* length of longest keyword */\n";
  162. print "\n";
  163. print "#endif /* NASM_TOKENS_H */\n";
  164. } elsif ($output eq 'c') {
  165. #
  166. # tokhash.c
  167. #
  168. @hashinfo = gen_perfect_hash(\%tokens);
  169. if (!@hashinfo) {
  170. die "$0: no hash found\n";
  171. }
  172. # Paranoia...
  173. verify_hash_table(\%tokens, \@hashinfo);
  174. ($n, $sv, $g) = @hashinfo;
  175. $sv2 = $sv+2;
  176. die if ($n & ($n-1));
  177. print "/*\n";
  178. print " * This file is generated from insns.dat, regs.dat and token.dat\n";
  179. print " * by tokhash.pl; do not edit.\n";
  180. print " */\n";
  181. print "\n";
  182. print "#include \"compiler.h\"\n";
  183. print "#include <string.h>\n";
  184. print "#include \"nasm.h\"\n";
  185. print "#include \"hashtbl.h\"\n";
  186. print "#include \"insns.h\"\n";
  187. print "#include \"stdscan.h\"\n";
  188. print "\n";
  189. # These somewhat odd sizes and ordering thereof are due to the
  190. # relative ranges of the types; this makes it fit in 16 bytes on
  191. # 64-bit machines and 12 bytes on 32-bit machines.
  192. print "struct tokendata {\n";
  193. print " const char *string;\n";
  194. print " int16_t tokentype;\n";
  195. print " int8_t aux;\n";
  196. print " int8_t tokflag;\n";
  197. print " int32_t num;\n";
  198. print "};\n";
  199. print "\n";
  200. print "int nasm_token_hash(const char *token, struct tokenval *tv)\n";
  201. print "{\n";
  202. # Put a large value in unused slots. This makes it extremely unlikely
  203. # that any combination that involves unused slot will pass the range test.
  204. # This speeds up rejection of unrecognized tokens, i.e. identifiers.
  205. print "#define UNUSED (65535/3)\n";
  206. print " static const int16_t hash1[$n] = {\n";
  207. for ($i = 0; $i < $n; $i++) {
  208. my $h = ${$g}[$i*2+0];
  209. print " ", defined($h) ? $h : 'UNUSED', ",\n";
  210. }
  211. print " };\n";
  212. print " static const int16_t hash2[$n] = {\n";
  213. for ($i = 0; $i < $n; $i++) {
  214. my $h = ${$g}[$i*2+1];
  215. print " ", defined($h) ? $h : 'UNUSED', ",\n";
  216. }
  217. print " };\n";
  218. printf " static const struct tokendata tokendata[%d] = {\n", scalar(@tokendata);
  219. foreach $d (@tokendata) {
  220. print " { ", $d, " },\n";
  221. }
  222. print " };\n";
  223. print " uint32_t k1, k2;\n";
  224. print " uint64_t crc;\n";
  225. # For correct overflow behavior, "ix" should be unsigned of the same
  226. # width as the hash arrays.
  227. print " uint16_t ix;\n";
  228. print " const struct tokendata *data;\n";
  229. print "\n";
  230. printf " tv->t_flag = 0;\n";
  231. printf " crc = crc64(UINT64_C(0x%08x%08x), token);\n",
  232. $$sv[0], $$sv[1];
  233. print " k1 = (uint32_t)crc;\n";
  234. print " k2 = (uint32_t)(crc >> 32);\n";
  235. print "\n";
  236. printf " ix = hash1[k1 & 0x%x] + hash2[k2 & 0x%x];\n", $n-1, $n-1;
  237. printf " if (ix >= %d)\n", scalar(@tokendata);
  238. print " return tv->t_type = TOKEN_ID;\n";
  239. print "\n";
  240. print " data = &tokendata[ix];\n";
  241. print " if (strcmp(data->string, token))\n";
  242. print " return tv->t_type = TOKEN_ID;\n";
  243. print "\n";
  244. print " tv->t_integer = data->num;\n";
  245. print " tv->t_inttwo = data->aux;\n";
  246. print " tv->t_flag = data->tokflag;\n";
  247. print " return tv->t_type = data->tokentype;\n";
  248. print "}\n";
  249. }