insns.pl 33 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049
  1. #!/usr/bin/perl
  2. ## --------------------------------------------------------------------------
  3. ##
  4. ## Copyright 1996-2017 The NASM Authors - All Rights Reserved
  5. ## See the file AUTHORS included with the NASM distribution for
  6. ## the specific copyright holders.
  7. ##
  8. ## Redistribution and use in source and binary forms, with or without
  9. ## modification, are permitted provided that the following
  10. ## conditions are met:
  11. ##
  12. ## * Redistributions of source code must retain the above copyright
  13. ## notice, this list of conditions and the following disclaimer.
  14. ## * Redistributions in binary form must reproduce the above
  15. ## copyright notice, this list of conditions and the following
  16. ## disclaimer in the documentation and/or other materials provided
  17. ## with the distribution.
  18. ##
  19. ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  20. ## CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  21. ## INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  22. ## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  23. ## DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  24. ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. ## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  26. ## NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  27. ## LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28. ## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29. ## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  30. ## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  31. ## EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32. ##
  33. ## --------------------------------------------------------------------------
  34. #
  35. # insns.pl
  36. #
  37. # Parse insns.dat and produce generated source code files
  38. require 'x86/insns-iflags.ph';
  39. # Opcode prefixes which need their own opcode tables
  40. # LONGER PREFIXES FIRST!
  41. @disasm_prefixes = qw(0F24 0F25 0F38 0F3A 0F7A 0FA6 0FA7 0F);
  42. # This should match MAX_OPERANDS from nasm.h
  43. $MAX_OPERANDS = 5;
  44. # Add VEX/XOP prefixes
  45. @vex_class = ( 'vex', 'xop', 'evex' );
  46. $vex_classes = scalar(@vex_class);
  47. @vexlist = ();
  48. %vexmap = ();
  49. for ($c = 0; $c < $vex_classes; $c++) {
  50. $vexmap{$vex_class[$c]} = $c;
  51. for ($m = 0; $m < 32; $m++) {
  52. for ($p = 0; $p < 4; $p++) {
  53. push(@vexlist, sprintf("%s%02X%01X", $vex_class[$c], $m, $p));
  54. }
  55. }
  56. }
  57. @disasm_prefixes = (@vexlist, @disasm_prefixes);
  58. @bytecode_count = (0) x 256;
  59. print STDERR "Reading insns.dat...\n";
  60. @args = ();
  61. undef $output;
  62. foreach $arg ( @ARGV ) {
  63. if ( $arg =~ /^\-/ ) {
  64. if ( $arg =~ /^\-([abdin]|f[hc])$/ ) {
  65. $output = $1;
  66. } else {
  67. die "$0: Unknown option: ${arg}\n";
  68. }
  69. } else {
  70. push (@args, $arg);
  71. }
  72. }
  73. die if (scalar(@args) != 2); # input output
  74. ($fname, $oname) = @args;
  75. open(F, '<', $fname) || die "unable to open $fname";
  76. %dinstables = ();
  77. @bytecode_list = ();
  78. $line = 0;
  79. $insns = 0;
  80. $n_opcodes = $n_opcodes_cc = 0;
  81. while (<F>) {
  82. $line++;
  83. chomp;
  84. next if ( /^\s*(\;.*|)$/ ); # comments or blank lines
  85. unless (/^\s*(\S+)\s+(\S+)\s+(\S+|\[.*\])\s+(\S+)\s*$/) {
  86. warn "line $line does not contain four fields\n";
  87. next;
  88. }
  89. @fields = ($1, $2, $3, $4);
  90. @field_list = ([@fields, 0]);
  91. if ($fields[1] =~ /\*/) {
  92. # This instruction has relaxed form(s)
  93. if ($fields[2] !~ /^\[/) {
  94. warn "line $line has an * operand but uses raw bytecodes\n";
  95. next;
  96. }
  97. $opmask = 0;
  98. @ops = split(/,/, $fields[1]);
  99. for ($oi = 0; $oi < scalar @ops; $oi++) {
  100. if ($ops[$oi] =~ /\*$/) {
  101. if ($oi == 0) {
  102. warn "line $line has a first operand with a *\n";
  103. next;
  104. }
  105. $opmask |= 1 << $oi;
  106. }
  107. }
  108. for ($oi = 1; $oi < (1 << scalar @ops); $oi++) {
  109. if (($oi & ~$opmask) == 0) {
  110. my @xops = ();
  111. my $omask = ~$oi;
  112. for ($oj = 0; $oj < scalar(@ops); $oj++) {
  113. if ($omask & 1) {
  114. push(@xops, $ops[$oj]);
  115. }
  116. $omask >>= 1;
  117. }
  118. push(@field_list, [$fields[0], join(',', @xops),
  119. $fields[2], $fields[3], $oi]);
  120. }
  121. }
  122. }
  123. foreach $fptr (@field_list) {
  124. @fields = @$fptr;
  125. ($formatted, $nd) = format_insn(@fields);
  126. if ($formatted) {
  127. $insns++;
  128. $aname = "aa_$fields[0]";
  129. push @$aname, $formatted;
  130. }
  131. if ( $fields[0] =~ /cc$/ ) {
  132. # Conditional instruction
  133. if (!defined($k_opcodes_cc{$fields[0]})) {
  134. $k_opcodes_cc{$fields[0]} = $n_opcodes_cc++;
  135. }
  136. } else {
  137. # Unconditional instruction
  138. if (!defined($k_opcodes{$fields[0]})) {
  139. $k_opcodes{$fields[0]} = $n_opcodes++;
  140. }
  141. }
  142. if ($formatted && !$nd) {
  143. push @big, $formatted;
  144. my @sseq = startseq($fields[2], $fields[4]);
  145. foreach $i (@sseq) {
  146. if (!defined($dinstables{$i})) {
  147. $dinstables{$i} = [];
  148. }
  149. push(@{$dinstables{$i}}, $#big);
  150. }
  151. }
  152. }
  153. }
  154. close F;
  155. #
  156. # Generate the bytecode array. At this point, @bytecode_list contains
  157. # the full set of bytecodes.
  158. #
  159. # Sort by descending length
  160. @bytecode_list = sort { scalar(@$b) <=> scalar(@$a) } @bytecode_list;
  161. @bytecode_array = ();
  162. %bytecode_pos = ();
  163. $bytecode_next = 0;
  164. foreach $bl (@bytecode_list) {
  165. my $h = hexstr(@$bl);
  166. next if (defined($bytecode_pos{$h}));
  167. push(@bytecode_array, $bl);
  168. while ($h ne '') {
  169. $bytecode_pos{$h} = $bytecode_next;
  170. $h = substr($h, 2);
  171. $bytecode_next++;
  172. }
  173. }
  174. undef @bytecode_list;
  175. @opcodes = sort { $k_opcodes{$a} <=> $k_opcodes{$b} } keys(%k_opcodes);
  176. @opcodes_cc = sort { $k_opcodes_cc{$a} <=> $k_opcodes_cc{$b} } keys(%k_opcodes_cc);
  177. if ( $output eq 'b') {
  178. print STDERR "Writing $oname...\n";
  179. open(B, '>', $oname);
  180. print B "/* This file auto-generated from insns.dat by insns.pl" .
  181. " - don't edit it */\n\n";
  182. print B "#include \"nasm.h\"\n";
  183. print B "#include \"insns.h\"\n\n";
  184. print B "const uint8_t nasm_bytecodes[$bytecode_next] = {\n";
  185. $p = 0;
  186. foreach $bl (@bytecode_array) {
  187. printf B " /* %5d */ ", $p;
  188. foreach $d (@$bl) {
  189. printf B "%#o,", $d;
  190. $p++;
  191. }
  192. printf B "\n";
  193. }
  194. print B "};\n";
  195. print B "\n";
  196. print B "/*\n";
  197. print B " * Bytecode frequencies (including reuse):\n";
  198. print B " *\n";
  199. for ($i = 0; $i < 32; $i++) {
  200. print B " *";
  201. for ($j = 0; $j < 256; $j += 32) {
  202. print B " |" if ($j);
  203. printf B " %3o:%4d", $i+$j, $bytecode_count[$i+$j];
  204. }
  205. print B "\n";
  206. }
  207. print B " */\n";
  208. close B;
  209. }
  210. if ( $output eq 'a' ) {
  211. print STDERR "Writing $oname...\n";
  212. open(A, '>', $oname);
  213. print A "/* This file auto-generated from insns.dat by insns.pl" .
  214. " - don't edit it */\n\n";
  215. print A "#include \"nasm.h\"\n";
  216. print A "#include \"insns.h\"\n\n";
  217. foreach $i (@opcodes, @opcodes_cc) {
  218. print A "static const struct itemplate instrux_${i}[] = {\n";
  219. $aname = "aa_$i";
  220. foreach $j (@$aname) {
  221. print A " ", codesubst($j), "\n";
  222. }
  223. print A " ITEMPLATE_END\n};\n\n";
  224. }
  225. print A "const struct itemplate * const nasm_instructions[] = {\n";
  226. foreach $i (@opcodes, @opcodes_cc) {
  227. print A " instrux_${i},\n";
  228. }
  229. print A "};\n";
  230. close A;
  231. }
  232. if ( $output eq 'd' ) {
  233. print STDERR "Writing $oname...\n";
  234. open(D, '>', $oname);
  235. print D "/* This file auto-generated from insns.dat by insns.pl" .
  236. " - don't edit it */\n\n";
  237. print D "#include \"nasm.h\"\n";
  238. print D "#include \"insns.h\"\n\n";
  239. print D "static const struct itemplate instrux[] = {\n";
  240. $n = 0;
  241. foreach $j (@big) {
  242. printf D " /* %4d */ %s\n", $n++, codesubst($j);
  243. }
  244. print D "};\n";
  245. foreach $h (sort(keys(%dinstables))) {
  246. next if ($h eq ''); # Skip pseudo-instructions
  247. print D "\nstatic const struct itemplate * const itable_${h}[] = {\n";
  248. foreach $j (@{$dinstables{$h}}) {
  249. print D " instrux + $j,\n";
  250. }
  251. print D "};\n";
  252. }
  253. @prefix_list = ();
  254. foreach $h (@disasm_prefixes, '') {
  255. for ($c = 0; $c < 256; $c++) {
  256. $nn = sprintf("%s%02X", $h, $c);
  257. if ($is_prefix{$nn} || defined($dinstables{$nn})) {
  258. # At least one entry in this prefix table
  259. push(@prefix_list, $h);
  260. $is_prefix{$h} = 1;
  261. last;
  262. }
  263. }
  264. }
  265. foreach $h (@prefix_list) {
  266. print D "\n";
  267. print D "static " unless ($h eq '');
  268. print D "const struct disasm_index ";
  269. print D ($h eq '') ? 'itable' : "itable_$h";
  270. print D "[256] = {\n";
  271. for ($c = 0; $c < 256; $c++) {
  272. $nn = sprintf("%s%02X", $h, $c);
  273. if ($is_prefix{$nn}) {
  274. die "$fname: ambiguous decoding of $nn\n"
  275. if (defined($dinstables{$nn}));
  276. printf D " /* 0x%02x */ { itable_%s, -1 },\n", $c, $nn;
  277. } elsif (defined($dinstables{$nn})) {
  278. printf D " /* 0x%02x */ { itable_%s, %u },\n", $c,
  279. $nn, scalar(@{$dinstables{$nn}});
  280. } else {
  281. printf D " /* 0x%02x */ { NULL, 0 },\n", $c;
  282. }
  283. }
  284. print D "};\n";
  285. }
  286. printf D "\nconst struct disasm_index * const itable_vex[NASM_VEX_CLASSES][32][4] =\n";
  287. print D "{\n";
  288. for ($c = 0; $c < $vex_classes; $c++) {
  289. print D " {\n";
  290. for ($m = 0; $m < 32; $m++) {
  291. print D " { ";
  292. for ($p = 0; $p < 4; $p++) {
  293. $vp = sprintf("%s%02X%01X", $vex_class[$c], $m, $p);
  294. printf D "%-15s",
  295. ($is_prefix{$vp} ? sprintf("itable_%s,", $vp) : 'NULL,');
  296. }
  297. print D "},\n";
  298. }
  299. print D " },\n";
  300. }
  301. print D "};\n";
  302. close D;
  303. }
  304. if ( $output eq 'i' ) {
  305. print STDERR "Writing $oname...\n";
  306. open(I, '>', $oname);
  307. print I "/* This file is auto-generated from insns.dat by insns.pl" .
  308. " - don't edit it */\n\n";
  309. print I "/* This file in included by nasm.h */\n\n";
  310. print I "/* Instruction names */\n\n";
  311. print I "#ifndef NASM_INSNSI_H\n";
  312. print I "#define NASM_INSNSI_H 1\n\n";
  313. print I "enum opcode {\n";
  314. $maxlen = 0;
  315. foreach $i (@opcodes, @opcodes_cc) {
  316. print I "\tI_${i},\n";
  317. $len = length($i);
  318. $len++ if ( $i =~ /cc$/ ); # Condition codes can be 3 characters long
  319. $maxlen = $len if ( $len > $maxlen );
  320. }
  321. print I "\tI_none = -1\n";
  322. print I "};\n\n";
  323. print I "#define MAX_INSLEN ", $maxlen, "\n";
  324. print I "#define NASM_VEX_CLASSES ", $vex_classes, "\n";
  325. print I "#define NO_DECORATOR\t{", join(',',(0) x $MAX_OPERANDS), "}\n";
  326. print I "#define FIRST_COND_OPCODE I_", $opcodes_cc[0], "\n\n";
  327. print I "#endif /* NASM_INSNSI_H */\n";
  328. close I;
  329. }
  330. if ( $output eq 'n' ) {
  331. print STDERR "Writing $oname...\n";
  332. open(N, '>', $oname);
  333. print N "/* This file is auto-generated from insns.dat by insns.pl" .
  334. " - don't edit it */\n\n";
  335. print N "#include \"tables.h\"\n\n";
  336. print N "const char * const nasm_insn_names[] = {";
  337. $first = 1;
  338. foreach $i (@opcodes, @opcodes_cc) {
  339. print N "," if ( !$first );
  340. $first = 0;
  341. $ilower = $i;
  342. $ilower =~ s/cc$//; # Remove conditional cc suffix
  343. $ilower =~ tr/A-Z/a-z/; # Change to lower case (Perl 4 compatible)
  344. print N "\n\t\"${ilower}\"";
  345. }
  346. print N "\n};\n";
  347. close N;
  348. }
  349. if ( $output eq 'fh') {
  350. write_iflaggen_h();
  351. }
  352. if ( $output eq 'fc') {
  353. write_iflag_c();
  354. }
  355. printf STDERR "Done: %d instructions\n", $insns;
  356. # Count primary bytecodes, for statistics
  357. sub count_bytecodes(@) {
  358. my $skip = 0;
  359. foreach my $bc (@_) {
  360. if ($skip) {
  361. $skip--;
  362. next;
  363. }
  364. $bytecode_count[$bc]++;
  365. if ($bc >= 01 && $bc <= 04) {
  366. $skip = $bc;
  367. } elsif (($bc & ~03) == 010) {
  368. $skip = 1;
  369. } elsif (($bc & ~013) == 0144) {
  370. $skip = 1;
  371. } elsif ($bc == 0172 || $bc == 0173) {
  372. $skip = 1;
  373. } elsif (($bc & ~3) == 0260 || $bc == 0270) { # VEX
  374. $skip = 2;
  375. } elsif (($bc & ~3) == 0240 || $bc == 0250) { # EVEX
  376. $skip = 3;
  377. } elsif ($bc == 0330) {
  378. $skip = 1;
  379. }
  380. }
  381. }
  382. sub format_insn($$$$$) {
  383. my ($opcode, $operands, $codes, $flags, $relax) = @_;
  384. my $num, $nd = 0, $rawflags, $flagsindex;
  385. my @bytecode;
  386. my $op, @ops, $opp, @opx, @oppx, @decos, @opevex;
  387. return (undef, undef) if $operands eq "ignore";
  388. # format the operands
  389. $operands =~ s/\*//g;
  390. $operands =~ s/:/|colon,/g;
  391. @ops = ();
  392. @decos = ();
  393. if ($operands ne 'void') {
  394. foreach $op (split(/,/, $operands)) {
  395. @opx = ();
  396. @opevex = ();
  397. foreach $opp (split(/\|/, $op)) {
  398. @oppx = ();
  399. if ($opp =~ s/^(b(32|64)|mask|z|er|sae)$//) {
  400. push(@opevex, $1);
  401. }
  402. if ($opp =~ s/(?<!\d)(8|16|32|64|80|128|256|512)$//) {
  403. push(@oppx, "bits$1");
  404. }
  405. $opp =~ s/^mem$/memory/;
  406. $opp =~ s/^memory_offs$/mem_offs/;
  407. $opp =~ s/^imm$/immediate/;
  408. $opp =~ s/^([a-z]+)rm$/rm_$1/;
  409. $opp =~ s/^rm$/rm_gpr/;
  410. $opp =~ s/^reg$/reg_gpr/;
  411. # only for evex insns, high-16 regs are allowed
  412. if ($codes !~ /(^|\s)evex\./) {
  413. $opp =~ s/^(rm_[xyz]mm)$/$1_l16/;
  414. $opp =~ s/^([xyz]mm)reg$/$1_l16/;
  415. }
  416. push(@opx, $opp, @oppx) if $opp;
  417. }
  418. $op = join('|', @opx);
  419. push(@ops, $op);
  420. push(@decos, (@opevex ? join('|', @opevex) : '0'));
  421. }
  422. }
  423. $num = scalar(@ops);
  424. while (scalar(@ops) < $MAX_OPERANDS) {
  425. push(@ops, '0');
  426. push(@decos, '0');
  427. }
  428. $operands = join(',', @ops);
  429. $operands =~ tr/a-z/A-Z/;
  430. $decorators = "{" . join(',', @decos) . "}";
  431. if ($decorators =~ /^{(0,)+0}$/) {
  432. $decorators = "NO_DECORATOR";
  433. }
  434. $decorators =~ tr/a-z/A-Z/;
  435. # format the flags
  436. $nd = 1 if $flags =~ /(^|\,)ND($|\,)/;
  437. $flags =~ s/(^|\,)ND($|\,)/\1/g;
  438. $flags =~ s/(^|\,)X64($|\,)/\1LONG,X86_64\2/g;
  439. if ($codes =~ /evex\./) {
  440. $flags .= ",EVEX";
  441. } elsif ($codes =~ /(vex|xop)\./) {
  442. $flags .= ",VEX";
  443. }
  444. $rawflags = $flags;
  445. $flagsindex = insns_flag_index(split(',',$flags));
  446. die "Error in flags $rawflags" if not defined($flagsindex);
  447. @bytecode = (decodify($codes, $relax), 0);
  448. push(@bytecode_list, [@bytecode]);
  449. $codes = hexstr(@bytecode);
  450. count_bytecodes(@bytecode);
  451. ("{I_$opcode, $num, {$operands}, $decorators, \@\@CODES-$codes\@\@, $flagsindex},", $nd);
  452. }
  453. #
  454. # Look for @@CODES-xxx@@ sequences and replace them with the appropriate
  455. # offset into nasm_bytecodes
  456. #
  457. sub codesubst($) {
  458. my($s) = @_;
  459. my $n;
  460. while ($s =~ /\@\@CODES-([0-9A-F]+)\@\@/) {
  461. my $pos = $bytecode_pos{$1};
  462. if (!defined($pos)) {
  463. die "$fname: no position assigned to byte code $1\n";
  464. }
  465. $s = $` . "nasm_bytecodes+${pos}" . "$'";
  466. }
  467. return $s;
  468. }
  469. sub addprefix ($@) {
  470. my ($prefix, @list) = @_;
  471. my $x;
  472. my @l = ();
  473. foreach $x (@list) {
  474. push(@l, sprintf("%s%02X", $prefix, $x));
  475. }
  476. return @l;
  477. }
  478. #
  479. # Turn a code string into a sequence of bytes
  480. #
  481. sub decodify($$) {
  482. # Although these are C-syntax strings, by convention they should have
  483. # only octal escapes (for directives) and hexadecimal escapes
  484. # (for verbatim bytes)
  485. my($codestr, $relax) = @_;
  486. if ($codestr =~ /^\s*\[([^\]]*)\]\s*$/) {
  487. return byte_code_compile($1, $relax);
  488. }
  489. my $c = $codestr;
  490. my @codes = ();
  491. unless ($codestr eq 'ignore') {
  492. while ($c ne '') {
  493. if ($c =~ /^\\x([0-9a-f]+)(.*)$/i) {
  494. push(@codes, hex $1);
  495. $c = $2;
  496. next;
  497. } elsif ($c =~ /^\\([0-7]{1,3})(.*)$/) {
  498. push(@codes, oct $1);
  499. $c = $2;
  500. next;
  501. } else {
  502. die "$fname: unknown code format in \"$codestr\"\n";
  503. }
  504. }
  505. }
  506. return @codes;
  507. }
  508. # Turn a numeric list into a hex string
  509. sub hexstr(@) {
  510. my $s = '';
  511. my $c;
  512. foreach $c (@_) {
  513. $s .= sprintf("%02X", $c);
  514. }
  515. return $s;
  516. }
  517. # Here we determine the range of possible starting bytes for a given
  518. # instruction. We need only consider the codes:
  519. # \[1234] mean literal bytes, of course
  520. # \1[0123] mean byte plus register value
  521. # \330 means byte plus condition code
  522. # \0 or \340 mean give up and return empty set
  523. # \34[4567] mean PUSH/POP of segment registers: special case
  524. # \17[234] skip is4 control byte
  525. # \26x \270 skip VEX control bytes
  526. # \24x \250 skip EVEX control bytes
  527. sub startseq($$) {
  528. my ($codestr, $relax) = @_;
  529. my $word, @range;
  530. my @codes = ();
  531. my $c = $codestr;
  532. my $c0, $c1, $i;
  533. my $prefix = '';
  534. @codes = decodify($codestr, $relax);
  535. while ($c0 = shift(@codes)) {
  536. $c1 = $codes[0];
  537. if ($c0 >= 01 && $c0 <= 04) {
  538. # Fixed byte string
  539. my $fbs = $prefix;
  540. while (1) {
  541. if ($c0 >= 01 && $c0 <= 04) {
  542. while ($c0--) {
  543. $fbs .= sprintf("%02X", shift(@codes));
  544. }
  545. } else {
  546. last;
  547. }
  548. $c0 = shift(@codes);
  549. }
  550. foreach $pfx (@disasm_prefixes) {
  551. if (substr($fbs, 0, length($pfx)) eq $pfx) {
  552. $prefix = $pfx;
  553. $fbs = substr($fbs, length($pfx));
  554. last;
  555. }
  556. }
  557. if ($fbs ne '') {
  558. return ($prefix.substr($fbs,0,2));
  559. }
  560. unshift(@codes, $c0);
  561. } elsif ($c0 >= 010 && $c0 <= 013) {
  562. return addprefix($prefix, $c1..($c1+7));
  563. } elsif (($c0 & ~013) == 0144) {
  564. return addprefix($prefix, $c1, $c1|2);
  565. } elsif ($c0 == 0330) {
  566. return addprefix($prefix, $c1..($c1+15));
  567. } elsif ($c0 == 0 || $c0 == 0340) {
  568. return $prefix;
  569. } elsif (($c0 & ~3) == 0260 || $c0 == 0270 ||
  570. ($c0 & ~3) == 0240 || $c0 == 0250) {
  571. my $c,$m,$wlp;
  572. $m = shift(@codes);
  573. $wlp = shift(@codes);
  574. $c = ($m >> 6);
  575. $m = $m & 31;
  576. $prefix .= sprintf('%s%02X%01X', $vex_class[$c], $m, $wlp & 3);
  577. if ($c0 < 0260) {
  578. my $tuple = shift(@codes);
  579. }
  580. } elsif ($c0 >= 0172 && $c0 <= 173) {
  581. shift(@codes); # Skip is4 control byte
  582. } else {
  583. # We really need to be able to distinguish "forbidden"
  584. # and "ignorable" codes here
  585. }
  586. }
  587. return $prefix;
  588. }
  589. # EVEX tuple types offset is 0300. e.g. 0301 is for full vector(fv).
  590. sub tupletype($) {
  591. my ($tuplestr) = @_;
  592. my %tuple_codes = (
  593. '' => 000,
  594. 'fv' => 001,
  595. 'hv' => 002,
  596. 'fvm' => 003,
  597. 't1s8' => 004,
  598. 't1s16' => 005,
  599. 't1s' => 006,
  600. 't1f32' => 007,
  601. 't1f64' => 010,
  602. 't2' => 011,
  603. 't4' => 012,
  604. 't8' => 013,
  605. 'hvm' => 014,
  606. 'qvm' => 015,
  607. 'ovm' => 016,
  608. 'm128' => 017,
  609. 'dup' => 020,
  610. );
  611. if (defined $tuple_codes{$tuplestr}) {
  612. return 0300 + $tuple_codes{$tuplestr};
  613. } else {
  614. die "Undefined tuple type : $tuplestr\n";
  615. }
  616. }
  617. #
  618. # This function takes a series of byte codes in a format which is more
  619. # typical of the Intel documentation, and encode it.
  620. #
  621. # The format looks like:
  622. #
  623. # [operands: opcodes]
  624. #
  625. # The operands word lists the order of the operands:
  626. #
  627. # r = register field in the modr/m
  628. # m = modr/m
  629. # v = VEX "v" field
  630. # i = immediate
  631. # s = register field of is4/imz2 field
  632. # - = implicit (unencoded) operand
  633. # x = indeX register of mib. 014..017 bytecodes are used.
  634. #
  635. # For an operand that should be filled into more than one field,
  636. # enter it as e.g. "r+v".
  637. #
  638. sub byte_code_compile($$) {
  639. my($str, $relax) = @_;
  640. my $opr;
  641. my $opc;
  642. my @codes = ();
  643. my $litix = undef;
  644. my %oppos = ();
  645. my $i;
  646. my $op, $oq;
  647. my $opex;
  648. my %imm_codes = (
  649. 'ib' => 020, # imm8
  650. 'ib,u' => 024, # Unsigned imm8
  651. 'iw' => 030, # imm16
  652. 'ib,s' => 0274, # imm8 sign-extended to opsize or bits
  653. 'iwd' => 034, # imm16 or imm32, depending on opsize
  654. 'id' => 040, # imm32
  655. 'id,s' => 0254, # imm32 sign-extended to 64 bits
  656. 'iwdq' => 044, # imm16/32/64, depending on addrsize
  657. 'rel8' => 050,
  658. 'iq' => 054,
  659. 'rel16' => 060,
  660. 'rel' => 064, # 16 or 32 bit relative operand
  661. 'rel32' => 070,
  662. 'seg' => 074,
  663. );
  664. my %plain_codes = (
  665. 'o16' => 0320, # 16-bit operand size
  666. 'o32' => 0321, # 32-bit operand size
  667. 'odf' => 0322, # Operand size is default
  668. 'o64' => 0324, # 64-bit operand size requiring REX.W
  669. 'o64nw' => 0323, # Implied 64-bit operand size (no REX.W)
  670. 'a16' => 0310,
  671. 'a32' => 0311,
  672. 'adf' => 0312, # Address size is default
  673. 'a64' => 0313,
  674. '!osp' => 0364,
  675. '!asp' => 0365,
  676. 'f2i' => 0332, # F2 prefix, but 66 for operand size is OK
  677. 'f3i' => 0333, # F3 prefix, but 66 for operand size is OK
  678. 'mustrep' => 0336,
  679. 'mustrepne' => 0337,
  680. 'rex.l' => 0334,
  681. 'norexb' => 0314,
  682. 'norexx' => 0315,
  683. 'norexr' => 0316,
  684. 'norexw' => 0317,
  685. 'repe' => 0335,
  686. 'nohi' => 0325, # Use spl/bpl/sil/dil even without REX
  687. 'nof3' => 0326, # No REP 0xF3 prefix permitted
  688. 'norep' => 0331, # No REP prefix permitted
  689. 'wait' => 0341, # Needs a wait prefix
  690. 'resb' => 0340,
  691. 'np' => 0360, # No prefix
  692. 'jcc8' => 0370, # Match only if Jcc possible with single byte
  693. 'jmp8' => 0371, # Match only if JMP possible with single byte
  694. 'jlen' => 0373, # Length of jump
  695. 'hlexr' => 0271,
  696. 'hlenl' => 0272,
  697. 'hle' => 0273,
  698. # This instruction takes XMM VSIB
  699. 'vsibx' => 0374,
  700. 'vm32x' => 0374,
  701. 'vm64x' => 0374,
  702. # This instruction takes YMM VSIB
  703. 'vsiby' => 0375,
  704. 'vm32y' => 0375,
  705. 'vm64y' => 0375,
  706. # This instruction takes ZMM VSIB
  707. 'vsibz' => 0376,
  708. 'vm32z' => 0376,
  709. 'vm64z' => 0376,
  710. );
  711. unless ($str =~ /^(([^\s:]*)\:*([^\s:]*)\:|)\s*(.*\S)\s*$/) {
  712. die "$fname: $line: cannot parse: [$str]\n";
  713. }
  714. $opr = "\L$2";
  715. $tuple = "\L$3"; # Tuple type for AVX512
  716. $opc = "\L$4";
  717. my $op = 0;
  718. for ($i = 0; $i < length($opr); $i++) {
  719. my $c = substr($opr,$i,1);
  720. if ($c eq '+') {
  721. $op--;
  722. } else {
  723. if ($relax & 1) {
  724. $op--;
  725. }
  726. $relax >>= 1;
  727. $oppos{$c} = $op++;
  728. }
  729. }
  730. $tup = tupletype($tuple);
  731. my $last_imm = 'h';
  732. my $prefix_ok = 1;
  733. foreach $op (split(/\s*(?:\s|(?=[\/\\]))/, $opc)) {
  734. my $pc = $plain_codes{$op};
  735. if (defined $pc) {
  736. # Plain code
  737. push(@codes, $pc);
  738. } elsif ($prefix_ok && $op =~ /^(66|f2|f3)$/) {
  739. # 66/F2/F3 prefix used as an opcode extension
  740. if ($op eq '66') {
  741. push(@codes, 0361);
  742. } elsif ($op eq 'f2') {
  743. push(@codes, 0332);
  744. } else {
  745. push(@codes, 0333);
  746. }
  747. } elsif ($op =~ /^[0-9a-f]{2}$/) {
  748. if (defined($litix) && $litix+$codes[$litix]+1 == scalar @codes &&
  749. $codes[$litix] < 4) {
  750. $codes[$litix]++;
  751. push(@codes, hex $op);
  752. } else {
  753. $litix = scalar(@codes);
  754. push(@codes, 01, hex $op);
  755. }
  756. $prefix_ok = 0;
  757. } elsif ($op eq '/r') {
  758. if (!defined($oppos{'r'}) || !defined($oppos{'m'})) {
  759. die "$fname: $line: $op requires r and m operands\n";
  760. }
  761. $opex = (($oppos{'m'} & 4) ? 06 : 0) |
  762. (($oppos{'r'} & 4) ? 05 : 0);
  763. push(@codes, $opex) if ($opex);
  764. # if mib is composed with two separate operands - ICC style
  765. push(@codes, 014 + ($oppos{'x'} & 3)) if (defined($oppos{'x'}));
  766. push(@codes, 0100 + (($oppos{'m'} & 3) << 3) + ($oppos{'r'} & 3));
  767. $prefix_ok = 0;
  768. } elsif ($op =~ m:^/([0-7])$:) {
  769. if (!defined($oppos{'m'})) {
  770. die "$fname: $line: $op requires m operand\n";
  771. }
  772. push(@codes, 06) if ($oppos{'m'} & 4);
  773. push(@codes, 0200 + (($oppos{'m'} & 3) << 3) + $1);
  774. $prefix_ok = 0;
  775. } elsif ($op =~ /^(vex|xop)(|\..*)$/) {
  776. my $vexname = $1;
  777. my $c = $vexmap{$vexname};
  778. my ($m,$w,$l,$p) = (undef,2,undef,0);
  779. my $has_nds = 0;
  780. my @subops = split(/\./, $op);
  781. shift @subops; # Drop prefix
  782. foreach $oq (@subops) {
  783. if ($oq eq '128' || $oq eq 'l0' || $oq eq 'lz') {
  784. $l = 0;
  785. } elsif ($oq eq '256' || $oq eq 'l1') {
  786. $l = 1;
  787. } elsif ($oq eq 'lig') {
  788. $l = 2;
  789. } elsif ($oq eq 'w0') {
  790. $w = 0;
  791. } elsif ($oq eq 'w1') {
  792. $w = 1;
  793. } elsif ($oq eq 'wig') {
  794. $w = 2;
  795. } elsif ($oq eq 'ww') {
  796. $w = 3;
  797. } elsif ($oq eq 'p0') {
  798. $p = 0;
  799. } elsif ($oq eq '66' || $oq eq 'p1') {
  800. $p = 1;
  801. } elsif ($oq eq 'f3' || $oq eq 'p2') {
  802. $p = 2;
  803. } elsif ($oq eq 'f2' || $oq eq 'p3') {
  804. $p = 3;
  805. } elsif ($oq eq '0f') {
  806. $m = 1;
  807. } elsif ($oq eq '0f38') {
  808. $m = 2;
  809. } elsif ($oq eq '0f3a') {
  810. $m = 3;
  811. } elsif ($oq =~ /^m([0-9]+)$/) {
  812. $m = $1+0;
  813. } elsif ($oq eq 'nds' || $oq eq 'ndd' || $oq eq 'dds') {
  814. if (!defined($oppos{'v'})) {
  815. die "$fname: $line: $vexname.$oq without 'v' operand\n";
  816. }
  817. $has_nds = 1;
  818. } else {
  819. die "$fname: $line: undefined \U$vexname\E subcode: $oq\n";
  820. }
  821. }
  822. if (!defined($m) || !defined($w) || !defined($l) || !defined($p)) {
  823. die "$fname: $line: missing fields in \U$vexname\E specification\n";
  824. }
  825. if (defined($oppos{'v'}) && !$has_nds) {
  826. die "$fname: $line: 'v' operand without ${vexname}.nds or ${vexname}.ndd\n";
  827. }
  828. my $minmap = ($c == 1) ? 8 : 0; # 0-31 for VEX, 8-31 for XOP
  829. if ($m < $minmap || $m > 31) {
  830. die "$fname: $line: Only maps ${minmap}-31 are valid for \U${vexname}\n";
  831. }
  832. push(@codes, defined($oppos{'v'}) ? 0260+($oppos{'v'} & 3) : 0270,
  833. ($c << 6)+$m, ($w << 4)+($l << 2)+$p);
  834. $prefix_ok = 0;
  835. } elsif ($op =~ /^(evex)(|\..*)$/) {
  836. my $c = $vexmap{$1};
  837. my ($m,$w,$l,$p) = (undef,2,undef,0);
  838. my $has_nds = 0;
  839. my @subops = split(/\./, $op);
  840. shift @subops; # Drop prefix
  841. foreach $oq (@subops) {
  842. if ($oq eq '128' || $oq eq 'l0' || $oq eq 'lz' || $oq eq 'lig') {
  843. $l = 0;
  844. } elsif ($oq eq '256' || $oq eq 'l1') {
  845. $l = 1;
  846. } elsif ($oq eq '512' || $oq eq 'l2') {
  847. $l = 2;
  848. } elsif ($oq eq 'w0') {
  849. $w = 0;
  850. } elsif ($oq eq 'w1') {
  851. $w = 1;
  852. } elsif ($oq eq 'wig') {
  853. $w = 2;
  854. } elsif ($oq eq 'ww') {
  855. $w = 3;
  856. } elsif ($oq eq 'p0') {
  857. $p = 0;
  858. } elsif ($oq eq '66' || $oq eq 'p1') {
  859. $p = 1;
  860. } elsif ($oq eq 'f3' || $oq eq 'p2') {
  861. $p = 2;
  862. } elsif ($oq eq 'f2' || $oq eq 'p3') {
  863. $p = 3;
  864. } elsif ($oq eq '0f') {
  865. $m = 1;
  866. } elsif ($oq eq '0f38') {
  867. $m = 2;
  868. } elsif ($oq eq '0f3a') {
  869. $m = 3;
  870. } elsif ($oq =~ /^m([0-9]+)$/) {
  871. $m = $1+0;
  872. } elsif ($oq eq 'nds' || $oq eq 'ndd' || $oq eq 'dds') {
  873. if (!defined($oppos{'v'})) {
  874. die "$fname: $line: evex.$oq without 'v' operand\n";
  875. }
  876. $has_nds = 1;
  877. } else {
  878. die "$fname: $line: undefined EVEX subcode: $oq\n";
  879. }
  880. }
  881. if (!defined($m) || !defined($w) || !defined($l) || !defined($p)) {
  882. die "$fname: $line: missing fields in EVEX specification\n";
  883. }
  884. if (defined($oppos{'v'}) && !$has_nds) {
  885. die "$fname: $line: 'v' operand without evex.nds or evex.ndd\n";
  886. }
  887. if ($m > 15) {
  888. die "$fname: $line: Only maps 0-15 are valid for EVEX\n";
  889. }
  890. push(@codes, defined($oppos{'v'}) ? 0240+($oppos{'v'} & 3) : 0250,
  891. ($c << 6)+$m, ($w << 4)+($l << 2)+$p, $tup);
  892. $prefix_ok = 0;
  893. } elsif (defined $imm_codes{$op}) {
  894. if ($op eq 'seg') {
  895. if ($last_imm lt 'i') {
  896. die "$fname: $line: seg without an immediate operand\n";
  897. }
  898. } else {
  899. $last_imm++;
  900. if ($last_imm gt 'j') {
  901. die "$fname: $line: too many immediate operands\n";
  902. }
  903. }
  904. if (!defined($oppos{$last_imm})) {
  905. die "$fname: $line: $op without '$last_imm' operand\n";
  906. }
  907. push(@codes, 05) if ($oppos{$last_imm} & 4);
  908. push(@codes, $imm_codes{$op} + ($oppos{$last_imm} & 3));
  909. $prefix_ok = 0;
  910. } elsif ($op eq '/is4') {
  911. if (!defined($oppos{'s'})) {
  912. die "$fname: $line: $op without 's' operand\n";
  913. }
  914. if (defined($oppos{'i'})) {
  915. push(@codes, 0172, ($oppos{'s'} << 3)+$oppos{'i'});
  916. } else {
  917. push(@codes, 05) if ($oppos{'s'} & 4);
  918. push(@codes, 0174+($oppos{'s'} & 3));
  919. }
  920. $prefix_ok = 0;
  921. } elsif ($op =~ /^\/is4\=([0-9]+)$/) {
  922. my $imm = $1;
  923. if (!defined($oppos{'s'})) {
  924. die "$fname: $line: $op without 's' operand\n";
  925. }
  926. if ($imm < 0 || $imm > 15) {
  927. die "$fname: $line: invalid imm4 value for $op: $imm\n";
  928. }
  929. push(@codes, 0173, ($oppos{'s'} << 4) + $imm);
  930. $prefix_ok = 0;
  931. } elsif ($op =~ /^([0-9a-f]{2})\+c$/) {
  932. push(@codes, 0330, hex $1);
  933. $prefix_ok = 0;
  934. } elsif ($op =~ /^([0-9a-f]{2})\+r$/) {
  935. if (!defined($oppos{'r'})) {
  936. die "$fname: $line: $op without 'r' operand\n";
  937. }
  938. push(@codes, 05) if ($oppos{'r'} & 4);
  939. push(@codes, 010 + ($oppos{'r'} & 3), hex $1);
  940. $prefix_ok = 0;
  941. } elsif ($op =~ /^\\([0-7]+|x[0-9a-f]{2})$/) {
  942. # Escape to enter literal bytecodes
  943. push(@codes, oct $1);
  944. } else {
  945. die "$fname: $line: unknown operation: $op\n";
  946. }
  947. }
  948. return @codes;
  949. }