parser.c 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209
  1. /* ----------------------------------------------------------------------- *
  2. *
  3. * Copyright 1996-2018 The NASM Authors - All Rights Reserved
  4. * See the file AUTHORS included with the NASM distribution for
  5. * the specific copyright holders.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following
  9. * conditions are met:
  10. *
  11. * * Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * * Redistributions in binary form must reproduce the above
  14. * copyright notice, this list of conditions and the following
  15. * disclaimer in the documentation and/or other materials provided
  16. * with the distribution.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21. * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29. * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30. * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. * ----------------------------------------------------------------------- */
  33. /*
  34. * parser.c source line parser for the Netwide Assembler
  35. */
  36. #include "compiler.h"
  37. #include <stdio.h>
  38. #include <stdlib.h>
  39. #include <stddef.h>
  40. #include <string.h>
  41. #include <ctype.h>
  42. #include "nasm.h"
  43. #include "insns.h"
  44. #include "nasmlib.h"
  45. #include "error.h"
  46. #include "stdscan.h"
  47. #include "eval.h"
  48. #include "parser.h"
  49. #include "float.h"
  50. #include "assemble.h"
  51. #include "tables.h"
  52. static int is_comma_next(void);
  53. static struct tokenval tokval;
  54. static int prefix_slot(int prefix)
  55. {
  56. switch (prefix) {
  57. case P_WAIT:
  58. return PPS_WAIT;
  59. case R_CS:
  60. case R_DS:
  61. case R_SS:
  62. case R_ES:
  63. case R_FS:
  64. case R_GS:
  65. return PPS_SEG;
  66. case P_LOCK:
  67. return PPS_LOCK;
  68. case P_REP:
  69. case P_REPE:
  70. case P_REPZ:
  71. case P_REPNE:
  72. case P_REPNZ:
  73. case P_XACQUIRE:
  74. case P_XRELEASE:
  75. case P_BND:
  76. case P_NOBND:
  77. return PPS_REP;
  78. case P_O16:
  79. case P_O32:
  80. case P_O64:
  81. case P_OSP:
  82. return PPS_OSIZE;
  83. case P_A16:
  84. case P_A32:
  85. case P_A64:
  86. case P_ASP:
  87. return PPS_ASIZE;
  88. case P_EVEX:
  89. case P_VEX3:
  90. case P_VEX2:
  91. return PPS_VEX;
  92. default:
  93. nasm_panic(0, "Invalid value %d passed to prefix_slot()", prefix);
  94. return -1;
  95. }
  96. }
  97. static void process_size_override(insn *result, operand *op)
  98. {
  99. if (tasm_compatible_mode) {
  100. switch (tokval.t_integer) {
  101. /* For TASM compatibility a size override inside the
  102. * brackets changes the size of the operand, not the
  103. * address type of the operand as it does in standard
  104. * NASM syntax. Hence:
  105. *
  106. * mov eax,[DWORD val]
  107. *
  108. * is valid syntax in TASM compatibility mode. Note that
  109. * you lose the ability to override the default address
  110. * type for the instruction, but we never use anything
  111. * but 32-bit flat model addressing in our code.
  112. */
  113. case S_BYTE:
  114. op->type |= BITS8;
  115. break;
  116. case S_WORD:
  117. op->type |= BITS16;
  118. break;
  119. case S_DWORD:
  120. case S_LONG:
  121. op->type |= BITS32;
  122. break;
  123. case S_QWORD:
  124. op->type |= BITS64;
  125. break;
  126. case S_TWORD:
  127. op->type |= BITS80;
  128. break;
  129. case S_OWORD:
  130. op->type |= BITS128;
  131. break;
  132. default:
  133. nasm_error(ERR_NONFATAL,
  134. "invalid operand size specification");
  135. break;
  136. }
  137. } else {
  138. /* Standard NASM compatible syntax */
  139. switch (tokval.t_integer) {
  140. case S_NOSPLIT:
  141. op->eaflags |= EAF_TIMESTWO;
  142. break;
  143. case S_REL:
  144. op->eaflags |= EAF_REL;
  145. break;
  146. case S_ABS:
  147. op->eaflags |= EAF_ABS;
  148. break;
  149. case S_BYTE:
  150. op->disp_size = 8;
  151. op->eaflags |= EAF_BYTEOFFS;
  152. break;
  153. case P_A16:
  154. case P_A32:
  155. case P_A64:
  156. if (result->prefixes[PPS_ASIZE] &&
  157. result->prefixes[PPS_ASIZE] != tokval.t_integer)
  158. nasm_error(ERR_NONFATAL,
  159. "conflicting address size specifications");
  160. else
  161. result->prefixes[PPS_ASIZE] = tokval.t_integer;
  162. break;
  163. case S_WORD:
  164. op->disp_size = 16;
  165. op->eaflags |= EAF_WORDOFFS;
  166. break;
  167. case S_DWORD:
  168. case S_LONG:
  169. op->disp_size = 32;
  170. op->eaflags |= EAF_WORDOFFS;
  171. break;
  172. case S_QWORD:
  173. op->disp_size = 64;
  174. op->eaflags |= EAF_WORDOFFS;
  175. break;
  176. default:
  177. nasm_error(ERR_NONFATAL, "invalid size specification in"
  178. " effective address");
  179. break;
  180. }
  181. }
  182. }
  183. /*
  184. * Brace decorators are are parsed here. opmask and zeroing
  185. * decorators can be placed in any order. e.g. zmm1 {k2}{z} or zmm2
  186. * {z}{k3} decorator(s) are placed at the end of an operand.
  187. */
  188. static bool parse_braces(decoflags_t *decoflags)
  189. {
  190. int i, j;
  191. i = tokval.t_type;
  192. while (true) {
  193. switch (i) {
  194. case TOKEN_OPMASK:
  195. if (*decoflags & OPMASK_MASK) {
  196. nasm_error(ERR_NONFATAL,
  197. "opmask k%"PRIu64" is already set",
  198. *decoflags & OPMASK_MASK);
  199. *decoflags &= ~OPMASK_MASK;
  200. }
  201. *decoflags |= VAL_OPMASK(nasm_regvals[tokval.t_integer]);
  202. break;
  203. case TOKEN_DECORATOR:
  204. j = tokval.t_integer;
  205. switch (j) {
  206. case BRC_Z:
  207. *decoflags |= Z_MASK;
  208. break;
  209. case BRC_1TO2:
  210. case BRC_1TO4:
  211. case BRC_1TO8:
  212. case BRC_1TO16:
  213. *decoflags |= BRDCAST_MASK | VAL_BRNUM(j - BRC_1TO2);
  214. break;
  215. default:
  216. nasm_error(ERR_NONFATAL,
  217. "{%s} is not an expected decorator",
  218. tokval.t_charptr);
  219. break;
  220. }
  221. break;
  222. case ',':
  223. case TOKEN_EOS:
  224. return false;
  225. default:
  226. nasm_error(ERR_NONFATAL,
  227. "only a series of valid decorators expected");
  228. return true;
  229. }
  230. i = stdscan(NULL, &tokval);
  231. }
  232. }
  233. static int parse_mref(operand *op, const expr *e)
  234. {
  235. int b, i, s; /* basereg, indexreg, scale */
  236. int64_t o; /* offset */
  237. b = i = -1;
  238. o = s = 0;
  239. op->segment = op->wrt = NO_SEG;
  240. if (e->type && e->type <= EXPR_REG_END) { /* this bit's a register */
  241. bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
  242. if (is_gpr && e->value == 1)
  243. b = e->type; /* It can be basereg */
  244. else /* No, it has to be indexreg */
  245. i = e->type, s = e->value;
  246. e++;
  247. }
  248. if (e->type && e->type <= EXPR_REG_END) { /* it's a 2nd register */
  249. bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
  250. if (b != -1) /* If the first was the base, ... */
  251. i = e->type, s = e->value; /* second has to be indexreg */
  252. else if (!is_gpr || e->value != 1) {
  253. /* If both want to be index */
  254. nasm_error(ERR_NONFATAL,
  255. "invalid effective address: two index registers");
  256. return -1;
  257. } else
  258. b = e->type;
  259. e++;
  260. }
  261. if (e->type) { /* is there an offset? */
  262. if (e->type <= EXPR_REG_END) { /* in fact, is there an error? */
  263. nasm_error(ERR_NONFATAL,
  264. "invalid effective address: impossible register");
  265. return -1;
  266. } else {
  267. if (e->type == EXPR_UNKNOWN) {
  268. op->opflags |= OPFLAG_UNKNOWN;
  269. o = 0; /* doesn't matter what */
  270. while (e->type)
  271. e++; /* go to the end of the line */
  272. } else {
  273. if (e->type == EXPR_SIMPLE) {
  274. o = e->value;
  275. e++;
  276. }
  277. if (e->type == EXPR_WRT) {
  278. op->wrt = e->value;
  279. e++;
  280. }
  281. /*
  282. * Look for a segment base type.
  283. */
  284. for (; e->type; e++) {
  285. if (!e->value)
  286. continue;
  287. if (e->type <= EXPR_REG_END) {
  288. nasm_error(ERR_NONFATAL,
  289. "invalid effective address: too many registers");
  290. return -1;
  291. } else if (e->type < EXPR_SEGBASE) {
  292. nasm_error(ERR_NONFATAL,
  293. "invalid effective address: bad subexpression type");
  294. return -1;
  295. } else if (e->value == 1) {
  296. if (op->segment != NO_SEG) {
  297. nasm_error(ERR_NONFATAL,
  298. "invalid effective address: multiple base segments");
  299. return -1;
  300. }
  301. op->segment = e->type - EXPR_SEGBASE;
  302. } else if (e->value == -1 &&
  303. e->type == location.segment + EXPR_SEGBASE &&
  304. !(op->opflags & OPFLAG_RELATIVE)) {
  305. op->opflags |= OPFLAG_RELATIVE;
  306. } else {
  307. nasm_error(ERR_NONFATAL,
  308. "invalid effective address: impossible segment base multiplier");
  309. return -1;
  310. }
  311. }
  312. }
  313. }
  314. }
  315. nasm_assert(!e->type); /* We should be at the end */
  316. op->basereg = b;
  317. op->indexreg = i;
  318. op->scale = s;
  319. op->offset = o;
  320. return 0;
  321. }
  322. static void mref_set_optype(operand *op)
  323. {
  324. int b = op->basereg;
  325. int i = op->indexreg;
  326. int s = op->scale;
  327. /* It is memory, but it can match any r/m operand */
  328. op->type |= MEMORY_ANY;
  329. if (b == -1 && (i == -1 || s == 0)) {
  330. int is_rel = globalbits == 64 &&
  331. !(op->eaflags & EAF_ABS) &&
  332. ((globalrel &&
  333. !(op->eaflags & EAF_FSGS)) ||
  334. (op->eaflags & EAF_REL));
  335. op->type |= is_rel ? IP_REL : MEM_OFFS;
  336. }
  337. if (i != -1) {
  338. opflags_t iclass = nasm_reg_flags[i];
  339. if (is_class(XMMREG,iclass))
  340. op->type |= XMEM;
  341. else if (is_class(YMMREG,iclass))
  342. op->type |= YMEM;
  343. else if (is_class(ZMMREG,iclass))
  344. op->type |= ZMEM;
  345. }
  346. }
  347. /*
  348. * Convert an expression vector returned from evaluate() into an
  349. * extop structure. Return zero on success.
  350. */
  351. static int value_to_extop(expr * vect, extop *eop, int32_t myseg)
  352. {
  353. eop->type = EOT_DB_NUMBER;
  354. eop->offset = 0;
  355. eop->segment = eop->wrt = NO_SEG;
  356. eop->relative = false;
  357. for (; vect->type; vect++) {
  358. if (!vect->value) /* zero term, safe to ignore */
  359. continue;
  360. if (vect->type <= EXPR_REG_END) /* false if a register is present */
  361. return -1;
  362. if (vect->type == EXPR_UNKNOWN) /* something we can't resolve yet */
  363. return 0;
  364. if (vect->type == EXPR_SIMPLE) {
  365. /* Simple number expression */
  366. eop->offset += vect->value;
  367. continue;
  368. }
  369. if (eop->wrt == NO_SEG && !eop->relative && vect->type == EXPR_WRT) {
  370. /* WRT term */
  371. eop->wrt = vect->value;
  372. continue;
  373. }
  374. if (!eop->relative &&
  375. vect->type == EXPR_SEGBASE + myseg && vect->value == -1) {
  376. /* Expression of the form: foo - $ */
  377. eop->relative = true;
  378. continue;
  379. }
  380. if (eop->segment == NO_SEG && vect->type >= EXPR_SEGBASE &&
  381. vect->value == 1) {
  382. eop->segment = vect->type - EXPR_SEGBASE;
  383. continue;
  384. }
  385. /* Otherwise, badness */
  386. return -1;
  387. }
  388. /* We got to the end and it was all okay */
  389. return 0;
  390. }
  391. insn *parse_line(int pass, char *buffer, insn *result)
  392. {
  393. bool insn_is_label = false;
  394. struct eval_hints hints;
  395. int opnum;
  396. int critical;
  397. bool first;
  398. bool recover;
  399. int i;
  400. nasm_static_assert(P_none == 0);
  401. restart_parse:
  402. first = true;
  403. result->forw_ref = false;
  404. stdscan_reset();
  405. stdscan_set(buffer);
  406. i = stdscan(NULL, &tokval);
  407. memset(result->prefixes, P_none, sizeof(result->prefixes));
  408. result->times = 1; /* No TIMES either yet */
  409. result->label = NULL; /* Assume no label */
  410. result->eops = NULL; /* must do this, whatever happens */
  411. result->operands = 0; /* must initialize this */
  412. result->evex_rm = 0; /* Ensure EVEX rounding mode is reset */
  413. result->evex_brerop = -1; /* Reset EVEX broadcasting/ER op position */
  414. /* Ignore blank lines */
  415. if (i == TOKEN_EOS)
  416. goto fail;
  417. if (i != TOKEN_ID &&
  418. i != TOKEN_INSN &&
  419. i != TOKEN_PREFIX &&
  420. (i != TOKEN_REG || !IS_SREG(tokval.t_integer))) {
  421. nasm_error(ERR_NONFATAL,
  422. "label or instruction expected at start of line");
  423. goto fail;
  424. }
  425. if (i == TOKEN_ID || (insn_is_label && i == TOKEN_INSN)) {
  426. /* there's a label here */
  427. first = false;
  428. result->label = tokval.t_charptr;
  429. i = stdscan(NULL, &tokval);
  430. if (i == ':') { /* skip over the optional colon */
  431. i = stdscan(NULL, &tokval);
  432. } else if (i == 0) {
  433. nasm_error(ERR_WARNING | WARN_OL | ERR_PASS1,
  434. "label alone on a line without a colon might be in error");
  435. }
  436. if (i != TOKEN_INSN || tokval.t_integer != I_EQU) {
  437. /*
  438. * FIXME: location.segment could be NO_SEG, in which case
  439. * it is possible we should be passing 'absolute.segment'. Look into this.
  440. * Work out whether that is *really* what we should be doing.
  441. * Generally fix things. I think this is right as it is, but
  442. * am still not certain.
  443. */
  444. define_label(result->label,
  445. in_absolute ? absolute.segment : location.segment,
  446. location.offset, true);
  447. }
  448. }
  449. /* Just a label here */
  450. if (i == TOKEN_EOS)
  451. goto fail;
  452. while (i == TOKEN_PREFIX ||
  453. (i == TOKEN_REG && IS_SREG(tokval.t_integer))) {
  454. first = false;
  455. /*
  456. * Handle special case: the TIMES prefix.
  457. */
  458. if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) {
  459. expr *value;
  460. i = stdscan(NULL, &tokval);
  461. value = evaluate(stdscan, NULL, &tokval, NULL, pass0, NULL);
  462. i = tokval.t_type;
  463. if (!value) /* Error in evaluator */
  464. goto fail;
  465. if (!is_simple(value)) {
  466. nasm_error(ERR_NONFATAL,
  467. "non-constant argument supplied to TIMES");
  468. result->times = 1L;
  469. } else {
  470. result->times = value->value;
  471. if (value->value < 0) {
  472. nasm_error(ERR_NONFATAL|ERR_PASS2, "TIMES value %"PRId64" is negative", value->value);
  473. result->times = 0;
  474. }
  475. }
  476. } else {
  477. int slot = prefix_slot(tokval.t_integer);
  478. if (result->prefixes[slot]) {
  479. if (result->prefixes[slot] == tokval.t_integer)
  480. nasm_error(ERR_WARNING | ERR_PASS1,
  481. "instruction has redundant prefixes");
  482. else
  483. nasm_error(ERR_NONFATAL,
  484. "instruction has conflicting prefixes");
  485. }
  486. result->prefixes[slot] = tokval.t_integer;
  487. i = stdscan(NULL, &tokval);
  488. }
  489. }
  490. if (i != TOKEN_INSN) {
  491. int j;
  492. enum prefixes pfx;
  493. for (j = 0; j < MAXPREFIX; j++) {
  494. if ((pfx = result->prefixes[j]) != P_none)
  495. break;
  496. }
  497. if (i == 0 && pfx != P_none) {
  498. /*
  499. * Instruction prefixes are present, but no actual
  500. * instruction. This is allowed: at this point we
  501. * invent a notional instruction of RESB 0.
  502. */
  503. result->opcode = I_RESB;
  504. result->operands = 1;
  505. nasm_zero(result->oprs);
  506. result->oprs[0].type = IMMEDIATE;
  507. result->oprs[0].offset = 0L;
  508. result->oprs[0].segment = result->oprs[0].wrt = NO_SEG;
  509. return result;
  510. } else {
  511. nasm_error(ERR_NONFATAL, "parser: instruction expected");
  512. goto fail;
  513. }
  514. }
  515. result->opcode = tokval.t_integer;
  516. result->condition = tokval.t_inttwo;
  517. /*
  518. * INCBIN cannot be satisfied with incorrectly
  519. * evaluated operands, since the correct values _must_ be known
  520. * on the first pass. Hence, even in pass one, we set the
  521. * `critical' flag on calling evaluate(), so that it will bomb
  522. * out on undefined symbols.
  523. */
  524. if (result->opcode == I_INCBIN) {
  525. critical = (pass0 < 2 ? 1 : 2);
  526. } else
  527. critical = (pass == 2 ? 2 : 0);
  528. if (opcode_is_db(result->opcode) || result->opcode == I_INCBIN) {
  529. extop *eop, **tail = &result->eops, **fixptr;
  530. int oper_num = 0;
  531. int32_t sign;
  532. result->eops_float = false;
  533. /*
  534. * Begin to read the DB/DW/DD/DQ/DT/DO/DY/DZ/INCBIN operands.
  535. */
  536. while (1) {
  537. i = stdscan(NULL, &tokval);
  538. if (i == TOKEN_EOS)
  539. break;
  540. else if (first && i == ':') {
  541. insn_is_label = true;
  542. goto restart_parse;
  543. }
  544. first = false;
  545. fixptr = tail;
  546. eop = *tail = nasm_malloc(sizeof(extop));
  547. tail = &eop->next;
  548. eop->next = NULL;
  549. eop->type = EOT_NOTHING;
  550. oper_num++;
  551. sign = +1;
  552. /*
  553. * is_comma_next() here is to distinguish this from
  554. * a string used as part of an expression...
  555. */
  556. if (i == TOKEN_STR && is_comma_next()) {
  557. eop->type = EOT_DB_STRING;
  558. eop->stringval = tokval.t_charptr;
  559. eop->stringlen = tokval.t_inttwo;
  560. i = stdscan(NULL, &tokval); /* eat the comma */
  561. } else if (i == TOKEN_STRFUNC) {
  562. bool parens = false;
  563. const char *funcname = tokval.t_charptr;
  564. enum strfunc func = tokval.t_integer;
  565. i = stdscan(NULL, &tokval);
  566. if (i == '(') {
  567. parens = true;
  568. i = stdscan(NULL, &tokval);
  569. }
  570. if (i != TOKEN_STR) {
  571. nasm_error(ERR_NONFATAL,
  572. "%s must be followed by a string constant",
  573. funcname);
  574. eop->type = EOT_NOTHING;
  575. } else {
  576. eop->type = EOT_DB_STRING_FREE;
  577. eop->stringlen =
  578. string_transform(tokval.t_charptr, tokval.t_inttwo,
  579. &eop->stringval, func);
  580. if (eop->stringlen == (size_t)-1) {
  581. nasm_error(ERR_NONFATAL, "invalid string for transform");
  582. eop->type = EOT_NOTHING;
  583. }
  584. }
  585. if (parens && i && i != ')') {
  586. i = stdscan(NULL, &tokval);
  587. if (i != ')') {
  588. nasm_error(ERR_NONFATAL, "unterminated %s function",
  589. funcname);
  590. }
  591. }
  592. if (i && i != ',')
  593. i = stdscan(NULL, &tokval);
  594. } else if (i == '-' || i == '+') {
  595. char *save = stdscan_get();
  596. int token = i;
  597. sign = (i == '-') ? -1 : 1;
  598. i = stdscan(NULL, &tokval);
  599. if (i != TOKEN_FLOAT) {
  600. stdscan_set(save);
  601. i = tokval.t_type = token;
  602. goto is_expression;
  603. } else {
  604. goto is_float;
  605. }
  606. } else if (i == TOKEN_FLOAT) {
  607. is_float:
  608. eop->type = EOT_DB_STRING;
  609. result->eops_float = true;
  610. eop->stringlen = db_bytes(result->opcode);
  611. if (eop->stringlen > 16) {
  612. nasm_error(ERR_NONFATAL, "floating-point constant"
  613. " encountered in DY or DZ instruction");
  614. eop->stringlen = 0;
  615. } else if (eop->stringlen < 1) {
  616. nasm_error(ERR_NONFATAL, "floating-point constant"
  617. " encountered in unknown instruction");
  618. /*
  619. * fix suggested by Pedro Gimeno... original line was:
  620. * eop->type = EOT_NOTHING;
  621. */
  622. eop->stringlen = 0;
  623. }
  624. eop = nasm_realloc(eop, sizeof(extop) + eop->stringlen);
  625. tail = &eop->next;
  626. *fixptr = eop;
  627. eop->stringval = (char *)eop + sizeof(extop);
  628. if (!eop->stringlen ||
  629. !float_const(tokval.t_charptr, sign,
  630. (uint8_t *)eop->stringval, eop->stringlen))
  631. eop->type = EOT_NOTHING;
  632. i = stdscan(NULL, &tokval); /* eat the comma */
  633. } else {
  634. /* anything else, assume it is an expression */
  635. expr *value;
  636. is_expression:
  637. value = evaluate(stdscan, NULL, &tokval, NULL,
  638. critical, NULL);
  639. i = tokval.t_type;
  640. if (!value) /* Error in evaluator */
  641. goto fail;
  642. if (value_to_extop(value, eop, location.segment)) {
  643. nasm_error(ERR_NONFATAL,
  644. "operand %d: expression is not simple or relocatable",
  645. oper_num);
  646. }
  647. }
  648. /*
  649. * We're about to call stdscan(), which will eat the
  650. * comma that we're currently sitting on between
  651. * arguments. However, we'd better check first that it
  652. * _is_ a comma.
  653. */
  654. if (i == TOKEN_EOS) /* also could be EOL */
  655. break;
  656. if (i != ',') {
  657. nasm_error(ERR_NONFATAL, "comma expected after operand %d",
  658. oper_num);
  659. goto fail;
  660. }
  661. }
  662. if (result->opcode == I_INCBIN) {
  663. /*
  664. * Correct syntax for INCBIN is that there should be
  665. * one string operand, followed by one or two numeric
  666. * operands.
  667. */
  668. if (!result->eops || result->eops->type != EOT_DB_STRING)
  669. nasm_error(ERR_NONFATAL, "`incbin' expects a file name");
  670. else if (result->eops->next &&
  671. result->eops->next->type != EOT_DB_NUMBER)
  672. nasm_error(ERR_NONFATAL, "`incbin': second parameter is"
  673. " non-numeric");
  674. else if (result->eops->next && result->eops->next->next &&
  675. result->eops->next->next->type != EOT_DB_NUMBER)
  676. nasm_error(ERR_NONFATAL, "`incbin': third parameter is"
  677. " non-numeric");
  678. else if (result->eops->next && result->eops->next->next &&
  679. result->eops->next->next->next)
  680. nasm_error(ERR_NONFATAL,
  681. "`incbin': more than three parameters");
  682. else
  683. return result;
  684. /*
  685. * If we reach here, one of the above errors happened.
  686. * Throw the instruction away.
  687. */
  688. goto fail;
  689. } else /* DB ... */ if (oper_num == 0)
  690. nasm_error(ERR_WARNING | ERR_PASS1,
  691. "no operand for data declaration");
  692. else
  693. result->operands = oper_num;
  694. return result;
  695. }
  696. /*
  697. * Now we begin to parse the operands. There may be up to four
  698. * of these, separated by commas, and terminated by a zero token.
  699. */
  700. for (opnum = 0; opnum < MAX_OPERANDS; opnum++) {
  701. operand *op = &result->oprs[opnum];
  702. expr *value; /* used most of the time */
  703. bool mref; /* is this going to be a memory ref? */
  704. bool bracket; /* is it a [] mref, or a & mref? */
  705. bool mib; /* compound (mib) mref? */
  706. int setsize = 0;
  707. decoflags_t brace_flags = 0; /* flags for decorators in braces */
  708. op->disp_size = 0; /* have to zero this whatever */
  709. op->eaflags = 0; /* and this */
  710. op->opflags = 0;
  711. op->decoflags = 0;
  712. i = stdscan(NULL, &tokval);
  713. if (i == TOKEN_EOS)
  714. break; /* end of operands: get out of here */
  715. else if (first && i == ':') {
  716. insn_is_label = true;
  717. goto restart_parse;
  718. }
  719. first = false;
  720. op->type = 0; /* so far, no override */
  721. while (i == TOKEN_SPECIAL) { /* size specifiers */
  722. switch (tokval.t_integer) {
  723. case S_BYTE:
  724. if (!setsize) /* we want to use only the first */
  725. op->type |= BITS8;
  726. setsize = 1;
  727. break;
  728. case S_WORD:
  729. if (!setsize)
  730. op->type |= BITS16;
  731. setsize = 1;
  732. break;
  733. case S_DWORD:
  734. case S_LONG:
  735. if (!setsize)
  736. op->type |= BITS32;
  737. setsize = 1;
  738. break;
  739. case S_QWORD:
  740. if (!setsize)
  741. op->type |= BITS64;
  742. setsize = 1;
  743. break;
  744. case S_TWORD:
  745. if (!setsize)
  746. op->type |= BITS80;
  747. setsize = 1;
  748. break;
  749. case S_OWORD:
  750. if (!setsize)
  751. op->type |= BITS128;
  752. setsize = 1;
  753. break;
  754. case S_YWORD:
  755. if (!setsize)
  756. op->type |= BITS256;
  757. setsize = 1;
  758. break;
  759. case S_ZWORD:
  760. if (!setsize)
  761. op->type |= BITS512;
  762. setsize = 1;
  763. break;
  764. case S_TO:
  765. op->type |= TO;
  766. break;
  767. case S_STRICT:
  768. op->type |= STRICT;
  769. break;
  770. case S_FAR:
  771. op->type |= FAR;
  772. break;
  773. case S_NEAR:
  774. op->type |= NEAR;
  775. break;
  776. case S_SHORT:
  777. op->type |= SHORT;
  778. break;
  779. default:
  780. nasm_error(ERR_NONFATAL, "invalid operand size specification");
  781. }
  782. i = stdscan(NULL, &tokval);
  783. }
  784. if (i == '[' || i == '&') { /* memory reference */
  785. mref = true;
  786. bracket = (i == '[');
  787. i = stdscan(NULL, &tokval); /* then skip the colon */
  788. while (i == TOKEN_SPECIAL || i == TOKEN_PREFIX) {
  789. process_size_override(result, op);
  790. i = stdscan(NULL, &tokval);
  791. }
  792. /* when a comma follows an opening bracket - [ , eax*4] */
  793. if (i == ',') {
  794. /* treat as if there is a zero displacement virtually */
  795. tokval.t_type = TOKEN_NUM;
  796. tokval.t_integer = 0;
  797. stdscan_set(stdscan_get() - 1); /* rewind the comma */
  798. }
  799. } else { /* immediate operand, or register */
  800. mref = false;
  801. bracket = false; /* placate optimisers */
  802. }
  803. if ((op->type & FAR) && !mref &&
  804. result->opcode != I_JMP && result->opcode != I_CALL) {
  805. nasm_error(ERR_NONFATAL, "invalid use of FAR operand specifier");
  806. }
  807. value = evaluate(stdscan, NULL, &tokval,
  808. &op->opflags, critical, &hints);
  809. i = tokval.t_type;
  810. if (op->opflags & OPFLAG_FORWARD) {
  811. result->forw_ref = true;
  812. }
  813. if (!value) /* Error in evaluator */
  814. goto fail;
  815. if (i == ':' && mref) { /* it was seg:offset */
  816. /*
  817. * Process the segment override.
  818. */
  819. if (value[1].type != 0 ||
  820. value->value != 1 ||
  821. !IS_SREG(value->type))
  822. nasm_error(ERR_NONFATAL, "invalid segment override");
  823. else if (result->prefixes[PPS_SEG])
  824. nasm_error(ERR_NONFATAL,
  825. "instruction has conflicting segment overrides");
  826. else {
  827. result->prefixes[PPS_SEG] = value->type;
  828. if (IS_FSGS(value->type))
  829. op->eaflags |= EAF_FSGS;
  830. }
  831. i = stdscan(NULL, &tokval); /* then skip the colon */
  832. while (i == TOKEN_SPECIAL || i == TOKEN_PREFIX) {
  833. process_size_override(result, op);
  834. i = stdscan(NULL, &tokval);
  835. }
  836. value = evaluate(stdscan, NULL, &tokval,
  837. &op->opflags, critical, &hints);
  838. i = tokval.t_type;
  839. if (op->opflags & OPFLAG_FORWARD) {
  840. result->forw_ref = true;
  841. }
  842. /* and get the offset */
  843. if (!value) /* Error in evaluator */
  844. goto fail;
  845. }
  846. mib = false;
  847. if (mref && bracket && i == ',') {
  848. /* [seg:base+offset,index*scale] syntax (mib) */
  849. operand o1, o2; /* Partial operands */
  850. if (parse_mref(&o1, value))
  851. goto fail;
  852. i = stdscan(NULL, &tokval); /* Eat comma */
  853. value = evaluate(stdscan, NULL, &tokval, &op->opflags,
  854. critical, &hints);
  855. i = tokval.t_type;
  856. if (!value)
  857. goto fail;
  858. if (parse_mref(&o2, value))
  859. goto fail;
  860. if (o2.basereg != -1 && o2.indexreg == -1) {
  861. o2.indexreg = o2.basereg;
  862. o2.scale = 1;
  863. o2.basereg = -1;
  864. }
  865. if (o1.indexreg != -1 || o2.basereg != -1 || o2.offset != 0 ||
  866. o2.segment != NO_SEG || o2.wrt != NO_SEG) {
  867. nasm_error(ERR_NONFATAL, "invalid mib expression");
  868. goto fail;
  869. }
  870. op->basereg = o1.basereg;
  871. op->indexreg = o2.indexreg;
  872. op->scale = o2.scale;
  873. op->offset = o1.offset;
  874. op->segment = o1.segment;
  875. op->wrt = o1.wrt;
  876. if (op->basereg != -1) {
  877. op->hintbase = op->basereg;
  878. op->hinttype = EAH_MAKEBASE;
  879. } else if (op->indexreg != -1) {
  880. op->hintbase = op->indexreg;
  881. op->hinttype = EAH_NOTBASE;
  882. } else {
  883. op->hintbase = -1;
  884. op->hinttype = EAH_NOHINT;
  885. }
  886. mib = true;
  887. }
  888. recover = false;
  889. if (mref && bracket) { /* find ] at the end */
  890. if (i != ']') {
  891. nasm_error(ERR_NONFATAL, "parser: expecting ]");
  892. recover = true;
  893. } else { /* we got the required ] */
  894. i = stdscan(NULL, &tokval);
  895. if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
  896. /* parse opmask (and zeroing) after an operand */
  897. recover = parse_braces(&brace_flags);
  898. i = tokval.t_type;
  899. }
  900. if (i != 0 && i != ',') {
  901. nasm_error(ERR_NONFATAL, "comma or end of line expected");
  902. recover = true;
  903. }
  904. }
  905. } else { /* immediate operand */
  906. if (i != 0 && i != ',' && i != ':' &&
  907. i != TOKEN_DECORATOR && i != TOKEN_OPMASK) {
  908. nasm_error(ERR_NONFATAL, "comma, colon, decorator or end of "
  909. "line expected after operand");
  910. recover = true;
  911. } else if (i == ':') {
  912. op->type |= COLON;
  913. } else if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
  914. /* parse opmask (and zeroing) after an operand */
  915. recover = parse_braces(&brace_flags);
  916. }
  917. }
  918. if (recover) {
  919. do { /* error recovery */
  920. i = stdscan(NULL, &tokval);
  921. } while (i != 0 && i != ',');
  922. }
  923. /*
  924. * now convert the exprs returned from evaluate()
  925. * into operand descriptions...
  926. */
  927. op->decoflags |= brace_flags;
  928. if (mref) { /* it's a memory reference */
  929. /* A mib reference was fully parsed already */
  930. if (!mib) {
  931. if (parse_mref(op, value))
  932. goto fail;
  933. op->hintbase = hints.base;
  934. op->hinttype = hints.type;
  935. }
  936. mref_set_optype(op);
  937. } else { /* it's not a memory reference */
  938. if (is_just_unknown(value)) { /* it's immediate but unknown */
  939. op->type |= IMMEDIATE;
  940. op->opflags |= OPFLAG_UNKNOWN;
  941. op->offset = 0; /* don't care */
  942. op->segment = NO_SEG; /* don't care again */
  943. op->wrt = NO_SEG; /* still don't care */
  944. if(optimizing.level >= 0 && !(op->type & STRICT)) {
  945. /* Be optimistic */
  946. op->type |=
  947. UNITY | SBYTEWORD | SBYTEDWORD | UDWORD | SDWORD;
  948. }
  949. } else if (is_reloc(value)) { /* it's immediate */
  950. uint64_t n = reloc_value(value);
  951. op->type |= IMMEDIATE;
  952. op->offset = n;
  953. op->segment = reloc_seg(value);
  954. op->wrt = reloc_wrt(value);
  955. op->opflags |= is_self_relative(value) ? OPFLAG_RELATIVE : 0;
  956. if (is_simple(value)) {
  957. if (n == 1)
  958. op->type |= UNITY;
  959. if (optimizing.level >= 0 && !(op->type & STRICT)) {
  960. if ((uint32_t) (n + 128) <= 255)
  961. op->type |= SBYTEDWORD;
  962. if ((uint16_t) (n + 128) <= 255)
  963. op->type |= SBYTEWORD;
  964. if (n <= UINT64_C(0xFFFFFFFF))
  965. op->type |= UDWORD;
  966. if (n + UINT64_C(0x80000000) <= UINT64_C(0xFFFFFFFF))
  967. op->type |= SDWORD;
  968. }
  969. }
  970. } else if (value->type == EXPR_RDSAE) {
  971. /*
  972. * it's not an operand but a rounding or SAE decorator.
  973. * put the decorator information in the (opflag_t) type field
  974. * of previous operand.
  975. */
  976. opnum--; op--;
  977. switch (value->value) {
  978. case BRC_RN:
  979. case BRC_RU:
  980. case BRC_RD:
  981. case BRC_RZ:
  982. case BRC_SAE:
  983. op->decoflags |= (value->value == BRC_SAE ? SAE : ER);
  984. result->evex_rm = value->value;
  985. break;
  986. default:
  987. nasm_error(ERR_NONFATAL, "invalid decorator");
  988. break;
  989. }
  990. } else { /* it's a register */
  991. opflags_t rs;
  992. uint64_t regset_size = 0;
  993. if (value->type >= EXPR_SIMPLE || value->value != 1) {
  994. nasm_error(ERR_NONFATAL, "invalid operand type");
  995. goto fail;
  996. }
  997. /*
  998. * We do not allow any kind of expression, except for
  999. * reg+value in which case it is a register set.
  1000. */
  1001. for (i = 1; value[i].type; i++) {
  1002. if (!value[i].value)
  1003. continue;
  1004. switch (value[i].type) {
  1005. case EXPR_SIMPLE:
  1006. if (!regset_size) {
  1007. regset_size = value[i].value + 1;
  1008. break;
  1009. }
  1010. /* fallthrough */
  1011. default:
  1012. nasm_error(ERR_NONFATAL, "invalid operand type");
  1013. goto fail;
  1014. }
  1015. }
  1016. if ((regset_size & (regset_size - 1)) ||
  1017. regset_size >= (UINT64_C(1) << REGSET_BITS)) {
  1018. nasm_error(ERR_NONFATAL | ERR_PASS2,
  1019. "invalid register set size");
  1020. regset_size = 0;
  1021. }
  1022. /* clear overrides, except TO which applies to FPU regs */
  1023. if (op->type & ~TO) {
  1024. /*
  1025. * we want to produce a warning iff the specified size
  1026. * is different from the register size
  1027. */
  1028. rs = op->type & SIZE_MASK;
  1029. } else {
  1030. rs = 0;
  1031. }
  1032. /*
  1033. * Make sure we're not out of nasm_reg_flags, still
  1034. * probably this should be fixed when we're defining
  1035. * the label.
  1036. *
  1037. * An easy trigger is
  1038. *
  1039. * e equ 0x80000000:0
  1040. * pshufw word e-0
  1041. *
  1042. */
  1043. if (value->type < EXPR_REG_START ||
  1044. value->type > EXPR_REG_END) {
  1045. nasm_error(ERR_NONFATAL, "invalid operand type");
  1046. goto fail;
  1047. }
  1048. op->type &= TO;
  1049. op->type |= REGISTER;
  1050. op->type |= nasm_reg_flags[value->type];
  1051. op->type |= (regset_size >> 1) << REGSET_SHIFT;
  1052. op->decoflags |= brace_flags;
  1053. op->basereg = value->type;
  1054. if (rs && (op->type & SIZE_MASK) != rs)
  1055. nasm_error(ERR_WARNING | ERR_PASS1,
  1056. "register size specification ignored");
  1057. }
  1058. }
  1059. /* remember the position of operand having broadcasting/ER mode */
  1060. if (op->decoflags & (BRDCAST_MASK | ER | SAE))
  1061. result->evex_brerop = opnum;
  1062. }
  1063. result->operands = opnum; /* set operand count */
  1064. /* clear remaining operands */
  1065. while (opnum < MAX_OPERANDS)
  1066. result->oprs[opnum++].type = 0;
  1067. /*
  1068. * Transform RESW, RESD, RESQ, REST, RESO, RESY, RESZ into RESB.
  1069. */
  1070. if (opcode_is_resb(result->opcode)) {
  1071. result->oprs[0].offset *= resb_bytes(result->opcode);
  1072. result->oprs[0].offset *= result->times;
  1073. result->times = 1;
  1074. result->opcode = I_RESB;
  1075. }
  1076. return result;
  1077. fail:
  1078. result->opcode = I_none;
  1079. return result;
  1080. }
  1081. static int is_comma_next(void)
  1082. {
  1083. struct tokenval tv;
  1084. char *p;
  1085. int i;
  1086. p = stdscan_get();
  1087. i = stdscan(NULL, &tv);
  1088. stdscan_set(p);
  1089. return (i == ',' || i == ';' || !i);
  1090. }
  1091. void cleanup_insn(insn * i)
  1092. {
  1093. extop *e;
  1094. while ((e = i->eops)) {
  1095. i->eops = e->next;
  1096. if (e->type == EOT_DB_STRING_FREE)
  1097. nasm_free(e->stringval);
  1098. nasm_free(e);
  1099. }
  1100. }