stdscan.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. /* ----------------------------------------------------------------------- *
  2. *
  3. * Copyright 1996-2016 The NASM Authors - All Rights Reserved
  4. * See the file AUTHORS included with the NASM distribution for
  5. * the specific copyright holders.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following
  9. * conditions are met:
  10. *
  11. * * Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * * Redistributions in binary form must reproduce the above
  14. * copyright notice, this list of conditions and the following
  15. * disclaimer in the documentation and/or other materials provided
  16. * with the distribution.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21. * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29. * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30. * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. * ----------------------------------------------------------------------- */
  33. #include "compiler.h"
  34. #include <stdio.h>
  35. #include <stdlib.h>
  36. #include <string.h>
  37. #include <ctype.h>
  38. #include "nasm.h"
  39. #include "nasmlib.h"
  40. #include "error.h"
  41. #include "quote.h"
  42. #include "stdscan.h"
  43. #include "insns.h"
  44. /*
  45. * Standard scanner routine used by parser.c and some output
  46. * formats. It keeps a succession of temporary-storage strings in
  47. * stdscan_tempstorage, which can be cleared using stdscan_reset.
  48. */
  49. static char *stdscan_bufptr = NULL;
  50. static char **stdscan_tempstorage = NULL;
  51. static int stdscan_tempsize = 0, stdscan_templen = 0;
  52. #define STDSCAN_TEMP_DELTA 256
  53. void stdscan_set(char *str)
  54. {
  55. stdscan_bufptr = str;
  56. }
  57. char *stdscan_get(void)
  58. {
  59. return stdscan_bufptr;
  60. }
  61. static void stdscan_pop(void)
  62. {
  63. nasm_free(stdscan_tempstorage[--stdscan_templen]);
  64. }
  65. void stdscan_reset(void)
  66. {
  67. while (stdscan_templen > 0)
  68. stdscan_pop();
  69. }
  70. /*
  71. * Unimportant cleanup is done to avoid confusing people who are trying
  72. * to debug real memory leaks
  73. */
  74. void stdscan_cleanup(void)
  75. {
  76. stdscan_reset();
  77. nasm_free(stdscan_tempstorage);
  78. }
  79. static char *stdscan_copy(char *p, int len)
  80. {
  81. char *text;
  82. text = nasm_malloc(len + 1);
  83. memcpy(text, p, len);
  84. text[len] = '\0';
  85. if (stdscan_templen >= stdscan_tempsize) {
  86. stdscan_tempsize += STDSCAN_TEMP_DELTA;
  87. stdscan_tempstorage = nasm_realloc(stdscan_tempstorage,
  88. stdscan_tempsize *
  89. sizeof(char *));
  90. }
  91. stdscan_tempstorage[stdscan_templen++] = text;
  92. return text;
  93. }
  94. /*
  95. * a token is enclosed with braces. proper token type will be assigned
  96. * accordingly with the token flag.
  97. */
  98. static int stdscan_handle_brace(struct tokenval *tv)
  99. {
  100. if (!(tv->t_flag & TFLAG_BRC_ANY)) {
  101. /* invalid token is put inside braces */
  102. nasm_error(ERR_NONFATAL,
  103. "`%s' is not a valid decorator with braces", tv->t_charptr);
  104. tv->t_type = TOKEN_INVALID;
  105. } else if (tv->t_flag & TFLAG_BRC_OPT) {
  106. if (is_reg_class(OPMASKREG, tv->t_integer)) {
  107. /* within braces, opmask register is now used as a mask */
  108. tv->t_type = TOKEN_OPMASK;
  109. }
  110. }
  111. return tv->t_type;
  112. }
  113. int stdscan(void *private_data, struct tokenval *tv)
  114. {
  115. char ourcopy[MAX_KEYWORD + 1], *r, *s;
  116. (void)private_data; /* Don't warn that this parameter is unused */
  117. stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
  118. if (!*stdscan_bufptr)
  119. return tv->t_type = TOKEN_EOS;
  120. /* we have a token; either an id, a number or a char */
  121. if (isidstart(*stdscan_bufptr) ||
  122. (*stdscan_bufptr == '$' && isidstart(stdscan_bufptr[1]))) {
  123. /* now we've got an identifier */
  124. bool is_sym = false;
  125. int token_type;
  126. if (*stdscan_bufptr == '$') {
  127. is_sym = true;
  128. stdscan_bufptr++;
  129. }
  130. r = stdscan_bufptr++;
  131. /* read the entire buffer to advance the buffer pointer but... */
  132. while (isidchar(*stdscan_bufptr))
  133. stdscan_bufptr++;
  134. /* ... copy only up to IDLEN_MAX-1 characters */
  135. tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r < IDLEN_MAX ?
  136. stdscan_bufptr - r : IDLEN_MAX - 1);
  137. if (is_sym || stdscan_bufptr - r > MAX_KEYWORD)
  138. return tv->t_type = TOKEN_ID; /* bypass all other checks */
  139. for (s = tv->t_charptr, r = ourcopy; *s; s++)
  140. *r++ = nasm_tolower(*s);
  141. *r = '\0';
  142. /* right, so we have an identifier sitting in temp storage. now,
  143. * is it actually a register or instruction name, or what? */
  144. token_type = nasm_token_hash(ourcopy, tv);
  145. if (unlikely(tv->t_flag & TFLAG_WARN)) {
  146. nasm_error(ERR_WARNING|ERR_PASS1|WARN_PTR,
  147. "`%s' is not a NASM keyword", tv->t_charptr);
  148. }
  149. if (likely(!(tv->t_flag & TFLAG_BRC))) {
  150. /* most of the tokens fall into this case */
  151. return token_type;
  152. } else {
  153. return tv->t_type = TOKEN_ID;
  154. }
  155. } else if (*stdscan_bufptr == '$' && !isnumchar(stdscan_bufptr[1])) {
  156. /*
  157. * It's a $ sign with no following hex number; this must
  158. * mean it's a Here token ($), evaluating to the current
  159. * assembly location, or a Base token ($$), evaluating to
  160. * the base of the current segment.
  161. */
  162. stdscan_bufptr++;
  163. if (*stdscan_bufptr == '$') {
  164. stdscan_bufptr++;
  165. return tv->t_type = TOKEN_BASE;
  166. }
  167. return tv->t_type = TOKEN_HERE;
  168. } else if (isnumstart(*stdscan_bufptr)) { /* now we've got a number */
  169. bool rn_error;
  170. bool is_hex = false;
  171. bool is_float = false;
  172. bool has_e = false;
  173. char c;
  174. r = stdscan_bufptr;
  175. if (*stdscan_bufptr == '$') {
  176. stdscan_bufptr++;
  177. is_hex = true;
  178. }
  179. for (;;) {
  180. c = *stdscan_bufptr++;
  181. if (!is_hex && (c == 'e' || c == 'E')) {
  182. has_e = true;
  183. if (*stdscan_bufptr == '+' || *stdscan_bufptr == '-') {
  184. /*
  185. * e can only be followed by +/- if it is either a
  186. * prefixed hex number or a floating-point number
  187. */
  188. is_float = true;
  189. stdscan_bufptr++;
  190. }
  191. } else if (c == 'H' || c == 'h' || c == 'X' || c == 'x') {
  192. is_hex = true;
  193. } else if (c == 'P' || c == 'p') {
  194. is_float = true;
  195. if (*stdscan_bufptr == '+' || *stdscan_bufptr == '-')
  196. stdscan_bufptr++;
  197. } else if (isnumchar(c))
  198. ; /* just advance */
  199. else if (c == '.')
  200. is_float = true;
  201. else
  202. break;
  203. }
  204. stdscan_bufptr--; /* Point to first character beyond number */
  205. if (has_e && !is_hex) {
  206. /* 1e13 is floating-point, but 1e13h is not */
  207. is_float = true;
  208. }
  209. if (is_float) {
  210. tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r);
  211. return tv->t_type = TOKEN_FLOAT;
  212. } else {
  213. r = stdscan_copy(r, stdscan_bufptr - r);
  214. tv->t_integer = readnum(r, &rn_error);
  215. stdscan_pop();
  216. if (rn_error) {
  217. /* some malformation occurred */
  218. return tv->t_type = TOKEN_ERRNUM;
  219. }
  220. tv->t_charptr = NULL;
  221. return tv->t_type = TOKEN_NUM;
  222. }
  223. } else if (*stdscan_bufptr == '\'' || *stdscan_bufptr == '"' ||
  224. *stdscan_bufptr == '`') {
  225. /* a quoted string */
  226. char start_quote = *stdscan_bufptr;
  227. tv->t_charptr = stdscan_bufptr;
  228. tv->t_inttwo = nasm_unquote(tv->t_charptr, &stdscan_bufptr);
  229. if (*stdscan_bufptr != start_quote)
  230. return tv->t_type = TOKEN_ERRSTR;
  231. stdscan_bufptr++; /* Skip final quote */
  232. return tv->t_type = TOKEN_STR;
  233. } else if (*stdscan_bufptr == '{') {
  234. /* now we've got a decorator */
  235. int token_len;
  236. stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
  237. r = ++stdscan_bufptr;
  238. /*
  239. * read the entire buffer to advance the buffer pointer
  240. * {rn-sae}, {rd-sae}, {ru-sae}, {rz-sae} contain '-' in tokens.
  241. */
  242. while (isbrcchar(*stdscan_bufptr))
  243. stdscan_bufptr++;
  244. token_len = stdscan_bufptr - r;
  245. /* ... copy only up to DECOLEN_MAX-1 characters */
  246. tv->t_charptr = stdscan_copy(r, token_len < DECOLEN_MAX ?
  247. token_len : DECOLEN_MAX - 1);
  248. stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
  249. /* if brace is not closed properly or token is too long */
  250. if ((*stdscan_bufptr != '}') || (token_len > MAX_KEYWORD)) {
  251. nasm_error(ERR_NONFATAL,
  252. "invalid decorator token inside braces");
  253. return tv->t_type = TOKEN_INVALID;
  254. }
  255. stdscan_bufptr++; /* skip closing brace */
  256. for (s = tv->t_charptr, r = ourcopy; *s; s++)
  257. *r++ = nasm_tolower(*s);
  258. *r = '\0';
  259. /* right, so we have a decorator sitting in temp storage. */
  260. nasm_token_hash(ourcopy, tv);
  261. /* handle tokens inside braces */
  262. return stdscan_handle_brace(tv);
  263. } else if (*stdscan_bufptr == ';') {
  264. /* a comment has happened - stay */
  265. return tv->t_type = TOKEN_EOS;
  266. } else if (stdscan_bufptr[0] == '>' && stdscan_bufptr[1] == '>') {
  267. stdscan_bufptr += 2;
  268. return tv->t_type = TOKEN_SHR;
  269. } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '<') {
  270. stdscan_bufptr += 2;
  271. return tv->t_type = TOKEN_SHL;
  272. } else if (stdscan_bufptr[0] == '/' && stdscan_bufptr[1] == '/') {
  273. stdscan_bufptr += 2;
  274. return tv->t_type = TOKEN_SDIV;
  275. } else if (stdscan_bufptr[0] == '%' && stdscan_bufptr[1] == '%') {
  276. stdscan_bufptr += 2;
  277. return tv->t_type = TOKEN_SMOD;
  278. } else if (stdscan_bufptr[0] == '=' && stdscan_bufptr[1] == '=') {
  279. stdscan_bufptr += 2;
  280. return tv->t_type = TOKEN_EQ;
  281. } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '>') {
  282. stdscan_bufptr += 2;
  283. return tv->t_type = TOKEN_NE;
  284. } else if (stdscan_bufptr[0] == '!' && stdscan_bufptr[1] == '=') {
  285. stdscan_bufptr += 2;
  286. return tv->t_type = TOKEN_NE;
  287. } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '=') {
  288. stdscan_bufptr += 2;
  289. return tv->t_type = TOKEN_LE;
  290. } else if (stdscan_bufptr[0] == '>' && stdscan_bufptr[1] == '=') {
  291. stdscan_bufptr += 2;
  292. return tv->t_type = TOKEN_GE;
  293. } else if (stdscan_bufptr[0] == '&' && stdscan_bufptr[1] == '&') {
  294. stdscan_bufptr += 2;
  295. return tv->t_type = TOKEN_DBL_AND;
  296. } else if (stdscan_bufptr[0] == '^' && stdscan_bufptr[1] == '^') {
  297. stdscan_bufptr += 2;
  298. return tv->t_type = TOKEN_DBL_XOR;
  299. } else if (stdscan_bufptr[0] == '|' && stdscan_bufptr[1] == '|') {
  300. stdscan_bufptr += 2;
  301. return tv->t_type = TOKEN_DBL_OR;
  302. } else /* just an ordinary char */
  303. return tv->t_type = (uint8_t)(*stdscan_bufptr++);
  304. }