quote.c 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480
  1. /* ----------------------------------------------------------------------- *
  2. *
  3. * Copyright 1996-2016 The NASM Authors - All Rights Reserved
  4. * See the file AUTHORS included with the NASM distribution for
  5. * the specific copyright holders.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following
  9. * conditions are met:
  10. *
  11. * * Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * * Redistributions in binary form must reproduce the above
  14. * copyright notice, this list of conditions and the following
  15. * disclaimer in the documentation and/or other materials provided
  16. * with the distribution.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21. * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29. * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30. * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. * ----------------------------------------------------------------------- */
  33. /*
  34. * quote.c
  35. */
  36. #include "compiler.h"
  37. #include <stdlib.h>
  38. #include "nasmlib.h"
  39. #include "quote.h"
  40. char *nasm_quote(const char *str, size_t len)
  41. {
  42. const char *p, *ep;
  43. char c, c1, *q, *nstr;
  44. unsigned char uc;
  45. bool sq_ok, dq_ok;
  46. size_t qlen;
  47. sq_ok = dq_ok = true;
  48. ep = str+len;
  49. qlen = 0; /* Length if we need `...` quotes */
  50. for (p = str; p < ep; p++) {
  51. c = *p;
  52. switch (c) {
  53. case '\'':
  54. sq_ok = false;
  55. qlen++;
  56. break;
  57. case '\"':
  58. dq_ok = false;
  59. qlen++;
  60. break;
  61. case '`':
  62. case '\\':
  63. qlen += 2;
  64. break;
  65. default:
  66. if (c < ' ' || c > '~') {
  67. sq_ok = dq_ok = false;
  68. switch (c) {
  69. case '\a':
  70. case '\b':
  71. case '\t':
  72. case '\n':
  73. case '\v':
  74. case '\f':
  75. case '\r':
  76. case 27:
  77. qlen += 2;
  78. break;
  79. default:
  80. c1 = (p+1 < ep) ? p[1] : 0;
  81. if (c1 >= '0' && c1 <= '7')
  82. uc = 0377; /* Must use the full form */
  83. else
  84. uc = c;
  85. if (uc > 077)
  86. qlen++;
  87. if (uc > 07)
  88. qlen++;
  89. qlen += 2;
  90. break;
  91. }
  92. } else {
  93. qlen++;
  94. }
  95. break;
  96. }
  97. }
  98. if (sq_ok || dq_ok) {
  99. /* Use '...' or "..." */
  100. nstr = nasm_malloc(len+3);
  101. nstr[0] = nstr[len+1] = sq_ok ? '\'' : '\"';
  102. nstr[len+2] = '\0';
  103. if (len > 0)
  104. memcpy(nstr+1, str, len);
  105. } else {
  106. /* Need to use `...` quoted syntax */
  107. nstr = nasm_malloc(qlen+3);
  108. q = nstr;
  109. *q++ = '`';
  110. for (p = str; p < ep; p++) {
  111. c = *p;
  112. switch (c) {
  113. case '`':
  114. case '\\':
  115. *q++ = '\\';
  116. *q++ = c;
  117. break;
  118. case 7:
  119. *q++ = '\\';
  120. *q++ = 'a';
  121. break;
  122. case 8:
  123. *q++ = '\\';
  124. *q++ = 'b';
  125. break;
  126. case 9:
  127. *q++ = '\\';
  128. *q++ = 't';
  129. break;
  130. case 10:
  131. *q++ = '\\';
  132. *q++ = 'n';
  133. break;
  134. case 11:
  135. *q++ = '\\';
  136. *q++ = 'v';
  137. break;
  138. case 12:
  139. *q++ = '\\';
  140. *q++ = 'f';
  141. break;
  142. case 13:
  143. *q++ = '\\';
  144. *q++ = 'r';
  145. break;
  146. case 27:
  147. *q++ = '\\';
  148. *q++ = 'e';
  149. break;
  150. default:
  151. if (c < ' ' || c > '~') {
  152. c1 = (p+1 < ep) ? p[1] : 0;
  153. if (c1 >= '0' && c1 <= '7')
  154. uc = 0377; /* Must use the full form */
  155. else
  156. uc = c;
  157. *q++ = '\\';
  158. if (uc > 077)
  159. *q++ = ((unsigned char)c >> 6) + '0';
  160. if (uc > 07)
  161. *q++ = (((unsigned char)c >> 3) & 7) + '0';
  162. *q++ = ((unsigned char)c & 7) + '0';
  163. break;
  164. } else {
  165. *q++ = c;
  166. }
  167. break;
  168. }
  169. }
  170. *q++ = '`';
  171. *q++ = '\0';
  172. nasm_assert((size_t)(q-nstr) == qlen+3);
  173. }
  174. return nstr;
  175. }
  176. static char *emit_utf8(char *q, int32_t v)
  177. {
  178. if (v < 0) {
  179. /* Impossible - do nothing */
  180. } else if (v <= 0x7f) {
  181. *q++ = v;
  182. } else if (v <= 0x000007ff) {
  183. *q++ = 0xc0 | (v >> 6);
  184. *q++ = 0x80 | (v & 63);
  185. } else if (v <= 0x0000ffff) {
  186. *q++ = 0xe0 | (v >> 12);
  187. *q++ = 0x80 | ((v >> 6) & 63);
  188. *q++ = 0x80 | (v & 63);
  189. } else if (v <= 0x001fffff) {
  190. *q++ = 0xf0 | (v >> 18);
  191. *q++ = 0x80 | ((v >> 12) & 63);
  192. *q++ = 0x80 | ((v >> 6) & 63);
  193. *q++ = 0x80 | (v & 63);
  194. } else if (v <= 0x03ffffff) {
  195. *q++ = 0xf8 | (v >> 24);
  196. *q++ = 0x80 | ((v >> 18) & 63);
  197. *q++ = 0x80 | ((v >> 12) & 63);
  198. *q++ = 0x80 | ((v >> 6) & 63);
  199. *q++ = 0x80 | (v & 63);
  200. } else {
  201. *q++ = 0xfc | (v >> 30);
  202. *q++ = 0x80 | ((v >> 24) & 63);
  203. *q++ = 0x80 | ((v >> 18) & 63);
  204. *q++ = 0x80 | ((v >> 12) & 63);
  205. *q++ = 0x80 | ((v >> 6) & 63);
  206. *q++ = 0x80 | (v & 63);
  207. }
  208. return q;
  209. }
  210. /*
  211. * Do an *in-place* dequoting of the specified string, returning the
  212. * resulting length (which may be containing embedded nulls.)
  213. *
  214. * In-place replacement is possible since the unquoted length is always
  215. * shorter than or equal to the quoted length.
  216. *
  217. * *ep points to the final quote, or to the null if improperly quoted.
  218. */
  219. size_t nasm_unquote(char *str, char **ep)
  220. {
  221. char bq;
  222. char *p, *q;
  223. char *escp = NULL;
  224. char c;
  225. enum unq_state {
  226. st_start,
  227. st_backslash,
  228. st_hex,
  229. st_oct,
  230. st_ucs
  231. } state;
  232. int ndig = 0;
  233. int32_t nval = 0;
  234. p = q = str;
  235. bq = *p++;
  236. if (!bq)
  237. return 0;
  238. switch (bq) {
  239. case '\'':
  240. case '\"':
  241. /* '...' or "..." string */
  242. while ((c = *p) && c != bq) {
  243. p++;
  244. *q++ = c;
  245. }
  246. *q = '\0';
  247. break;
  248. case '`':
  249. /* `...` string */
  250. state = st_start;
  251. while ((c = *p)) {
  252. p++;
  253. switch (state) {
  254. case st_start:
  255. switch (c) {
  256. case '\\':
  257. state = st_backslash;
  258. break;
  259. case '`':
  260. p--;
  261. goto out;
  262. default:
  263. *q++ = c;
  264. break;
  265. }
  266. break;
  267. case st_backslash:
  268. state = st_start;
  269. escp = p; /* Beginning of argument sequence */
  270. nval = 0;
  271. switch (c) {
  272. case 'a':
  273. *q++ = 7;
  274. break;
  275. case 'b':
  276. *q++ = 8;
  277. break;
  278. case 'e':
  279. *q++ = 27;
  280. break;
  281. case 'f':
  282. *q++ = 12;
  283. break;
  284. case 'n':
  285. *q++ = 10;
  286. break;
  287. case 'r':
  288. *q++ = 13;
  289. break;
  290. case 't':
  291. *q++ = 9;
  292. break;
  293. case 'u':
  294. state = st_ucs;
  295. ndig = 4;
  296. break;
  297. case 'U':
  298. state = st_ucs;
  299. ndig = 8;
  300. break;
  301. case 'v':
  302. *q++ = 11;
  303. break;
  304. case 'x':
  305. case 'X':
  306. state = st_hex;
  307. ndig = 2;
  308. break;
  309. case '0':
  310. case '1':
  311. case '2':
  312. case '3':
  313. case '4':
  314. case '5':
  315. case '6':
  316. case '7':
  317. state = st_oct;
  318. ndig = 2; /* Up to two more digits */
  319. nval = c - '0';
  320. break;
  321. default:
  322. *q++ = c;
  323. break;
  324. }
  325. break;
  326. case st_oct:
  327. if (c >= '0' && c <= '7') {
  328. nval = (nval << 3) + (c - '0');
  329. if (!--ndig) {
  330. *q++ = nval;
  331. state = st_start;
  332. }
  333. } else {
  334. p--; /* Process this character again */
  335. *q++ = nval;
  336. state = st_start;
  337. }
  338. break;
  339. case st_hex:
  340. if ((c >= '0' && c <= '9') ||
  341. (c >= 'A' && c <= 'F') ||
  342. (c >= 'a' && c <= 'f')) {
  343. nval = (nval << 4) + numvalue(c);
  344. if (!--ndig) {
  345. *q++ = nval;
  346. state = st_start;
  347. }
  348. } else {
  349. p--; /* Process this character again */
  350. *q++ = (p > escp) ? nval : escp[-1];
  351. state = st_start;
  352. }
  353. break;
  354. case st_ucs:
  355. if ((c >= '0' && c <= '9') ||
  356. (c >= 'A' && c <= 'F') ||
  357. (c >= 'a' && c <= 'f')) {
  358. nval = (nval << 4) + numvalue(c);
  359. if (!--ndig) {
  360. q = emit_utf8(q, nval);
  361. state = st_start;
  362. }
  363. } else {
  364. p--; /* Process this character again */
  365. if (p > escp)
  366. q = emit_utf8(q, nval);
  367. else
  368. *q++ = escp[-1];
  369. state = st_start;
  370. }
  371. break;
  372. }
  373. }
  374. switch (state) {
  375. case st_start:
  376. case st_backslash:
  377. break;
  378. case st_oct:
  379. *q++ = nval;
  380. break;
  381. case st_hex:
  382. *q++ = (p > escp) ? nval : escp[-1];
  383. break;
  384. case st_ucs:
  385. if (p > escp)
  386. q = emit_utf8(q, nval);
  387. else
  388. *q++ = escp[-1];
  389. break;
  390. }
  391. out:
  392. break;
  393. default:
  394. /* Not a quoted string, just return the input... */
  395. p = q = strchr(str, '\0');
  396. break;
  397. }
  398. if (ep)
  399. *ep = p;
  400. return q-str;
  401. }
  402. /*
  403. * Find the end of a quoted string; returns the pointer to the terminating
  404. * character (either the ending quote or the null character, if unterminated.)
  405. */
  406. char *nasm_skip_string(char *str)
  407. {
  408. char bq;
  409. char *p;
  410. char c;
  411. enum unq_state {
  412. st_start,
  413. st_backslash
  414. } state;
  415. bq = str[0];
  416. if (bq == '\'' || bq == '\"') {
  417. /* '...' or "..." string */
  418. for (p = str+1; *p && *p != bq; p++)
  419. ;
  420. return p;
  421. } else if (bq == '`') {
  422. /* `...` string */
  423. state = st_start;
  424. p = str+1;
  425. if (!*p)
  426. return p;
  427. while ((c = *p++)) {
  428. switch (state) {
  429. case st_start:
  430. switch (c) {
  431. case '\\':
  432. state = st_backslash;
  433. break;
  434. case '`':
  435. return p-1; /* Found the end */
  436. default:
  437. break;
  438. }
  439. break;
  440. case st_backslash:
  441. /*
  442. * Note: for the purpose of finding the end of the string,
  443. * all successor states to st_backslash are functionally
  444. * equivalent to st_start, since either a backslash or
  445. * a backquote will force a return to the st_start state.
  446. */
  447. state = st_start;
  448. break;
  449. }
  450. }
  451. return p-1; /* Unterminated string... */
  452. } else {
  453. return str; /* Not a string... */
  454. }
  455. }