| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360 |
- /* ----------------------------------------------------------------------- *
- *
- * Copyright 1996-2009 The NASM Authors - All Rights Reserved
- * See the file AUTHORS included with the NASM distribution for
- * the specific copyright holders.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following
- * conditions are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
- * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ----------------------------------------------------------------------- */
- /*
- * strfunc.c
- *
- * String transformation functions
- */
- #include "nasmlib.h"
- #include "nasm.h"
- /*
- * Convert a string in UTF-8 format to UTF-16LE
- */
- static size_t utf8_to_16le(uint8_t *str, size_t len, char *op)
- {
- #define EMIT(x) do { if (op) { WRITESHORT(op,x); } outlen++; } while(0)
- size_t outlen = 0;
- int expect = 0;
- uint8_t c;
- uint32_t v = 0, vmin = 0;
- while (len--) {
- c = *str++;
- if (expect) {
- if ((c & 0xc0) != 0x80) {
- expect = 0;
- return -1;
- } else {
- v = (v << 6) | (c & 0x3f);
- if (!--expect) {
- if (v < vmin || v > 0x10ffff ||
- (v >= 0xd800 && v <= 0xdfff)) {
- return -1;
- } else if (v > 0xffff) {
- v -= 0x10000;
- EMIT(0xd800 | (v >> 10));
- EMIT(0xdc00 | (v & 0x3ff));
- } else {
- EMIT(v);
- }
- }
- continue;
- }
- }
- if (c < 0x80) {
- EMIT(c);
- } else if (c < 0xc0 || c >= 0xfe) {
- /* Invalid UTF-8 */
- return -1;
- } else if (c < 0xe0) {
- v = c & 0x1f;
- expect = 1;
- vmin = 0x80;
- } else if (c < 0xf0) {
- v = c & 0x0f;
- expect = 2;
- vmin = 0x800;
- } else if (c < 0xf8) {
- v = c & 0x07;
- expect = 3;
- vmin = 0x10000;
- } else if (c < 0xfc) {
- v = c & 0x03;
- expect = 4;
- vmin = 0x200000;
- } else {
- v = c & 0x01;
- expect = 5;
- vmin = 0x4000000;
- }
- }
- return expect ? (size_t)-1 : outlen << 1;
- #undef EMIT
- }
- /*
- * Convert a string in UTF-8 format to UTF-16BE
- */
- static size_t utf8_to_16be(uint8_t *str, size_t len, char *op)
- {
- #define EMIT(x) \
- do { \
- uint16_t _y = (x); \
- if (op) { \
- WRITECHAR(op, _y >> 8); \
- WRITECHAR(op, _y); \
- } \
- outlen++; \
- } while (0) \
- size_t outlen = 0;
- int expect = 0;
- uint8_t c;
- uint32_t v = 0, vmin = 0;
- while (len--) {
- c = *str++;
- if (expect) {
- if ((c & 0xc0) != 0x80) {
- expect = 0;
- return -1;
- } else {
- v = (v << 6) | (c & 0x3f);
- if (!--expect) {
- if (v < vmin || v > 0x10ffff ||
- (v >= 0xd800 && v <= 0xdfff)) {
- return -1;
- } else if (v > 0xffff) {
- v -= 0x10000;
- EMIT(0xdc00 | (v & 0x3ff));
- EMIT(0xd800 | (v >> 10));
- } else {
- EMIT(v);
- }
- }
- continue;
- }
- }
- if (c < 0x80) {
- EMIT(c);
- } else if (c < 0xc0 || c >= 0xfe) {
- /* Invalid UTF-8 */
- return -1;
- } else if (c < 0xe0) {
- v = c & 0x1f;
- expect = 1;
- vmin = 0x80;
- } else if (c < 0xf0) {
- v = c & 0x0f;
- expect = 2;
- vmin = 0x800;
- } else if (c < 0xf8) {
- v = c & 0x07;
- expect = 3;
- vmin = 0x10000;
- } else if (c < 0xfc) {
- v = c & 0x03;
- expect = 4;
- vmin = 0x200000;
- } else {
- v = c & 0x01;
- expect = 5;
- vmin = 0x4000000;
- }
- }
- return expect ? (size_t)-1 : outlen << 1;
- #undef EMIT
- }
- /*
- * Convert a string in UTF-8 format to UTF-32LE
- */
- static size_t utf8_to_32le(uint8_t *str, size_t len, char *op)
- {
- #define EMIT(x) do { if (op) { WRITELONG(op,x); } outlen++; } while(0)
- size_t outlen = 0;
- int expect = 0;
- uint8_t c;
- uint32_t v = 0, vmin = 0;
- while (len--) {
- c = *str++;
- if (expect) {
- if ((c & 0xc0) != 0x80) {
- return -1;
- } else {
- v = (v << 6) | (c & 0x3f);
- if (!--expect) {
- if (v < vmin || (v >= 0xd800 && v <= 0xdfff)) {
- return -1;
- } else {
- EMIT(v);
- }
- }
- continue;
- }
- }
- if (c < 0x80) {
- EMIT(c);
- } else if (c < 0xc0 || c >= 0xfe) {
- /* Invalid UTF-8 */
- return -1;
- } else if (c < 0xe0) {
- v = c & 0x1f;
- expect = 1;
- vmin = 0x80;
- } else if (c < 0xf0) {
- v = c & 0x0f;
- expect = 2;
- vmin = 0x800;
- } else if (c < 0xf8) {
- v = c & 0x07;
- expect = 3;
- vmin = 0x10000;
- } else if (c < 0xfc) {
- v = c & 0x03;
- expect = 4;
- vmin = 0x200000;
- } else {
- v = c & 0x01;
- expect = 5;
- vmin = 0x4000000;
- }
- }
- return expect ? (size_t)-1 : outlen << 2;
- #undef EMIT
- }
- /*
- * Convert a string in UTF-8 format to UTF-32BE
- */
- static size_t utf8_to_32be(uint8_t *str, size_t len, char *op)
- {
- #define EMIT(x) \
- do { \
- uint32_t _y = (x); \
- if (op) { \
- WRITECHAR(op,_y >> 24); \
- WRITECHAR(op,_y >> 16); \
- WRITECHAR(op,_y >> 8); \
- WRITECHAR(op,_y); \
- } \
- outlen++; \
- } while (0)
- size_t outlen = 0;
- int expect = 0;
- uint8_t c;
- uint32_t v = 0, vmin = 0;
- while (len--) {
- c = *str++;
- if (expect) {
- if ((c & 0xc0) != 0x80) {
- return -1;
- } else {
- v = (v << 6) | (c & 0x3f);
- if (!--expect) {
- if (v < vmin || (v >= 0xd800 && v <= 0xdfff)) {
- return -1;
- } else {
- EMIT(v);
- }
- }
- continue;
- }
- }
- if (c < 0x80) {
- EMIT(c);
- } else if (c < 0xc0 || c >= 0xfe) {
- /* Invalid UTF-8 */
- return -1;
- } else if (c < 0xe0) {
- v = c & 0x1f;
- expect = 1;
- vmin = 0x80;
- } else if (c < 0xf0) {
- v = c & 0x0f;
- expect = 2;
- vmin = 0x800;
- } else if (c < 0xf8) {
- v = c & 0x07;
- expect = 3;
- vmin = 0x10000;
- } else if (c < 0xfc) {
- v = c & 0x03;
- expect = 4;
- vmin = 0x200000;
- } else {
- v = c & 0x01;
- expect = 5;
- vmin = 0x4000000;
- }
- }
- return expect ? (size_t)-1 : outlen << 2;
- #undef EMIT
- }
- typedef size_t (*transform_func)(uint8_t *, size_t, char *);
- /*
- * Apply a specific string transform and return it in a nasm_malloc'd
- * buffer, returning the length. On error, returns (size_t)-1 and no
- * buffer is allocated.
- */
- size_t string_transform(char *str, size_t len, char **out, enum strfunc func)
- {
- /* This should match enum strfunc in nasm.h */
- static const transform_func str_transforms[] = {
- utf8_to_16le,
- utf8_to_16le,
- utf8_to_16be,
- utf8_to_32le,
- utf8_to_32le,
- utf8_to_32be,
- };
- transform_func transform = str_transforms[func];
- size_t outlen;
- uint8_t *s = (uint8_t *)str;
- char *buf;
- outlen = transform(s, len, NULL);
- if (outlen == (size_t)-1)
- return -1;
- *out = buf = nasm_malloc(outlen+1);
- buf[outlen] = '\0'; /* Forcibly null-terminate the buffer */
- return transform(s, len, buf);
- }
|