diff options
Diffstat (limited to 'gcc/c-lex.c')
-rw-r--r-- | gcc/c-lex.c | 320 |
1 files changed, 209 insertions, 111 deletions
diff --git a/gcc/c-lex.c b/gcc/c-lex.c index fb9133c8992..51f985d091d 100644 --- a/gcc/c-lex.c +++ b/gcc/c-lex.c @@ -19,15 +19,8 @@ the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "config.h" - -#include <stdio.h> -#include <errno.h> +#include "system.h" #include <setjmp.h> - -#if HAVE_LIMITS_H -# include <limits.h> -#endif - #include "rtl.h" #include "tree.h" #include "input.h" @@ -38,24 +31,12 @@ Boston, MA 02111-1307, USA. */ #include "c-pragma.h" #include "intl.h" -#ifdef MAP_CHARACTER -#include <ctype.h> -#endif - -/* MULTIBYTE_CHARS support only works for native compilers. - ??? Ideally what we want is to model widechar support after - the current floating point support. */ -#ifdef CROSS_COMPILE -#undef MULTIBYTE_CHARS -#endif - #ifdef MULTIBYTE_CHARS -#include <stdlib.h> +#include "mbchar.h" #include <locale.h> +#ifndef GET_ENVIRONMENT +#define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME)) #endif - -#ifndef errno -extern int errno; #endif #if USE_CPPLIB @@ -242,6 +223,7 @@ init_lex () #ifdef MULTIBYTE_CHARS /* Change to the native locale for multibyte conversions. */ setlocale (LC_CTYPE, ""); + GET_ENVIRONMENT (literal_codeset, "LANG"); #endif maxtoken = 40; @@ -1348,8 +1330,8 @@ yylex () int parts[TOTAL_PARTS]; int overflow = 0; - enum anon1 { NOT_FLOAT, AFTER_POINT, TOO_MANY_POINTS} floatflag - = NOT_FLOAT; + enum anon1 { NOT_FLOAT, AFTER_POINT, TOO_MANY_POINTS, AFTER_EXPON} + floatflag = NOT_FLOAT; for (count = 0; count < TOTAL_PARTS; count++) parts[count] = 0; @@ -1385,12 +1367,12 @@ yylex () { if (c == '.') { - if (base == 16) - error ("floating constant may not be in radix 16"); + if (base == 16 && pedantic) + pedwarn ("floating constant may not be in radix 16"); if (floatflag == TOO_MANY_POINTS) /* We have already emitted an error. Don't need another. */ ; - else if (floatflag == AFTER_POINT) + else if (floatflag == AFTER_POINT || floatflag == AFTER_EXPON) { error ("malformed floating constant"); floatflag = TOO_MANY_POINTS; @@ -1401,7 +1383,8 @@ yylex () else floatflag = AFTER_POINT; - base = 10; + if (base == 8) + base = 10; *p++ = c = GETC(); /* Accept '.' as the start of a floating-point number only when it is followed by a digit. @@ -1440,12 +1423,17 @@ yylex () if (c == 'e' || c == 'E') { base = 10; - floatflag = AFTER_POINT; + floatflag = AFTER_EXPON; break; /* start of exponent */ } error ("nondigits in number and not hexadecimal"); c = 0; } + else if (base == 16 && (c == 'p' || c == 'P')) + { + floatflag = AFTER_EXPON; + break; /* start of exponent */ + } else if (c >= 'a') { c = c - 'a' + 10; @@ -1502,7 +1490,8 @@ yylex () /* Read explicit exponent if any, and put it in tokenbuf. */ - if ((c == 'e') || (c == 'E')) + if ((base == 10 && ((c == 'e') || (c == 'E'))) + || (base == 16 && (c == 'p' || c == 'P'))) { if (p >= token_buffer + maxtoken - 3) p = extend_token_buffer (p); @@ -1523,6 +1512,8 @@ yylex () c = GETC(); } } + if (base == 16 && floatflag != AFTER_EXPON) + error ("hexadecimal floating constant has no exponent"); *p = 0; @@ -1595,10 +1586,13 @@ yylex () type = float_type_node; errno = 0; - value = REAL_VALUE_ATOF (copy, TYPE_MODE (type)); + if (base == 16) + value = REAL_VALUE_HTOF (copy, TYPE_MODE (type)); + else + value = REAL_VALUE_ATOF (copy, TYPE_MODE (type)); conversion_errno = errno; /* A diagnostic is required here by some ANSI C testsuites. - This is not pedwarn, become some people don't want + This is not pedwarn, because some people don't want an error for this. */ if (REAL_VALUE_ISINF (value) && pedantic) warning ("floating point number exceeds range of `float'"); @@ -1607,7 +1601,10 @@ yylex () { type = long_double_type_node; errno = 0; - value = REAL_VALUE_ATOF (copy, TYPE_MODE (type)); + if (base == 16) + value = REAL_VALUE_HTOF (copy, TYPE_MODE (type)); + else + value = REAL_VALUE_ATOF (copy, TYPE_MODE (type)); conversion_errno = errno; if (REAL_VALUE_ISINF (value) && pedantic) warning ("floating point number exceeds range of `long double'"); @@ -1615,7 +1612,10 @@ yylex () else { errno = 0; - value = REAL_VALUE_ATOF (copy, TYPE_MODE (type)); + if (base == 16) + value = REAL_VALUE_HTOF (copy, TYPE_MODE (type)); + else + value = REAL_VALUE_ATOF (copy, TYPE_MODE (type)); conversion_errno = errno; if (REAL_VALUE_ISINF (value) && pedantic) warning ("floating point number exceeds range of `double'"); @@ -1842,30 +1842,27 @@ yylex () { register int result = 0; register int num_chars = 0; + int chars_seen = 0; unsigned width = TYPE_PRECISION (char_type_node); int max_chars; - - if (wide_flag) - { - width = WCHAR_TYPE_SIZE; #ifdef MULTIBYTE_CHARS - max_chars = MB_CUR_MAX; -#else - max_chars = 1; + int longest_char = local_mb_cur_max (); + (void) local_mbtowc (NULL_PTR, NULL_PTR, 0); #endif - } - else - max_chars = TYPE_PRECISION (integer_type_node) / width; + + max_chars = TYPE_PRECISION (integer_type_node) / width; + if (wide_flag) + width = WCHAR_TYPE_SIZE; while (1) { tryagain: - c = GETC(); if (c == '\'' || c == EOF) break; + ++chars_seen; if (c == '\\') { int ignore = 0; @@ -1886,18 +1883,76 @@ yylex () pedwarn ("ANSI C forbids newline in character constant"); lineno++; } -#ifdef MAP_CHARACTER else - c = MAP_CHARACTER (c); + { +#ifdef MULTIBYTE_CHARS + wchar_t wc; + int i; + int char_len = -1; + for (i = 1; i <= longest_char; ++i) + { + if (i > maxtoken - 4) + extend_token_buffer (token_buffer); + + token_buffer[i] = c; + char_len = local_mbtowc (& wc, + token_buffer + 1, + i); + if (char_len != -1) + break; + c = GETC (); + } + if (char_len > 1) + { + /* mbtowc sometimes needs an extra char before accepting */ + if (char_len < i) + UNGETC (c); + if (! wide_flag) + { + /* Merge character into result; ignore excess chars. */ + for (i = 1; i <= char_len; ++i) + { + if (i > max_chars) + break; + if (width < HOST_BITS_PER_INT) + result = (result << width) + | (token_buffer[i] + & ((1 << width) - 1)); + else + result = token_buffer[i]; + } + num_chars += char_len; + goto tryagain; + } + c = wc; + } + else + { + if (char_len == -1) + warning ("Ignoring invalid multibyte character"); + if (wide_flag) + c = wc; +#ifdef MAP_CHARACTER + else + c = MAP_CHARACTER (c); #endif + } +#else /* ! MULTIBYTE_CHARS */ +#ifdef MAP_CHARACTER + c = MAP_CHARACTER (c); +#endif +#endif /* ! MULTIBYTE_CHARS */ + } - num_chars++; - if (num_chars > maxtoken - 4) - extend_token_buffer (token_buffer); - - token_buffer[num_chars] = c; + if (wide_flag) + { + if (chars_seen == 1) /* only keep the first one */ + result = c; + goto tryagain; + } /* Merge character into result; ignore excess chars. */ + num_chars += (width / TYPE_PRECISION (char_type_node)); if (num_chars < max_chars + 1) { if (width < HOST_BITS_PER_INT) @@ -1907,19 +1962,16 @@ yylex () } } - token_buffer[num_chars + 1] = '\''; - token_buffer[num_chars + 2] = 0; - if (c != '\'') - error ("malformatted character constant"); - else if (num_chars == 0) + error ("malformed character constant"); + else if (chars_seen == 0) error ("empty character constant"); else if (num_chars > max_chars) { num_chars = max_chars; error ("character constant too long"); } - else if (num_chars != 1 && ! flag_traditional) + else if (chars_seen != 1 && ! flag_traditional) warning ("multi-character character constant"); /* If char type is signed, sign-extend the constant. */ @@ -1944,22 +1996,6 @@ yylex () } else { -#ifdef MULTIBYTE_CHARS - /* Set the initial shift state and convert the next sequence. */ - result = 0; - /* In all locales L'\0' is zero and mbtowc will return zero, - so don't use it. */ - if (num_chars > 1 - || (num_chars == 1 && token_buffer[1] != '\0')) - { - wchar_t wc; - (void) mbtowc (NULL_PTR, NULL_PTR, 0); - if (mbtowc (& wc, token_buffer + 1, num_chars) == num_chars) - result = wc; - else - warning ("Ignoring invalid multibyte character"); - } -#endif yylval.ttype = build_int_2 (result, 0); TREE_TYPE (yylval.ttype) = wchar_type_node; } @@ -1971,7 +2007,13 @@ yylex () case '"': string_constant: { - c = GETC(); + unsigned width = wide_flag ? WCHAR_TYPE_SIZE + : TYPE_PRECISION (char_type_node); +#ifdef MULTIBYTE_CHARS + int longest_char = local_mb_cur_max (); + (void) local_mbtowc (NULL_PTR, NULL_PTR, 0); +#endif + c = GETC (); p = token_buffer + 1; while (c != '"' && c >= 0) @@ -1982,9 +2024,8 @@ yylex () c = readescape (&ignore); if (ignore) goto skipnewline; - if (!wide_flag - && TYPE_PRECISION (char_type_node) < HOST_BITS_PER_INT - && c >= (1 << TYPE_PRECISION (char_type_node))) + if (width < HOST_BITS_PER_INT + && (unsigned) c >= (1 << width)) pedwarn ("escape sequence out of range for character"); } else if (c == '\n') @@ -1993,15 +2034,94 @@ yylex () pedwarn ("ANSI C forbids newline in string constant"); lineno++; } + else + { +#ifdef MULTIBYTE_CHARS + wchar_t wc; + int i; + int char_len = -1; + for (i = 0; i < longest_char; ++i) + { + if (p + i == token_buffer + maxtoken) + p = extend_token_buffer (p); + p[i] = c; - if (p == token_buffer + maxtoken) - p = extend_token_buffer (p); - *p++ = c; + char_len = local_mbtowc (& wc, p, i + 1); + if (char_len != -1) + break; + c = GETC (); + } + if (char_len == -1) + warning ("Ignoring invalid multibyte character"); + else + { + /* mbtowc sometimes needs an extra char before accepting */ + if (char_len <= i) + UNGETC (c); + if (wide_flag) + { + *(wchar_t *)p = wc; + p += sizeof (wc); + } + else + p += (i + 1); + c = GETC (); + continue; + } +#endif /* MULTIBYTE_CHARS */ + } + + /* Add this single character into the buffer either as a wchar_t + or as a single byte. */ + if (wide_flag) + { + unsigned width = TYPE_PRECISION (char_type_node); + unsigned bytemask = (1 << width) - 1; + int byte; + + if (p + WCHAR_BYTES >= token_buffer + maxtoken) + p = extend_token_buffer (p); + + for (byte = 0; byte < WCHAR_BYTES; ++byte) + { + int value; + if (byte >= sizeof (c)) + value = 0; + else + value = (c >> (byte * width)) & bytemask; + if (BYTES_BIG_ENDIAN) + p[WCHAR_BYTES - byte - 1] = value; + else + p[byte] = value; + } + p += WCHAR_BYTES; + } + else + { + if (p == token_buffer + maxtoken) + p = extend_token_buffer (p); + *p++ = c; + } skipnewline: - c = GETC(); + c = GETC (); + } + + /* Terminate the string value, either with a single byte zero + or with a wide zero. */ + if (wide_flag) + { + if (p + WCHAR_BYTES >= token_buffer + maxtoken) + p = extend_token_buffer (p); + bzero (p, WCHAR_BYTES); + p += WCHAR_BYTES; + } + else + { + if (p == token_buffer + maxtoken) + p = extend_token_buffer (p); + *p++ = 0; } - *p = 0; if (c < 0) error ("Unterminated string constant"); @@ -2011,31 +2131,8 @@ yylex () if (wide_flag) { - /* If this is a L"..." wide-string, convert the multibyte string - to a wide character string. */ - char *widep = (char *) alloca ((p - token_buffer) * WCHAR_BYTES); - int len; - -#ifdef MULTIBYTE_CHARS - len = mbstowcs ((wchar_t *) widep, token_buffer + 1, p - token_buffer); - if (len < 0 || len >= (p - token_buffer)) - { - warning ("Ignoring invalid multibyte string"); - len = 0; - } - bzero (widep + (len * WCHAR_BYTES), WCHAR_BYTES); -#else - { - char *wp, *cp; - - wp = widep + (BYTES_BIG_ENDIAN ? WCHAR_BYTES - 1 : 0); - bzero (widep, (p - token_buffer) * WCHAR_BYTES); - for (cp = token_buffer + 1; cp < p; cp++) - *wp = *cp, wp += WCHAR_BYTES; - len = p - token_buffer - 1; - } -#endif - yylval.ttype = build_string ((len + 1) * WCHAR_BYTES, widep); + yylval.ttype = build_string (p - (token_buffer + 1), + token_buffer + 1); TREE_TYPE (yylval.ttype) = wchar_array_type_node; value = STRING; } @@ -2043,14 +2140,15 @@ yylex () { extern tree build_objc_string(); /* Return an Objective-C @"..." constant string object. */ - yylval.ttype = build_objc_string (p - token_buffer, + yylval.ttype = build_objc_string (p - (token_buffer + 1), token_buffer + 1); TREE_TYPE (yylval.ttype) = char_array_type_node; value = OBJC_STRING; } else { - yylval.ttype = build_string (p - token_buffer, token_buffer + 1); + yylval.ttype = build_string (p - (token_buffer + 1), + token_buffer + 1); TREE_TYPE (yylval.ttype) = char_array_type_node; value = STRING; } |