aboutsummaryrefslogtreecommitdiff
path: root/gcc/c-lex.c
diff options
context:
space:
mode:
authorZack Weinberg <zack@codesourcery.com>2003-07-05 00:24:00 +0000
committerZack Weinberg <zack@codesourcery.com>2003-07-05 00:24:00 +0000
commit30c029af349bc6516a6712ecce5213715d372d1b (patch)
tree34c7734f7acee49beff2b3d99cbdf53576456697 /gcc/c-lex.c
parent4caad22833f36dc62cc1e9802b1d6ddbc7a87e56 (diff)
* cpplib.h (CPP_AT_NAME, CPP_OBJC_STRING): New token types.
(struct cpp_options): Add narrow_charset, wide_charset, bytes_big_endian fields. Remove EBCDIC field. (cpp_init_iconv, cpp_interpret_string): New external interfaces. * cpphash.h: Include <iconv.h> if we have it, otherwise provide a dummy definition of iconv_t. (struct cpp_reader): Add narrow_cset_desc and wide_cset_desc fields. (_cpp_valid_ucn): Update prototype. (_cpp_destroy_iconv): New prototype. * doc/cpp.texi: Document character set handling. * doc/cppopts.texi: Document -fexec-charset= and -fexec-wide-charset=. * doc/extend.texi: Delete entire section on multiline strings. Rewrite section on __FUNCTION__ etc now that these are variables in C. * cppucnid.tab, cppucnid.pl: New files. * cppucnid.h: New generated file. * cppcharset.c: Include cppucnid.h. Lots of commentary added. (iconv_open, iconv, iconv_close): Provide dummy definitions if !HAVE_ICONV. (SOURCE_CHARSET, struct strbuf, init_iconv_desc, cpp_init_iconv, _cpp_destroy_iconv, convert_cset, width_to_mask, convert_ucn, emit_numeric_escape, convert_hex, convert_oct, convert_escape, cpp_interpret_string, narrow_str_to_charconst, wide_str_to_charconst): New. (ucn_valid_in_identifier): Use a binary search through the ucnranges table defined in cppucnid.h, not a long chain of if statements. (_cpp_valid_ucn): Add a limit pointer. Downgrade "universal character names are only valid in C++ and C99" to a warning. Issue the "meaning of \[uU] is different in traditional C" warning here. Take care not to let iconv see an invalid UCS value if we get a malformed UCN. Issue an error if we don't have iconv. (cpp_interpret_charconst): Moved here from cpplex.c. Use cpp_interpret_string to do the heavy lifting. * cppinit.c (cpp_create_reader): Initialize bytes_big_endian, narrow_charset, wide_charset fields of options structure. (cpp_destroy): Call _cpp_destroy_iconv. * cpplex.c (forms_identifier_p): Adjust call to _cpp_valid_ucn. (maybe_read_ucn, hex_digit_value, cpp_parse_escape): Delete. (cpp_interpret_charconst): Moved to cppcharset.c. * cpplib.c (dequote_string): Delete. (interpret_string_notranslate): New. (do_line, do_linemarker): Use interpret_string_notranslate. * Makefile.in (cppcharset.o): Depend on cppucnid.h. * c-common.c (fname_string, combine_strings): Delete. * c-common.h (fname_string, combine_strings): Delete prototypes. * c-lex.c (ignore_escape_flag): Delete. (cb_ident): Use cpp_interpret_string, not lex_string. (get_nonpadding_token): New function. (c_lex): Handle Objective-C @-prefixed identifiers and strings here. Adjust calls to lex_string. Don't write *value twice. (lex_string): Now handles string constant concatenation. Most of the work handed off to cpp_interpret_string. Call fix_string_type here. * c-parse.in (STRING_FUNC_NAME, VAR_FUNC_NAME): Replace with FUNC_NAME, throughout. (OBJC_STRING): New token type. (primary:STRING): No need to call fix_string_type here. (primary:objc_string): Make that OBJC_STRING. (objc_string nonterminal): Delete. (yylexname): Delete code to handle fake string constants. (yylexstring): Delete entirely. (_yylex): Handle CPP_AT_NAME and CPP_OBJC_STRING. No need to handle CPP_ATSIGN. * c.opt (-fexec-charset=, -fwide-exec-charset=): New options. * c-opts.c (missing_arg, c_common_handle_option): Handle OPT_fexec_charset_ and OPT_fwide_exec_charset_. (c_common_init): Set cpp_opts->bytes_big_endian, not cpp_opts->EBCDIC. Call cpp_init_iconv. (print_help): Document -fexec-charset= and -fexec-wide-charset=. (TARGET_EBCDIC): Delete default definition. * objc/objc-act.c (build_objc_string_object): No need to handle string constant concatenation. cp: * parser.c (cp_lexer_read_token): No need to handle string constant concatenation. testsuite: * gcc.c-torture/execute/wchar_t-1.x: New file; XFAIL wchar_t-1.c everywhere. * gcc.dg/concat.c: Concatenation of string constants with __FUNCTION__ / __PRETTY_FUNCTION__ is now a hard error. * gcc.dg/wtr-strcat-1.c: Loosen dg-warning regexp. * gcc.dg/cpp/escape-2.c: Use wide character constants where necessary to avoid multi-character character constant warning. * gcc.dg/cpp/escape.c: Likewise. * gcc.dg/cpp/ucs.c: Likewise. Remove backslashes from dg-bogus comments, as they confuse Tcl. Fix a typo. libstdc++-v3: * testsuite/22_locale/collate/compare/wchar_t/2.cc * testsuite/22_locale/collate/compare/wchar_t/wrapped_env.cc * testsuite/22_locale/collate/compare/wchar_t/wrapped_locale.cc * testsuite/22_locale/collate/hash/wchar_t/2.cc * testsuite/22_locale/collate/hash/wchar_t/wrapped_env.cc * testsuite/22_locale/collate/hash/wchar_t/wrapped_locale.cc * testsuite/22_locale/collate/transform/wchar_t/2.cc * testsuite/22_locale/collate/transform/wchar_t/wrapped_env.cc * testsuite/22_locale/collate/transform/wchar_t/wrapped_locale.cc: XFAIL on all targets. git-svn-id: https://gcc.gnu.org/svn/gcc/trunk@68952 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/c-lex.c')
-rw-r--r--gcc/c-lex.c195
1 files changed, 132 insertions, 63 deletions
diff --git a/gcc/c-lex.c b/gcc/c-lex.c
index 2cca2313c2f..f5733604a5a 100644
--- a/gcc/c-lex.c
+++ b/gcc/c-lex.c
@@ -61,16 +61,13 @@ static splay_tree file_info_tree;
int pending_lang_change; /* If we need to switch languages - C++ only */
int c_header_level; /* depth in C headers - C++ only */
-/* Nonzero tells yylex to ignore \ in string constants. */
-static int ignore_escape_flag;
-
static tree interpret_integer (const cpp_token *, unsigned int);
static tree interpret_float (const cpp_token *, unsigned int);
static enum integer_type_kind
narrowest_unsigned_type (tree, unsigned int);
static enum integer_type_kind
narrowest_signed_type (tree, unsigned int);
-static tree lex_string (const cpp_string *);
+static enum cpp_ttype lex_string (const cpp_token *, tree *, bool);
static tree lex_charconst (const cpp_token *);
static void update_header_times (const char *);
static int dump_one_header (splay_tree_node, void *);
@@ -184,8 +181,12 @@ cb_ident (cpp_reader *pfile ATTRIBUTE_UNUSED,
if (! flag_no_ident)
{
/* Convert escapes in the string. */
- tree value ATTRIBUTE_UNUSED = lex_string (str);
- ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
+ cpp_string cstr = { 0, 0 };
+ if (cpp_interpret_string (pfile, str, 1, &cstr, false))
+ {
+ ASM_OUTPUT_IDENT (asm_out_file, cstr.text);
+ free ((void *)cstr.text);
+ }
}
#endif
}
@@ -296,12 +297,10 @@ cb_undef (cpp_reader *pfile ATTRIBUTE_UNUSED, unsigned int line,
(const char *) NODE_NAME (node));
}
-int
-c_lex (tree *value)
+static inline const cpp_token *
+get_nonpadding_token (void)
{
const cpp_token *tok;
-
- retry:
timevar_push (TV_CPP);
do
tok = cpp_get_token (parse_in);
@@ -310,10 +309,22 @@ c_lex (tree *value)
/* The C++ front end does horrible things with the current line
number. To ensure an accurate line number, we must reset it
- every time we return a token. */
+ every time we advance a token. */
input_line = src_lineno;
- *value = NULL_TREE;
+ return tok;
+}
+
+int
+c_lex (tree *value)
+{
+ const cpp_token *tok;
+ location_t atloc;
+
+ retry:
+ tok = get_nonpadding_token ();
+
+ retry_after_at:
switch (tok->type)
{
case CPP_NAME:
@@ -345,6 +356,37 @@ c_lex (tree *value)
}
break;
+ case CPP_ATSIGN:
+ /* An @ may give the next token special significance in Objective-C. */
+ atloc = input_location;
+ tok = get_nonpadding_token ();
+ if (c_dialect_objc ())
+ {
+ tree val;
+ switch (tok->type)
+ {
+ case CPP_NAME:
+ val = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
+ if (C_IS_RESERVED_WORD (val)
+ && OBJC_IS_AT_KEYWORD (C_RID_CODE (val)))
+ {
+ *value = val;
+ return CPP_AT_NAME;
+ }
+ break;
+
+ case CPP_STRING:
+ case CPP_WSTRING:
+ return lex_string (tok, value, true);
+
+ default: break;
+ }
+ }
+
+ /* ... or not. */
+ error ("%Hstray '@' in program", &atloc);
+ goto retry_after_at;
+
case CPP_OTHER:
{
cppchar_t c = tok->val.str.text[0];
@@ -365,7 +407,7 @@ c_lex (tree *value)
case CPP_STRING:
case CPP_WSTRING:
- *value = lex_string (&tok->val.str);
+ return lex_string (tok, value, false);
break;
/* These tokens should not be visible outside cpplib. */
@@ -374,7 +416,9 @@ c_lex (tree *value)
case CPP_MACRO_ARG:
abort ();
- default: break;
+ default:
+ *value = NULL_TREE;
+ break;
}
return tok->type;
@@ -571,75 +615,100 @@ interpret_float (const cpp_token *token, unsigned int flags)
return value;
}
-static tree
-lex_string (const cpp_string *str)
+/* Convert a series of STRING and/or WSTRING tokens into a tree,
+ performing string constant concatenation. TOK is the first of
+ these. VALP is the location to write the string into. OBJC_STRING
+ indicates whether an '@' token preceded the incoming token.
+ Returns the CPP token type of the result (CPP_STRING, CPP_WSTRING,
+ or CPP_OBJC_STRING).
+
+ This is unfortunately more work than it should be. If any of the
+ strings in the series has an L prefix, the result is a wide string
+ (6.4.5p4). Whether or not the result is a wide string affects the
+ meaning of octal and hexadecimal escapes (6.4.4.4p6,9). But escape
+ sequences do not continue across the boundary between two strings in
+ a series (6.4.5p7), so we must not lose the boundaries. Therefore
+ cpp_interpret_string takes a vector of cpp_string structures, which
+ we must arrange to provide. */
+
+static enum cpp_ttype
+lex_string (const cpp_token *tok, tree *valp, bool objc_string)
{
- bool wide;
tree value;
- char *buf, *q;
- cppchar_t c;
- const unsigned char *p, *limit;
+ bool wide = false;
+ size_t count = 1;
+ struct obstack str_ob;
+ cpp_string istr;
- wide = str->text[0] == 'L';
- p = str->text + 1 + wide;
- limit = str->text + str->len - 1;
- q = buf = alloca ((str->len + 1) * (wide ? WCHAR_BYTES : 1));
+ /* Try to avoid the overhead of creating and destroying an obstack
+ for the common case of just one string. */
+ cpp_string str = tok->val.str;
+ cpp_string *strs = &str;
- while (p < limit)
- {
- c = *p++;
+ if (tok->type == CPP_WSTRING)
+ wide = true;
- if (c == '\\' && !ignore_escape_flag)
- c = cpp_parse_escape (parse_in, &p, limit, wide);
+ tok = get_nonpadding_token ();
+ if (c_dialect_objc () && tok->type == CPP_ATSIGN)
+ {
+ objc_string = true;
+ tok = get_nonpadding_token ();
+ }
+ if (tok->type == CPP_STRING || tok->type == CPP_WSTRING)
+ {
+ gcc_obstack_init (&str_ob);
+ obstack_grow (&str_ob, &str, sizeof (cpp_string));
- /* Add this single character into the buffer either as a wchar_t,
- a multibyte sequence, or as a single byte. */
- if (wide)
+ do
{
- unsigned charwidth = TYPE_PRECISION (char_type_node);
- unsigned bytemask = (1 << charwidth) - 1;
- int byte;
-
- for (byte = 0; byte < WCHAR_BYTES; ++byte)
+ count++;
+ if (tok->type == CPP_WSTRING)
+ wide = true;
+ obstack_grow (&str_ob, &tok->val.str, sizeof (cpp_string));
+
+ tok = get_nonpadding_token ();
+ if (c_dialect_objc () && tok->type == CPP_ATSIGN)
{
- int n;
- if (byte >= (int) sizeof (c))
- n = 0;
- else
- n = (c >> (byte * charwidth)) & bytemask;
- if (BYTES_BIG_ENDIAN)
- q[WCHAR_BYTES - byte - 1] = n;
- else
- q[byte] = n;
+ objc_string = true;
+ tok = get_nonpadding_token ();
}
- q += WCHAR_BYTES;
- }
- else
- {
- *q++ = c;
}
+ while (tok->type == CPP_STRING || tok->type == CPP_WSTRING);
+ strs = obstack_finish (&str_ob);
}
- /* Terminate the string value, either with a single byte zero
- or with a wide zero. */
+ /* We have read one more token than we want. */
+ _cpp_backup_tokens (parse_in, 1);
+
+ if (count > 1 && !objc_string && warn_traditional && !in_system_header)
+ warning ("traditional C rejects string constant concatenation");
- if (wide)
+ if (cpp_interpret_string (parse_in, strs, count, &istr, wide))
{
- memset (q, 0, WCHAR_BYTES);
- q += WCHAR_BYTES;
+ value = build_string (istr.len, (char *)istr.text);
+ free ((void *)istr.text);
}
else
{
- *q++ = '\0';
+ /* Callers cannot generally handle error_mark_node in this context,
+ so return the empty string instead. cpp_interpret_string has
+ issued an error. */
+ if (wide)
+ value = build_string (TYPE_PRECISION (wchar_type_node)
+ / TYPE_PRECISION (char_type_node),
+ "\0\0\0"); /* widest supported wchar_t
+ is 32 bits */
+ else
+ value = build_string (1, "");
}
- value = build_string (q - buf, buf);
+ TREE_TYPE (value) = wide ? wchar_array_type_node : char_array_type_node;
+ *valp = fix_string_type (value);
- if (wide)
- TREE_TYPE (value) = wchar_array_type_node;
- else
- TREE_TYPE (value) = char_array_type_node;
- return value;
+ if (strs != &str)
+ obstack_free (&str_ob, 0);
+
+ return objc_string ? CPP_OBJC_STRING : wide ? CPP_WSTRING : CPP_STRING;
}
/* Converts a (possibly wide) character constant token into a tree. */