diff options
Diffstat (limited to 'gcc/cpplex.c')
-rw-r--r-- | gcc/cpplex.c | 673 |
1 files changed, 372 insertions, 301 deletions
diff --git a/gcc/cpplex.c b/gcc/cpplex.c index 674db90e608..b8a1b071161 100644 --- a/gcc/cpplex.c +++ b/gcc/cpplex.c @@ -25,11 +25,16 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "cpplib.h" #include "cpphash.h" -#define PEEKN(N) (CPP_BUFFER (pfile)->rlimit - CPP_BUFFER (pfile)->cur >= (N) \ - ? CPP_BUFFER (pfile)->cur[N] : EOF) -#define FORWARD(N) CPP_FORWARD (CPP_BUFFER (pfile), (N)) -#define GETC() CPP_BUF_GET (CPP_BUFFER (pfile)) -#define PEEKC() CPP_BUF_PEEK (CPP_BUFFER (pfile)) +#define PEEKBUF(BUFFER, N) \ + ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF) +#define GETBUF(BUFFER) \ + ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF) +#define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N)) + +#define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N) +#define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N)) +#define GETC() GETBUF (CPP_BUFFER (pfile)) +#define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0) static void skip_block_comment PARAMS ((cpp_reader *)); static void skip_line_comment PARAMS ((cpp_reader *)); @@ -40,6 +45,7 @@ static void skip_string PARAMS ((cpp_reader *, int)); static void parse_string PARAMS ((cpp_reader *, int)); static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *)); static int null_cleanup PARAMS ((cpp_buffer *, cpp_reader *)); +static void null_warning PARAMS ((cpp_reader *, unsigned int)); /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */ @@ -87,9 +93,9 @@ cpp_push_buffer (pfile, buffer, length) new->if_stack = pfile->if_stack; new->cleanup = null_cleanup; new->buf = new->cur = buffer; - new->alimit = new->rlimit = buffer + length; + new->rlimit = buffer + length; new->prev = buf; - new->mark = -1; + new->mark = NULL; new->line_base = NULL; CPP_BUFFER (pfile) = new; @@ -119,7 +125,7 @@ cpp_scan_buffer (pfile) { cpp_buffer *buffer = CPP_BUFFER (pfile); enum cpp_token token; - if (CPP_OPTIONS (pfile)->no_output) + if (CPP_OPTION (pfile, no_output)) { long old_written = CPP_WRITTEN (pfile); /* In no-output mode, we can ignore everything but directives. */ @@ -193,12 +199,12 @@ cpp_expand_to_buffer (pfile, buf, length) ip->has_escapes = 1; /* Scan the input, create the output. */ - save_no_output = CPP_OPTIONS (pfile)->no_output; - CPP_OPTIONS (pfile)->no_output = 0; - CPP_OPTIONS (pfile)->no_line_commands++; + save_no_output = CPP_OPTION (pfile, no_output); + CPP_OPTION (pfile, no_output) = 0; + CPP_OPTION (pfile, no_line_commands)++; cpp_scan_buffer (pfile); - CPP_OPTIONS (pfile)->no_line_commands--; - CPP_OPTIONS (pfile)->no_output = save_no_output; + CPP_OPTION (pfile, no_line_commands)--; + CPP_OPTION (pfile, no_output) = save_no_output; CPP_NUL_TERMINATE (pfile); } @@ -264,7 +270,7 @@ skip_block_comment (pfile) else if (c == '/' && prev_c == '*') return; else if (c == '*' && prev_c == '/' - && CPP_OPTIONS (pfile)->warn_comments) + && CPP_OPTION (pfile, warn_comments)) cpp_warning (pfile, "`/*' within comment"); prev_c = c; @@ -294,7 +300,7 @@ skip_line_comment (pfile) /* \r cannot be a macro escape marker here. */ if (!ACTIVE_MARK_P (pfile)) CPP_BUMP_LINE (pfile); - if (CPP_OPTIONS (pfile)->warn_comments) + if (CPP_OPTION (pfile, warn_comments)) cpp_warning (pfile, "backslash-newline within line comment"); } } @@ -325,9 +331,9 @@ skip_comment (pfile, m) skip_line_comment (pfile); return ' '; } - else if (CPP_OPTIONS (pfile)->cplusplus_comments) + else if (CPP_OPTION (pfile, cplusplus_comments)) { - if (CPP_OPTIONS (pfile)->c89 + if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile) && ! CPP_BUFFER (pfile)->warned_cplusplus_comments) { @@ -344,7 +350,7 @@ skip_comment (pfile, m) return m; } else if (m == '-' && PEEKC() == '-' - && CPP_OPTIONS (pfile)->chill) + && CPP_OPTION (pfile, chill)) { skip_line_comment (pfile); return ' '; @@ -376,23 +382,38 @@ copy_comment (pfile, m) return ' '; } +static void +null_warning (pfile, count) + cpp_reader *pfile; + unsigned int count; +{ + if (count == 1) + cpp_warning (pfile, "embedded null character ignored"); + else + cpp_warning (pfile, "embedded null characters ignored"); +} + /* Skip whitespace \-newline and comments. Does not macro-expand. */ void _cpp_skip_hspace (pfile) cpp_reader *pfile; { + unsigned int null_count = 0; int c; + while (1) { c = GETC(); if (c == EOF) - return; + goto out; else if (is_hspace(c)) { if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile)) cpp_pedwarn (pfile, "%s in preprocessing directive", c == '\f' ? "formfeed" : "vertical tab"); + else if (c == '\0') + null_count++; } else if (c == '\r') { @@ -418,6 +439,9 @@ _cpp_skip_hspace (pfile) break; } FORWARD(-1); + out: + if (null_count) + null_warning (pfile, null_count); } /* Read and discard the rest of the current line. */ @@ -500,8 +524,9 @@ skip_string (pfile, c) int c; { long start_line, start_column; - cpp_buf_line_and_col (cpp_file_buffer (pfile), &start_line, &start_column); + unsigned int null_count = 0; + cpp_buf_line_and_col (cpp_file_buffer (pfile), &start_line, &start_column); while (1) { int cc = GETC(); @@ -516,19 +541,23 @@ skip_string (pfile, c) pfile->multiline_string_line, -1, "possible real start of unterminated constant"); pfile->multiline_string_line = 0; - return; + goto out; + case '\0': + null_count++; + break; + case '\n': CPP_BUMP_LINE (pfile); /* In Fortran and assembly language, silently terminate strings of either variety at end of line. This is a kludge around not knowing where comments are in these languages. */ - if (CPP_OPTIONS (pfile)->lang_fortran - || CPP_OPTIONS (pfile)->lang_asm) + if (CPP_OPTION (pfile, lang_fortran) + || CPP_OPTION (pfile, lang_asm)) { FORWARD(-1); - return; + goto out; } /* Character constants may not extend over multiple lines. In Standard C, neither may strings. We accept multiline @@ -538,7 +567,7 @@ skip_string (pfile, c) cpp_error_with_line (pfile, start_line, start_column, "unterminated character constant"); FORWARD(-1); - return; + goto out; } if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0) cpp_pedwarn_with_line (pfile, start_line, start_column, @@ -565,10 +594,16 @@ skip_string (pfile, c) case '\"': case '\'': if (cc == c) - return; + goto out; break; } } + + out: + if (null_count == 1) + cpp_warning (pfile, "null character in string or character constant"); + else if (null_count > 1) + cpp_warning (pfile, "null characters in string or character constant"); } /* Parse a string and copy it to the output. */ @@ -604,7 +639,12 @@ _cpp_parse_assertion (pfile) int c, dropwhite; _cpp_skip_hspace (pfile); c = PEEKC(); - if (! is_idstart(c)) + if (c == '\n') + { + cpp_error (pfile, "assertion without predicate"); + return 0; + } + else if (! is_idstart(c)) { cpp_error (pfile, "assertion predicate is not an identifier"); return 0; @@ -662,7 +702,6 @@ _cpp_parse_assertion (pfile) else CPP_PUTC (pfile, ')'); - CPP_NUL_TERMINATE (pfile); return 2; } @@ -675,7 +714,6 @@ _cpp_lex_token (pfile) { register int c, c2, c3; enum cpp_token token; - struct cpp_options *opts = CPP_OPTIONS (pfile); get_next: c = GETC(); @@ -689,7 +727,7 @@ _cpp_lex_token (pfile) goto op2; comment: - if (opts->discard_comments) + if (CPP_OPTION (pfile, discard_comments)) c = skip_comment (pfile, c); else c = copy_comment (pfile, c); @@ -698,8 +736,16 @@ _cpp_lex_token (pfile) /* Comments are equivalent to spaces. For -traditional, a comment is equivalent to nothing. */ - if (opts->traditional || !opts->discard_comments) + if (!CPP_OPTION (pfile, discard_comments)) return CPP_COMMENT; + else if (CPP_TRADITIONAL (pfile) + && ! is_space (PEEKC ())) + { + if (pfile->parsing_define_directive) + return CPP_COMMENT; + else + goto get_next; + } else { CPP_PUTC (pfile, c); @@ -709,9 +755,9 @@ _cpp_lex_token (pfile) case '#': if (pfile->parsing_if_directive) { - _cpp_skip_hspace (pfile); - _cpp_parse_assertion (pfile); - return CPP_ASSERTION; + if (_cpp_parse_assertion (pfile)) + return CPP_ASSERTION; + goto randomchar; } if (pfile->parsing_define_directive && ! CPP_TRADITIONAL (pfile)) @@ -730,27 +776,20 @@ _cpp_lex_token (pfile) if (!pfile->only_seen_white) goto randomchar; - /* -traditional directives are recognized only with the # in - column 1. - XXX Layering violation. */ - if (CPP_TRADITIONAL (pfile) - && CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base != 1) - goto randomchar; return CPP_DIRECTIVE; case '\"': case '\'': parse_string (pfile, c); - pfile->only_seen_white = 0; return c == '\'' ? CPP_CHAR : CPP_STRING; case '$': - if (!opts->dollars_in_ident) + if (!CPP_OPTION (pfile, dollars_in_ident)) goto randomchar; goto letter; case ':': - if (opts->cplusplus && PEEKC () == ':') + if (CPP_OPTION (pfile, cplusplus) && PEEKC () == ':') goto op2; goto randomchar; @@ -775,7 +814,7 @@ _cpp_lex_token (pfile) c2 = PEEKC (); if (c2 == '-') { - if (opts->chill) + if (CPP_OPTION (pfile, chill)) goto comment; /* Chill style comment */ else goto op2; @@ -784,11 +823,10 @@ _cpp_lex_token (pfile) goto op2; else if (c2 == '>') { - if (opts->cplusplus && PEEKN (1) == '*') + if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*') { /* In C++, there's a ->* operator. */ token = CPP_OTHER; - pfile->only_seen_white = 0; CPP_RESERVE (pfile, 4); CPP_PUTC_Q (pfile, c); CPP_PUTC_Q (pfile, GETC ()); @@ -842,7 +880,7 @@ _cpp_lex_token (pfile) if (c2 == '=') goto op2; /* GNU C++ supports MIN and MAX operators <? and >?. */ - if (c2 != c && (!opts->cplusplus || c2 != '?')) + if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?')) goto randomchar; FORWARD(1); CPP_RESERVE (pfile, 4); @@ -852,7 +890,6 @@ _cpp_lex_token (pfile) if (c3 == '=') CPP_PUTC_Q (pfile, GETC ()); CPP_NUL_TERMINATE_Q (pfile); - pfile->only_seen_white = 0; return CPP_OTHER; case '.': @@ -866,7 +903,7 @@ _cpp_lex_token (pfile) } /* In C++ there's a .* operator. */ - if (opts->cplusplus && c2 == '*') + if (CPP_OPTION (pfile, cplusplus) && c2 == '*') goto op2; if (c2 == '.' && PEEKN(1) == '.') @@ -877,14 +914,12 @@ _cpp_lex_token (pfile) CPP_PUTC_Q (pfile, '.'); FORWARD (2); CPP_NUL_TERMINATE_Q (pfile); - pfile->only_seen_white = 0; return CPP_3DOTS; } goto randomchar; op2: token = CPP_OTHER; - pfile->only_seen_white = 0; CPP_RESERVE(pfile, 3); CPP_PUTC_Q (pfile, c); CPP_PUTC_Q (pfile, GETC ()); @@ -898,7 +933,6 @@ _cpp_lex_token (pfile) CPP_PUTC (pfile, c); c = GETC (); parse_string (pfile, c); - pfile->only_seen_white = 0; return c == '\'' ? CPP_WCHAR : CPP_WSTRING; } goto letter; @@ -917,20 +951,18 @@ _cpp_lex_token (pfile) if (!is_numchar(c) && c != '.' && ((c2 != 'e' && c2 != 'E' && ((c2 != 'p' && c2 != 'P') - || CPP_OPTIONS (pfile)->c89)) + || CPP_OPTION (pfile, c89))) || (c != '+' && c != '-'))) break; FORWARD(1); c2= c; } CPP_NUL_TERMINATE_Q (pfile); - pfile->only_seen_white = 0; return CPP_NUMBER; case 'b': case 'c': case 'd': case 'h': case 'o': case 'B': case 'C': case 'D': case 'H': case 'O': - if (opts->chill && PEEKC () == '\'') + if (CPP_OPTION (pfile, chill) && PEEKC () == '\'') { - pfile->only_seen_white = 0; CPP_RESERVE (pfile, 2); CPP_PUTC_Q (pfile, c); CPP_PUTC_Q (pfile, '\''); @@ -971,20 +1003,28 @@ _cpp_lex_token (pfile) case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': letter: - pfile->only_seen_white = 0; _cpp_parse_name (pfile, c); return CPP_MACRO; - case ' ': case '\t': case '\v': - for (;;) - { - CPP_PUTC (pfile, c); - c = PEEKC (); - if (c == EOF || !is_hspace(c)) - break; - FORWARD(1); - } - return CPP_HSPACE; + case ' ': case '\t': case '\v': case '\f': case '\0': + { + int null_count = 0; + + for (;;) + { + if (c == '\0') + null_count++; + else + CPP_PUTC (pfile, c); + c = PEEKC (); + if (c == EOF || !is_hspace(c)) + break; + FORWARD(1); + } + if (null_count) + null_warning (pfile, null_count); + return CPP_HSPACE; + } case '\r': if (CPP_BUFFER (pfile)->has_escapes) @@ -999,6 +1039,8 @@ _cpp_lex_token (pfile) } else if (c == ' ') { + /* "\r " means a space, but only if necessary to prevent + accidental token concatenation. */ CPP_RESERVE (pfile, 2); if (pfile->output_escapes) CPP_PUTC_Q (pfile, '\r'); @@ -1020,15 +1062,6 @@ _cpp_lex_token (pfile) case '\n': CPP_PUTC (pfile, c); - if (pfile->only_seen_white == 0) - pfile->only_seen_white = 1; - CPP_BUMP_LINE (pfile); - if (! CPP_OPTIONS (pfile)->no_line_commands) - { - pfile->lineno++; - if (CPP_BUFFER (pfile)->lineno != pfile->lineno) - _cpp_output_line_command (pfile, same_file); - } return CPP_VSPACE; case '(': token = CPP_LPAREN; goto char1; @@ -1042,7 +1075,6 @@ _cpp_lex_token (pfile) default: token = CPP_OTHER; char1: - pfile->only_seen_white = 0; CPP_PUTC (pfile, c); return token; } @@ -1076,6 +1108,13 @@ maybe_macroexpand (pfile, written) } return 0; } + if (hp->type == T_EMPTY) + { + /* Special case optimization: macro expands to nothing. */ + CPP_SET_WRITTEN (pfile, written); + CPP_PUTC_Q (pfile, ' '); + return 1; + } /* If macro wants an arglist, verify that a '(' follows. */ if (hp->type == T_MACRO && hp->value.defn->nargs >= 0) @@ -1147,9 +1186,28 @@ cpp_get_token (pfile) switch (token) { default: + pfile->potential_control_macro = 0; + pfile->only_seen_white = 0; + return token; + + case CPP_VSPACE: + if (pfile->only_seen_white == 0) + pfile->only_seen_white = 1; + CPP_BUMP_LINE (pfile); + if (! CPP_OPTION (pfile, no_line_commands)) + { + pfile->lineno++; + if (CPP_BUFFER (pfile)->lineno != pfile->lineno) + _cpp_output_line_command (pfile, same_file); + } + return token; + + case CPP_HSPACE: + case CPP_COMMENT: return token; case CPP_DIRECTIVE: + pfile->potential_control_macro = 0; if (_cpp_handle_directive (pfile)) return CPP_DIRECTIVE; pfile->only_seen_white = 0; @@ -1157,6 +1215,8 @@ cpp_get_token (pfile) return CPP_OTHER; case CPP_MACRO: + pfile->potential_control_macro = 0; + pfile->only_seen_white = 0; if (! pfile->no_macro_expand && maybe_macroexpand (pfile, written)) goto get_next; @@ -1193,57 +1253,96 @@ cpp_get_non_space_token (pfile) for (;;) { enum cpp_token token = cpp_get_token (pfile); - if (token != CPP_COMMENT && token != CPP_POP - && token != CPP_HSPACE && token != CPP_VSPACE) + if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE) return token; CPP_SET_WRITTEN (pfile, old_written); } } -/* Like cpp_get_token, except that it does not read past end-of-line. - Also, horizontal space is skipped, and macros are popped. */ +/* Like cpp_get_token, except that it does not execute directives, + does not consume vertical space, and automatically pops off macro + buffers. + XXX This function will exist only till collect_expansion doesn't + need to see whitespace anymore, then it'll be merged with + _cpp_get_directive_token (below). */ enum cpp_token -_cpp_get_directive_token (pfile) +_cpp_get_define_token (pfile) cpp_reader *pfile; { - long old_written = CPP_WRITTEN (pfile); + long old_written; enum cpp_token token; - for (;;) + get_next: + old_written = CPP_WRITTEN (pfile); + token = _cpp_lex_token (pfile); + switch (token) { - _cpp_skip_hspace (pfile); - if (PEEKC () == '\n') - return CPP_VSPACE; + default: + return token; - token = cpp_get_token (pfile); - /* token could be hspace at the beginning of a macro. */ - if (token == CPP_HSPACE || token == CPP_COMMENT) - { - CPP_SET_WRITTEN (pfile, old_written); - continue; - } + case CPP_VSPACE: + /* Put it back and return VSPACE. */ + FORWARD(-1); + CPP_ADJUST_WRITTEN (pfile, -1); + return CPP_VSPACE; - /* token cannot be vspace, it would have been caught above. */ - if (token == CPP_VSPACE) + case CPP_HSPACE: + if (CPP_PEDANTIC (pfile)) { - cpp_ice (pfile, "VSPACE in get_directive_token"); - return token; + U_CHAR *p, *limit; + p = pfile->token_buffer + old_written; + limit = CPP_PWRITTEN (pfile); + while (p < limit) + { + if (*p == '\v' || *p == '\f') + cpp_pedwarn (pfile, "%s in preprocessing directive", + *p == '\f' ? "formfeed" : "vertical tab"); + p++; + } } + return CPP_HSPACE; - /* token cannot be POP unless the buffer is a macro buffer. */ - if (token != CPP_POP) - return token; + case CPP_DIRECTIVE: + /* Don't execute the directive, but don't smash it to OTHER either. */ + CPP_PUTC (pfile, '#'); + return CPP_DIRECTIVE; + + case CPP_MACRO: + if (! pfile->no_macro_expand + && maybe_macroexpand (pfile, old_written)) + goto get_next; + return CPP_NAME; - if (! CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile))) + case CPP_EOF: + if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile))) { - cpp_ice (pfile, "POP of file buffer in get_directive_token"); - return token; + cpp_pop_buffer (pfile); + goto get_next; } + else + /* This can happen for files that don't end with a newline, + and for cpp_define and friends. Pretend they do, so + callers don't have to deal. A warning will be issued by + someone else, if necessary. */ + return CPP_VSPACE; + } +} - /* We must pop the buffer by hand, or else cpp_get_token might - hand us white space or newline on the next invocation. */ - cpp_pop_buffer (pfile); +/* Just like _cpp_get_define_token except that it discards horizontal + whitespace. */ + +enum cpp_token +_cpp_get_directive_token (pfile) + cpp_reader *pfile; +{ + int old_written = CPP_WRITTEN (pfile); + for (;;) + { + enum cpp_token token = _cpp_get_define_token (pfile); + if (token != CPP_COMMENT && token != CPP_HSPACE) + return token; + CPP_SET_WRITTEN (pfile, old_written); } } @@ -1269,6 +1368,51 @@ find_position (start, limit, linep) return lbase; } +/* The following table is used by _cpp_read_and_prescan. If we have + designated initializers, it can be constant data; otherwise, it is + set up at runtime by _cpp_init_input_buffer. */ + +#ifndef UCHAR_MAX +#define UCHAR_MAX 255 /* assume 8-bit bytes */ +#endif + +#if (GCC_VERSION >= 2007) || (__STDC_VERSION__ >= 199901L) +#define init_chartab() /* nothing */ +#define CHARTAB static const unsigned char chartab[UCHAR_MAX + 1] = { +#define END }; +#define s(p, v) [p] = v, +#else +#define CHARTAB static unsigned char chartab[UCHAR_MAX + 1] = { 0 }; \ + static void init_chartab PARAMS ((void)) { \ + unsigned char *x = chartab; +#define END } +#define s(p, v) x[p] = v; +#endif + +/* Table of characters that can't be handled in the inner loop. + Also contains the mapping between trigraph third characters and their + replacements. */ +#define SPECCASE_CR 1 +#define SPECCASE_BACKSLASH 2 +#define SPECCASE_QUESTION 3 + +CHARTAB + s('\r', SPECCASE_CR) + s('\\', SPECCASE_BACKSLASH) + s('?', SPECCASE_QUESTION) + + s('=', '#') s(')', ']') s('!', '|') + s('(', '[') s('\'', '^') s('>', '}') + s('/', '\\') s('<', '{') s('-', '~') +END + +#undef CHARTAB +#undef END +#undef s + +#define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION) +#define NONTRI(c) ((c) <= SPECCASE_QUESTION) + /* Read the entire contents of file DESC into buffer BUF. LEN is how much memory to allocate initially; more will be allocated if necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to @@ -1292,17 +1436,19 @@ find_position (start, limit, linep) at the end of reload1.c is about 60%. (reload1.c is 329k.) If your file has more than one kind of end-of-line marker, you - will get messed-up line numbering. */ - -/* Table of characters that can't be handled in the inner loop. - Keep these contiguous to optimize the performance of the code generated - for the switch that uses them. */ -#define SPECCASE_EMPTY 0 -#define SPECCASE_NUL 1 -#define SPECCASE_CR 2 -#define SPECCASE_BACKSLASH 3 -#define SPECCASE_QUESTION 4 - + will get messed-up line numbering. + + So that the cases of the switch statement do not have to concern + themselves with the complications of reading beyond the end of the + buffer, the buffer is guaranteed to have at least 3 characters in + it (or however many are left in the file, if less) on entry to the + switch. This is enough to handle trigraphs and the "\\\n\r" and + "\\\r\n" cases. + + The end of the buffer is marked by a '\\', which, being a special + character, guarantees we will exit the fast-scan loops and perform + a refill. */ + long _cpp_read_and_prescan (pfile, fp, desc, len) cpp_reader *pfile; @@ -1313,215 +1459,176 @@ _cpp_read_and_prescan (pfile, fp, desc, len) U_CHAR *buf = (U_CHAR *) xmalloc (len); U_CHAR *ip, *op, *line_base; U_CHAR *ibase; - U_CHAR *speccase = pfile->input_speccase; unsigned long line; unsigned int deferred_newlines; - int count; size_t offset; + int count = 0; offset = 0; + deferred_newlines = 0; op = buf; line_base = buf; line = 1; - ibase = pfile->input_buffer + 2; - deferred_newlines = 0; + ibase = pfile->input_buffer + 3; + ip = ibase; + ip[-1] = '\0'; /* Guarantee no match with \n for SPECCASE_CR */ for (;;) { - read_next: + U_CHAR *near_buff_end; + + /* Copy previous char plus unprocessed (at most 2) chars + to beginning of buffer, refill it with another + read(), and continue processing */ + memcpy(ip - count - 1, ip - 1, 3); + ip -= count; - count = read (desc, pfile->input_buffer + 2, pfile->input_buffer_len); + count = read (desc, ibase, pfile->input_buffer_len); if (count < 0) goto error; - else if (count == 0) - break; - - offset += count; - ip = ibase; - ibase = pfile->input_buffer + 2; - ibase[count] = ibase[count+1] = '\0'; - - if (offset > len) + + ibase[count] = '\\'; /* Marks end of buffer */ + if (count) { - size_t delta_op; - size_t delta_line_base; - len *= 2; + near_buff_end = pfile->input_buffer + count; + offset += count; if (offset > len) - /* len overflowed. - This could happen if the file is larger than half the - maximum address space of the machine. */ - goto too_big; - - delta_op = op - buf; - delta_line_base = line_base - buf; - buf = (U_CHAR *) xrealloc (buf, len); - op = buf + delta_op; - line_base = buf + delta_line_base; + { + size_t delta_op; + size_t delta_line_base; + len *= 2; + if (offset > len) + /* len overflowed. + This could happen if the file is larger than half the + maximum address space of the machine. */ + goto too_big; + + delta_op = op - buf; + delta_line_base = line_base - buf; + buf = (U_CHAR *) xrealloc (buf, len); + op = buf + delta_op; + line_base = buf + delta_line_base; + } + } + else + { + if (ip == ibase) + break; + /* Allow normal processing of the (at most 2) remaining + characters. The end-of-buffer marker is still present + and prevents false matches within the switch. */ + near_buff_end = ibase - 1; } for (;;) { - unsigned int span = 0; + unsigned int span; - /* Deal with \-newline in the middle of a token. */ + /* Deal with \-newline, potentially in the middle of a token. */ if (deferred_newlines) { - while (speccase[ip[span]] == SPECCASE_EMPTY - && ip[span] != '\n' - && ip[span] != '\t' - && ip[span] != ' ') - span++; - memcpy (op, ip, span); - op += span; - ip += span; - /* If ip[0] is SPECCASE_EMPTY, we have hit white space. - Dump out the remaining deferred \-newlines. */ - if (speccase[ip[0]] == SPECCASE_EMPTY) - while (deferred_newlines) - deferred_newlines--, *op++ = '\r'; - span = 0; + if (op != buf && ! is_space (op[-1]) && op[-1] != '\r') + { + /* Previous was not white space. Skip to white + space, if we can, before outputting the \r's */ + span = 0; + while (ip[span] != ' ' + && ip[span] != '\t' + && ip[span] != '\n' + && NORMAL(ip[span])) + span++; + memcpy (op, ip, span); + op += span; + ip += span; + if (! NORMAL(ip[0])) + goto do_speccase; + } + while (deferred_newlines) + deferred_newlines--, *op++ = '\r'; } /* Copy as much as we can without special treatment. */ - while (speccase[ip[span]] == SPECCASE_EMPTY) span++; + span = 0; + while (NORMAL (ip[span])) span++; memcpy (op, ip, span); op += span; ip += span; - switch (speccase[*ip++]) + do_speccase: + if (ip > near_buff_end) /* Do we have enough chars? */ + break; + switch (chartab[*ip++]) { - case SPECCASE_NUL: /* \0 */ - ibase[-1] = op[-1]; - goto read_next; - case SPECCASE_CR: /* \r */ - if (ip[-2] == '\n') - continue; - else if (*ip == '\n') - ip++; - else if (*ip == '\0') + if (ip[-2] != '\n') { - *--ibase = '\r'; - goto read_next; + if (*ip == '\n') + ip++; + *op++ = '\n'; } - *op++ = '\n'; break; case SPECCASE_BACKSLASH: /* \ */ - backslash: - { - /* If we're at the end of the intermediate buffer, - we have to shift the backslash down to the start - and come back next pass. */ - if (*ip == '\0') - { - *--ibase = '\\'; - goto read_next; - } - else if (*ip == '\n') + if (*ip == '\n') { + deferred_newlines++; ip++; if (*ip == '\r') ip++; - if (*ip == '\n' || *ip == '\t' || *ip == ' ') - *op++ = '\r'; - else if (op[-1] == '\t' || op[-1] == ' ' - || op[-1] == '\r' || op[-1] == '\n') - *op++ = '\r'; - else - deferred_newlines++; } else if (*ip == '\r') { + deferred_newlines++; ip++; if (*ip == '\n') ip++; - else if (*ip == '\0') - { - *--ibase = '\r'; - *--ibase = '\\'; - goto read_next; - } - else if (*ip == '\r' || *ip == '\t' || *ip == ' ') - *op++ = '\r'; - else - deferred_newlines++; } else *op++ = '\\'; - } - break; + break; case SPECCASE_QUESTION: /* ? */ { unsigned int d, t; - /* If we're at the end of the intermediate buffer, - we have to shift the ?'s down to the start and - come back next pass. */ - d = ip[0]; - if (d == '\0') - { - *--ibase = '?'; - goto read_next; - } - if (d != '?') - { - *op++ = '?'; - break; - } + + *op++ = '?'; /* Normal non-trigraph case */ + if (ip[0] != '?') + break; + d = ip[1]; - if (d == '\0') - { - *--ibase = '?'; - *--ibase = '?'; - goto read_next; - } + t = chartab[d]; + if (NONTRI (t)) + break; - /* Trigraph map: - * from to from to from to - * ?? = # ?? ) ] ?? ! | - * ?? ( [ ?? ' ^ ?? > } - * ?? / \ ?? < { ?? - ~ - */ - if (d == '=') t = '#'; - else if (d == ')') t = ']'; - else if (d == '!') t = '|'; - else if (d == '(') t = '['; - else if (d == '\'') t = '^'; - else if (d == '>') t = '}'; - else if (d == '/') t = '\\'; - else if (d == '<') t = '{'; - else if (d == '-') t = '~'; - else - { - *op++ = '?'; - break; - } - ip += 2; - if (CPP_OPTIONS (pfile)->warn_trigraphs) + if (CPP_OPTION (pfile, warn_trigraphs)) { unsigned long col; line_base = find_position (line_base, op, &line); col = op - line_base + 1; - if (CPP_OPTIONS (pfile)->trigraphs) + if (CPP_OPTION (pfile, trigraphs)) cpp_warning_with_line (pfile, line, col, - "trigraph ??%c converted to %c", d, t); + "trigraph ??%c converted to %c", d, t); else cpp_warning_with_line (pfile, line, col, - "trigraph ??%c ignored", d); + "trigraph ??%c ignored", d); } - if (CPP_OPTIONS (pfile)->trigraphs) + + ip += 2; + if (CPP_OPTION (pfile, trigraphs)) { + op[-1] = t; /* Overwrite '?' */ if (t == '\\') - goto backslash; - else - *op++ = t; + { + op--; + *--ip = '\\'; + goto do_speccase; /* May need buffer refill */ + } } else { *op++ = '?'; - *op++ = '?'; *op++ = d; } } + break; } } } @@ -1529,28 +1636,6 @@ _cpp_read_and_prescan (pfile, fp, desc, len) if (offset == 0) return 0; - /* Deal with pushed-back chars at true EOF. - This may be any of: ?? ? \ \r \n \\r \\n. - \r must become \n, \\r or \\n must become \r. - We know we have space already. */ - if (ibase == pfile->input_buffer) - { - if (*ibase == '?') - { - *op++ = '?'; - *op++ = '?'; - } - else - *op++ = '\r'; - } - else if (ibase == pfile->input_buffer + 1) - { - if (*ibase == '\r') - *op++ = '\n'; - else - *op++ = *ibase; - } - if (op[-1] != '\n') { unsigned long col; @@ -1582,39 +1667,25 @@ _cpp_read_and_prescan (pfile, fp, desc, len) return -1; } -/* Initialize the `input_buffer' and `input_speccase' tables. - These are only used by read_and_prescan, but they're large and - somewhat expensive to set up, so we want them allocated once for - the duration of the cpp run. */ - +/* Allocate pfile->input_buffer, and initialize chartab[] + if it hasn't happened already. */ + void _cpp_init_input_buffer (pfile) cpp_reader *pfile; { U_CHAR *tmp; - /* Table of characters that cannot be handled by the - read_and_prescan inner loop. The number of non-EMPTY entries - should be as small as humanly possible. */ - - tmp = (U_CHAR *) xmalloc (1 << CHAR_BIT); - memset (tmp, SPECCASE_EMPTY, 1 << CHAR_BIT); - tmp['\0'] = SPECCASE_NUL; - tmp['\r'] = SPECCASE_CR; - tmp['\\'] = SPECCASE_BACKSLASH; - if (CPP_OPTIONS (pfile)->trigraphs || CPP_OPTIONS (pfile)->warn_trigraphs) - tmp['?'] = SPECCASE_QUESTION; - - pfile->input_speccase = tmp; + init_chartab (); /* Determine the appropriate size for the input buffer. Normal C source files are smaller than eight K. */ - /* 8Kbytes of buffer proper, 2 to detect running off the end without - address arithmetic all the time, and 2 for pushback in the case - there's a potential trigraph or end-of-line digraph at the end of - a block. */ + /* 8Kbytes of buffer proper, 1 to detect running off the end without + address arithmetic all the time, and 3 for pushback during buffer + refill, in case there's a potential trigraph or end-of-line + digraph at the end of a block. */ - tmp = (U_CHAR *) xmalloc (8192 + 2 + 2); + tmp = (U_CHAR *) xmalloc (8192 + 1 + 3); pfile->input_buffer = tmp; pfile->input_buffer_len = 8192; } |