1f08c3bdfSopenharmony_ci/* 2f08c3bdfSopenharmony_ci * This is a really stupid C tokenizer. It doesn't do any include 3f08c3bdfSopenharmony_ci * files or anything complex at all. That's the preprocessor. 4f08c3bdfSopenharmony_ci * 5f08c3bdfSopenharmony_ci * Copyright (C) 2003 Transmeta Corp. 6f08c3bdfSopenharmony_ci * 2003 Linus Torvalds 7f08c3bdfSopenharmony_ci * 8f08c3bdfSopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a copy 9f08c3bdfSopenharmony_ci * of this software and associated documentation files (the "Software"), to deal 10f08c3bdfSopenharmony_ci * in the Software without restriction, including without limitation the rights 11f08c3bdfSopenharmony_ci * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12f08c3bdfSopenharmony_ci * copies of the Software, and to permit persons to whom the Software is 13f08c3bdfSopenharmony_ci * furnished to do so, subject to the following conditions: 14f08c3bdfSopenharmony_ci * 15f08c3bdfSopenharmony_ci * The above copyright notice and this permission notice shall be included in 16f08c3bdfSopenharmony_ci * all copies or substantial portions of the Software. 17f08c3bdfSopenharmony_ci * 18f08c3bdfSopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19f08c3bdfSopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20f08c3bdfSopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21f08c3bdfSopenharmony_ci * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22f08c3bdfSopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23f08c3bdfSopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24f08c3bdfSopenharmony_ci * THE SOFTWARE. 25f08c3bdfSopenharmony_ci */ 26f08c3bdfSopenharmony_ci#include <stdio.h> 27f08c3bdfSopenharmony_ci#include <stdlib.h> 28f08c3bdfSopenharmony_ci#include <stdarg.h> 29f08c3bdfSopenharmony_ci#include <stddef.h> 30f08c3bdfSopenharmony_ci#include <string.h> 31f08c3bdfSopenharmony_ci#include <ctype.h> 32f08c3bdfSopenharmony_ci#include <unistd.h> 33f08c3bdfSopenharmony_ci#include <stdint.h> 34f08c3bdfSopenharmony_ci 35f08c3bdfSopenharmony_ci#include "lib.h" 36f08c3bdfSopenharmony_ci#include "allocate.h" 37f08c3bdfSopenharmony_ci#include "token.h" 38f08c3bdfSopenharmony_ci#include "symbol.h" 39f08c3bdfSopenharmony_ci 40f08c3bdfSopenharmony_ci#define EOF (-1) 41f08c3bdfSopenharmony_ci 42f08c3bdfSopenharmony_ciint input_stream_nr = 0; 43f08c3bdfSopenharmony_cistruct stream *input_streams; 44f08c3bdfSopenharmony_cistatic int input_streams_allocated; 45f08c3bdfSopenharmony_ciunsigned int tabstop = 8; 46f08c3bdfSopenharmony_ci 47f08c3bdfSopenharmony_ci#define BUFSIZE (8192) 48f08c3bdfSopenharmony_ci 49f08c3bdfSopenharmony_citypedef struct { 50f08c3bdfSopenharmony_ci int fd, offset, size; 51f08c3bdfSopenharmony_ci int pos, line, nr; 52f08c3bdfSopenharmony_ci int newline, whitespace; 53f08c3bdfSopenharmony_ci struct token **tokenlist; 54f08c3bdfSopenharmony_ci struct token *token; 55f08c3bdfSopenharmony_ci unsigned char *buffer; 56f08c3bdfSopenharmony_ci} stream_t; 57f08c3bdfSopenharmony_ci 58f08c3bdfSopenharmony_ciconst char *stream_name(int stream) 59f08c3bdfSopenharmony_ci{ 60f08c3bdfSopenharmony_ci if (stream < 0 || stream > input_stream_nr) 61f08c3bdfSopenharmony_ci return "<bad stream>"; 62f08c3bdfSopenharmony_ci return input_streams[stream].name; 63f08c3bdfSopenharmony_ci} 64f08c3bdfSopenharmony_ci 65f08c3bdfSopenharmony_ciint stream_prev(int stream) 66f08c3bdfSopenharmony_ci{ 67f08c3bdfSopenharmony_ci if (stream < 0 || stream > input_stream_nr) 68f08c3bdfSopenharmony_ci return -1; 69f08c3bdfSopenharmony_ci stream = input_streams[stream].pos.stream; 70f08c3bdfSopenharmony_ci if (stream > input_stream_nr) 71f08c3bdfSopenharmony_ci return -1; 72f08c3bdfSopenharmony_ci return stream; 73f08c3bdfSopenharmony_ci} 74f08c3bdfSopenharmony_ci 75f08c3bdfSopenharmony_cistatic struct position stream_pos(stream_t *stream) 76f08c3bdfSopenharmony_ci{ 77f08c3bdfSopenharmony_ci struct position pos; 78f08c3bdfSopenharmony_ci pos.type = 0; 79f08c3bdfSopenharmony_ci pos.stream = stream->nr; 80f08c3bdfSopenharmony_ci pos.newline = stream->newline; 81f08c3bdfSopenharmony_ci pos.whitespace = stream->whitespace; 82f08c3bdfSopenharmony_ci pos.pos = stream->pos; 83f08c3bdfSopenharmony_ci pos.line = stream->line; 84f08c3bdfSopenharmony_ci pos.noexpand = 0; 85f08c3bdfSopenharmony_ci return pos; 86f08c3bdfSopenharmony_ci} 87f08c3bdfSopenharmony_ci 88f08c3bdfSopenharmony_ciconst char *show_special(int val) 89f08c3bdfSopenharmony_ci{ 90f08c3bdfSopenharmony_ci static char buffer[4]; 91f08c3bdfSopenharmony_ci 92f08c3bdfSopenharmony_ci buffer[0] = val; 93f08c3bdfSopenharmony_ci buffer[1] = 0; 94f08c3bdfSopenharmony_ci if (val >= SPECIAL_BASE) 95f08c3bdfSopenharmony_ci strcpy(buffer, (char *) combinations[val - SPECIAL_BASE]); 96f08c3bdfSopenharmony_ci return buffer; 97f08c3bdfSopenharmony_ci} 98f08c3bdfSopenharmony_ci 99f08c3bdfSopenharmony_ciconst char *show_ident(const struct ident *ident) 100f08c3bdfSopenharmony_ci{ 101f08c3bdfSopenharmony_ci static char buff[4][256]; 102f08c3bdfSopenharmony_ci static int n; 103f08c3bdfSopenharmony_ci char *buffer; 104f08c3bdfSopenharmony_ci 105f08c3bdfSopenharmony_ci if (!ident) 106f08c3bdfSopenharmony_ci return "<noident>"; 107f08c3bdfSopenharmony_ci buffer = buff[3 & ++n]; 108f08c3bdfSopenharmony_ci sprintf(buffer, "%.*s", ident->len, ident->name); 109f08c3bdfSopenharmony_ci return buffer; 110f08c3bdfSopenharmony_ci} 111f08c3bdfSopenharmony_ci 112f08c3bdfSopenharmony_cistatic char *charstr(char *ptr, unsigned char c, unsigned char escape, unsigned char next) 113f08c3bdfSopenharmony_ci{ 114f08c3bdfSopenharmony_ci if (isprint(c)) { 115f08c3bdfSopenharmony_ci if (c == escape || c == '\\') 116f08c3bdfSopenharmony_ci *ptr++ = '\\'; 117f08c3bdfSopenharmony_ci *ptr++ = c; 118f08c3bdfSopenharmony_ci return ptr; 119f08c3bdfSopenharmony_ci } 120f08c3bdfSopenharmony_ci *ptr++ = '\\'; 121f08c3bdfSopenharmony_ci switch (c) { 122f08c3bdfSopenharmony_ci case '\n': 123f08c3bdfSopenharmony_ci *ptr++ = 'n'; 124f08c3bdfSopenharmony_ci return ptr; 125f08c3bdfSopenharmony_ci case '\t': 126f08c3bdfSopenharmony_ci *ptr++ = 't'; 127f08c3bdfSopenharmony_ci return ptr; 128f08c3bdfSopenharmony_ci } 129f08c3bdfSopenharmony_ci if (!isdigit(next)) 130f08c3bdfSopenharmony_ci return ptr + sprintf(ptr, "%o", c); 131f08c3bdfSopenharmony_ci 132f08c3bdfSopenharmony_ci return ptr + sprintf(ptr, "%03o", c); 133f08c3bdfSopenharmony_ci} 134f08c3bdfSopenharmony_ci 135f08c3bdfSopenharmony_ciconst char *show_string(const struct string *string) 136f08c3bdfSopenharmony_ci{ 137f08c3bdfSopenharmony_ci static char buffer[4 * MAX_STRING + 3]; 138f08c3bdfSopenharmony_ci char *ptr; 139f08c3bdfSopenharmony_ci int i; 140f08c3bdfSopenharmony_ci 141f08c3bdfSopenharmony_ci if (!string || !string->length) 142f08c3bdfSopenharmony_ci return "<bad_string>"; 143f08c3bdfSopenharmony_ci ptr = buffer; 144f08c3bdfSopenharmony_ci *ptr++ = '"'; 145f08c3bdfSopenharmony_ci for (i = 0; i < string->length-1; i++) { 146f08c3bdfSopenharmony_ci const char *p = string->data + i; 147f08c3bdfSopenharmony_ci ptr = charstr(ptr, p[0], '"', p[1]); 148f08c3bdfSopenharmony_ci } 149f08c3bdfSopenharmony_ci *ptr++ = '"'; 150f08c3bdfSopenharmony_ci *ptr = '\0'; 151f08c3bdfSopenharmony_ci return buffer; 152f08c3bdfSopenharmony_ci} 153f08c3bdfSopenharmony_ci 154f08c3bdfSopenharmony_cistatic const char *show_char(const char *s, size_t len, char prefix, char delim) 155f08c3bdfSopenharmony_ci{ 156f08c3bdfSopenharmony_ci static char buffer[MAX_STRING + 4]; 157f08c3bdfSopenharmony_ci char *p = buffer; 158f08c3bdfSopenharmony_ci if (prefix) 159f08c3bdfSopenharmony_ci *p++ = prefix; 160f08c3bdfSopenharmony_ci *p++ = delim; 161f08c3bdfSopenharmony_ci memcpy(p, s, len); 162f08c3bdfSopenharmony_ci p += len; 163f08c3bdfSopenharmony_ci *p++ = delim; 164f08c3bdfSopenharmony_ci *p++ = '\0'; 165f08c3bdfSopenharmony_ci return buffer; 166f08c3bdfSopenharmony_ci} 167f08c3bdfSopenharmony_ci 168f08c3bdfSopenharmony_cistatic const char *quote_char(const char *s, size_t len, char prefix, char delim) 169f08c3bdfSopenharmony_ci{ 170f08c3bdfSopenharmony_ci static char buffer[2*MAX_STRING + 6]; 171f08c3bdfSopenharmony_ci size_t i; 172f08c3bdfSopenharmony_ci char *p = buffer; 173f08c3bdfSopenharmony_ci if (prefix) 174f08c3bdfSopenharmony_ci *p++ = prefix; 175f08c3bdfSopenharmony_ci if (delim == '"') 176f08c3bdfSopenharmony_ci *p++ = '\\'; 177f08c3bdfSopenharmony_ci *p++ = delim; 178f08c3bdfSopenharmony_ci for (i = 0; i < len; i++) { 179f08c3bdfSopenharmony_ci if (s[i] == '"' || s[i] == '\\') 180f08c3bdfSopenharmony_ci *p++ = '\\'; 181f08c3bdfSopenharmony_ci *p++ = s[i]; 182f08c3bdfSopenharmony_ci } 183f08c3bdfSopenharmony_ci if (delim == '"') 184f08c3bdfSopenharmony_ci *p++ = '\\'; 185f08c3bdfSopenharmony_ci *p++ = delim; 186f08c3bdfSopenharmony_ci *p++ = '\0'; 187f08c3bdfSopenharmony_ci return buffer; 188f08c3bdfSopenharmony_ci} 189f08c3bdfSopenharmony_ci 190f08c3bdfSopenharmony_ciconst char *show_token(const struct token *token) 191f08c3bdfSopenharmony_ci{ 192f08c3bdfSopenharmony_ci static char buffer[256]; 193f08c3bdfSopenharmony_ci 194f08c3bdfSopenharmony_ci if (!token) 195f08c3bdfSopenharmony_ci return "<no token>"; 196f08c3bdfSopenharmony_ci switch (token_type(token)) { 197f08c3bdfSopenharmony_ci case TOKEN_ERROR: 198f08c3bdfSopenharmony_ci return "syntax error"; 199f08c3bdfSopenharmony_ci 200f08c3bdfSopenharmony_ci case TOKEN_EOF: 201f08c3bdfSopenharmony_ci return "end-of-input"; 202f08c3bdfSopenharmony_ci 203f08c3bdfSopenharmony_ci case TOKEN_IDENT: 204f08c3bdfSopenharmony_ci return show_ident(token->ident); 205f08c3bdfSopenharmony_ci 206f08c3bdfSopenharmony_ci case TOKEN_NUMBER: 207f08c3bdfSopenharmony_ci return token->number; 208f08c3bdfSopenharmony_ci 209f08c3bdfSopenharmony_ci case TOKEN_SPECIAL: 210f08c3bdfSopenharmony_ci return show_special(token->special); 211f08c3bdfSopenharmony_ci 212f08c3bdfSopenharmony_ci case TOKEN_CHAR: 213f08c3bdfSopenharmony_ci return show_char(token->string->data, 214f08c3bdfSopenharmony_ci token->string->length - 1, 0, '\''); 215f08c3bdfSopenharmony_ci case TOKEN_CHAR_EMBEDDED_0 ... TOKEN_CHAR_EMBEDDED_3: 216f08c3bdfSopenharmony_ci return show_char(token->embedded, 217f08c3bdfSopenharmony_ci token_type(token) - TOKEN_CHAR, 0, '\''); 218f08c3bdfSopenharmony_ci case TOKEN_WIDE_CHAR: 219f08c3bdfSopenharmony_ci return show_char(token->string->data, 220f08c3bdfSopenharmony_ci token->string->length - 1, 'L', '\''); 221f08c3bdfSopenharmony_ci case TOKEN_WIDE_CHAR_EMBEDDED_0 ... TOKEN_WIDE_CHAR_EMBEDDED_3: 222f08c3bdfSopenharmony_ci return show_char(token->embedded, 223f08c3bdfSopenharmony_ci token_type(token) - TOKEN_WIDE_CHAR, 'L', '\''); 224f08c3bdfSopenharmony_ci case TOKEN_STRING: 225f08c3bdfSopenharmony_ci return show_char(token->string->data, 226f08c3bdfSopenharmony_ci token->string->length - 1, 0, '"'); 227f08c3bdfSopenharmony_ci case TOKEN_WIDE_STRING: 228f08c3bdfSopenharmony_ci return show_char(token->string->data, 229f08c3bdfSopenharmony_ci token->string->length - 1, 'L', '"'); 230f08c3bdfSopenharmony_ci 231f08c3bdfSopenharmony_ci case TOKEN_STREAMBEGIN: 232f08c3bdfSopenharmony_ci sprintf(buffer, "<beginning of '%s'>", stream_name(token->pos.stream)); 233f08c3bdfSopenharmony_ci return buffer; 234f08c3bdfSopenharmony_ci 235f08c3bdfSopenharmony_ci case TOKEN_STREAMEND: 236f08c3bdfSopenharmony_ci sprintf(buffer, "<end of '%s'>", stream_name(token->pos.stream)); 237f08c3bdfSopenharmony_ci return buffer; 238f08c3bdfSopenharmony_ci 239f08c3bdfSopenharmony_ci case TOKEN_UNTAINT: 240f08c3bdfSopenharmony_ci sprintf(buffer, "<untaint>"); 241f08c3bdfSopenharmony_ci return buffer; 242f08c3bdfSopenharmony_ci 243f08c3bdfSopenharmony_ci case TOKEN_ARG_COUNT: 244f08c3bdfSopenharmony_ci sprintf(buffer, "<argcnt>"); 245f08c3bdfSopenharmony_ci return buffer; 246f08c3bdfSopenharmony_ci 247f08c3bdfSopenharmony_ci default: 248f08c3bdfSopenharmony_ci sprintf(buffer, "unhandled token type '%d' ", token_type(token)); 249f08c3bdfSopenharmony_ci return buffer; 250f08c3bdfSopenharmony_ci } 251f08c3bdfSopenharmony_ci} 252f08c3bdfSopenharmony_ci 253f08c3bdfSopenharmony_ciconst char *quote_token(const struct token *token) 254f08c3bdfSopenharmony_ci{ 255f08c3bdfSopenharmony_ci static char buffer[256]; 256f08c3bdfSopenharmony_ci 257f08c3bdfSopenharmony_ci switch (token_type(token)) { 258f08c3bdfSopenharmony_ci case TOKEN_ERROR: 259f08c3bdfSopenharmony_ci return "syntax error"; 260f08c3bdfSopenharmony_ci 261f08c3bdfSopenharmony_ci case TOKEN_IDENT: 262f08c3bdfSopenharmony_ci return show_ident(token->ident); 263f08c3bdfSopenharmony_ci 264f08c3bdfSopenharmony_ci case TOKEN_NUMBER: 265f08c3bdfSopenharmony_ci return token->number; 266f08c3bdfSopenharmony_ci 267f08c3bdfSopenharmony_ci case TOKEN_SPECIAL: 268f08c3bdfSopenharmony_ci return show_special(token->special); 269f08c3bdfSopenharmony_ci 270f08c3bdfSopenharmony_ci case TOKEN_CHAR: 271f08c3bdfSopenharmony_ci return quote_char(token->string->data, 272f08c3bdfSopenharmony_ci token->string->length - 1, 0, '\''); 273f08c3bdfSopenharmony_ci case TOKEN_CHAR_EMBEDDED_0 ... TOKEN_CHAR_EMBEDDED_3: 274f08c3bdfSopenharmony_ci return quote_char(token->embedded, 275f08c3bdfSopenharmony_ci token_type(token) - TOKEN_CHAR, 0, '\''); 276f08c3bdfSopenharmony_ci case TOKEN_WIDE_CHAR: 277f08c3bdfSopenharmony_ci return quote_char(token->string->data, 278f08c3bdfSopenharmony_ci token->string->length - 1, 'L', '\''); 279f08c3bdfSopenharmony_ci case TOKEN_WIDE_CHAR_EMBEDDED_0 ... TOKEN_WIDE_CHAR_EMBEDDED_3: 280f08c3bdfSopenharmony_ci return quote_char(token->embedded, 281f08c3bdfSopenharmony_ci token_type(token) - TOKEN_WIDE_CHAR, 'L', '\''); 282f08c3bdfSopenharmony_ci case TOKEN_STRING: 283f08c3bdfSopenharmony_ci return quote_char(token->string->data, 284f08c3bdfSopenharmony_ci token->string->length - 1, 0, '"'); 285f08c3bdfSopenharmony_ci case TOKEN_WIDE_STRING: 286f08c3bdfSopenharmony_ci return quote_char(token->string->data, 287f08c3bdfSopenharmony_ci token->string->length - 1, 'L', '"'); 288f08c3bdfSopenharmony_ci default: 289f08c3bdfSopenharmony_ci sprintf(buffer, "unhandled token type '%d' ", token_type(token)); 290f08c3bdfSopenharmony_ci return buffer; 291f08c3bdfSopenharmony_ci } 292f08c3bdfSopenharmony_ci} 293f08c3bdfSopenharmony_ci 294f08c3bdfSopenharmony_ci#define HASHED_INPUT_BITS (6) 295f08c3bdfSopenharmony_ci#define HASHED_INPUT (1 << HASHED_INPUT_BITS) 296f08c3bdfSopenharmony_ci#define HASH_PRIME 0x9e370001UL 297f08c3bdfSopenharmony_ci 298f08c3bdfSopenharmony_cistatic int input_stream_hashes[HASHED_INPUT] = { [0 ... HASHED_INPUT-1] = -1 }; 299f08c3bdfSopenharmony_ci 300f08c3bdfSopenharmony_ciint *hash_stream(const char *name) 301f08c3bdfSopenharmony_ci{ 302f08c3bdfSopenharmony_ci uint32_t hash = 0; 303f08c3bdfSopenharmony_ci unsigned char c; 304f08c3bdfSopenharmony_ci 305f08c3bdfSopenharmony_ci while ((c = *name++) != 0) 306f08c3bdfSopenharmony_ci hash = (hash + (c << 4) + (c >> 4)) * 11; 307f08c3bdfSopenharmony_ci 308f08c3bdfSopenharmony_ci hash *= HASH_PRIME; 309f08c3bdfSopenharmony_ci hash >>= 32 - HASHED_INPUT_BITS; 310f08c3bdfSopenharmony_ci return input_stream_hashes + hash; 311f08c3bdfSopenharmony_ci} 312f08c3bdfSopenharmony_ci 313f08c3bdfSopenharmony_ciint init_stream(const struct position *pos, const char *name, int fd, const char **next_path) 314f08c3bdfSopenharmony_ci{ 315f08c3bdfSopenharmony_ci int stream = input_stream_nr, *hash; 316f08c3bdfSopenharmony_ci struct stream *current; 317f08c3bdfSopenharmony_ci 318f08c3bdfSopenharmony_ci if (stream >= input_streams_allocated) { 319f08c3bdfSopenharmony_ci int newalloc = stream * 4 / 3 + 10; 320f08c3bdfSopenharmony_ci input_streams = realloc(input_streams, newalloc * sizeof(struct stream)); 321f08c3bdfSopenharmony_ci if (!input_streams) 322f08c3bdfSopenharmony_ci die("Unable to allocate more streams space"); 323f08c3bdfSopenharmony_ci input_streams_allocated = newalloc; 324f08c3bdfSopenharmony_ci } 325f08c3bdfSopenharmony_ci current = input_streams + stream; 326f08c3bdfSopenharmony_ci memset(current, 0, sizeof(*current)); 327f08c3bdfSopenharmony_ci current->name = name; 328f08c3bdfSopenharmony_ci current->fd = fd; 329f08c3bdfSopenharmony_ci current->next_path = next_path; 330f08c3bdfSopenharmony_ci current->path = NULL; 331f08c3bdfSopenharmony_ci current->constant = CONSTANT_FILE_MAYBE; 332f08c3bdfSopenharmony_ci if (pos) 333f08c3bdfSopenharmony_ci current->pos = *pos; 334f08c3bdfSopenharmony_ci else 335f08c3bdfSopenharmony_ci current->pos.stream = -1; 336f08c3bdfSopenharmony_ci input_stream_nr = stream+1; 337f08c3bdfSopenharmony_ci hash = hash_stream(name); 338f08c3bdfSopenharmony_ci current->next_stream = *hash; 339f08c3bdfSopenharmony_ci *hash = stream; 340f08c3bdfSopenharmony_ci return stream; 341f08c3bdfSopenharmony_ci} 342f08c3bdfSopenharmony_ci 343f08c3bdfSopenharmony_cistatic struct token * alloc_token(stream_t *stream) 344f08c3bdfSopenharmony_ci{ 345f08c3bdfSopenharmony_ci struct token *token = __alloc_token(0); 346f08c3bdfSopenharmony_ci token->pos = stream_pos(stream); 347f08c3bdfSopenharmony_ci return token; 348f08c3bdfSopenharmony_ci} 349f08c3bdfSopenharmony_ci 350f08c3bdfSopenharmony_ci/* 351f08c3bdfSopenharmony_ci * Argh... That was surprisingly messy - handling '\r' complicates the 352f08c3bdfSopenharmony_ci * things a _lot_. 353f08c3bdfSopenharmony_ci */ 354f08c3bdfSopenharmony_cistatic int nextchar_slow(stream_t *stream) 355f08c3bdfSopenharmony_ci{ 356f08c3bdfSopenharmony_ci int offset = stream->offset; 357f08c3bdfSopenharmony_ci int size = stream->size; 358f08c3bdfSopenharmony_ci int c; 359f08c3bdfSopenharmony_ci int spliced = 0, had_cr, had_backslash; 360f08c3bdfSopenharmony_ci 361f08c3bdfSopenharmony_cirestart: 362f08c3bdfSopenharmony_ci had_cr = had_backslash = 0; 363f08c3bdfSopenharmony_ci 364f08c3bdfSopenharmony_cirepeat: 365f08c3bdfSopenharmony_ci if (offset >= size) { 366f08c3bdfSopenharmony_ci if (stream->fd < 0) 367f08c3bdfSopenharmony_ci goto got_eof; 368f08c3bdfSopenharmony_ci size = read(stream->fd, stream->buffer, BUFSIZE); 369f08c3bdfSopenharmony_ci if (size <= 0) 370f08c3bdfSopenharmony_ci goto got_eof; 371f08c3bdfSopenharmony_ci stream->size = size; 372f08c3bdfSopenharmony_ci stream->offset = offset = 0; 373f08c3bdfSopenharmony_ci } 374f08c3bdfSopenharmony_ci 375f08c3bdfSopenharmony_ci c = stream->buffer[offset++]; 376f08c3bdfSopenharmony_ci if (had_cr) 377f08c3bdfSopenharmony_ci goto check_lf; 378f08c3bdfSopenharmony_ci 379f08c3bdfSopenharmony_ci if (c == '\r') { 380f08c3bdfSopenharmony_ci had_cr = 1; 381f08c3bdfSopenharmony_ci goto repeat; 382f08c3bdfSopenharmony_ci } 383f08c3bdfSopenharmony_ci 384f08c3bdfSopenharmony_cinorm: 385f08c3bdfSopenharmony_ci if (!had_backslash) { 386f08c3bdfSopenharmony_ci switch (c) { 387f08c3bdfSopenharmony_ci case '\t': 388f08c3bdfSopenharmony_ci stream->pos += tabstop - stream->pos % tabstop; 389f08c3bdfSopenharmony_ci break; 390f08c3bdfSopenharmony_ci case '\n': 391f08c3bdfSopenharmony_ci stream->line++; 392f08c3bdfSopenharmony_ci stream->pos = 0; 393f08c3bdfSopenharmony_ci stream->newline = 1; 394f08c3bdfSopenharmony_ci break; 395f08c3bdfSopenharmony_ci case '\\': 396f08c3bdfSopenharmony_ci had_backslash = 1; 397f08c3bdfSopenharmony_ci stream->pos++; 398f08c3bdfSopenharmony_ci goto repeat; 399f08c3bdfSopenharmony_ci default: 400f08c3bdfSopenharmony_ci stream->pos++; 401f08c3bdfSopenharmony_ci } 402f08c3bdfSopenharmony_ci } else { 403f08c3bdfSopenharmony_ci if (c == '\n') { 404f08c3bdfSopenharmony_ci stream->line++; 405f08c3bdfSopenharmony_ci stream->pos = 0; 406f08c3bdfSopenharmony_ci spliced = 1; 407f08c3bdfSopenharmony_ci goto restart; 408f08c3bdfSopenharmony_ci } 409f08c3bdfSopenharmony_ci offset--; 410f08c3bdfSopenharmony_ci c = '\\'; 411f08c3bdfSopenharmony_ci } 412f08c3bdfSopenharmony_ciout: 413f08c3bdfSopenharmony_ci stream->offset = offset; 414f08c3bdfSopenharmony_ci 415f08c3bdfSopenharmony_ci return c; 416f08c3bdfSopenharmony_ci 417f08c3bdfSopenharmony_cicheck_lf: 418f08c3bdfSopenharmony_ci if (c != '\n') 419f08c3bdfSopenharmony_ci offset--; 420f08c3bdfSopenharmony_ci c = '\n'; 421f08c3bdfSopenharmony_ci goto norm; 422f08c3bdfSopenharmony_ci 423f08c3bdfSopenharmony_cigot_eof: 424f08c3bdfSopenharmony_ci if (had_backslash) { 425f08c3bdfSopenharmony_ci c = '\\'; 426f08c3bdfSopenharmony_ci goto out; 427f08c3bdfSopenharmony_ci } 428f08c3bdfSopenharmony_ci if (stream->pos & Wnewline_eof) 429f08c3bdfSopenharmony_ci warning(stream_pos(stream), "no newline at end of file"); 430f08c3bdfSopenharmony_ci else if (spliced) 431f08c3bdfSopenharmony_ci warning(stream_pos(stream), "backslash-newline at end of file"); 432f08c3bdfSopenharmony_ci return EOF; 433f08c3bdfSopenharmony_ci} 434f08c3bdfSopenharmony_ci 435f08c3bdfSopenharmony_ci/* 436f08c3bdfSopenharmony_ci * We want that as light as possible while covering all normal cases. 437f08c3bdfSopenharmony_ci * Slow path (including the logics with line-splicing and EOF sanity 438f08c3bdfSopenharmony_ci * checks) is in nextchar_slow(). 439f08c3bdfSopenharmony_ci */ 440f08c3bdfSopenharmony_cistatic inline int nextchar(stream_t *stream) 441f08c3bdfSopenharmony_ci{ 442f08c3bdfSopenharmony_ci int offset = stream->offset; 443f08c3bdfSopenharmony_ci 444f08c3bdfSopenharmony_ci if (offset < stream->size) { 445f08c3bdfSopenharmony_ci int c = stream->buffer[offset++]; 446f08c3bdfSopenharmony_ci static const char special[256] = { 447f08c3bdfSopenharmony_ci ['\t'] = 1, ['\r'] = 1, ['\n'] = 1, ['\\'] = 1 448f08c3bdfSopenharmony_ci }; 449f08c3bdfSopenharmony_ci if (!special[c]) { 450f08c3bdfSopenharmony_ci stream->offset = offset; 451f08c3bdfSopenharmony_ci stream->pos++; 452f08c3bdfSopenharmony_ci return c; 453f08c3bdfSopenharmony_ci } 454f08c3bdfSopenharmony_ci } 455f08c3bdfSopenharmony_ci return nextchar_slow(stream); 456f08c3bdfSopenharmony_ci} 457f08c3bdfSopenharmony_ci 458f08c3bdfSopenharmony_cistruct token eof_token_entry; 459f08c3bdfSopenharmony_ci 460f08c3bdfSopenharmony_cistatic struct token *mark_eof(stream_t *stream) 461f08c3bdfSopenharmony_ci{ 462f08c3bdfSopenharmony_ci struct token *end; 463f08c3bdfSopenharmony_ci 464f08c3bdfSopenharmony_ci end = alloc_token(stream); 465f08c3bdfSopenharmony_ci eof_token_entry.pos = end->pos; 466f08c3bdfSopenharmony_ci token_type(end) = TOKEN_STREAMEND; 467f08c3bdfSopenharmony_ci end->pos.newline = 1; 468f08c3bdfSopenharmony_ci 469f08c3bdfSopenharmony_ci eof_token_entry.next = &eof_token_entry; 470f08c3bdfSopenharmony_ci eof_token_entry.pos.newline = 1; 471f08c3bdfSopenharmony_ci 472f08c3bdfSopenharmony_ci end->next = &eof_token_entry; 473f08c3bdfSopenharmony_ci *stream->tokenlist = end; 474f08c3bdfSopenharmony_ci stream->tokenlist = NULL; 475f08c3bdfSopenharmony_ci return end; 476f08c3bdfSopenharmony_ci} 477f08c3bdfSopenharmony_ci 478f08c3bdfSopenharmony_cistatic void add_token(stream_t *stream) 479f08c3bdfSopenharmony_ci{ 480f08c3bdfSopenharmony_ci struct token *token = stream->token; 481f08c3bdfSopenharmony_ci 482f08c3bdfSopenharmony_ci stream->token = NULL; 483f08c3bdfSopenharmony_ci token->next = NULL; 484f08c3bdfSopenharmony_ci *stream->tokenlist = token; 485f08c3bdfSopenharmony_ci stream->tokenlist = &token->next; 486f08c3bdfSopenharmony_ci} 487f08c3bdfSopenharmony_ci 488f08c3bdfSopenharmony_cistatic void drop_token(stream_t *stream) 489f08c3bdfSopenharmony_ci{ 490f08c3bdfSopenharmony_ci stream->newline |= stream->token->pos.newline; 491f08c3bdfSopenharmony_ci stream->whitespace |= stream->token->pos.whitespace; 492f08c3bdfSopenharmony_ci stream->token = NULL; 493f08c3bdfSopenharmony_ci} 494f08c3bdfSopenharmony_ci 495f08c3bdfSopenharmony_cienum { 496f08c3bdfSopenharmony_ci Letter = 1, 497f08c3bdfSopenharmony_ci Digit = 2, 498f08c3bdfSopenharmony_ci Hex = 4, 499f08c3bdfSopenharmony_ci Exp = 8, 500f08c3bdfSopenharmony_ci Dot = 16, 501f08c3bdfSopenharmony_ci ValidSecond = 32, 502f08c3bdfSopenharmony_ci Quote = 64, 503f08c3bdfSopenharmony_ci}; 504f08c3bdfSopenharmony_ci 505f08c3bdfSopenharmony_cistatic const char cclass[257] = { 506f08c3bdfSopenharmony_ci ['0' + 1 ... '9' + 1] = Digit | Hex, 507f08c3bdfSopenharmony_ci ['A' + 1 ... 'D' + 1] = Letter | Hex, 508f08c3bdfSopenharmony_ci ['E' + 1] = Letter | Hex | Exp, /* E<exp> */ 509f08c3bdfSopenharmony_ci ['F' + 1] = Letter | Hex, 510f08c3bdfSopenharmony_ci ['G' + 1 ... 'O' + 1] = Letter, 511f08c3bdfSopenharmony_ci ['P' + 1] = Letter | Exp, /* P<exp> */ 512f08c3bdfSopenharmony_ci ['Q' + 1 ... 'Z' + 1] = Letter, 513f08c3bdfSopenharmony_ci ['a' + 1 ... 'd' + 1] = Letter | Hex, 514f08c3bdfSopenharmony_ci ['e' + 1] = Letter | Hex | Exp, /* e<exp> */ 515f08c3bdfSopenharmony_ci ['f' + 1] = Letter | Hex, 516f08c3bdfSopenharmony_ci ['g' + 1 ... 'o' + 1] = Letter, 517f08c3bdfSopenharmony_ci ['p' + 1] = Letter | Exp, /* p<exp> */ 518f08c3bdfSopenharmony_ci ['q' + 1 ... 'z' + 1] = Letter, 519f08c3bdfSopenharmony_ci ['_' + 1] = Letter, 520f08c3bdfSopenharmony_ci ['.' + 1] = Dot | ValidSecond, 521f08c3bdfSopenharmony_ci ['=' + 1] = ValidSecond, 522f08c3bdfSopenharmony_ci ['+' + 1] = ValidSecond, 523f08c3bdfSopenharmony_ci ['-' + 1] = ValidSecond, 524f08c3bdfSopenharmony_ci ['>' + 1] = ValidSecond, 525f08c3bdfSopenharmony_ci ['<' + 1] = ValidSecond, 526f08c3bdfSopenharmony_ci ['&' + 1] = ValidSecond, 527f08c3bdfSopenharmony_ci ['|' + 1] = ValidSecond, 528f08c3bdfSopenharmony_ci ['#' + 1] = ValidSecond, 529f08c3bdfSopenharmony_ci ['\'' + 1] = Quote, 530f08c3bdfSopenharmony_ci ['"' + 1] = Quote, 531f08c3bdfSopenharmony_ci}; 532f08c3bdfSopenharmony_ci 533f08c3bdfSopenharmony_ci/* 534f08c3bdfSopenharmony_ci * pp-number: 535f08c3bdfSopenharmony_ci * digit 536f08c3bdfSopenharmony_ci * . digit 537f08c3bdfSopenharmony_ci * pp-number digit 538f08c3bdfSopenharmony_ci * pp-number identifier-nodigit 539f08c3bdfSopenharmony_ci * pp-number e sign 540f08c3bdfSopenharmony_ci * pp-number E sign 541f08c3bdfSopenharmony_ci * pp-number p sign 542f08c3bdfSopenharmony_ci * pp-number P sign 543f08c3bdfSopenharmony_ci * pp-number . 544f08c3bdfSopenharmony_ci */ 545f08c3bdfSopenharmony_cistatic int get_one_number(int c, int next, stream_t *stream) 546f08c3bdfSopenharmony_ci{ 547f08c3bdfSopenharmony_ci struct token *token; 548f08c3bdfSopenharmony_ci static char buffer[4095]; 549f08c3bdfSopenharmony_ci char *p = buffer, *buffer_end = buffer + sizeof (buffer); 550f08c3bdfSopenharmony_ci 551f08c3bdfSopenharmony_ci *p++ = c; 552f08c3bdfSopenharmony_ci for (;;) { 553f08c3bdfSopenharmony_ci long class = cclass[next + 1]; 554f08c3bdfSopenharmony_ci if (!(class & (Dot | Digit | Letter))) 555f08c3bdfSopenharmony_ci break; 556f08c3bdfSopenharmony_ci if (p != buffer_end) 557f08c3bdfSopenharmony_ci *p++ = next; 558f08c3bdfSopenharmony_ci next = nextchar(stream); 559f08c3bdfSopenharmony_ci if (class & Exp) { 560f08c3bdfSopenharmony_ci if (next == '-' || next == '+') { 561f08c3bdfSopenharmony_ci if (p != buffer_end) 562f08c3bdfSopenharmony_ci *p++ = next; 563f08c3bdfSopenharmony_ci next = nextchar(stream); 564f08c3bdfSopenharmony_ci } 565f08c3bdfSopenharmony_ci } 566f08c3bdfSopenharmony_ci } 567f08c3bdfSopenharmony_ci 568f08c3bdfSopenharmony_ci if (p == buffer_end) { 569f08c3bdfSopenharmony_ci sparse_error(stream_pos(stream), "number token exceeds %td characters", 570f08c3bdfSopenharmony_ci buffer_end - buffer); 571f08c3bdfSopenharmony_ci // Pretend we saw just "1". 572f08c3bdfSopenharmony_ci buffer[0] = '1'; 573f08c3bdfSopenharmony_ci p = buffer + 1; 574f08c3bdfSopenharmony_ci } 575f08c3bdfSopenharmony_ci 576f08c3bdfSopenharmony_ci *p++ = 0; 577f08c3bdfSopenharmony_ci token = stream->token; 578f08c3bdfSopenharmony_ci token_type(token) = TOKEN_NUMBER; 579f08c3bdfSopenharmony_ci token->number = xmemdup(buffer, p - buffer); 580f08c3bdfSopenharmony_ci add_token(stream); 581f08c3bdfSopenharmony_ci 582f08c3bdfSopenharmony_ci return next; 583f08c3bdfSopenharmony_ci} 584f08c3bdfSopenharmony_ci 585f08c3bdfSopenharmony_cistatic int eat_string(int next, stream_t *stream, enum token_type type) 586f08c3bdfSopenharmony_ci{ 587f08c3bdfSopenharmony_ci static char buffer[MAX_STRING]; 588f08c3bdfSopenharmony_ci struct string *string; 589f08c3bdfSopenharmony_ci struct token *token = stream->token; 590f08c3bdfSopenharmony_ci int len = 0; 591f08c3bdfSopenharmony_ci int escape; 592f08c3bdfSopenharmony_ci int want_hex = 0; 593f08c3bdfSopenharmony_ci char delim = type < TOKEN_STRING ? '\'' : '"'; 594f08c3bdfSopenharmony_ci 595f08c3bdfSopenharmony_ci for (escape = 0; escape || next != delim; next = nextchar(stream)) { 596f08c3bdfSopenharmony_ci if (len < MAX_STRING) 597f08c3bdfSopenharmony_ci buffer[len] = next; 598f08c3bdfSopenharmony_ci len++; 599f08c3bdfSopenharmony_ci if (next == '\n') { 600f08c3bdfSopenharmony_ci warning(stream_pos(stream), 601f08c3bdfSopenharmony_ci "missing terminating %c character", delim); 602f08c3bdfSopenharmony_ci /* assume delimiter is lost */ 603f08c3bdfSopenharmony_ci break; 604f08c3bdfSopenharmony_ci } 605f08c3bdfSopenharmony_ci if (next == EOF) { 606f08c3bdfSopenharmony_ci warning(stream_pos(stream), 607f08c3bdfSopenharmony_ci "End of file in middle of string"); 608f08c3bdfSopenharmony_ci return next; 609f08c3bdfSopenharmony_ci } 610f08c3bdfSopenharmony_ci if (!escape) { 611f08c3bdfSopenharmony_ci if (want_hex && !(cclass[next + 1] & Hex)) 612f08c3bdfSopenharmony_ci warning(stream_pos(stream), 613f08c3bdfSopenharmony_ci "\\x used with no following hex digits"); 614f08c3bdfSopenharmony_ci want_hex = 0; 615f08c3bdfSopenharmony_ci escape = next == '\\'; 616f08c3bdfSopenharmony_ci } else { 617f08c3bdfSopenharmony_ci escape = 0; 618f08c3bdfSopenharmony_ci want_hex = next == 'x'; 619f08c3bdfSopenharmony_ci } 620f08c3bdfSopenharmony_ci } 621f08c3bdfSopenharmony_ci if (want_hex) 622f08c3bdfSopenharmony_ci warning(stream_pos(stream), 623f08c3bdfSopenharmony_ci "\\x used with no following hex digits"); 624f08c3bdfSopenharmony_ci if (len > MAX_STRING) { 625f08c3bdfSopenharmony_ci warning(stream_pos(stream), "string too long (%d bytes, %d bytes max)", len, MAX_STRING); 626f08c3bdfSopenharmony_ci len = MAX_STRING; 627f08c3bdfSopenharmony_ci } 628f08c3bdfSopenharmony_ci if (delim == '\'' && len && len <= 4) { 629f08c3bdfSopenharmony_ci token_type(token) = type + len; 630f08c3bdfSopenharmony_ci memset(buffer + len, '\0', 4 - len); 631f08c3bdfSopenharmony_ci memcpy(token->embedded, buffer, 4); 632f08c3bdfSopenharmony_ci } else { 633f08c3bdfSopenharmony_ci token_type(token) = type; 634f08c3bdfSopenharmony_ci string = __alloc_string(len+1); 635f08c3bdfSopenharmony_ci memcpy(string->data, buffer, len); 636f08c3bdfSopenharmony_ci string->data[len] = '\0'; 637f08c3bdfSopenharmony_ci string->length = len+1; 638f08c3bdfSopenharmony_ci token->string = string; 639f08c3bdfSopenharmony_ci } 640f08c3bdfSopenharmony_ci 641f08c3bdfSopenharmony_ci /* Pass it on.. */ 642f08c3bdfSopenharmony_ci token = stream->token; 643f08c3bdfSopenharmony_ci add_token(stream); 644f08c3bdfSopenharmony_ci return nextchar(stream); 645f08c3bdfSopenharmony_ci} 646f08c3bdfSopenharmony_ci 647f08c3bdfSopenharmony_cistatic int drop_stream_eoln(stream_t *stream) 648f08c3bdfSopenharmony_ci{ 649f08c3bdfSopenharmony_ci drop_token(stream); 650f08c3bdfSopenharmony_ci for (;;) { 651f08c3bdfSopenharmony_ci switch (nextchar(stream)) { 652f08c3bdfSopenharmony_ci case EOF: 653f08c3bdfSopenharmony_ci return EOF; 654f08c3bdfSopenharmony_ci case '\n': 655f08c3bdfSopenharmony_ci return nextchar(stream); 656f08c3bdfSopenharmony_ci } 657f08c3bdfSopenharmony_ci } 658f08c3bdfSopenharmony_ci} 659f08c3bdfSopenharmony_ci 660f08c3bdfSopenharmony_cistatic int drop_stream_comment(stream_t *stream) 661f08c3bdfSopenharmony_ci{ 662f08c3bdfSopenharmony_ci int newline; 663f08c3bdfSopenharmony_ci int next; 664f08c3bdfSopenharmony_ci drop_token(stream); 665f08c3bdfSopenharmony_ci newline = stream->newline; 666f08c3bdfSopenharmony_ci 667f08c3bdfSopenharmony_ci next = nextchar(stream); 668f08c3bdfSopenharmony_ci for (;;) { 669f08c3bdfSopenharmony_ci int curr = next; 670f08c3bdfSopenharmony_ci if (curr == EOF) { 671f08c3bdfSopenharmony_ci warning(stream_pos(stream), "End of file in the middle of a comment"); 672f08c3bdfSopenharmony_ci return curr; 673f08c3bdfSopenharmony_ci } 674f08c3bdfSopenharmony_ci next = nextchar(stream); 675f08c3bdfSopenharmony_ci if (curr == '*' && next == '/') 676f08c3bdfSopenharmony_ci break; 677f08c3bdfSopenharmony_ci } 678f08c3bdfSopenharmony_ci stream->newline = newline; 679f08c3bdfSopenharmony_ci return nextchar(stream); 680f08c3bdfSopenharmony_ci} 681f08c3bdfSopenharmony_ci 682f08c3bdfSopenharmony_ciunsigned char combinations[][4] = COMBINATION_STRINGS; 683f08c3bdfSopenharmony_ci 684f08c3bdfSopenharmony_ci#define NR_COMBINATIONS (SPECIAL_ARG_SEPARATOR - SPECIAL_BASE) 685f08c3bdfSopenharmony_ci 686f08c3bdfSopenharmony_ci/* hash function for two-character punctuators - all give unique values */ 687f08c3bdfSopenharmony_ci#define special_hash(c0, c1) (((c0*8+c1*2)+((c0*8+c1*2)>>5))&31) 688f08c3bdfSopenharmony_ci 689f08c3bdfSopenharmony_ci/* 690f08c3bdfSopenharmony_ci * note that we won't get false positives - special_hash(0,0) is 0 and 691f08c3bdfSopenharmony_ci * entry 0 is filled (by +=), so all the missing ones are OK. 692f08c3bdfSopenharmony_ci */ 693f08c3bdfSopenharmony_cistatic unsigned char hash_results[32][2] = { 694f08c3bdfSopenharmony_ci#define RES(c0, c1) [special_hash(c0, c1)] = {c0, c1} 695f08c3bdfSopenharmony_ci RES('+', '='), /* 00 */ 696f08c3bdfSopenharmony_ci RES('/', '='), /* 01 */ 697f08c3bdfSopenharmony_ci RES('^', '='), /* 05 */ 698f08c3bdfSopenharmony_ci RES('&', '&'), /* 07 */ 699f08c3bdfSopenharmony_ci RES('#', '#'), /* 08 */ 700f08c3bdfSopenharmony_ci RES('<', '<'), /* 0a */ 701f08c3bdfSopenharmony_ci RES('<', '='), /* 0c */ 702f08c3bdfSopenharmony_ci RES('!', '='), /* 0e */ 703f08c3bdfSopenharmony_ci RES('%', '='), /* 0f */ 704f08c3bdfSopenharmony_ci RES('-', '-'), /* 10 */ 705f08c3bdfSopenharmony_ci RES('-', '='), /* 11 */ 706f08c3bdfSopenharmony_ci RES('-', '>'), /* 13 */ 707f08c3bdfSopenharmony_ci RES('=', '='), /* 15 */ 708f08c3bdfSopenharmony_ci RES('&', '='), /* 17 */ 709f08c3bdfSopenharmony_ci RES('*', '='), /* 18 */ 710f08c3bdfSopenharmony_ci RES('.', '.'), /* 1a */ 711f08c3bdfSopenharmony_ci RES('+', '+'), /* 1b */ 712f08c3bdfSopenharmony_ci RES('|', '='), /* 1c */ 713f08c3bdfSopenharmony_ci RES('>', '='), /* 1d */ 714f08c3bdfSopenharmony_ci RES('|', '|'), /* 1e */ 715f08c3bdfSopenharmony_ci RES('>', '>') /* 1f */ 716f08c3bdfSopenharmony_ci#undef RES 717f08c3bdfSopenharmony_ci}; 718f08c3bdfSopenharmony_cistatic int code[32] = { 719f08c3bdfSopenharmony_ci#define CODE(c0, c1, value) [special_hash(c0, c1)] = value 720f08c3bdfSopenharmony_ci CODE('+', '=', SPECIAL_ADD_ASSIGN), /* 00 */ 721f08c3bdfSopenharmony_ci CODE('/', '=', SPECIAL_DIV_ASSIGN), /* 01 */ 722f08c3bdfSopenharmony_ci CODE('^', '=', SPECIAL_XOR_ASSIGN), /* 05 */ 723f08c3bdfSopenharmony_ci CODE('&', '&', SPECIAL_LOGICAL_AND), /* 07 */ 724f08c3bdfSopenharmony_ci CODE('#', '#', SPECIAL_HASHHASH), /* 08 */ 725f08c3bdfSopenharmony_ci CODE('<', '<', SPECIAL_LEFTSHIFT), /* 0a */ 726f08c3bdfSopenharmony_ci CODE('<', '=', SPECIAL_LTE), /* 0c */ 727f08c3bdfSopenharmony_ci CODE('!', '=', SPECIAL_NOTEQUAL), /* 0e */ 728f08c3bdfSopenharmony_ci CODE('%', '=', SPECIAL_MOD_ASSIGN), /* 0f */ 729f08c3bdfSopenharmony_ci CODE('-', '-', SPECIAL_DECREMENT), /* 10 */ 730f08c3bdfSopenharmony_ci CODE('-', '=', SPECIAL_SUB_ASSIGN), /* 11 */ 731f08c3bdfSopenharmony_ci CODE('-', '>', SPECIAL_DEREFERENCE), /* 13 */ 732f08c3bdfSopenharmony_ci CODE('=', '=', SPECIAL_EQUAL), /* 15 */ 733f08c3bdfSopenharmony_ci CODE('&', '=', SPECIAL_AND_ASSIGN), /* 17 */ 734f08c3bdfSopenharmony_ci CODE('*', '=', SPECIAL_MUL_ASSIGN), /* 18 */ 735f08c3bdfSopenharmony_ci CODE('.', '.', SPECIAL_DOTDOT), /* 1a */ 736f08c3bdfSopenharmony_ci CODE('+', '+', SPECIAL_INCREMENT), /* 1b */ 737f08c3bdfSopenharmony_ci CODE('|', '=', SPECIAL_OR_ASSIGN), /* 1c */ 738f08c3bdfSopenharmony_ci CODE('>', '=', SPECIAL_GTE), /* 1d */ 739f08c3bdfSopenharmony_ci CODE('|', '|', SPECIAL_LOGICAL_OR), /* 1e */ 740f08c3bdfSopenharmony_ci CODE('>', '>', SPECIAL_RIGHTSHIFT) /* 1f */ 741f08c3bdfSopenharmony_ci#undef CODE 742f08c3bdfSopenharmony_ci}; 743f08c3bdfSopenharmony_ci 744f08c3bdfSopenharmony_cistatic int get_one_special(int c, stream_t *stream) 745f08c3bdfSopenharmony_ci{ 746f08c3bdfSopenharmony_ci struct token *token; 747f08c3bdfSopenharmony_ci int next, value, i; 748f08c3bdfSopenharmony_ci 749f08c3bdfSopenharmony_ci next = nextchar(stream); 750f08c3bdfSopenharmony_ci 751f08c3bdfSopenharmony_ci /* 752f08c3bdfSopenharmony_ci * Check for numbers, strings, character constants, and comments 753f08c3bdfSopenharmony_ci */ 754f08c3bdfSopenharmony_ci switch (c) { 755f08c3bdfSopenharmony_ci case '.': 756f08c3bdfSopenharmony_ci if (next >= '0' && next <= '9') 757f08c3bdfSopenharmony_ci return get_one_number(c, next, stream); 758f08c3bdfSopenharmony_ci break; 759f08c3bdfSopenharmony_ci case '"': 760f08c3bdfSopenharmony_ci return eat_string(next, stream, TOKEN_STRING); 761f08c3bdfSopenharmony_ci case '\'': 762f08c3bdfSopenharmony_ci return eat_string(next, stream, TOKEN_CHAR); 763f08c3bdfSopenharmony_ci case '/': 764f08c3bdfSopenharmony_ci if (next == '/') 765f08c3bdfSopenharmony_ci return drop_stream_eoln(stream); 766f08c3bdfSopenharmony_ci if (next == '*') 767f08c3bdfSopenharmony_ci return drop_stream_comment(stream); 768f08c3bdfSopenharmony_ci } 769f08c3bdfSopenharmony_ci 770f08c3bdfSopenharmony_ci /* 771f08c3bdfSopenharmony_ci * Check for combinations 772f08c3bdfSopenharmony_ci */ 773f08c3bdfSopenharmony_ci value = c; 774f08c3bdfSopenharmony_ci if (cclass[next + 1] & ValidSecond) { 775f08c3bdfSopenharmony_ci i = special_hash(c, next); 776f08c3bdfSopenharmony_ci if (hash_results[i][0] == c && hash_results[i][1] == next) { 777f08c3bdfSopenharmony_ci value = code[i]; 778f08c3bdfSopenharmony_ci next = nextchar(stream); 779f08c3bdfSopenharmony_ci if (value >= SPECIAL_LEFTSHIFT && 780f08c3bdfSopenharmony_ci next == "==."[value - SPECIAL_LEFTSHIFT]) { 781f08c3bdfSopenharmony_ci value += 3; 782f08c3bdfSopenharmony_ci next = nextchar(stream); 783f08c3bdfSopenharmony_ci } 784f08c3bdfSopenharmony_ci } 785f08c3bdfSopenharmony_ci } 786f08c3bdfSopenharmony_ci 787f08c3bdfSopenharmony_ci /* Pass it on.. */ 788f08c3bdfSopenharmony_ci token = stream->token; 789f08c3bdfSopenharmony_ci token_type(token) = TOKEN_SPECIAL; 790f08c3bdfSopenharmony_ci token->special = value; 791f08c3bdfSopenharmony_ci add_token(stream); 792f08c3bdfSopenharmony_ci return next; 793f08c3bdfSopenharmony_ci} 794f08c3bdfSopenharmony_ci 795f08c3bdfSopenharmony_ci#define IDENT_HASH_BITS (13) 796f08c3bdfSopenharmony_ci#define IDENT_HASH_SIZE (1<<IDENT_HASH_BITS) 797f08c3bdfSopenharmony_ci#define IDENT_HASH_MASK (IDENT_HASH_SIZE-1) 798f08c3bdfSopenharmony_ci 799f08c3bdfSopenharmony_ci#define ident_hash_init(c) (c) 800f08c3bdfSopenharmony_ci#define ident_hash_add(oldhash,c) ((oldhash)*11 + (c)) 801f08c3bdfSopenharmony_ci#define ident_hash_end(hash) ((((hash) >> IDENT_HASH_BITS) + (hash)) & IDENT_HASH_MASK) 802f08c3bdfSopenharmony_ci 803f08c3bdfSopenharmony_cistatic struct ident *hash_table[IDENT_HASH_SIZE]; 804f08c3bdfSopenharmony_cistatic int ident_hit, ident_miss, idents; 805f08c3bdfSopenharmony_ci 806f08c3bdfSopenharmony_civoid show_identifier_stats(void) 807f08c3bdfSopenharmony_ci{ 808f08c3bdfSopenharmony_ci int i; 809f08c3bdfSopenharmony_ci int distribution[100]; 810f08c3bdfSopenharmony_ci 811f08c3bdfSopenharmony_ci fprintf(stderr, "identifiers: %d hits, %d misses\n", 812f08c3bdfSopenharmony_ci ident_hit, ident_miss); 813f08c3bdfSopenharmony_ci 814f08c3bdfSopenharmony_ci for (i = 0; i < 100; i++) 815f08c3bdfSopenharmony_ci distribution[i] = 0; 816f08c3bdfSopenharmony_ci 817f08c3bdfSopenharmony_ci for (i = 0; i < IDENT_HASH_SIZE; i++) { 818f08c3bdfSopenharmony_ci struct ident * ident = hash_table[i]; 819f08c3bdfSopenharmony_ci int count = 0; 820f08c3bdfSopenharmony_ci 821f08c3bdfSopenharmony_ci while (ident) { 822f08c3bdfSopenharmony_ci count++; 823f08c3bdfSopenharmony_ci ident = ident->next; 824f08c3bdfSopenharmony_ci } 825f08c3bdfSopenharmony_ci if (count > 99) 826f08c3bdfSopenharmony_ci count = 99; 827f08c3bdfSopenharmony_ci distribution[count]++; 828f08c3bdfSopenharmony_ci } 829f08c3bdfSopenharmony_ci 830f08c3bdfSopenharmony_ci for (i = 0; i < 100; i++) { 831f08c3bdfSopenharmony_ci if (distribution[i]) 832f08c3bdfSopenharmony_ci fprintf(stderr, "%2d: %d buckets\n", i, distribution[i]); 833f08c3bdfSopenharmony_ci } 834f08c3bdfSopenharmony_ci} 835f08c3bdfSopenharmony_ci 836f08c3bdfSopenharmony_cistatic struct ident *alloc_ident(const char *name, int len) 837f08c3bdfSopenharmony_ci{ 838f08c3bdfSopenharmony_ci struct ident *ident = __alloc_ident(len); 839f08c3bdfSopenharmony_ci ident->symbols = NULL; 840f08c3bdfSopenharmony_ci ident->len = len; 841f08c3bdfSopenharmony_ci ident->tainted = 0; 842f08c3bdfSopenharmony_ci memcpy(ident->name, name, len); 843f08c3bdfSopenharmony_ci return ident; 844f08c3bdfSopenharmony_ci} 845f08c3bdfSopenharmony_ci 846f08c3bdfSopenharmony_cistatic struct ident * insert_hash(struct ident *ident, unsigned long hash) 847f08c3bdfSopenharmony_ci{ 848f08c3bdfSopenharmony_ci ident->next = hash_table[hash]; 849f08c3bdfSopenharmony_ci hash_table[hash] = ident; 850f08c3bdfSopenharmony_ci ident_miss++; 851f08c3bdfSopenharmony_ci return ident; 852f08c3bdfSopenharmony_ci} 853f08c3bdfSopenharmony_ci 854f08c3bdfSopenharmony_cistatic struct ident *create_hashed_ident(const char *name, int len, unsigned long hash) 855f08c3bdfSopenharmony_ci{ 856f08c3bdfSopenharmony_ci struct ident *ident; 857f08c3bdfSopenharmony_ci struct ident **p; 858f08c3bdfSopenharmony_ci 859f08c3bdfSopenharmony_ci p = &hash_table[hash]; 860f08c3bdfSopenharmony_ci while ((ident = *p) != NULL) { 861f08c3bdfSopenharmony_ci if (ident->len == (unsigned char) len) { 862f08c3bdfSopenharmony_ci if (strncmp(name, ident->name, len) != 0) 863f08c3bdfSopenharmony_ci goto next; 864f08c3bdfSopenharmony_ci 865f08c3bdfSopenharmony_ci ident_hit++; 866f08c3bdfSopenharmony_ci return ident; 867f08c3bdfSopenharmony_ci } 868f08c3bdfSopenharmony_cinext: 869f08c3bdfSopenharmony_ci //misses++; 870f08c3bdfSopenharmony_ci p = &ident->next; 871f08c3bdfSopenharmony_ci } 872f08c3bdfSopenharmony_ci ident = alloc_ident(name, len); 873f08c3bdfSopenharmony_ci *p = ident; 874f08c3bdfSopenharmony_ci ident->next = NULL; 875f08c3bdfSopenharmony_ci ident_miss++; 876f08c3bdfSopenharmony_ci idents++; 877f08c3bdfSopenharmony_ci return ident; 878f08c3bdfSopenharmony_ci} 879f08c3bdfSopenharmony_ci 880f08c3bdfSopenharmony_cistatic unsigned long hash_name(const char *name, int len) 881f08c3bdfSopenharmony_ci{ 882f08c3bdfSopenharmony_ci unsigned long hash; 883f08c3bdfSopenharmony_ci const unsigned char *p = (const unsigned char *)name; 884f08c3bdfSopenharmony_ci 885f08c3bdfSopenharmony_ci hash = ident_hash_init(*p++); 886f08c3bdfSopenharmony_ci while (--len) { 887f08c3bdfSopenharmony_ci unsigned int i = *p++; 888f08c3bdfSopenharmony_ci hash = ident_hash_add(hash, i); 889f08c3bdfSopenharmony_ci } 890f08c3bdfSopenharmony_ci return ident_hash_end(hash); 891f08c3bdfSopenharmony_ci} 892f08c3bdfSopenharmony_ci 893f08c3bdfSopenharmony_cistruct ident *hash_ident(struct ident *ident) 894f08c3bdfSopenharmony_ci{ 895f08c3bdfSopenharmony_ci return insert_hash(ident, hash_name(ident->name, ident->len)); 896f08c3bdfSopenharmony_ci} 897f08c3bdfSopenharmony_ci 898f08c3bdfSopenharmony_cistruct ident *built_in_ident(const char *name) 899f08c3bdfSopenharmony_ci{ 900f08c3bdfSopenharmony_ci int len = strlen(name); 901f08c3bdfSopenharmony_ci return create_hashed_ident(name, len, hash_name(name, len)); 902f08c3bdfSopenharmony_ci} 903f08c3bdfSopenharmony_ci 904f08c3bdfSopenharmony_cistruct token *built_in_token(int stream, struct ident *ident) 905f08c3bdfSopenharmony_ci{ 906f08c3bdfSopenharmony_ci struct token *token; 907f08c3bdfSopenharmony_ci 908f08c3bdfSopenharmony_ci token = __alloc_token(0); 909f08c3bdfSopenharmony_ci token->pos.stream = stream; 910f08c3bdfSopenharmony_ci token_type(token) = TOKEN_IDENT; 911f08c3bdfSopenharmony_ci token->ident = ident; 912f08c3bdfSopenharmony_ci return token; 913f08c3bdfSopenharmony_ci} 914f08c3bdfSopenharmony_ci 915f08c3bdfSopenharmony_cistatic int get_one_identifier(int c, stream_t *stream) 916f08c3bdfSopenharmony_ci{ 917f08c3bdfSopenharmony_ci struct token *token; 918f08c3bdfSopenharmony_ci struct ident *ident; 919f08c3bdfSopenharmony_ci unsigned long hash; 920f08c3bdfSopenharmony_ci char buf[256]; 921f08c3bdfSopenharmony_ci int len = 1; 922f08c3bdfSopenharmony_ci int next; 923f08c3bdfSopenharmony_ci 924f08c3bdfSopenharmony_ci hash = ident_hash_init(c); 925f08c3bdfSopenharmony_ci buf[0] = c; 926f08c3bdfSopenharmony_ci for (;;) { 927f08c3bdfSopenharmony_ci next = nextchar(stream); 928f08c3bdfSopenharmony_ci if (!(cclass[next + 1] & (Letter | Digit))) 929f08c3bdfSopenharmony_ci break; 930f08c3bdfSopenharmony_ci if (len >= sizeof(buf)) 931f08c3bdfSopenharmony_ci break; 932f08c3bdfSopenharmony_ci hash = ident_hash_add(hash, next); 933f08c3bdfSopenharmony_ci buf[len] = next; 934f08c3bdfSopenharmony_ci len++; 935f08c3bdfSopenharmony_ci }; 936f08c3bdfSopenharmony_ci if (cclass[next + 1] & Quote) { 937f08c3bdfSopenharmony_ci if (len == 1 && buf[0] == 'L') { 938f08c3bdfSopenharmony_ci if (next == '\'') 939f08c3bdfSopenharmony_ci return eat_string(nextchar(stream), stream, 940f08c3bdfSopenharmony_ci TOKEN_WIDE_CHAR); 941f08c3bdfSopenharmony_ci else 942f08c3bdfSopenharmony_ci return eat_string(nextchar(stream), stream, 943f08c3bdfSopenharmony_ci TOKEN_WIDE_STRING); 944f08c3bdfSopenharmony_ci } 945f08c3bdfSopenharmony_ci } 946f08c3bdfSopenharmony_ci hash = ident_hash_end(hash); 947f08c3bdfSopenharmony_ci ident = create_hashed_ident(buf, len, hash); 948f08c3bdfSopenharmony_ci 949f08c3bdfSopenharmony_ci /* Pass it on.. */ 950f08c3bdfSopenharmony_ci token = stream->token; 951f08c3bdfSopenharmony_ci token_type(token) = TOKEN_IDENT; 952f08c3bdfSopenharmony_ci token->ident = ident; 953f08c3bdfSopenharmony_ci add_token(stream); 954f08c3bdfSopenharmony_ci return next; 955f08c3bdfSopenharmony_ci} 956f08c3bdfSopenharmony_ci 957f08c3bdfSopenharmony_cistatic int get_one_token(int c, stream_t *stream) 958f08c3bdfSopenharmony_ci{ 959f08c3bdfSopenharmony_ci long class = cclass[c + 1]; 960f08c3bdfSopenharmony_ci if (class & Digit) 961f08c3bdfSopenharmony_ci return get_one_number(c, nextchar(stream), stream); 962f08c3bdfSopenharmony_ci if (class & Letter) 963f08c3bdfSopenharmony_ci return get_one_identifier(c, stream); 964f08c3bdfSopenharmony_ci return get_one_special(c, stream); 965f08c3bdfSopenharmony_ci} 966f08c3bdfSopenharmony_ci 967f08c3bdfSopenharmony_cistatic struct token *setup_stream(stream_t *stream, int idx, int fd, 968f08c3bdfSopenharmony_ci unsigned char *buf, unsigned int buf_size) 969f08c3bdfSopenharmony_ci{ 970f08c3bdfSopenharmony_ci struct token *begin; 971f08c3bdfSopenharmony_ci 972f08c3bdfSopenharmony_ci stream->nr = idx; 973f08c3bdfSopenharmony_ci stream->line = 1; 974f08c3bdfSopenharmony_ci stream->newline = 1; 975f08c3bdfSopenharmony_ci stream->whitespace = 0; 976f08c3bdfSopenharmony_ci stream->pos = 0; 977f08c3bdfSopenharmony_ci 978f08c3bdfSopenharmony_ci stream->token = NULL; 979f08c3bdfSopenharmony_ci stream->fd = fd; 980f08c3bdfSopenharmony_ci stream->offset = 0; 981f08c3bdfSopenharmony_ci stream->size = buf_size; 982f08c3bdfSopenharmony_ci stream->buffer = buf; 983f08c3bdfSopenharmony_ci 984f08c3bdfSopenharmony_ci begin = alloc_token(stream); 985f08c3bdfSopenharmony_ci token_type(begin) = TOKEN_STREAMBEGIN; 986f08c3bdfSopenharmony_ci stream->tokenlist = &begin->next; 987f08c3bdfSopenharmony_ci return begin; 988f08c3bdfSopenharmony_ci} 989f08c3bdfSopenharmony_ci 990f08c3bdfSopenharmony_cistatic struct token *tokenize_stream(stream_t *stream) 991f08c3bdfSopenharmony_ci{ 992f08c3bdfSopenharmony_ci int c = nextchar(stream); 993f08c3bdfSopenharmony_ci while (c != EOF) { 994f08c3bdfSopenharmony_ci if (!isspace(c)) { 995f08c3bdfSopenharmony_ci struct token *token = alloc_token(stream); 996f08c3bdfSopenharmony_ci stream->token = token; 997f08c3bdfSopenharmony_ci stream->newline = 0; 998f08c3bdfSopenharmony_ci stream->whitespace = 0; 999f08c3bdfSopenharmony_ci c = get_one_token(c, stream); 1000f08c3bdfSopenharmony_ci continue; 1001f08c3bdfSopenharmony_ci } 1002f08c3bdfSopenharmony_ci stream->whitespace = 1; 1003f08c3bdfSopenharmony_ci c = nextchar(stream); 1004f08c3bdfSopenharmony_ci } 1005f08c3bdfSopenharmony_ci return mark_eof(stream); 1006f08c3bdfSopenharmony_ci} 1007f08c3bdfSopenharmony_ci 1008f08c3bdfSopenharmony_cistruct token * tokenize_buffer(void *buffer, unsigned long size, struct token **endtoken) 1009f08c3bdfSopenharmony_ci{ 1010f08c3bdfSopenharmony_ci stream_t stream; 1011f08c3bdfSopenharmony_ci struct token *begin; 1012f08c3bdfSopenharmony_ci 1013f08c3bdfSopenharmony_ci begin = setup_stream(&stream, 0, -1, buffer, size); 1014f08c3bdfSopenharmony_ci *endtoken = tokenize_stream(&stream); 1015f08c3bdfSopenharmony_ci return begin; 1016f08c3bdfSopenharmony_ci} 1017f08c3bdfSopenharmony_ci 1018f08c3bdfSopenharmony_cistruct token * tokenize(const struct position *pos, const char *name, int fd, struct token *endtoken, const char **next_path) 1019f08c3bdfSopenharmony_ci{ 1020f08c3bdfSopenharmony_ci struct token *begin, *end; 1021f08c3bdfSopenharmony_ci stream_t stream; 1022f08c3bdfSopenharmony_ci unsigned char buffer[BUFSIZE]; 1023f08c3bdfSopenharmony_ci int idx; 1024f08c3bdfSopenharmony_ci 1025f08c3bdfSopenharmony_ci idx = init_stream(pos, name, fd, next_path); 1026f08c3bdfSopenharmony_ci if (idx < 0) { 1027f08c3bdfSopenharmony_ci // info(endtoken->pos, "File %s is const", name); 1028f08c3bdfSopenharmony_ci return endtoken; 1029f08c3bdfSopenharmony_ci } 1030f08c3bdfSopenharmony_ci 1031f08c3bdfSopenharmony_ci begin = setup_stream(&stream, idx, fd, buffer, 0); 1032f08c3bdfSopenharmony_ci end = tokenize_stream(&stream); 1033f08c3bdfSopenharmony_ci if (endtoken) 1034f08c3bdfSopenharmony_ci end->next = endtoken; 1035f08c3bdfSopenharmony_ci return begin; 1036f08c3bdfSopenharmony_ci} 1037