1f08c3bdfSopenharmony_ci/*
2f08c3bdfSopenharmony_ci * This is a really stupid C tokenizer. It doesn't do any include
3f08c3bdfSopenharmony_ci * files or anything complex at all. That's the preprocessor.
4f08c3bdfSopenharmony_ci *
5f08c3bdfSopenharmony_ci * Copyright (C) 2003 Transmeta Corp.
6f08c3bdfSopenharmony_ci *               2003 Linus Torvalds
7f08c3bdfSopenharmony_ci *
8f08c3bdfSopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a copy
9f08c3bdfSopenharmony_ci * of this software and associated documentation files (the "Software"), to deal
10f08c3bdfSopenharmony_ci * in the Software without restriction, including without limitation the rights
11f08c3bdfSopenharmony_ci * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12f08c3bdfSopenharmony_ci * copies of the Software, and to permit persons to whom the Software is
13f08c3bdfSopenharmony_ci * furnished to do so, subject to the following conditions:
14f08c3bdfSopenharmony_ci *
15f08c3bdfSopenharmony_ci * The above copyright notice and this permission notice shall be included in
16f08c3bdfSopenharmony_ci * all copies or substantial portions of the Software.
17f08c3bdfSopenharmony_ci *
18f08c3bdfSopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19f08c3bdfSopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20f08c3bdfSopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21f08c3bdfSopenharmony_ci * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22f08c3bdfSopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23f08c3bdfSopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24f08c3bdfSopenharmony_ci * THE SOFTWARE.
25f08c3bdfSopenharmony_ci */
26f08c3bdfSopenharmony_ci#include <stdio.h>
27f08c3bdfSopenharmony_ci#include <stdlib.h>
28f08c3bdfSopenharmony_ci#include <stdarg.h>
29f08c3bdfSopenharmony_ci#include <stddef.h>
30f08c3bdfSopenharmony_ci#include <string.h>
31f08c3bdfSopenharmony_ci#include <ctype.h>
32f08c3bdfSopenharmony_ci#include <unistd.h>
33f08c3bdfSopenharmony_ci#include <stdint.h>
34f08c3bdfSopenharmony_ci
35f08c3bdfSopenharmony_ci#include "lib.h"
36f08c3bdfSopenharmony_ci#include "allocate.h"
37f08c3bdfSopenharmony_ci#include "token.h"
38f08c3bdfSopenharmony_ci#include "symbol.h"
39f08c3bdfSopenharmony_ci
40f08c3bdfSopenharmony_ci#define EOF (-1)
41f08c3bdfSopenharmony_ci
42f08c3bdfSopenharmony_ciint input_stream_nr = 0;
43f08c3bdfSopenharmony_cistruct stream *input_streams;
44f08c3bdfSopenharmony_cistatic int input_streams_allocated;
45f08c3bdfSopenharmony_ciunsigned int tabstop = 8;
46f08c3bdfSopenharmony_ci
47f08c3bdfSopenharmony_ci#define BUFSIZE (8192)
48f08c3bdfSopenharmony_ci
49f08c3bdfSopenharmony_citypedef struct {
50f08c3bdfSopenharmony_ci	int fd, offset, size;
51f08c3bdfSopenharmony_ci	int pos, line, nr;
52f08c3bdfSopenharmony_ci	int newline, whitespace;
53f08c3bdfSopenharmony_ci	struct token **tokenlist;
54f08c3bdfSopenharmony_ci	struct token *token;
55f08c3bdfSopenharmony_ci	unsigned char *buffer;
56f08c3bdfSopenharmony_ci} stream_t;
57f08c3bdfSopenharmony_ci
58f08c3bdfSopenharmony_ciconst char *stream_name(int stream)
59f08c3bdfSopenharmony_ci{
60f08c3bdfSopenharmony_ci	if (stream < 0 || stream > input_stream_nr)
61f08c3bdfSopenharmony_ci		return "<bad stream>";
62f08c3bdfSopenharmony_ci	return input_streams[stream].name;
63f08c3bdfSopenharmony_ci}
64f08c3bdfSopenharmony_ci
65f08c3bdfSopenharmony_ciint stream_prev(int stream)
66f08c3bdfSopenharmony_ci{
67f08c3bdfSopenharmony_ci	if (stream < 0 || stream > input_stream_nr)
68f08c3bdfSopenharmony_ci		return -1;
69f08c3bdfSopenharmony_ci	stream = input_streams[stream].pos.stream;
70f08c3bdfSopenharmony_ci	if (stream > input_stream_nr)
71f08c3bdfSopenharmony_ci		return -1;
72f08c3bdfSopenharmony_ci	return stream;
73f08c3bdfSopenharmony_ci}
74f08c3bdfSopenharmony_ci
75f08c3bdfSopenharmony_cistatic struct position stream_pos(stream_t *stream)
76f08c3bdfSopenharmony_ci{
77f08c3bdfSopenharmony_ci	struct position pos;
78f08c3bdfSopenharmony_ci	pos.type = 0;
79f08c3bdfSopenharmony_ci	pos.stream = stream->nr;
80f08c3bdfSopenharmony_ci	pos.newline = stream->newline;
81f08c3bdfSopenharmony_ci	pos.whitespace = stream->whitespace;
82f08c3bdfSopenharmony_ci	pos.pos = stream->pos;
83f08c3bdfSopenharmony_ci	pos.line = stream->line;
84f08c3bdfSopenharmony_ci	pos.noexpand = 0;
85f08c3bdfSopenharmony_ci	return pos;
86f08c3bdfSopenharmony_ci}
87f08c3bdfSopenharmony_ci
88f08c3bdfSopenharmony_ciconst char *show_special(int val)
89f08c3bdfSopenharmony_ci{
90f08c3bdfSopenharmony_ci	static char buffer[4];
91f08c3bdfSopenharmony_ci
92f08c3bdfSopenharmony_ci	buffer[0] = val;
93f08c3bdfSopenharmony_ci	buffer[1] = 0;
94f08c3bdfSopenharmony_ci	if (val >= SPECIAL_BASE)
95f08c3bdfSopenharmony_ci		strcpy(buffer, (char *) combinations[val - SPECIAL_BASE]);
96f08c3bdfSopenharmony_ci	return buffer;
97f08c3bdfSopenharmony_ci}
98f08c3bdfSopenharmony_ci
99f08c3bdfSopenharmony_ciconst char *show_ident(const struct ident *ident)
100f08c3bdfSopenharmony_ci{
101f08c3bdfSopenharmony_ci	static char buff[4][256];
102f08c3bdfSopenharmony_ci	static int n;
103f08c3bdfSopenharmony_ci	char *buffer;
104f08c3bdfSopenharmony_ci
105f08c3bdfSopenharmony_ci	if (!ident)
106f08c3bdfSopenharmony_ci		return "<noident>";
107f08c3bdfSopenharmony_ci	buffer = buff[3 & ++n];
108f08c3bdfSopenharmony_ci	sprintf(buffer, "%.*s", ident->len, ident->name);
109f08c3bdfSopenharmony_ci	return buffer;
110f08c3bdfSopenharmony_ci}
111f08c3bdfSopenharmony_ci
112f08c3bdfSopenharmony_cistatic char *charstr(char *ptr, unsigned char c, unsigned char escape, unsigned char next)
113f08c3bdfSopenharmony_ci{
114f08c3bdfSopenharmony_ci	if (isprint(c)) {
115f08c3bdfSopenharmony_ci		if (c == escape || c == '\\')
116f08c3bdfSopenharmony_ci			*ptr++ = '\\';
117f08c3bdfSopenharmony_ci		*ptr++ = c;
118f08c3bdfSopenharmony_ci		return ptr;
119f08c3bdfSopenharmony_ci	}
120f08c3bdfSopenharmony_ci	*ptr++ = '\\';
121f08c3bdfSopenharmony_ci	switch (c) {
122f08c3bdfSopenharmony_ci	case '\n':
123f08c3bdfSopenharmony_ci		*ptr++ = 'n';
124f08c3bdfSopenharmony_ci		return ptr;
125f08c3bdfSopenharmony_ci	case '\t':
126f08c3bdfSopenharmony_ci		*ptr++ = 't';
127f08c3bdfSopenharmony_ci		return ptr;
128f08c3bdfSopenharmony_ci	}
129f08c3bdfSopenharmony_ci	if (!isdigit(next))
130f08c3bdfSopenharmony_ci		return ptr + sprintf(ptr, "%o", c);
131f08c3bdfSopenharmony_ci
132f08c3bdfSopenharmony_ci	return ptr + sprintf(ptr, "%03o", c);
133f08c3bdfSopenharmony_ci}
134f08c3bdfSopenharmony_ci
135f08c3bdfSopenharmony_ciconst char *show_string(const struct string *string)
136f08c3bdfSopenharmony_ci{
137f08c3bdfSopenharmony_ci	static char buffer[4 * MAX_STRING + 3];
138f08c3bdfSopenharmony_ci	char *ptr;
139f08c3bdfSopenharmony_ci	int i;
140f08c3bdfSopenharmony_ci
141f08c3bdfSopenharmony_ci	if (!string || !string->length)
142f08c3bdfSopenharmony_ci		return "<bad_string>";
143f08c3bdfSopenharmony_ci	ptr = buffer;
144f08c3bdfSopenharmony_ci	*ptr++ = '"';
145f08c3bdfSopenharmony_ci	for (i = 0; i < string->length-1; i++) {
146f08c3bdfSopenharmony_ci		const char *p = string->data + i;
147f08c3bdfSopenharmony_ci		ptr = charstr(ptr, p[0], '"', p[1]);
148f08c3bdfSopenharmony_ci	}
149f08c3bdfSopenharmony_ci	*ptr++ = '"';
150f08c3bdfSopenharmony_ci	*ptr = '\0';
151f08c3bdfSopenharmony_ci	return buffer;
152f08c3bdfSopenharmony_ci}
153f08c3bdfSopenharmony_ci
154f08c3bdfSopenharmony_cistatic const char *show_char(const char *s, size_t len, char prefix, char delim)
155f08c3bdfSopenharmony_ci{
156f08c3bdfSopenharmony_ci	static char buffer[MAX_STRING + 4];
157f08c3bdfSopenharmony_ci	char *p = buffer;
158f08c3bdfSopenharmony_ci	if (prefix)
159f08c3bdfSopenharmony_ci		*p++ = prefix;
160f08c3bdfSopenharmony_ci	*p++ = delim;
161f08c3bdfSopenharmony_ci	memcpy(p, s, len);
162f08c3bdfSopenharmony_ci	p += len;
163f08c3bdfSopenharmony_ci	*p++ = delim;
164f08c3bdfSopenharmony_ci	*p++ = '\0';
165f08c3bdfSopenharmony_ci	return buffer;
166f08c3bdfSopenharmony_ci}
167f08c3bdfSopenharmony_ci
168f08c3bdfSopenharmony_cistatic const char *quote_char(const char *s, size_t len, char prefix, char delim)
169f08c3bdfSopenharmony_ci{
170f08c3bdfSopenharmony_ci	static char buffer[2*MAX_STRING + 6];
171f08c3bdfSopenharmony_ci	size_t i;
172f08c3bdfSopenharmony_ci	char *p = buffer;
173f08c3bdfSopenharmony_ci	if (prefix)
174f08c3bdfSopenharmony_ci		*p++ = prefix;
175f08c3bdfSopenharmony_ci	if (delim == '"')
176f08c3bdfSopenharmony_ci		*p++ = '\\';
177f08c3bdfSopenharmony_ci	*p++ = delim;
178f08c3bdfSopenharmony_ci	for (i = 0; i < len; i++) {
179f08c3bdfSopenharmony_ci		if (s[i] == '"' || s[i] == '\\')
180f08c3bdfSopenharmony_ci			*p++ = '\\';
181f08c3bdfSopenharmony_ci		*p++ = s[i];
182f08c3bdfSopenharmony_ci	}
183f08c3bdfSopenharmony_ci	if (delim == '"')
184f08c3bdfSopenharmony_ci		*p++ = '\\';
185f08c3bdfSopenharmony_ci	*p++ = delim;
186f08c3bdfSopenharmony_ci	*p++ = '\0';
187f08c3bdfSopenharmony_ci	return buffer;
188f08c3bdfSopenharmony_ci}
189f08c3bdfSopenharmony_ci
190f08c3bdfSopenharmony_ciconst char *show_token(const struct token *token)
191f08c3bdfSopenharmony_ci{
192f08c3bdfSopenharmony_ci	static char buffer[256];
193f08c3bdfSopenharmony_ci
194f08c3bdfSopenharmony_ci	if (!token)
195f08c3bdfSopenharmony_ci		return "<no token>";
196f08c3bdfSopenharmony_ci	switch (token_type(token)) {
197f08c3bdfSopenharmony_ci	case TOKEN_ERROR:
198f08c3bdfSopenharmony_ci		return "syntax error";
199f08c3bdfSopenharmony_ci
200f08c3bdfSopenharmony_ci	case TOKEN_EOF:
201f08c3bdfSopenharmony_ci		return "end-of-input";
202f08c3bdfSopenharmony_ci
203f08c3bdfSopenharmony_ci	case TOKEN_IDENT:
204f08c3bdfSopenharmony_ci		return show_ident(token->ident);
205f08c3bdfSopenharmony_ci
206f08c3bdfSopenharmony_ci	case TOKEN_NUMBER:
207f08c3bdfSopenharmony_ci		return token->number;
208f08c3bdfSopenharmony_ci
209f08c3bdfSopenharmony_ci	case TOKEN_SPECIAL:
210f08c3bdfSopenharmony_ci		return show_special(token->special);
211f08c3bdfSopenharmony_ci
212f08c3bdfSopenharmony_ci	case TOKEN_CHAR:
213f08c3bdfSopenharmony_ci		return show_char(token->string->data,
214f08c3bdfSopenharmony_ci			token->string->length - 1, 0, '\'');
215f08c3bdfSopenharmony_ci	case TOKEN_CHAR_EMBEDDED_0 ... TOKEN_CHAR_EMBEDDED_3:
216f08c3bdfSopenharmony_ci		return show_char(token->embedded,
217f08c3bdfSopenharmony_ci			token_type(token) - TOKEN_CHAR, 0, '\'');
218f08c3bdfSopenharmony_ci	case TOKEN_WIDE_CHAR:
219f08c3bdfSopenharmony_ci		return show_char(token->string->data,
220f08c3bdfSopenharmony_ci			token->string->length - 1, 'L', '\'');
221f08c3bdfSopenharmony_ci	case TOKEN_WIDE_CHAR_EMBEDDED_0 ... TOKEN_WIDE_CHAR_EMBEDDED_3:
222f08c3bdfSopenharmony_ci		return show_char(token->embedded,
223f08c3bdfSopenharmony_ci			token_type(token) - TOKEN_WIDE_CHAR, 'L', '\'');
224f08c3bdfSopenharmony_ci	case TOKEN_STRING:
225f08c3bdfSopenharmony_ci		return show_char(token->string->data,
226f08c3bdfSopenharmony_ci			token->string->length - 1, 0, '"');
227f08c3bdfSopenharmony_ci	case TOKEN_WIDE_STRING:
228f08c3bdfSopenharmony_ci		return show_char(token->string->data,
229f08c3bdfSopenharmony_ci			token->string->length - 1, 'L', '"');
230f08c3bdfSopenharmony_ci
231f08c3bdfSopenharmony_ci	case TOKEN_STREAMBEGIN:
232f08c3bdfSopenharmony_ci		sprintf(buffer, "<beginning of '%s'>", stream_name(token->pos.stream));
233f08c3bdfSopenharmony_ci		return buffer;
234f08c3bdfSopenharmony_ci
235f08c3bdfSopenharmony_ci	case TOKEN_STREAMEND:
236f08c3bdfSopenharmony_ci		sprintf(buffer, "<end of '%s'>", stream_name(token->pos.stream));
237f08c3bdfSopenharmony_ci		return buffer;
238f08c3bdfSopenharmony_ci
239f08c3bdfSopenharmony_ci	case TOKEN_UNTAINT:
240f08c3bdfSopenharmony_ci		sprintf(buffer, "<untaint>");
241f08c3bdfSopenharmony_ci		return buffer;
242f08c3bdfSopenharmony_ci
243f08c3bdfSopenharmony_ci	case TOKEN_ARG_COUNT:
244f08c3bdfSopenharmony_ci		sprintf(buffer, "<argcnt>");
245f08c3bdfSopenharmony_ci		return buffer;
246f08c3bdfSopenharmony_ci
247f08c3bdfSopenharmony_ci	default:
248f08c3bdfSopenharmony_ci		sprintf(buffer, "unhandled token type '%d' ", token_type(token));
249f08c3bdfSopenharmony_ci		return buffer;
250f08c3bdfSopenharmony_ci	}
251f08c3bdfSopenharmony_ci}
252f08c3bdfSopenharmony_ci
253f08c3bdfSopenharmony_ciconst char *quote_token(const struct token *token)
254f08c3bdfSopenharmony_ci{
255f08c3bdfSopenharmony_ci	static char buffer[256];
256f08c3bdfSopenharmony_ci
257f08c3bdfSopenharmony_ci	switch (token_type(token)) {
258f08c3bdfSopenharmony_ci	case TOKEN_ERROR:
259f08c3bdfSopenharmony_ci		return "syntax error";
260f08c3bdfSopenharmony_ci
261f08c3bdfSopenharmony_ci	case TOKEN_IDENT:
262f08c3bdfSopenharmony_ci		return show_ident(token->ident);
263f08c3bdfSopenharmony_ci
264f08c3bdfSopenharmony_ci	case TOKEN_NUMBER:
265f08c3bdfSopenharmony_ci		return token->number;
266f08c3bdfSopenharmony_ci
267f08c3bdfSopenharmony_ci	case TOKEN_SPECIAL:
268f08c3bdfSopenharmony_ci		return show_special(token->special);
269f08c3bdfSopenharmony_ci
270f08c3bdfSopenharmony_ci	case TOKEN_CHAR:
271f08c3bdfSopenharmony_ci		return quote_char(token->string->data,
272f08c3bdfSopenharmony_ci			token->string->length - 1, 0, '\'');
273f08c3bdfSopenharmony_ci	case TOKEN_CHAR_EMBEDDED_0 ... TOKEN_CHAR_EMBEDDED_3:
274f08c3bdfSopenharmony_ci		return quote_char(token->embedded,
275f08c3bdfSopenharmony_ci			token_type(token) - TOKEN_CHAR, 0, '\'');
276f08c3bdfSopenharmony_ci	case TOKEN_WIDE_CHAR:
277f08c3bdfSopenharmony_ci		return quote_char(token->string->data,
278f08c3bdfSopenharmony_ci			token->string->length - 1, 'L', '\'');
279f08c3bdfSopenharmony_ci	case TOKEN_WIDE_CHAR_EMBEDDED_0 ... TOKEN_WIDE_CHAR_EMBEDDED_3:
280f08c3bdfSopenharmony_ci		return quote_char(token->embedded,
281f08c3bdfSopenharmony_ci			token_type(token) - TOKEN_WIDE_CHAR, 'L', '\'');
282f08c3bdfSopenharmony_ci	case TOKEN_STRING:
283f08c3bdfSopenharmony_ci		return quote_char(token->string->data,
284f08c3bdfSopenharmony_ci			token->string->length - 1, 0, '"');
285f08c3bdfSopenharmony_ci	case TOKEN_WIDE_STRING:
286f08c3bdfSopenharmony_ci		return quote_char(token->string->data,
287f08c3bdfSopenharmony_ci			token->string->length - 1, 'L', '"');
288f08c3bdfSopenharmony_ci	default:
289f08c3bdfSopenharmony_ci		sprintf(buffer, "unhandled token type '%d' ", token_type(token));
290f08c3bdfSopenharmony_ci		return buffer;
291f08c3bdfSopenharmony_ci	}
292f08c3bdfSopenharmony_ci}
293f08c3bdfSopenharmony_ci
294f08c3bdfSopenharmony_ci#define HASHED_INPUT_BITS (6)
295f08c3bdfSopenharmony_ci#define HASHED_INPUT (1 << HASHED_INPUT_BITS)
296f08c3bdfSopenharmony_ci#define HASH_PRIME 0x9e370001UL
297f08c3bdfSopenharmony_ci
298f08c3bdfSopenharmony_cistatic int input_stream_hashes[HASHED_INPUT] = { [0 ... HASHED_INPUT-1] = -1 };
299f08c3bdfSopenharmony_ci
300f08c3bdfSopenharmony_ciint *hash_stream(const char *name)
301f08c3bdfSopenharmony_ci{
302f08c3bdfSopenharmony_ci	uint32_t hash = 0;
303f08c3bdfSopenharmony_ci	unsigned char c;
304f08c3bdfSopenharmony_ci
305f08c3bdfSopenharmony_ci	while ((c = *name++) != 0)
306f08c3bdfSopenharmony_ci		hash = (hash + (c << 4) + (c >> 4)) * 11;
307f08c3bdfSopenharmony_ci
308f08c3bdfSopenharmony_ci	hash *= HASH_PRIME;
309f08c3bdfSopenharmony_ci	hash >>= 32 - HASHED_INPUT_BITS;
310f08c3bdfSopenharmony_ci	return input_stream_hashes + hash;
311f08c3bdfSopenharmony_ci}
312f08c3bdfSopenharmony_ci
313f08c3bdfSopenharmony_ciint init_stream(const struct position *pos, const char *name, int fd, const char **next_path)
314f08c3bdfSopenharmony_ci{
315f08c3bdfSopenharmony_ci	int stream = input_stream_nr, *hash;
316f08c3bdfSopenharmony_ci	struct stream *current;
317f08c3bdfSopenharmony_ci
318f08c3bdfSopenharmony_ci	if (stream >= input_streams_allocated) {
319f08c3bdfSopenharmony_ci		int newalloc = stream * 4 / 3 + 10;
320f08c3bdfSopenharmony_ci		input_streams = realloc(input_streams, newalloc * sizeof(struct stream));
321f08c3bdfSopenharmony_ci		if (!input_streams)
322f08c3bdfSopenharmony_ci			die("Unable to allocate more streams space");
323f08c3bdfSopenharmony_ci		input_streams_allocated = newalloc;
324f08c3bdfSopenharmony_ci	}
325f08c3bdfSopenharmony_ci	current = input_streams + stream;
326f08c3bdfSopenharmony_ci	memset(current, 0, sizeof(*current));
327f08c3bdfSopenharmony_ci	current->name = name;
328f08c3bdfSopenharmony_ci	current->fd = fd;
329f08c3bdfSopenharmony_ci	current->next_path = next_path;
330f08c3bdfSopenharmony_ci	current->path = NULL;
331f08c3bdfSopenharmony_ci	current->constant = CONSTANT_FILE_MAYBE;
332f08c3bdfSopenharmony_ci	if (pos)
333f08c3bdfSopenharmony_ci		current->pos = *pos;
334f08c3bdfSopenharmony_ci	else
335f08c3bdfSopenharmony_ci		current->pos.stream = -1;
336f08c3bdfSopenharmony_ci	input_stream_nr = stream+1;
337f08c3bdfSopenharmony_ci	hash = hash_stream(name);
338f08c3bdfSopenharmony_ci	current->next_stream = *hash;
339f08c3bdfSopenharmony_ci	*hash = stream;
340f08c3bdfSopenharmony_ci	return stream;
341f08c3bdfSopenharmony_ci}
342f08c3bdfSopenharmony_ci
343f08c3bdfSopenharmony_cistatic struct token * alloc_token(stream_t *stream)
344f08c3bdfSopenharmony_ci{
345f08c3bdfSopenharmony_ci	struct token *token = __alloc_token(0);
346f08c3bdfSopenharmony_ci	token->pos = stream_pos(stream);
347f08c3bdfSopenharmony_ci	return token;
348f08c3bdfSopenharmony_ci}
349f08c3bdfSopenharmony_ci
350f08c3bdfSopenharmony_ci/*
351f08c3bdfSopenharmony_ci *  Argh...  That was surprisingly messy - handling '\r' complicates the
352f08c3bdfSopenharmony_ci *  things a _lot_.
353f08c3bdfSopenharmony_ci */
354f08c3bdfSopenharmony_cistatic int nextchar_slow(stream_t *stream)
355f08c3bdfSopenharmony_ci{
356f08c3bdfSopenharmony_ci	int offset = stream->offset;
357f08c3bdfSopenharmony_ci	int size = stream->size;
358f08c3bdfSopenharmony_ci	int c;
359f08c3bdfSopenharmony_ci	int spliced = 0, had_cr, had_backslash;
360f08c3bdfSopenharmony_ci
361f08c3bdfSopenharmony_cirestart:
362f08c3bdfSopenharmony_ci	had_cr = had_backslash = 0;
363f08c3bdfSopenharmony_ci
364f08c3bdfSopenharmony_cirepeat:
365f08c3bdfSopenharmony_ci	if (offset >= size) {
366f08c3bdfSopenharmony_ci		if (stream->fd < 0)
367f08c3bdfSopenharmony_ci			goto got_eof;
368f08c3bdfSopenharmony_ci		size = read(stream->fd, stream->buffer, BUFSIZE);
369f08c3bdfSopenharmony_ci		if (size <= 0)
370f08c3bdfSopenharmony_ci			goto got_eof;
371f08c3bdfSopenharmony_ci		stream->size = size;
372f08c3bdfSopenharmony_ci		stream->offset = offset = 0;
373f08c3bdfSopenharmony_ci	}
374f08c3bdfSopenharmony_ci
375f08c3bdfSopenharmony_ci	c = stream->buffer[offset++];
376f08c3bdfSopenharmony_ci	if (had_cr)
377f08c3bdfSopenharmony_ci		goto check_lf;
378f08c3bdfSopenharmony_ci
379f08c3bdfSopenharmony_ci	if (c == '\r') {
380f08c3bdfSopenharmony_ci		had_cr = 1;
381f08c3bdfSopenharmony_ci		goto repeat;
382f08c3bdfSopenharmony_ci	}
383f08c3bdfSopenharmony_ci
384f08c3bdfSopenharmony_cinorm:
385f08c3bdfSopenharmony_ci	if (!had_backslash) {
386f08c3bdfSopenharmony_ci		switch (c) {
387f08c3bdfSopenharmony_ci		case '\t':
388f08c3bdfSopenharmony_ci			stream->pos += tabstop - stream->pos % tabstop;
389f08c3bdfSopenharmony_ci			break;
390f08c3bdfSopenharmony_ci		case '\n':
391f08c3bdfSopenharmony_ci			stream->line++;
392f08c3bdfSopenharmony_ci			stream->pos = 0;
393f08c3bdfSopenharmony_ci			stream->newline = 1;
394f08c3bdfSopenharmony_ci			break;
395f08c3bdfSopenharmony_ci		case '\\':
396f08c3bdfSopenharmony_ci			had_backslash = 1;
397f08c3bdfSopenharmony_ci			stream->pos++;
398f08c3bdfSopenharmony_ci			goto repeat;
399f08c3bdfSopenharmony_ci		default:
400f08c3bdfSopenharmony_ci			stream->pos++;
401f08c3bdfSopenharmony_ci		}
402f08c3bdfSopenharmony_ci	} else {
403f08c3bdfSopenharmony_ci		if (c == '\n') {
404f08c3bdfSopenharmony_ci			stream->line++;
405f08c3bdfSopenharmony_ci			stream->pos = 0;
406f08c3bdfSopenharmony_ci			spliced = 1;
407f08c3bdfSopenharmony_ci			goto restart;
408f08c3bdfSopenharmony_ci		}
409f08c3bdfSopenharmony_ci		offset--;
410f08c3bdfSopenharmony_ci		c = '\\';
411f08c3bdfSopenharmony_ci	}
412f08c3bdfSopenharmony_ciout:
413f08c3bdfSopenharmony_ci	stream->offset = offset;
414f08c3bdfSopenharmony_ci
415f08c3bdfSopenharmony_ci	return c;
416f08c3bdfSopenharmony_ci
417f08c3bdfSopenharmony_cicheck_lf:
418f08c3bdfSopenharmony_ci	if (c != '\n')
419f08c3bdfSopenharmony_ci		offset--;
420f08c3bdfSopenharmony_ci	c = '\n';
421f08c3bdfSopenharmony_ci	goto norm;
422f08c3bdfSopenharmony_ci
423f08c3bdfSopenharmony_cigot_eof:
424f08c3bdfSopenharmony_ci	if (had_backslash) {
425f08c3bdfSopenharmony_ci		c = '\\';
426f08c3bdfSopenharmony_ci		goto out;
427f08c3bdfSopenharmony_ci	}
428f08c3bdfSopenharmony_ci	if (stream->pos & Wnewline_eof)
429f08c3bdfSopenharmony_ci		warning(stream_pos(stream), "no newline at end of file");
430f08c3bdfSopenharmony_ci	else if (spliced)
431f08c3bdfSopenharmony_ci		warning(stream_pos(stream), "backslash-newline at end of file");
432f08c3bdfSopenharmony_ci	return EOF;
433f08c3bdfSopenharmony_ci}
434f08c3bdfSopenharmony_ci
435f08c3bdfSopenharmony_ci/*
436f08c3bdfSopenharmony_ci *  We want that as light as possible while covering all normal cases.
437f08c3bdfSopenharmony_ci *  Slow path (including the logics with line-splicing and EOF sanity
438f08c3bdfSopenharmony_ci *  checks) is in nextchar_slow().
439f08c3bdfSopenharmony_ci */
440f08c3bdfSopenharmony_cistatic inline int nextchar(stream_t *stream)
441f08c3bdfSopenharmony_ci{
442f08c3bdfSopenharmony_ci	int offset = stream->offset;
443f08c3bdfSopenharmony_ci
444f08c3bdfSopenharmony_ci	if (offset < stream->size) {
445f08c3bdfSopenharmony_ci		int c = stream->buffer[offset++];
446f08c3bdfSopenharmony_ci		static const char special[256] = {
447f08c3bdfSopenharmony_ci			['\t'] = 1, ['\r'] = 1, ['\n'] = 1, ['\\'] = 1
448f08c3bdfSopenharmony_ci		};
449f08c3bdfSopenharmony_ci		if (!special[c]) {
450f08c3bdfSopenharmony_ci			stream->offset = offset;
451f08c3bdfSopenharmony_ci			stream->pos++;
452f08c3bdfSopenharmony_ci			return c;
453f08c3bdfSopenharmony_ci		}
454f08c3bdfSopenharmony_ci	}
455f08c3bdfSopenharmony_ci	return nextchar_slow(stream);
456f08c3bdfSopenharmony_ci}
457f08c3bdfSopenharmony_ci
458f08c3bdfSopenharmony_cistruct token eof_token_entry;
459f08c3bdfSopenharmony_ci
460f08c3bdfSopenharmony_cistatic struct token *mark_eof(stream_t *stream)
461f08c3bdfSopenharmony_ci{
462f08c3bdfSopenharmony_ci	struct token *end;
463f08c3bdfSopenharmony_ci
464f08c3bdfSopenharmony_ci	end = alloc_token(stream);
465f08c3bdfSopenharmony_ci	eof_token_entry.pos = end->pos;
466f08c3bdfSopenharmony_ci	token_type(end) = TOKEN_STREAMEND;
467f08c3bdfSopenharmony_ci	end->pos.newline = 1;
468f08c3bdfSopenharmony_ci
469f08c3bdfSopenharmony_ci	eof_token_entry.next = &eof_token_entry;
470f08c3bdfSopenharmony_ci	eof_token_entry.pos.newline = 1;
471f08c3bdfSopenharmony_ci
472f08c3bdfSopenharmony_ci	end->next =  &eof_token_entry;
473f08c3bdfSopenharmony_ci	*stream->tokenlist = end;
474f08c3bdfSopenharmony_ci	stream->tokenlist = NULL;
475f08c3bdfSopenharmony_ci	return end;
476f08c3bdfSopenharmony_ci}
477f08c3bdfSopenharmony_ci
478f08c3bdfSopenharmony_cistatic void add_token(stream_t *stream)
479f08c3bdfSopenharmony_ci{
480f08c3bdfSopenharmony_ci	struct token *token = stream->token;
481f08c3bdfSopenharmony_ci
482f08c3bdfSopenharmony_ci	stream->token = NULL;
483f08c3bdfSopenharmony_ci	token->next = NULL;
484f08c3bdfSopenharmony_ci	*stream->tokenlist = token;
485f08c3bdfSopenharmony_ci	stream->tokenlist = &token->next;
486f08c3bdfSopenharmony_ci}
487f08c3bdfSopenharmony_ci
488f08c3bdfSopenharmony_cistatic void drop_token(stream_t *stream)
489f08c3bdfSopenharmony_ci{
490f08c3bdfSopenharmony_ci	stream->newline |= stream->token->pos.newline;
491f08c3bdfSopenharmony_ci	stream->whitespace |= stream->token->pos.whitespace;
492f08c3bdfSopenharmony_ci	stream->token = NULL;
493f08c3bdfSopenharmony_ci}
494f08c3bdfSopenharmony_ci
495f08c3bdfSopenharmony_cienum {
496f08c3bdfSopenharmony_ci	Letter = 1,
497f08c3bdfSopenharmony_ci	Digit = 2,
498f08c3bdfSopenharmony_ci	Hex = 4,
499f08c3bdfSopenharmony_ci	Exp = 8,
500f08c3bdfSopenharmony_ci	Dot = 16,
501f08c3bdfSopenharmony_ci	ValidSecond = 32,
502f08c3bdfSopenharmony_ci	Quote = 64,
503f08c3bdfSopenharmony_ci};
504f08c3bdfSopenharmony_ci
505f08c3bdfSopenharmony_cistatic const char cclass[257] = {
506f08c3bdfSopenharmony_ci	['0' + 1 ... '9' + 1] = Digit | Hex,
507f08c3bdfSopenharmony_ci	['A' + 1 ... 'D' + 1] = Letter | Hex,
508f08c3bdfSopenharmony_ci	['E' + 1] = Letter | Hex | Exp,	/* E<exp> */
509f08c3bdfSopenharmony_ci	['F' + 1] = Letter | Hex,
510f08c3bdfSopenharmony_ci	['G' + 1 ... 'O' + 1] = Letter,
511f08c3bdfSopenharmony_ci	['P' + 1] = Letter | Exp,	/* P<exp> */
512f08c3bdfSopenharmony_ci	['Q' + 1 ... 'Z' + 1] = Letter,
513f08c3bdfSopenharmony_ci	['a' + 1 ... 'd' + 1] = Letter | Hex,
514f08c3bdfSopenharmony_ci	['e' + 1] = Letter | Hex | Exp,	/* e<exp> */
515f08c3bdfSopenharmony_ci	['f' + 1] = Letter | Hex,
516f08c3bdfSopenharmony_ci	['g' + 1 ... 'o' + 1] = Letter,
517f08c3bdfSopenharmony_ci	['p' + 1] = Letter | Exp,	/* p<exp> */
518f08c3bdfSopenharmony_ci	['q' + 1 ... 'z' + 1] = Letter,
519f08c3bdfSopenharmony_ci	['_' + 1] = Letter,
520f08c3bdfSopenharmony_ci	['.' + 1] = Dot | ValidSecond,
521f08c3bdfSopenharmony_ci	['=' + 1] = ValidSecond,
522f08c3bdfSopenharmony_ci	['+' + 1] = ValidSecond,
523f08c3bdfSopenharmony_ci	['-' + 1] = ValidSecond,
524f08c3bdfSopenharmony_ci	['>' + 1] = ValidSecond,
525f08c3bdfSopenharmony_ci	['<' + 1] = ValidSecond,
526f08c3bdfSopenharmony_ci	['&' + 1] = ValidSecond,
527f08c3bdfSopenharmony_ci	['|' + 1] = ValidSecond,
528f08c3bdfSopenharmony_ci	['#' + 1] = ValidSecond,
529f08c3bdfSopenharmony_ci	['\'' + 1] = Quote,
530f08c3bdfSopenharmony_ci	['"' + 1] = Quote,
531f08c3bdfSopenharmony_ci};
532f08c3bdfSopenharmony_ci
533f08c3bdfSopenharmony_ci/*
534f08c3bdfSopenharmony_ci * pp-number:
535f08c3bdfSopenharmony_ci *	digit
536f08c3bdfSopenharmony_ci *	. digit
537f08c3bdfSopenharmony_ci *	pp-number digit
538f08c3bdfSopenharmony_ci *	pp-number identifier-nodigit
539f08c3bdfSopenharmony_ci *	pp-number e sign
540f08c3bdfSopenharmony_ci *	pp-number E sign
541f08c3bdfSopenharmony_ci *	pp-number p sign
542f08c3bdfSopenharmony_ci *	pp-number P sign
543f08c3bdfSopenharmony_ci *	pp-number .
544f08c3bdfSopenharmony_ci */
545f08c3bdfSopenharmony_cistatic int get_one_number(int c, int next, stream_t *stream)
546f08c3bdfSopenharmony_ci{
547f08c3bdfSopenharmony_ci	struct token *token;
548f08c3bdfSopenharmony_ci	static char buffer[4095];
549f08c3bdfSopenharmony_ci	char *p = buffer, *buffer_end = buffer + sizeof (buffer);
550f08c3bdfSopenharmony_ci
551f08c3bdfSopenharmony_ci	*p++ = c;
552f08c3bdfSopenharmony_ci	for (;;) {
553f08c3bdfSopenharmony_ci		long class =  cclass[next + 1];
554f08c3bdfSopenharmony_ci		if (!(class & (Dot | Digit | Letter)))
555f08c3bdfSopenharmony_ci			break;
556f08c3bdfSopenharmony_ci		if (p != buffer_end)
557f08c3bdfSopenharmony_ci			*p++ = next;
558f08c3bdfSopenharmony_ci		next = nextchar(stream);
559f08c3bdfSopenharmony_ci		if (class & Exp) {
560f08c3bdfSopenharmony_ci			if (next == '-' || next == '+') {
561f08c3bdfSopenharmony_ci				if (p != buffer_end)
562f08c3bdfSopenharmony_ci					*p++ = next;
563f08c3bdfSopenharmony_ci				next = nextchar(stream);
564f08c3bdfSopenharmony_ci			}
565f08c3bdfSopenharmony_ci		}
566f08c3bdfSopenharmony_ci	}
567f08c3bdfSopenharmony_ci
568f08c3bdfSopenharmony_ci	if (p == buffer_end) {
569f08c3bdfSopenharmony_ci		sparse_error(stream_pos(stream), "number token exceeds %td characters",
570f08c3bdfSopenharmony_ci		      buffer_end - buffer);
571f08c3bdfSopenharmony_ci		// Pretend we saw just "1".
572f08c3bdfSopenharmony_ci		buffer[0] = '1';
573f08c3bdfSopenharmony_ci		p = buffer + 1;
574f08c3bdfSopenharmony_ci	}
575f08c3bdfSopenharmony_ci
576f08c3bdfSopenharmony_ci	*p++ = 0;
577f08c3bdfSopenharmony_ci	token = stream->token;
578f08c3bdfSopenharmony_ci	token_type(token) = TOKEN_NUMBER;
579f08c3bdfSopenharmony_ci	token->number = xmemdup(buffer, p - buffer);
580f08c3bdfSopenharmony_ci	add_token(stream);
581f08c3bdfSopenharmony_ci
582f08c3bdfSopenharmony_ci	return next;
583f08c3bdfSopenharmony_ci}
584f08c3bdfSopenharmony_ci
585f08c3bdfSopenharmony_cistatic int eat_string(int next, stream_t *stream, enum token_type type)
586f08c3bdfSopenharmony_ci{
587f08c3bdfSopenharmony_ci	static char buffer[MAX_STRING];
588f08c3bdfSopenharmony_ci	struct string *string;
589f08c3bdfSopenharmony_ci	struct token *token = stream->token;
590f08c3bdfSopenharmony_ci	int len = 0;
591f08c3bdfSopenharmony_ci	int escape;
592f08c3bdfSopenharmony_ci	int want_hex = 0;
593f08c3bdfSopenharmony_ci	char delim = type < TOKEN_STRING ? '\'' : '"';
594f08c3bdfSopenharmony_ci
595f08c3bdfSopenharmony_ci	for (escape = 0; escape || next != delim; next = nextchar(stream)) {
596f08c3bdfSopenharmony_ci		if (len < MAX_STRING)
597f08c3bdfSopenharmony_ci			buffer[len] = next;
598f08c3bdfSopenharmony_ci		len++;
599f08c3bdfSopenharmony_ci		if (next == '\n') {
600f08c3bdfSopenharmony_ci			warning(stream_pos(stream),
601f08c3bdfSopenharmony_ci				"missing terminating %c character", delim);
602f08c3bdfSopenharmony_ci			/* assume delimiter is lost */
603f08c3bdfSopenharmony_ci			break;
604f08c3bdfSopenharmony_ci		}
605f08c3bdfSopenharmony_ci		if (next == EOF) {
606f08c3bdfSopenharmony_ci			warning(stream_pos(stream),
607f08c3bdfSopenharmony_ci				"End of file in middle of string");
608f08c3bdfSopenharmony_ci			return next;
609f08c3bdfSopenharmony_ci		}
610f08c3bdfSopenharmony_ci		if (!escape) {
611f08c3bdfSopenharmony_ci			if (want_hex && !(cclass[next + 1] & Hex))
612f08c3bdfSopenharmony_ci				warning(stream_pos(stream),
613f08c3bdfSopenharmony_ci					"\\x used with no following hex digits");
614f08c3bdfSopenharmony_ci			want_hex = 0;
615f08c3bdfSopenharmony_ci			escape = next == '\\';
616f08c3bdfSopenharmony_ci		} else {
617f08c3bdfSopenharmony_ci			escape = 0;
618f08c3bdfSopenharmony_ci			want_hex = next == 'x';
619f08c3bdfSopenharmony_ci		}
620f08c3bdfSopenharmony_ci	}
621f08c3bdfSopenharmony_ci	if (want_hex)
622f08c3bdfSopenharmony_ci		warning(stream_pos(stream),
623f08c3bdfSopenharmony_ci			"\\x used with no following hex digits");
624f08c3bdfSopenharmony_ci	if (len > MAX_STRING) {
625f08c3bdfSopenharmony_ci		warning(stream_pos(stream), "string too long (%d bytes, %d bytes max)", len, MAX_STRING);
626f08c3bdfSopenharmony_ci		len = MAX_STRING;
627f08c3bdfSopenharmony_ci	}
628f08c3bdfSopenharmony_ci	if (delim == '\'' && len && len <= 4) {
629f08c3bdfSopenharmony_ci		token_type(token) = type + len;
630f08c3bdfSopenharmony_ci		memset(buffer + len, '\0', 4 - len);
631f08c3bdfSopenharmony_ci		memcpy(token->embedded, buffer, 4);
632f08c3bdfSopenharmony_ci	} else {
633f08c3bdfSopenharmony_ci		token_type(token) = type;
634f08c3bdfSopenharmony_ci		string = __alloc_string(len+1);
635f08c3bdfSopenharmony_ci		memcpy(string->data, buffer, len);
636f08c3bdfSopenharmony_ci		string->data[len] = '\0';
637f08c3bdfSopenharmony_ci		string->length = len+1;
638f08c3bdfSopenharmony_ci		token->string = string;
639f08c3bdfSopenharmony_ci	}
640f08c3bdfSopenharmony_ci
641f08c3bdfSopenharmony_ci	/* Pass it on.. */
642f08c3bdfSopenharmony_ci	token = stream->token;
643f08c3bdfSopenharmony_ci	add_token(stream);
644f08c3bdfSopenharmony_ci	return nextchar(stream);
645f08c3bdfSopenharmony_ci}
646f08c3bdfSopenharmony_ci
647f08c3bdfSopenharmony_cistatic int drop_stream_eoln(stream_t *stream)
648f08c3bdfSopenharmony_ci{
649f08c3bdfSopenharmony_ci	drop_token(stream);
650f08c3bdfSopenharmony_ci	for (;;) {
651f08c3bdfSopenharmony_ci		switch (nextchar(stream)) {
652f08c3bdfSopenharmony_ci		case EOF:
653f08c3bdfSopenharmony_ci			return EOF;
654f08c3bdfSopenharmony_ci		case '\n':
655f08c3bdfSopenharmony_ci			return nextchar(stream);
656f08c3bdfSopenharmony_ci		}
657f08c3bdfSopenharmony_ci	}
658f08c3bdfSopenharmony_ci}
659f08c3bdfSopenharmony_ci
660f08c3bdfSopenharmony_cistatic int drop_stream_comment(stream_t *stream)
661f08c3bdfSopenharmony_ci{
662f08c3bdfSopenharmony_ci	int newline;
663f08c3bdfSopenharmony_ci	int next;
664f08c3bdfSopenharmony_ci	drop_token(stream);
665f08c3bdfSopenharmony_ci	newline = stream->newline;
666f08c3bdfSopenharmony_ci
667f08c3bdfSopenharmony_ci	next = nextchar(stream);
668f08c3bdfSopenharmony_ci	for (;;) {
669f08c3bdfSopenharmony_ci		int curr = next;
670f08c3bdfSopenharmony_ci		if (curr == EOF) {
671f08c3bdfSopenharmony_ci			warning(stream_pos(stream), "End of file in the middle of a comment");
672f08c3bdfSopenharmony_ci			return curr;
673f08c3bdfSopenharmony_ci		}
674f08c3bdfSopenharmony_ci		next = nextchar(stream);
675f08c3bdfSopenharmony_ci		if (curr == '*' && next == '/')
676f08c3bdfSopenharmony_ci			break;
677f08c3bdfSopenharmony_ci	}
678f08c3bdfSopenharmony_ci	stream->newline = newline;
679f08c3bdfSopenharmony_ci	return nextchar(stream);
680f08c3bdfSopenharmony_ci}
681f08c3bdfSopenharmony_ci
682f08c3bdfSopenharmony_ciunsigned char combinations[][4] = COMBINATION_STRINGS;
683f08c3bdfSopenharmony_ci
684f08c3bdfSopenharmony_ci#define NR_COMBINATIONS (SPECIAL_ARG_SEPARATOR - SPECIAL_BASE)
685f08c3bdfSopenharmony_ci
686f08c3bdfSopenharmony_ci/* hash function for two-character punctuators - all give unique values */
687f08c3bdfSopenharmony_ci#define special_hash(c0, c1) (((c0*8+c1*2)+((c0*8+c1*2)>>5))&31)
688f08c3bdfSopenharmony_ci
689f08c3bdfSopenharmony_ci/*
690f08c3bdfSopenharmony_ci * note that we won't get false positives - special_hash(0,0) is 0 and
691f08c3bdfSopenharmony_ci * entry 0 is filled (by +=), so all the missing ones are OK.
692f08c3bdfSopenharmony_ci */
693f08c3bdfSopenharmony_cistatic unsigned char hash_results[32][2] = {
694f08c3bdfSopenharmony_ci#define RES(c0, c1) [special_hash(c0, c1)] = {c0, c1}
695f08c3bdfSopenharmony_ci	RES('+', '='), /* 00 */
696f08c3bdfSopenharmony_ci	RES('/', '='), /* 01 */
697f08c3bdfSopenharmony_ci	RES('^', '='), /* 05 */
698f08c3bdfSopenharmony_ci	RES('&', '&'), /* 07 */
699f08c3bdfSopenharmony_ci	RES('#', '#'), /* 08 */
700f08c3bdfSopenharmony_ci	RES('<', '<'), /* 0a */
701f08c3bdfSopenharmony_ci	RES('<', '='), /* 0c */
702f08c3bdfSopenharmony_ci	RES('!', '='), /* 0e */
703f08c3bdfSopenharmony_ci	RES('%', '='), /* 0f */
704f08c3bdfSopenharmony_ci	RES('-', '-'), /* 10 */
705f08c3bdfSopenharmony_ci	RES('-', '='), /* 11 */
706f08c3bdfSopenharmony_ci	RES('-', '>'), /* 13 */
707f08c3bdfSopenharmony_ci	RES('=', '='), /* 15 */
708f08c3bdfSopenharmony_ci	RES('&', '='), /* 17 */
709f08c3bdfSopenharmony_ci	RES('*', '='), /* 18 */
710f08c3bdfSopenharmony_ci	RES('.', '.'), /* 1a */
711f08c3bdfSopenharmony_ci	RES('+', '+'), /* 1b */
712f08c3bdfSopenharmony_ci	RES('|', '='), /* 1c */
713f08c3bdfSopenharmony_ci	RES('>', '='), /* 1d */
714f08c3bdfSopenharmony_ci	RES('|', '|'), /* 1e */
715f08c3bdfSopenharmony_ci	RES('>', '>')  /* 1f */
716f08c3bdfSopenharmony_ci#undef RES
717f08c3bdfSopenharmony_ci};
718f08c3bdfSopenharmony_cistatic int code[32] = {
719f08c3bdfSopenharmony_ci#define CODE(c0, c1, value) [special_hash(c0, c1)] = value
720f08c3bdfSopenharmony_ci	CODE('+', '=', SPECIAL_ADD_ASSIGN), /* 00 */
721f08c3bdfSopenharmony_ci	CODE('/', '=', SPECIAL_DIV_ASSIGN), /* 01 */
722f08c3bdfSopenharmony_ci	CODE('^', '=', SPECIAL_XOR_ASSIGN), /* 05 */
723f08c3bdfSopenharmony_ci	CODE('&', '&', SPECIAL_LOGICAL_AND), /* 07 */
724f08c3bdfSopenharmony_ci	CODE('#', '#', SPECIAL_HASHHASH), /* 08 */
725f08c3bdfSopenharmony_ci	CODE('<', '<', SPECIAL_LEFTSHIFT), /* 0a */
726f08c3bdfSopenharmony_ci	CODE('<', '=', SPECIAL_LTE), /* 0c */
727f08c3bdfSopenharmony_ci	CODE('!', '=', SPECIAL_NOTEQUAL), /* 0e */
728f08c3bdfSopenharmony_ci	CODE('%', '=', SPECIAL_MOD_ASSIGN), /* 0f */
729f08c3bdfSopenharmony_ci	CODE('-', '-', SPECIAL_DECREMENT), /* 10 */
730f08c3bdfSopenharmony_ci	CODE('-', '=', SPECIAL_SUB_ASSIGN), /* 11 */
731f08c3bdfSopenharmony_ci	CODE('-', '>', SPECIAL_DEREFERENCE), /* 13 */
732f08c3bdfSopenharmony_ci	CODE('=', '=', SPECIAL_EQUAL), /* 15 */
733f08c3bdfSopenharmony_ci	CODE('&', '=', SPECIAL_AND_ASSIGN), /* 17 */
734f08c3bdfSopenharmony_ci	CODE('*', '=', SPECIAL_MUL_ASSIGN), /* 18 */
735f08c3bdfSopenharmony_ci	CODE('.', '.', SPECIAL_DOTDOT), /* 1a */
736f08c3bdfSopenharmony_ci	CODE('+', '+', SPECIAL_INCREMENT), /* 1b */
737f08c3bdfSopenharmony_ci	CODE('|', '=', SPECIAL_OR_ASSIGN), /* 1c */
738f08c3bdfSopenharmony_ci	CODE('>', '=', SPECIAL_GTE), /* 1d */
739f08c3bdfSopenharmony_ci	CODE('|', '|', SPECIAL_LOGICAL_OR), /* 1e */
740f08c3bdfSopenharmony_ci	CODE('>', '>', SPECIAL_RIGHTSHIFT)  /* 1f */
741f08c3bdfSopenharmony_ci#undef CODE
742f08c3bdfSopenharmony_ci};
743f08c3bdfSopenharmony_ci
744f08c3bdfSopenharmony_cistatic int get_one_special(int c, stream_t *stream)
745f08c3bdfSopenharmony_ci{
746f08c3bdfSopenharmony_ci	struct token *token;
747f08c3bdfSopenharmony_ci	int next, value, i;
748f08c3bdfSopenharmony_ci
749f08c3bdfSopenharmony_ci	next = nextchar(stream);
750f08c3bdfSopenharmony_ci
751f08c3bdfSopenharmony_ci	/*
752f08c3bdfSopenharmony_ci	 * Check for numbers, strings, character constants, and comments
753f08c3bdfSopenharmony_ci	 */
754f08c3bdfSopenharmony_ci	switch (c) {
755f08c3bdfSopenharmony_ci	case '.':
756f08c3bdfSopenharmony_ci		if (next >= '0' && next <= '9')
757f08c3bdfSopenharmony_ci			return get_one_number(c, next, stream);
758f08c3bdfSopenharmony_ci		break;
759f08c3bdfSopenharmony_ci	case '"':
760f08c3bdfSopenharmony_ci		return eat_string(next, stream, TOKEN_STRING);
761f08c3bdfSopenharmony_ci	case '\'':
762f08c3bdfSopenharmony_ci		return eat_string(next, stream, TOKEN_CHAR);
763f08c3bdfSopenharmony_ci	case '/':
764f08c3bdfSopenharmony_ci		if (next == '/')
765f08c3bdfSopenharmony_ci			return drop_stream_eoln(stream);
766f08c3bdfSopenharmony_ci		if (next == '*')
767f08c3bdfSopenharmony_ci			return drop_stream_comment(stream);
768f08c3bdfSopenharmony_ci	}
769f08c3bdfSopenharmony_ci
770f08c3bdfSopenharmony_ci	/*
771f08c3bdfSopenharmony_ci	 * Check for combinations
772f08c3bdfSopenharmony_ci	 */
773f08c3bdfSopenharmony_ci	value = c;
774f08c3bdfSopenharmony_ci	if (cclass[next + 1] & ValidSecond) {
775f08c3bdfSopenharmony_ci		i = special_hash(c, next);
776f08c3bdfSopenharmony_ci		if (hash_results[i][0] == c && hash_results[i][1] == next) {
777f08c3bdfSopenharmony_ci			value = code[i];
778f08c3bdfSopenharmony_ci			next = nextchar(stream);
779f08c3bdfSopenharmony_ci			if (value >= SPECIAL_LEFTSHIFT &&
780f08c3bdfSopenharmony_ci			    next == "==."[value - SPECIAL_LEFTSHIFT]) {
781f08c3bdfSopenharmony_ci				value += 3;
782f08c3bdfSopenharmony_ci				next = nextchar(stream);
783f08c3bdfSopenharmony_ci			}
784f08c3bdfSopenharmony_ci		}
785f08c3bdfSopenharmony_ci	}
786f08c3bdfSopenharmony_ci
787f08c3bdfSopenharmony_ci	/* Pass it on.. */
788f08c3bdfSopenharmony_ci	token = stream->token;
789f08c3bdfSopenharmony_ci	token_type(token) = TOKEN_SPECIAL;
790f08c3bdfSopenharmony_ci	token->special = value;
791f08c3bdfSopenharmony_ci	add_token(stream);
792f08c3bdfSopenharmony_ci	return next;
793f08c3bdfSopenharmony_ci}
794f08c3bdfSopenharmony_ci
795f08c3bdfSopenharmony_ci#define IDENT_HASH_BITS (13)
796f08c3bdfSopenharmony_ci#define IDENT_HASH_SIZE (1<<IDENT_HASH_BITS)
797f08c3bdfSopenharmony_ci#define IDENT_HASH_MASK (IDENT_HASH_SIZE-1)
798f08c3bdfSopenharmony_ci
799f08c3bdfSopenharmony_ci#define ident_hash_init(c)		(c)
800f08c3bdfSopenharmony_ci#define ident_hash_add(oldhash,c)	((oldhash)*11 + (c))
801f08c3bdfSopenharmony_ci#define ident_hash_end(hash)		((((hash) >> IDENT_HASH_BITS) + (hash)) & IDENT_HASH_MASK)
802f08c3bdfSopenharmony_ci
803f08c3bdfSopenharmony_cistatic struct ident *hash_table[IDENT_HASH_SIZE];
804f08c3bdfSopenharmony_cistatic int ident_hit, ident_miss, idents;
805f08c3bdfSopenharmony_ci
806f08c3bdfSopenharmony_civoid show_identifier_stats(void)
807f08c3bdfSopenharmony_ci{
808f08c3bdfSopenharmony_ci	int i;
809f08c3bdfSopenharmony_ci	int distribution[100];
810f08c3bdfSopenharmony_ci
811f08c3bdfSopenharmony_ci	fprintf(stderr, "identifiers: %d hits, %d misses\n",
812f08c3bdfSopenharmony_ci		ident_hit, ident_miss);
813f08c3bdfSopenharmony_ci
814f08c3bdfSopenharmony_ci	for (i = 0; i < 100; i++)
815f08c3bdfSopenharmony_ci		distribution[i] = 0;
816f08c3bdfSopenharmony_ci
817f08c3bdfSopenharmony_ci	for (i = 0; i < IDENT_HASH_SIZE; i++) {
818f08c3bdfSopenharmony_ci		struct ident * ident = hash_table[i];
819f08c3bdfSopenharmony_ci		int count = 0;
820f08c3bdfSopenharmony_ci
821f08c3bdfSopenharmony_ci		while (ident) {
822f08c3bdfSopenharmony_ci			count++;
823f08c3bdfSopenharmony_ci			ident = ident->next;
824f08c3bdfSopenharmony_ci		}
825f08c3bdfSopenharmony_ci		if (count > 99)
826f08c3bdfSopenharmony_ci			count = 99;
827f08c3bdfSopenharmony_ci		distribution[count]++;
828f08c3bdfSopenharmony_ci	}
829f08c3bdfSopenharmony_ci
830f08c3bdfSopenharmony_ci	for (i = 0; i < 100; i++) {
831f08c3bdfSopenharmony_ci		if (distribution[i])
832f08c3bdfSopenharmony_ci			fprintf(stderr, "%2d: %d buckets\n", i, distribution[i]);
833f08c3bdfSopenharmony_ci	}
834f08c3bdfSopenharmony_ci}
835f08c3bdfSopenharmony_ci
836f08c3bdfSopenharmony_cistatic struct ident *alloc_ident(const char *name, int len)
837f08c3bdfSopenharmony_ci{
838f08c3bdfSopenharmony_ci	struct ident *ident = __alloc_ident(len);
839f08c3bdfSopenharmony_ci	ident->symbols = NULL;
840f08c3bdfSopenharmony_ci	ident->len = len;
841f08c3bdfSopenharmony_ci	ident->tainted = 0;
842f08c3bdfSopenharmony_ci	memcpy(ident->name, name, len);
843f08c3bdfSopenharmony_ci	return ident;
844f08c3bdfSopenharmony_ci}
845f08c3bdfSopenharmony_ci
846f08c3bdfSopenharmony_cistatic struct ident * insert_hash(struct ident *ident, unsigned long hash)
847f08c3bdfSopenharmony_ci{
848f08c3bdfSopenharmony_ci	ident->next = hash_table[hash];
849f08c3bdfSopenharmony_ci	hash_table[hash] = ident;
850f08c3bdfSopenharmony_ci	ident_miss++;
851f08c3bdfSopenharmony_ci	return ident;
852f08c3bdfSopenharmony_ci}
853f08c3bdfSopenharmony_ci
854f08c3bdfSopenharmony_cistatic struct ident *create_hashed_ident(const char *name, int len, unsigned long hash)
855f08c3bdfSopenharmony_ci{
856f08c3bdfSopenharmony_ci	struct ident *ident;
857f08c3bdfSopenharmony_ci	struct ident **p;
858f08c3bdfSopenharmony_ci
859f08c3bdfSopenharmony_ci	p = &hash_table[hash];
860f08c3bdfSopenharmony_ci	while ((ident = *p) != NULL) {
861f08c3bdfSopenharmony_ci		if (ident->len == (unsigned char) len) {
862f08c3bdfSopenharmony_ci			if (strncmp(name, ident->name, len) != 0)
863f08c3bdfSopenharmony_ci				goto next;
864f08c3bdfSopenharmony_ci
865f08c3bdfSopenharmony_ci			ident_hit++;
866f08c3bdfSopenharmony_ci			return ident;
867f08c3bdfSopenharmony_ci		}
868f08c3bdfSopenharmony_cinext:
869f08c3bdfSopenharmony_ci		//misses++;
870f08c3bdfSopenharmony_ci		p = &ident->next;
871f08c3bdfSopenharmony_ci	}
872f08c3bdfSopenharmony_ci	ident = alloc_ident(name, len);
873f08c3bdfSopenharmony_ci	*p = ident;
874f08c3bdfSopenharmony_ci	ident->next = NULL;
875f08c3bdfSopenharmony_ci	ident_miss++;
876f08c3bdfSopenharmony_ci	idents++;
877f08c3bdfSopenharmony_ci	return ident;
878f08c3bdfSopenharmony_ci}
879f08c3bdfSopenharmony_ci
880f08c3bdfSopenharmony_cistatic unsigned long hash_name(const char *name, int len)
881f08c3bdfSopenharmony_ci{
882f08c3bdfSopenharmony_ci	unsigned long hash;
883f08c3bdfSopenharmony_ci	const unsigned char *p = (const unsigned char *)name;
884f08c3bdfSopenharmony_ci
885f08c3bdfSopenharmony_ci	hash = ident_hash_init(*p++);
886f08c3bdfSopenharmony_ci	while (--len) {
887f08c3bdfSopenharmony_ci		unsigned int i = *p++;
888f08c3bdfSopenharmony_ci		hash = ident_hash_add(hash, i);
889f08c3bdfSopenharmony_ci	}
890f08c3bdfSopenharmony_ci	return ident_hash_end(hash);
891f08c3bdfSopenharmony_ci}
892f08c3bdfSopenharmony_ci
893f08c3bdfSopenharmony_cistruct ident *hash_ident(struct ident *ident)
894f08c3bdfSopenharmony_ci{
895f08c3bdfSopenharmony_ci	return insert_hash(ident, hash_name(ident->name, ident->len));
896f08c3bdfSopenharmony_ci}
897f08c3bdfSopenharmony_ci
898f08c3bdfSopenharmony_cistruct ident *built_in_ident(const char *name)
899f08c3bdfSopenharmony_ci{
900f08c3bdfSopenharmony_ci	int len = strlen(name);
901f08c3bdfSopenharmony_ci	return create_hashed_ident(name, len, hash_name(name, len));
902f08c3bdfSopenharmony_ci}
903f08c3bdfSopenharmony_ci
904f08c3bdfSopenharmony_cistruct token *built_in_token(int stream, struct ident *ident)
905f08c3bdfSopenharmony_ci{
906f08c3bdfSopenharmony_ci	struct token *token;
907f08c3bdfSopenharmony_ci
908f08c3bdfSopenharmony_ci	token = __alloc_token(0);
909f08c3bdfSopenharmony_ci	token->pos.stream = stream;
910f08c3bdfSopenharmony_ci	token_type(token) = TOKEN_IDENT;
911f08c3bdfSopenharmony_ci	token->ident = ident;
912f08c3bdfSopenharmony_ci	return token;
913f08c3bdfSopenharmony_ci}
914f08c3bdfSopenharmony_ci
915f08c3bdfSopenharmony_cistatic int get_one_identifier(int c, stream_t *stream)
916f08c3bdfSopenharmony_ci{
917f08c3bdfSopenharmony_ci	struct token *token;
918f08c3bdfSopenharmony_ci	struct ident *ident;
919f08c3bdfSopenharmony_ci	unsigned long hash;
920f08c3bdfSopenharmony_ci	char buf[256];
921f08c3bdfSopenharmony_ci	int len = 1;
922f08c3bdfSopenharmony_ci	int next;
923f08c3bdfSopenharmony_ci
924f08c3bdfSopenharmony_ci	hash = ident_hash_init(c);
925f08c3bdfSopenharmony_ci	buf[0] = c;
926f08c3bdfSopenharmony_ci	for (;;) {
927f08c3bdfSopenharmony_ci		next = nextchar(stream);
928f08c3bdfSopenharmony_ci		if (!(cclass[next + 1] & (Letter | Digit)))
929f08c3bdfSopenharmony_ci			break;
930f08c3bdfSopenharmony_ci		if (len >= sizeof(buf))
931f08c3bdfSopenharmony_ci			break;
932f08c3bdfSopenharmony_ci		hash = ident_hash_add(hash, next);
933f08c3bdfSopenharmony_ci		buf[len] = next;
934f08c3bdfSopenharmony_ci		len++;
935f08c3bdfSopenharmony_ci	};
936f08c3bdfSopenharmony_ci	if (cclass[next + 1] & Quote) {
937f08c3bdfSopenharmony_ci		if (len == 1 && buf[0] == 'L') {
938f08c3bdfSopenharmony_ci			if (next == '\'')
939f08c3bdfSopenharmony_ci				return eat_string(nextchar(stream), stream,
940f08c3bdfSopenharmony_ci							TOKEN_WIDE_CHAR);
941f08c3bdfSopenharmony_ci			else
942f08c3bdfSopenharmony_ci				return eat_string(nextchar(stream), stream,
943f08c3bdfSopenharmony_ci							TOKEN_WIDE_STRING);
944f08c3bdfSopenharmony_ci		}
945f08c3bdfSopenharmony_ci	}
946f08c3bdfSopenharmony_ci	hash = ident_hash_end(hash);
947f08c3bdfSopenharmony_ci	ident = create_hashed_ident(buf, len, hash);
948f08c3bdfSopenharmony_ci
949f08c3bdfSopenharmony_ci	/* Pass it on.. */
950f08c3bdfSopenharmony_ci	token = stream->token;
951f08c3bdfSopenharmony_ci	token_type(token) = TOKEN_IDENT;
952f08c3bdfSopenharmony_ci	token->ident = ident;
953f08c3bdfSopenharmony_ci	add_token(stream);
954f08c3bdfSopenharmony_ci	return next;
955f08c3bdfSopenharmony_ci}
956f08c3bdfSopenharmony_ci
957f08c3bdfSopenharmony_cistatic int get_one_token(int c, stream_t *stream)
958f08c3bdfSopenharmony_ci{
959f08c3bdfSopenharmony_ci	long class = cclass[c + 1];
960f08c3bdfSopenharmony_ci	if (class & Digit)
961f08c3bdfSopenharmony_ci		return get_one_number(c, nextchar(stream), stream);
962f08c3bdfSopenharmony_ci	if (class & Letter)
963f08c3bdfSopenharmony_ci		return get_one_identifier(c, stream);
964f08c3bdfSopenharmony_ci	return get_one_special(c, stream);
965f08c3bdfSopenharmony_ci}
966f08c3bdfSopenharmony_ci
967f08c3bdfSopenharmony_cistatic struct token *setup_stream(stream_t *stream, int idx, int fd,
968f08c3bdfSopenharmony_ci	unsigned char *buf, unsigned int buf_size)
969f08c3bdfSopenharmony_ci{
970f08c3bdfSopenharmony_ci	struct token *begin;
971f08c3bdfSopenharmony_ci
972f08c3bdfSopenharmony_ci	stream->nr = idx;
973f08c3bdfSopenharmony_ci	stream->line = 1;
974f08c3bdfSopenharmony_ci	stream->newline = 1;
975f08c3bdfSopenharmony_ci	stream->whitespace = 0;
976f08c3bdfSopenharmony_ci	stream->pos = 0;
977f08c3bdfSopenharmony_ci
978f08c3bdfSopenharmony_ci	stream->token = NULL;
979f08c3bdfSopenharmony_ci	stream->fd = fd;
980f08c3bdfSopenharmony_ci	stream->offset = 0;
981f08c3bdfSopenharmony_ci	stream->size = buf_size;
982f08c3bdfSopenharmony_ci	stream->buffer = buf;
983f08c3bdfSopenharmony_ci
984f08c3bdfSopenharmony_ci	begin = alloc_token(stream);
985f08c3bdfSopenharmony_ci	token_type(begin) = TOKEN_STREAMBEGIN;
986f08c3bdfSopenharmony_ci	stream->tokenlist = &begin->next;
987f08c3bdfSopenharmony_ci	return begin;
988f08c3bdfSopenharmony_ci}
989f08c3bdfSopenharmony_ci
990f08c3bdfSopenharmony_cistatic struct token *tokenize_stream(stream_t *stream)
991f08c3bdfSopenharmony_ci{
992f08c3bdfSopenharmony_ci	int c = nextchar(stream);
993f08c3bdfSopenharmony_ci	while (c != EOF) {
994f08c3bdfSopenharmony_ci		if (!isspace(c)) {
995f08c3bdfSopenharmony_ci			struct token *token = alloc_token(stream);
996f08c3bdfSopenharmony_ci			stream->token = token;
997f08c3bdfSopenharmony_ci			stream->newline = 0;
998f08c3bdfSopenharmony_ci			stream->whitespace = 0;
999f08c3bdfSopenharmony_ci			c = get_one_token(c, stream);
1000f08c3bdfSopenharmony_ci			continue;
1001f08c3bdfSopenharmony_ci		}
1002f08c3bdfSopenharmony_ci		stream->whitespace = 1;
1003f08c3bdfSopenharmony_ci		c = nextchar(stream);
1004f08c3bdfSopenharmony_ci	}
1005f08c3bdfSopenharmony_ci	return mark_eof(stream);
1006f08c3bdfSopenharmony_ci}
1007f08c3bdfSopenharmony_ci
1008f08c3bdfSopenharmony_cistruct token * tokenize_buffer(void *buffer, unsigned long size, struct token **endtoken)
1009f08c3bdfSopenharmony_ci{
1010f08c3bdfSopenharmony_ci	stream_t stream;
1011f08c3bdfSopenharmony_ci	struct token *begin;
1012f08c3bdfSopenharmony_ci
1013f08c3bdfSopenharmony_ci	begin = setup_stream(&stream, 0, -1, buffer, size);
1014f08c3bdfSopenharmony_ci	*endtoken = tokenize_stream(&stream);
1015f08c3bdfSopenharmony_ci	return begin;
1016f08c3bdfSopenharmony_ci}
1017f08c3bdfSopenharmony_ci
1018f08c3bdfSopenharmony_cistruct token * tokenize(const struct position *pos, const char *name, int fd, struct token *endtoken, const char **next_path)
1019f08c3bdfSopenharmony_ci{
1020f08c3bdfSopenharmony_ci	struct token *begin, *end;
1021f08c3bdfSopenharmony_ci	stream_t stream;
1022f08c3bdfSopenharmony_ci	unsigned char buffer[BUFSIZE];
1023f08c3bdfSopenharmony_ci	int idx;
1024f08c3bdfSopenharmony_ci
1025f08c3bdfSopenharmony_ci	idx = init_stream(pos, name, fd, next_path);
1026f08c3bdfSopenharmony_ci	if (idx < 0) {
1027f08c3bdfSopenharmony_ci		// info(endtoken->pos, "File %s is const", name);
1028f08c3bdfSopenharmony_ci		return endtoken;
1029f08c3bdfSopenharmony_ci	}
1030f08c3bdfSopenharmony_ci
1031f08c3bdfSopenharmony_ci	begin = setup_stream(&stream, idx, fd, buffer, 0);
1032f08c3bdfSopenharmony_ci	end = tokenize_stream(&stream);
1033f08c3bdfSopenharmony_ci	if (endtoken)
1034f08c3bdfSopenharmony_ci		end->next = endtoken;
1035f08c3bdfSopenharmony_ci	return begin;
1036f08c3bdfSopenharmony_ci}
1037