libcpp, v2: Add support for gnu::base64 #embed parameter

This patch which adds another #embed extension, gnu::base64.

As mentioned in the documentation, this extension is primarily
intended for use by the preprocessor, so that for the larger (say 32+ or
64+ bytes long embeds it doesn't have to emit tens of thousands or
millions of comma separated string literals which would be very expensive
to parse again, but can emit
 #embed "." __gnu__::__base64__( \
 "Tm9uIGVyYW0gbsOpc2NpdXMsIEJydXRlLCBjdW0sIHF1w6Ygc3VtbWlzIGluZ8OpbmlpcyBleHF1" \
 "aXNpdMOhcXVlIGRvY3Ryw61uYSBwaGlsw7Nzb3BoaSBHcsOmY28gc2VybcOzbmUgdHJhY3RhdsOt" \
 "c3NlbnQsIGVhIExhdMOtbmlzIGzDrXR0ZXJpcyBtYW5kYXLDqW11cywgZm9yZSB1dCBoaWMgbm9z" \
 "dGVyIGxhYm9yIGluIHbDoXJpYXMgcmVwcmVoZW5zacOzbmVzIGluY8O6cnJlcmV0LiBuYW0gcXVp" \
 "YsO6c2RhbSwgZXQgaWlzIHF1aWRlbSBub24gw6FkbW9kdW0gaW5kw7NjdGlzLCB0b3R1bSBob2Mg" \
 "ZMOtc3BsaWNldCBwaGlsb3NvcGjDoXJpLiBxdWlkYW0gYXV0ZW0gbm9uIHRhbSBpZCByZXByZWjD" \
 "qW5kdW50LCBzaSByZW3DrXNzaXVzIGFnw6F0dXIsIHNlZCB0YW50dW0gc3TDumRpdW0gdGFtcXVl" \
 "IG11bHRhbSDDs3BlcmFtIHBvbsOpbmRhbSBpbiBlbyBub24gYXJiaXRyw6FudHVyLiBlcnVudCDD" \
 "qXRpYW0sIGV0IGlpIHF1aWRlbSBlcnVkw610aSBHcsOmY2lzIGzDrXR0ZXJpcywgY29udGVtbsOp" \
 "bnRlcyBMYXTDrW5hcywgcXVpIHNlIGRpY2FudCBpbiBHcsOmY2lzIGxlZ8OpbmRpcyDDs3BlcmFt" \
 "IG1hbGxlIGNvbnPDum1lcmUuIHBvc3Ryw6ltbyDDoWxpcXVvcyBmdXTDunJvcyBzw7pzcGljb3Is" \
 "IHF1aSBtZSBhZCDDoWxpYXMgbMOtdHRlcmFzIHZvY2VudCwgZ2VudXMgaG9jIHNjcmliw6luZGks" \
 "IGV0c2kgc2l0IGVsw6lnYW5zLCBwZXJzw7Nuw6YgdGFtZW4gZXQgZGlnbml0w6F0aXMgZXNzZSBu" \
 "ZWdlbnQu")
with the meaning don't actually load some file, instead base64 decode
(RFC4648 with A-Za-z0-9+/ chars and = padding, no newlines in between)
the string and use that as data.  This is chosen because it should be
-pedantic-errors clean, fairly cheap to decode and then in optimizing
compiler could be handled as similar binary blob to normal #embed,
while the data isn't left somewhere on the disk, so distcc/ccache etc.
can move the preprocessed source without issues.
It makes no sense to support limit and gnu::offset parameters together
with it IMHO, why would somebody waste providing full data and then
threw some away?  prefix/suffix/if_empty are normally supported though,
but not intended to be used by the preprocessor.

This patch adds just the extension side, not the actual emitting of this
during -E or -E -fdirectives-only for now, that will be included in the
upcoming patch.

Compared to the earlier posted version of this extension, this patch
allows the string concatenation in the parameter argument (but still
doesn't allow escapes in the string, why would anyone use them when
only A-Za-z0-9+/= are valid).  The patch also adds support for parsing
this even in -fpreprocessed compilation.

2024-09-12  Jakub Jelinek  <jakub@redhat.com>

libcpp/
	* internal.h (struct cpp_embed_params): Add base64 member.
	(_cpp_free_embed_params_tokens): Declare.
	* directives.cc (DIRECTIVE_TABLE): Add IN_I flag to T_EMBED.
	(save_token_for_embed, _cpp_free_embed_params_tokens): New functions.
	(EMBED_PARAMS): Add gnu::base64 entry.
	(_cpp_parse_embed_params): Parse gnu::base64 parameter.  If
	-fpreprocessed without -fdirectives-only, require #embed to have
	gnu::base64 parameter.  Diagnose conflict between gnu::base64 and
	limit or gnu::offset parameters.
	(do_embed): Use _cpp_free_embed_params_tokens.
	* files.cc (finish_embed, base64_dec_fn): New functions.
	(base64_dec): New array.
	(B64D0, B64D1, B64D2, B64D3): Define.
	(finish_base64_embed): New function.
	(_cpp_stack_embed): Use finish_embed.  Handle params->base64
	using finish_base64_embed.
	* macro.cc (builtin_has_embed): Call _cpp_free_embed_params_tokens.
gcc/
	* doc/cpp.texi (Binary Resource Inclusion): Document gnu::base64
	parameter.
gcc/testsuite/
	* c-c++-common/cpp/embed-17.c: New test.
	* c-c++-common/cpp/embed-18.c: New test.
	* c-c++-common/cpp/embed-19.c: New test.
	* c-c++-common/cpp/embed-27.c: New test.
	* gcc.dg/cpp/embed-6.c: New test.
	* gcc.dg/cpp/embed-7.c: New test.
This commit is contained in:
Jakub Jelinek
2024-09-12 18:17:05 +02:00
committed by Jakub Jelinek
parent c5009eb887
commit ce0aecc7df
11 changed files with 709 additions and 183 deletions

View File

@@ -159,7 +159,7 @@ static void cpp_pop_definition (cpp_reader *, struct def_pragma_macro *);
D(error, T_ERROR, STDC89, 0) \
D(pragma, T_PRAGMA, STDC89, IN_I) \
D(warning, T_WARNING, STDC23, 0) \
D(embed, T_EMBED, STDC23, INCL | EXPAND) \
D(embed, T_EMBED, STDC23, IN_I | INCL | EXPAND) \
D(include_next, T_INCLUDE_NEXT, EXTENSION, INCL | EXPAND) \
D(ident, T_IDENT, EXTENSION, IN_I) \
D(import, T_IMPORT, EXTENSION, INCL | EXPAND) /* ObjC */ \
@@ -941,6 +941,50 @@ do_include_next (cpp_reader *pfile)
do_include_common (pfile, type);
}
/* Helper function for skip_balanced_token_seq and _cpp_parse_embed_params.
Save one token *TOKEN into *SAVE. */
static void
save_token_for_embed (cpp_embed_params_tokens *save, const cpp_token *token)
{
if (save->count == 0)
{
_cpp_init_tokenrun (&save->base_run, 4);
save->cur_run = &save->base_run;
save->cur_token = save->base_run.base;
}
else if (save->cur_token == save->cur_run->limit)
{
save->cur_run->next = XNEW (tokenrun);
save->cur_run->next->prev = save->cur_run;
_cpp_init_tokenrun (save->cur_run->next, 4);
save->cur_run = save->cur_run->next;
save->cur_token = save->cur_run->base;
}
*save->cur_token = *token;
save->cur_token->flags |= NO_EXPAND;
save->cur_token++;
save->count++;
}
/* Free memory associated with saved tokens in *SAVE. */
void
_cpp_free_embed_params_tokens (cpp_embed_params_tokens *save)
{
if (save->count == 0)
return;
tokenrun *n;
for (tokenrun *t = &save->base_run; t; t = n)
{
n = t->next;
XDELETEVEC (t->base);
if (t != &save->base_run)
XDELETE (t);
}
save->count = 0;
}
/* Skip over balanced preprocessing tokens until END is found.
If SAVE is non-NULL, remember the parsed tokens in it. NESTED is
false in the outermost invocation of the function and true
@@ -970,26 +1014,7 @@ skip_balanced_token_seq (cpp_reader *pfile, cpp_ttype end,
if (save
&& (token->type != CPP_PADDING || save->count)
&& (token->type != end || nested))
{
if (save->count == 0)
{
_cpp_init_tokenrun (&save->base_run, 4);
save->cur_run = &save->base_run;
save->cur_token = save->base_run.base;
}
else if (save->cur_token == save->cur_run->limit)
{
save->cur_run->next = XNEW (tokenrun);
save->cur_run->next->prev = save->cur_run;
_cpp_init_tokenrun (save->cur_run->next, 4);
save->cur_run = save->cur_run->next;
save->cur_token = save->cur_run->base;
}
*save->cur_token = *token;
save->cur_token->flags |= NO_EXPAND;
save->cur_token++;
save->count++;
}
save_token_for_embed (save, token);
if (token->type == end)
return;
switch (token->type)
@@ -1024,6 +1049,7 @@ skip_balanced_token_seq (cpp_reader *pfile, cpp_ttype end,
EMBED_PARAM (PREFIX, "prefix") \
EMBED_PARAM (SUFFIX, "suffix") \
EMBED_PARAM (IF_EMPTY, "if_empty") \
EMBED_PARAM (GNU_BASE64, "base64") \
EMBED_PARAM (GNU_OFFSET, "offset")
enum embed_param_kind {
@@ -1067,12 +1093,33 @@ _cpp_parse_embed_params (cpp_reader *pfile, struct cpp_embed_params *params)
cpp_error (pfile, CPP_DL_ERROR, "expected ')'");
return false;
}
return ret;
}
else if (token->type == CPP_CLOSE_PAREN && params->has_embed)
return ret;
cpp_error (pfile, CPP_DL_ERROR, "expected parameter name");
return false;
else if (token->type != CPP_CLOSE_PAREN || !params->has_embed)
{
cpp_error (pfile, CPP_DL_ERROR, "expected parameter name");
return false;
}
if (params->base64.count
&& (seen & ((1 << EMBED_PARAM_LIMIT)
| (1 << EMBED_PARAM_GNU_OFFSET))) != 0)
{
ret = false;
if (!params->has_embed)
cpp_error_with_line (pfile, CPP_DL_ERROR,
params->base64.base_run.base->src_loc, 0,
"'gnu::base64' parameter conflicts with "
"'limit' or 'gnu::offset' parameters");
}
else if (params->base64.count == 0
&& CPP_OPTION (pfile, preprocessed))
{
ret = false;
if (!params->has_embed)
cpp_error_with_line (pfile, CPP_DL_ERROR, params->loc, 0,
"'gnu::base64' parameter required in "
"preprocessed source");
}
return ret;
}
param_name = NODE_NAME (token->val.node.spelling);
param_name_len = NODE_LEN (token->val.node.spelling);
@@ -1197,6 +1244,53 @@ _cpp_parse_embed_params (cpp_reader *pfile, struct cpp_embed_params *params)
}
token = _cpp_get_token_no_padding (pfile);
}
else if (param_kind == EMBED_PARAM_GNU_BASE64)
{
token = _cpp_get_token_no_padding (pfile);
while (token->type == CPP_OTHER
&& CPP_OPTION (pfile, preprocessed)
&& !CPP_OPTION (pfile, directives_only)
&& token->val.str.len == 1
&& token->val.str.text[0] == '\\')
{
/* Allow backslash newline inside of gnu::base64 argument
for -fpreprocessed, so that it doesn't have to be
megabytes long line. */
pfile->state.in_directive = 0;
token = _cpp_get_token_no_padding (pfile);
pfile->state.in_directive = 3;
}
if (token->type == CPP_STRING)
{
do
{
save_token_for_embed (&params->base64, token);
token = _cpp_get_token_no_padding (pfile);
while (token->type == CPP_OTHER
&& CPP_OPTION (pfile, preprocessed)
&& !CPP_OPTION (pfile, directives_only)
&& token->val.str.len == 1
&& token->val.str.text[0] == '\\')
{
pfile->state.in_directive = 0;
token = _cpp_get_token_no_padding (pfile);
pfile->state.in_directive = 3;
}
}
while (token->type == CPP_STRING);
if (token->type != CPP_CLOSE_PAREN)
cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
"expected ')'");
}
else
{
cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
"expected character string literal");
if (token->type != CPP_CLOSE_PAREN)
token = _cpp_get_token_no_padding (pfile);
}
token = _cpp_get_token_no_padding (pfile);
}
else if (token->type == CPP_OPEN_PAREN)
{
cpp_embed_params_tokens *save = NULL;
@@ -1277,26 +1371,10 @@ do_embed (cpp_reader *pfile)
if (ok)
_cpp_stack_embed (pfile, fname, angle_brackets, &params);
for (int i = 0; i < 3; ++i)
{
cpp_embed_params_tokens *p;
if (i == 0)
p = &params.prefix;
else if (i == 1)
p = &params.suffix;
else
p = &params.if_empty;
if (p->count == 0)
continue;
tokenrun *n;
for (tokenrun *t = &p->base_run; t; t = n)
{
n = t->next;
XDELETEVEC (t->base);
if (t != &p->base_run)
XDELETE (t);
}
}
_cpp_free_embed_params_tokens (&params.prefix);
_cpp_free_embed_params_tokens (&params.suffix);
_cpp_free_embed_params_tokens (&params.if_empty);
_cpp_free_embed_params_tokens (&params.base64);
done:
XDELETEVEC (fname);

View File

@@ -1220,6 +1220,320 @@ cpp_probe_header_unit (cpp_reader *pfile, const char *name, bool angle,
return nullptr;
}
/* Helper function for _cpp_stack_embed. Finish #embed/__has_embed processing
after a file is found and data loaded into buffer. */
static int
finish_embed (cpp_reader *pfile, _cpp_file *file,
struct cpp_embed_params *params)
{
const uchar *buffer = file->buffer;
size_t limit = file->limit;
if (params->offset - file->offset > limit)
limit = 0;
else
{
buffer += params->offset - file->offset;
limit -= params->offset - file->offset;
}
if (params->limit < limit)
limit = params->limit;
/* For sizes larger than say 64 bytes, this is just a temporary
solution, we should emit a single new token which the FEs will
handle as an optimization. */
size_t max = INTTYPE_MAXIMUM (size_t) / sizeof (cpp_token);
if (limit > max / 2
|| (limit
? (params->prefix.count > max
|| params->suffix.count > max
|| (limit * 2 - 1 + params->prefix.count
+ params->suffix.count > max))
: params->if_empty.count > max))
{
cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
"%s is too large", file->path);
return 0;
}
size_t len = 0;
for (size_t i = 0; i < limit; ++i)
{
if (buffer[i] < 10)
len += 2;
else if (buffer[i] < 100)
len += 3;
#if UCHAR_MAX == 255
else
len += 4;
#else
else if (buffer[i] < 1000)
len += 4;
else
{
char buf[64];
len += sprintf (buf, "%d", buffer[i]) + 1;
}
#endif
if (len > INTTYPE_MAXIMUM (ssize_t))
{
cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
"%s is too large", file->path);
return 0;
}
}
uchar *s = len ? _cpp_unaligned_alloc (pfile, len) : NULL;
_cpp_buff *tok_buff = NULL;
cpp_token *toks = NULL, *tok = &pfile->directive_result;
size_t count = 0;
if (limit)
count = (params->prefix.count + limit * 2 - 1
+ params->suffix.count) - 1;
else if (params->if_empty.count)
count = params->if_empty.count - 1;
if (count)
{
tok_buff = _cpp_get_buff (pfile, count * sizeof (cpp_token));
toks = (cpp_token *) tok_buff->base;
}
cpp_embed_params_tokens *prefix
= limit ? &params->prefix : &params->if_empty;
if (prefix->count)
{
*tok = *prefix->base_run.base;
tok = toks;
tokenrun *cur_run = &prefix->base_run;
while (cur_run)
{
size_t cnt = (cur_run->next ? cur_run->limit
: prefix->cur_token) - cur_run->base;
cpp_token *t = cur_run->base;
if (cur_run == &prefix->base_run)
{
t++;
cnt--;
}
memcpy (tok, t, cnt * sizeof (cpp_token));
tok += cnt;
cur_run = cur_run->next;
}
}
for (size_t i = 0; i < limit; ++i)
{
tok->src_loc = params->loc;
tok->type = CPP_NUMBER;
tok->flags = NO_EXPAND;
if (i == 0)
tok->flags |= PREV_WHITE;
tok->val.str.text = s;
tok->val.str.len = sprintf ((char *) s, "%d", buffer[i]);
s += tok->val.str.len + 1;
if (tok == &pfile->directive_result)
tok = toks;
else
tok++;
if (i < limit - 1)
{
tok->src_loc = params->loc;
tok->type = CPP_COMMA;
tok->flags = NO_EXPAND;
tok++;
}
}
if (limit && params->suffix.count)
{
tokenrun *cur_run = &params->suffix.base_run;
cpp_token *orig_tok = tok;
while (cur_run)
{
size_t cnt = (cur_run->next ? cur_run->limit
: params->suffix.cur_token) - cur_run->base;
cpp_token *t = cur_run->base;
memcpy (tok, t, cnt * sizeof (cpp_token));
tok += cnt;
cur_run = cur_run->next;
}
orig_tok->flags |= PREV_WHITE;
}
pfile->directive_result.flags |= PREV_WHITE;
if (count)
{
_cpp_push_token_context (pfile, NULL, toks, count);
pfile->context->buff = tok_buff;
}
return limit ? 1 : 2;
}
/* Helper function for initialization of base64_dec table.
Can't rely on ASCII compatibility, so check each letter
separately. */
constexpr signed char
base64_dec_fn (unsigned char c)
{
return (c == 'A' ? 0 : c == 'B' ? 1 : c == 'C' ? 2 : c == 'D' ? 3
: c == 'E' ? 4 : c == 'F' ? 5 : c == 'G' ? 6 : c == 'H' ? 7
: c == 'I' ? 8 : c == 'J' ? 9 : c == 'K' ? 10 : c == 'L' ? 11
: c == 'M' ? 12 : c == 'N' ? 13 : c == 'O' ? 14 : c == 'P' ? 15
: c == 'Q' ? 16 : c == 'R' ? 17 : c == 'S' ? 18 : c == 'T' ? 19
: c == 'U' ? 20 : c == 'V' ? 21 : c == 'W' ? 22 : c == 'X' ? 23
: c == 'Y' ? 24 : c == 'Z' ? 25
: c == 'a' ? 26 : c == 'b' ? 27 : c == 'c' ? 28 : c == 'd' ? 29
: c == 'e' ? 30 : c == 'f' ? 31 : c == 'g' ? 32 : c == 'h' ? 33
: c == 'i' ? 34 : c == 'j' ? 35 : c == 'k' ? 36 : c == 'l' ? 37
: c == 'm' ? 38 : c == 'n' ? 39 : c == 'o' ? 40 : c == 'p' ? 41
: c == 'q' ? 42 : c == 'r' ? 43 : c == 's' ? 44 : c == 't' ? 45
: c == 'u' ? 46 : c == 'v' ? 47 : c == 'w' ? 48 : c == 'x' ? 49
: c == 'y' ? 50 : c == 'z' ? 51
: c == '0' ? 52 : c == '1' ? 53 : c == '2' ? 54 : c == '3' ? 55
: c == '4' ? 56 : c == '5' ? 57 : c == '6' ? 58 : c == '7' ? 59
: c == '8' ? 60 : c == '9' ? 61 : c == '+' ? 62 : c == '/' ? 63
: -1);
}
/* base64 decoding table. */
static constexpr signed char base64_dec[] = {
#define B64D0(x) base64_dec_fn (x)
#define B64D1(x) B64D0 (x), B64D0 (x + 1), B64D0 (x + 2), B64D0 (x + 3)
#define B64D2(x) B64D1 (x), B64D1 (x + 4), B64D1 (x + 8), B64D1 (x + 12)
#define B64D3(x) B64D2 (x), B64D2 (x + 16), B64D2 (x + 32), B64D2 (x + 48)
B64D3 (0), B64D3 (64), B64D3 (128), B64D3 (192)
};
/* Helper function for _cpp_stack_embed. Handle #embed/__has_embed with
gnu::base64 parameter. */
static int
finish_base64_embed (cpp_reader *pfile, const char *fname, bool angle,
struct cpp_embed_params *params)
{
size_t len, end, i, j, base64_len = 0, cnt;
uchar *buf = NULL, *q, pbuf[4], qbuf[3];
const uchar *base64_str;
if (angle || strcmp (fname, "."))
{
if (!params->has_embed)
cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
"'gnu::base64' parameter can be only used with \".\"");
return 0;
}
tokenrun *cur_run = &params->base64.base_run;
cpp_token *tend, *tok;
while (cur_run)
{
tend = cur_run->next ? cur_run->limit : params->base64.cur_token;
for (tok = cur_run->base; tok < tend; ++tok)
{
if (tok->val.str.len < 2
|| tok->val.str.text[0] != '"'
|| tok->val.str.text[tok->val.str.len - 1] != '"')
{
fail:
cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
"'gnu::base64' argument not valid base64 "
"encoded string");
free (buf);
return 0;
}
if (tok->val.str.len - 2 > (~(size_t) 0) - base64_len)
goto fail;
base64_len += tok->val.str.len - 2;
}
cur_run = cur_run->next;
}
if ((base64_len & 3) != 0)
goto fail;
len = base64_len / 4 * 3;
end = len;
if (params->has_embed)
q = qbuf;
else
{
buf = XNEWVEC (uchar, len ? len : 1);
q = buf;
}
cur_run = &params->base64.base_run;
tend = cur_run->next ? cur_run->limit : params->base64.cur_token;
tok = cur_run->base;
base64_str = tok->val.str.text + 1;
cnt = tok->val.str.len - 2;
++tok;
for (i = 0; i < end; i += 3)
{
for (j = 0; j < 4; ++j)
{
while (cnt == 0)
{
if (tok == tend)
{
cur_run = cur_run->next;
tend = (cur_run->next ? cur_run->limit
: params->base64.cur_token);
tok = cur_run->base;
}
base64_str = tok->val.str.text + 1;
cnt = tok->val.str.len - 2;
++tok;
}
pbuf[j] = *base64_str;
base64_str++;
--cnt;
}
if (pbuf[3] == '=' && i + 3 >= end)
{
end = len - 3;
--len;
if (pbuf[2] == '=')
--len;
break;
}
int a = base64_dec[pbuf[0]];
int b = base64_dec[pbuf[1]];
int c = base64_dec[pbuf[2]];
int d = base64_dec[pbuf[3]];
if (a == -1 || b == -1 || c == -1 || d == -1)
goto fail;
q[0] = (a << 2) | (b >> 4);
q[1] = (b << 4) | (c >> 2);
q[2] = (c << 6) | d;
if (!params->has_embed)
q += 3;
}
if (len != end)
{
int a = base64_dec[pbuf[0]];
int b = base64_dec[pbuf[1]];
if (a == -1 || b == -1)
goto fail;
q[0] = (a << 2) | (b >> 4);
if (len - end == 2)
{
int c = base64_dec[pbuf[2]];
if (c == -1)
goto fail;
q[1] = (b << 4) | (c >> 2);
if ((c & 3) != 0)
goto fail;
}
else if ((b & 15) != 0)
goto fail;
}
if (params->has_embed)
return len ? 1 : 2;
_cpp_file *file = make_cpp_file (NULL, "");
file->embed = 1;
file->next_file = pfile->all_files;
pfile->all_files = file;
params->limit = -1;
params->offset = 0;
file->limit = len;
file->buffer = buf;
file->path = xstrdup ("<base64>");
return finish_embed (pfile, file, params);
}
/* Try to load FNAME with #embed/__has_embed parameters PARAMS.
If !PARAMS->has_embed, return new token in pfile->directive_result
(first token) and rest in a pushed non-macro context.
@@ -1230,6 +1544,8 @@ int
_cpp_stack_embed (cpp_reader *pfile, const char *fname, bool angle,
struct cpp_embed_params *params)
{
if (params->base64.count)
return finish_base64_embed (pfile, fname, angle, params);
cpp_dir *dir = search_path_head (pfile, fname, angle, IT_EMBED,
params->has_embed);
if (!dir)
@@ -1449,141 +1765,7 @@ _cpp_stack_embed (cpp_reader *pfile, const char *fname, bool angle,
return limit && params->limit ? 1 : 2;
}
const uchar *buffer = file->buffer;
size_t limit = file->limit;
if (params->offset - file->offset > limit)
limit = 0;
else
{
buffer += params->offset - file->offset;
limit -= params->offset - file->offset;
}
if (params->limit < limit)
limit = params->limit;
/* For sizes larger than say 64 bytes, this is just a temporary
solution, we should emit a single new token which the FEs will
handle as an optimization. */
size_t max = INTTYPE_MAXIMUM (size_t) / sizeof (cpp_token);
if (limit > max / 2
|| (limit
? (params->prefix.count > max
|| params->suffix.count > max
|| (limit * 2 + params->prefix.count
+ params->suffix.count > max))
: params->if_empty.count > max))
{
cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
"%s is too large", file->path);
return 0;
}
size_t len = 0;
for (size_t i = 0; i < limit; ++i)
{
if (buffer[i] < 10)
len += 2;
else if (buffer[i] < 100)
len += 3;
#if UCHAR_MAX == 255
else
len += 4;
#else
else if (buffer[i] < 1000)
len += 4;
else
{
char buf[64];
len += sprintf (buf, "%d", buffer[i]) + 1;
}
#endif
if (len > INTTYPE_MAXIMUM (ssize_t))
{
cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
"%s is too large", file->path);
return 0;
}
}
uchar *s = len ? _cpp_unaligned_alloc (pfile, len) : NULL;
_cpp_buff *tok_buff = NULL;
cpp_token *toks = NULL, *tok = &pfile->directive_result;
size_t count = 0;
if (limit)
count = (params->prefix.count + limit * 2 - 1
+ params->suffix.count) - 1;
else if (params->if_empty.count)
count = params->if_empty.count - 1;
if (count)
{
tok_buff = _cpp_get_buff (pfile, count * sizeof (cpp_token));
toks = (cpp_token *) tok_buff->base;
}
cpp_embed_params_tokens *prefix
= limit ? &params->prefix : &params->if_empty;
if (prefix->count)
{
*tok = *prefix->base_run.base;
tok = toks;
tokenrun *cur_run = &prefix->base_run;
while (cur_run)
{
size_t cnt = (cur_run->next ? cur_run->limit
: prefix->cur_token) - cur_run->base;
cpp_token *t = cur_run->base;
if (cur_run == &prefix->base_run)
{
t++;
cnt--;
}
memcpy (tok, t, cnt * sizeof (cpp_token));
tok += cnt;
cur_run = cur_run->next;
}
}
for (size_t i = 0; i < limit; ++i)
{
tok->src_loc = params->loc;
tok->type = CPP_NUMBER;
tok->flags = NO_EXPAND;
if (i == 0)
tok->flags |= PREV_WHITE;
tok->val.str.text = s;
tok->val.str.len = sprintf ((char *) s, "%d", buffer[i]);
s += tok->val.str.len + 1;
if (tok == &pfile->directive_result)
tok = toks;
else
tok++;
if (i < limit - 1)
{
tok->src_loc = params->loc;
tok->type = CPP_COMMA;
tok->flags = NO_EXPAND;
tok++;
}
}
if (limit && params->suffix.count)
{
tokenrun *cur_run = &params->suffix.base_run;
cpp_token *orig_tok = tok;
while (cur_run)
{
size_t cnt = (cur_run->next ? cur_run->limit
: params->suffix.cur_token) - cur_run->base;
cpp_token *t = cur_run->base;
memcpy (tok, t, cnt * sizeof (cpp_token));
tok += cnt;
cur_run = cur_run->next;
}
orig_tok->flags |= PREV_WHITE;
}
pfile->directive_result.flags |= PREV_WHITE;
if (count)
{
_cpp_push_token_context (pfile, NULL, toks, count);
pfile->context->buff = tok_buff;
}
return limit ? 1 : 2;
return finish_embed (pfile, file, params);
}
/* Retrofit the just-entered main file asif it was an include. This

View File

@@ -638,7 +638,7 @@ struct cpp_embed_params
location_t loc;
bool has_embed;
cpp_num_part limit, offset;
cpp_embed_params_tokens prefix, suffix, if_empty;
cpp_embed_params_tokens prefix, suffix, if_empty, base64;
};
/* Character classes. Based on the more primitive macros in safe-ctype.h.
@@ -812,6 +812,7 @@ extern void _cpp_restore_pragma_names (cpp_reader *, char **);
extern int _cpp_do__Pragma (cpp_reader *, location_t);
extern void _cpp_init_directives (cpp_reader *);
extern void _cpp_init_internal_pragmas (cpp_reader *);
extern void _cpp_free_embed_params_tokens (cpp_embed_params_tokens *);
extern bool _cpp_parse_embed_params (cpp_reader *, struct cpp_embed_params *);
extern void _cpp_do_file_change (cpp_reader *, enum lc_reason, const char *,
linenum_type, unsigned int);

View File

@@ -505,6 +505,8 @@ builtin_has_embed (cpp_reader *pfile)
if (ok && !pfile->state.skip_eval)
result = _cpp_stack_embed (pfile, fname, bracket, &params);
_cpp_free_embed_params_tokens (&params.base64);
XDELETEVEC (fname);
}
else if (paren)