preprocessor: Adjust C rules on UCNs for C23 [PR117162]

As noted in bug 117162, C23 changed some rules on UCNs to match C++
(this was a late change agreed in the resolution to CD2 comment
US-032, implementing changes from N3124), which we need to implement.

Allow UCNs below 0xa0 outside identifiers for C, with a
pedwarn-if-pedantic before C23 (and a warning with -Wc11-c23-compat)
except for the always-allowed cases of UCNs for $ @ `.  Also as part
of that change, do not allow \u0024 in identifiers as equivalent to $
for C23.

Bootstrapped with no regressions for x86_64-pc-linux-gnu.

	PR c/117162

libcpp/
	* include/cpplib.h (struct cpp_options): Add low_ucns.
	* init.cc (struct lang_flags, lang_defaults): Add low_ucns.
	(cpp_set_lang): Set low_ucns
	* charset.cc (_cpp_valid_ucn): For C, allow UCNs below 0xa0
	outside identifiers, with a pedwarn if pedantic before C23 or a
	warning with -Wc11-c23-compat.  Do not allow \u0024 in identifiers
	for C23.

gcc/testsuite/
	* gcc.dg/cpp/c17-ucn-1.c, gcc.dg/cpp/c17-ucn-2.c,
	gcc.dg/cpp/c17-ucn-3.c, gcc.dg/cpp/c17-ucn-4.c,
	gcc.dg/cpp/c23-ucn-2.c, gcc.dg/cpp/c23-ucnid-2.c: New tests.
	* c-c++-common/cpp/delimited-escape-seq-3.c,
	c-c++-common/cpp/named-universal-char-escape-3.c,
	gcc.dg/cpp/c23-ucn-1.c, gcc.dg/cpp/c2y-delimited-escape-seq-3.c:
	Update expected messages
	* gcc.dg/cpp/ucs.c: Use -pedantic-errors.  Update expected
	messages.
This commit is contained in:
Joseph Myers
2024-12-03 13:01:58 +00:00
parent af9a3fe6a5
commit f3b5de944a
14 changed files with 5688 additions and 681 deletions

View File

@@ -113,44 +113,45 @@ struct lang_flags
unsigned int true_false : 1;
unsigned int embed : 1;
unsigned int imaginary_constants : 1;
unsigned int low_ucns : 1;
};
static const struct lang_flags lang_defaults[] = {
/* u e w n
b d 8 l a a t
x u i i c v s s i r d m o r e
x i d u r d n g t h a c z f n e e c u m i
c c n x c d s i l l l c s r l o o d l d d l d t f b m
9 + u i 1 i t g i i i s e i i p p f i e i i u a a e a
9 + m d 1 d d r t t t t p g t t e p t f r m c l l d g */
/* GNUC89 */ { 0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0 },
/* GNUC99 */ { 1,0,1,1,0,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0 },
/* GNUC11 */ { 1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0 },
/* GNUC17 */ { 1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0 },
/* GNUC23 */ { 1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0 },
/* GNUC2Y */ { 1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,1 },
/* STDC89 */ { 0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 },
/* STDC94 */ { 0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 },
/* STDC99 */ { 1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 },
/* STDC11 */ { 1,0,1,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 },
/* STDC17 */ { 1,0,1,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 },
/* STDC23 */ { 1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0 },
/* STDC2Y */ { 1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,1 },
/* GNUCXX */ { 0,1,1,1,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0 },
/* CXX98 */ { 0,1,0,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0 },
/* GNUCXX11 */ { 1,1,1,1,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0 },
/* CXX11 */ { 1,1,0,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0 },
/* GNUCXX14 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0 },
/* CXX14 */ { 1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0 },
/* GNUCXX17 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0 },
/* CXX17 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0 },
/* GNUCXX20 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0 },
/* CXX20 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0 },
/* GNUCXX23 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0 },
/* CXX23 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0 },
/* GNUCXX26 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0 },
/* CXX26 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0 },
/* ASM */ { 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }
b d 8 l a a t l
x u i i c v s s i r d m o r e o
x i d u r d n g t h a c z f n e e c u m i w
c c n x c d s i l l l c s r l o o d l d d l d t f b m u
9 + u i 1 i t g i i i s e i i p p f i e i i u a a e a c
9 + m d 1 d d r t t t t p g t t e p t f r m c l l d g n */
/* GNUC89 */ { 0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0 },
/* GNUC99 */ { 1,0,1,1,0,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0 },
/* GNUC11 */ { 1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0 },
/* GNUC17 */ { 1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0 },
/* GNUC23 */ { 1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1 },
/* GNUC2Y */ { 1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,1,1 },
/* STDC89 */ { 0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 },
/* STDC94 */ { 0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 },
/* STDC99 */ { 1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 },
/* STDC11 */ { 1,0,1,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 },
/* STDC17 */ { 1,0,1,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 },
/* STDC23 */ { 1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1 },
/* STDC2Y */ { 1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,1,1 },
/* GNUCXX */ { 0,1,1,1,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1 },
/* CXX98 */ { 0,1,0,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1 },
/* GNUCXX11 */ { 1,1,1,1,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1 },
/* CXX11 */ { 1,1,0,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1 },
/* GNUCXX14 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1 },
/* CXX14 */ { 1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1 },
/* GNUCXX17 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1 },
/* CXX17 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1 },
/* GNUCXX20 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1 },
/* CXX20 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1 },
/* GNUCXX23 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1 },
/* CXX23 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1 },
/* GNUCXX26 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1 },
/* CXX26 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1 },
/* ASM */ { 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }
};
/* Sets internal flags correctly for a given language. */
@@ -188,6 +189,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_lang lang)
CPP_OPTION (pfile, true_false) = l->true_false;
CPP_OPTION (pfile, embed) = l->embed;
CPP_OPTION (pfile, imaginary_constants) = l->imaginary_constants;
CPP_OPTION (pfile, low_ucns) = l->low_ucns;
}
/* Initialize library global state. */