libcpp: Update to Unicode 15

The following pseudo-patch regenerates the libcpp tables with Unicode 15.0.0
which added 4489 new characters.

As mentioned previously, this isn't just a matter of running the
two libcpp/make*.cc programs on the new Unicode files, but one needs
to manually update a table inside of makeuname2c.cc according to
a table in Unicode text (which is partially reflected in the text
files, but e.g. in Unicode 14.0.0 not 100% accurately, in 15.0.0
actually accurately).
I've also added some randomly chosen subset of those 4489 new
characters to a testcase.

2022-11-04  Jakub Jelinek  <jakub@redhat.com>

gcc/testsuite/
	* c-c++-common/cpp/named-universal-char-escape-1.c: Add tests for some
	characters newly added in Unicode 15.0.0.
libcpp/
	* makeuname2c.cc (struct generated): Update from Unicode 15.0.0
	table 4-8.
	* ucnid.h: Regenerated for Unicode 15.0.0.
	* uname2c.h: Likewise.
This commit is contained in:
Jakub Jelinek
2022-11-04 18:18:42 +01:00
parent 26d2db895b
commit 2662d537b0
4 changed files with 17225 additions and 16992 deletions

View File

@@ -69,7 +69,7 @@ struct entry { const char *name; unsigned long codepoint; };
static struct entry *entries;
static unsigned long num_allocated, num_entries;
/* Unicode 14 Table 4-8. */
/* Unicode 15 Table 4-8. */
struct generated {
const char *prefix;
/* max_high is a workaround for UnicodeData.txt inconsistencies
@@ -81,13 +81,14 @@ struct generated {
static struct generated generated_ranges[] =
{ { "HANGUL SYLLABLE ", 0xac00, 0xd7a3, 0, 0, 0 }, /* NR1 rule */
{ "CJK UNIFIED IDEOGRAPH-", 0x3400, 0x4dbf, 0, 1, 0 }, /* NR2 rules */
{ "CJK UNIFIED IDEOGRAPH-", 0x4e00, 0x9ffc, 0x9fff, 1, 0 },
{ "CJK UNIFIED IDEOGRAPH-", 0x20000, 0x2a6dd, 0x2a6df, 1, 0 },
{ "CJK UNIFIED IDEOGRAPH-", 0x2a700, 0x2b734, 0x2b738, 1, 0 },
{ "CJK UNIFIED IDEOGRAPH-", 0x4e00, 0x9fff, 0, 1, 0 },
{ "CJK UNIFIED IDEOGRAPH-", 0x20000, 0x2a6df, 0, 1, 0 },
{ "CJK UNIFIED IDEOGRAPH-", 0x2a700, 0x2b739, 0, 1, 0 },
{ "CJK UNIFIED IDEOGRAPH-", 0x2b740, 0x2b81d, 0, 1, 0 },
{ "CJK UNIFIED IDEOGRAPH-", 0x2b820, 0x2cea1, 0, 1, 0 },
{ "CJK UNIFIED IDEOGRAPH-", 0x2ceb0, 0x2ebe0, 0, 1, 0 },
{ "CJK UNIFIED IDEOGRAPH-", 0x30000, 0x3134a, 0, 1, 0 },
{ "CJK UNIFIED IDEOGRAPH-", 0x31350, 0x323af, 0, 1, 0 },
{ "TANGUT IDEOGRAPH-", 0x17000, 0x187f7, 0, 2, 0 },
{ "TANGUT IDEOGRAPH-", 0x18d00, 0x18d08, 0, 2, 0 },
{ "KHITAN SMALL SCRIPT CHARACTER-", 0x18b00, 0x18cd5, 0, 3, 0 },