mirror of
https://gcc.gnu.org/git/gcc.git
synced 2026-02-22 12:00:03 -05:00
This patch adds support to GCC's diagnostic subsystem for escaping certain
bytes and Unicode characters when quoting source code.
Specifically, this patch adds a new flag rich_location::m_escape_on_output
which is a hint from a diagnostic that non-ASCII bytes in the pertinent
lines of the user's source code should be escaped when printed.
The patch sets this for the following diagnostics:
- when complaining about stray bytes in the program (when these
are non-printable)
- when complaining about "null character(s) ignored");
- for -Wnormalized= (and generate source ranges for such warnings)
The escaping is controlled by a new option:
-fdiagnostics-escape-format=[unicode|bytes]
For example, consider a diagnostic involing a source line containing the
string "before" followed by the Unicode character U+03C0 ("GREEK SMALL
LETTER PI", with UTF-8 encoding 0xCF 0x80) followed by the byte 0xBF
(a stray UTF-8 trailing byte), followed by the string "after", where the
diagnostic highlights the U+03C0 character.
By default, this line will be printed verbatim to the user when
reporting a diagnostic at it, as:
beforeπXafter
^
(using X for the stray byte to avoid putting invalid UTF-8 in this
commit message)
If the diagnostic sets the "escape" flag, it will be printed as:
before<U+03C0><BF>after
^~~~~~~~
with -fdiagnostics-escape-format=unicode (the default), or as:
before<CF><80><BF>after
^~~~~~~~
if the user supplies -fdiagnostics-escape-format=bytes.
This only affects how the source is printed; it does not affect
how column numbers that are printed (as per -fdiagnostics-column-unit=
and -fdiagnostics-column-origin=).
gcc/c-family/ChangeLog:
* c-lex.c (c_lex_with_flags): When complaining about non-printable
CPP_OTHER tokens, set the "escape on output" flag.
gcc/ChangeLog:
* common.opt (fdiagnostics-escape-format=): New.
(diagnostics_escape_format): New enum.
(DIAGNOSTICS_ESCAPE_FORMAT_UNICODE): New enum value.
(DIAGNOSTICS_ESCAPE_FORMAT_BYTES): Likewise.
* diagnostic-format-json.cc (json_end_diagnostic): Add
"escape-source" attribute.
* diagnostic-show-locus.c
(exploc_with_display_col::exploc_with_display_col): Replace
"tabstop" param with a cpp_char_column_policy and add an "aspect"
param. Use these to compute m_display_col accordingly.
(struct char_display_policy): New struct.
(layout::m_policy): New field.
(layout::m_escape_on_output): New field.
(def_policy): New function.
(make_range): Update for changes to exploc_with_display_col ctor.
(default_print_decoded_ch): New.
(width_per_escaped_byte): New.
(escape_as_bytes_width): New.
(escape_as_bytes_print): New.
(escape_as_unicode_width): New.
(escape_as_unicode_print): New.
(make_policy): New.
(layout::layout): Initialize new fields. Update m_exploc ctor
call for above change to ctor.
(layout::maybe_add_location_range): Update for changes to
exploc_with_display_col ctor.
(layout::calculate_x_offset_display): Update for change to
cpp_display_width.
(layout::print_source_line): Pass policy
to cpp_display_width_computation. Capture cpp_decoded_char when
calling process_next_codepoint. Move printing of source code to
m_policy.m_print_cb.
(line_label::line_label): Pass in policy rather than context.
(layout::print_any_labels): Update for change to line_label ctor.
(get_affected_range): Pass in policy rather than context, updating
calls to location_compute_display_column accordingly.
(get_printed_columns): Likewise, also for cpp_display_width.
(correction::correction): Pass in policy rather than tabstop.
(correction::compute_display_cols): Pass m_policy rather than
m_tabstop to cpp_display_width.
(correction::m_tabstop): Replace with...
(correction::m_policy): ...this.
(line_corrections::line_corrections): Pass in policy rather than
context.
(line_corrections::m_context): Replace with...
(line_corrections::m_policy): ...this.
(line_corrections::add_hint): Update to use m_policy rather than
m_context.
(line_corrections::add_hint): Likewise.
(layout::print_trailing_fixits): Likewise.
(selftest::test_display_widths): New.
(selftest::test_layout_x_offset_display_utf8): Update to use
policy rather than tabstop.
(selftest::test_one_liner_labels_utf8): Add test of escaping
source lines.
(selftest::test_diagnostic_show_locus_one_liner_utf8): Update to
use policy rather than tabstop.
(selftest::test_overlapped_fixit_printing): Likewise.
(selftest::test_overlapped_fixit_printing_utf8): Likewise.
(selftest::test_overlapped_fixit_printing_2): Likewise.
(selftest::test_tab_expansion): Likewise.
(selftest::test_escaping_bytes_1): New.
(selftest::test_escaping_bytes_2): New.
(selftest::diagnostic_show_locus_c_tests): Call the new tests.
* diagnostic.c (diagnostic_initialize): Initialize
context->escape_format.
(convert_column_unit): Update to use default character width policy.
(selftest::test_diagnostic_get_location_text): Likewise.
* diagnostic.h (enum diagnostics_escape_format): New enum.
(diagnostic_context::escape_format): New field.
* doc/invoke.texi (-fdiagnostics-escape-format=): New option.
(-fdiagnostics-format=): Add "escape-source" attribute to examples
of JSON output, and document it.
* input.c (location_compute_display_column): Pass in "policy"
rather than "tabstop", passing to
cpp_byte_column_to_display_column.
(selftest::test_cpp_utf8): Update to use cpp_char_column_policy.
* input.h (class cpp_char_column_policy): New forward decl.
(location_compute_display_column): Pass in "policy" rather than
"tabstop".
* opts.c (common_handle_option): Handle
OPT_fdiagnostics_escape_format_.
* selftest.c (temp_source_file::temp_source_file): New ctor
overload taking a size_t.
* selftest.h (temp_source_file::temp_source_file): Likewise.
gcc/testsuite/ChangeLog:
* c-c++-common/diagnostic-format-json-1.c: Add regexp to consume
"escape-source" attribute.
* c-c++-common/diagnostic-format-json-2.c: Likewise.
* c-c++-common/diagnostic-format-json-3.c: Likewise.
* c-c++-common/diagnostic-format-json-4.c: Likewise, twice.
* c-c++-common/diagnostic-format-json-5.c: Likewise.
* gcc.dg/cpp/warn-normalized-4-bytes.c: New test.
* gcc.dg/cpp/warn-normalized-4-unicode.c: New test.
* gcc.dg/encoding-issues-bytes.c: New test.
* gcc.dg/encoding-issues-unicode.c: New test.
* gfortran.dg/diagnostic-format-json-1.F90: Add regexp to consume
"escape-source" attribute.
* gfortran.dg/diagnostic-format-json-2.F90: Likewise.
* gfortran.dg/diagnostic-format-json-3.F90: Likewise.
libcpp/ChangeLog:
* charset.c (convert_escape): Use encoding_rich_location when
complaining about nonprintable unknown escape sequences.
(cpp_display_width_computation::::cpp_display_width_computation):
Pass in policy rather than tabstop.
(cpp_display_width_computation::process_next_codepoint): Add "out"
param and populate *out if non-NULL.
(cpp_display_width_computation::advance_display_cols): Pass NULL
to process_next_codepoint.
(cpp_byte_column_to_display_column): Pass in policy rather than
tabstop. Pass NULL to process_next_codepoint.
(cpp_display_column_to_byte_column): Pass in policy rather than
tabstop.
* errors.c (cpp_diagnostic_get_current_location): New function,
splitting out the logic from...
(cpp_diagnostic): ...here.
(cpp_warning_at): New function.
(cpp_pedwarning_at): New function.
* include/cpplib.h (cpp_warning_at): New decl for rich_location.
(cpp_pedwarning_at): Likewise.
(struct cpp_decoded_char): New.
(struct cpp_char_column_policy): New.
(cpp_display_width_computation::cpp_display_width_computation):
Replace "tabstop" param with "policy".
(cpp_display_width_computation::process_next_codepoint): Add "out"
param.
(cpp_display_width_computation::m_tabstop): Replace with...
(cpp_display_width_computation::m_policy): ...this.
(cpp_byte_column_to_display_column): Replace "tabstop" param with
"policy".
(cpp_display_width): Likewise.
(cpp_display_column_to_byte_column): Likewise.
* include/line-map.h (rich_location::escape_on_output_p): New.
(rich_location::set_escape_on_output): New.
(rich_location::m_escape_on_output): New.
* internal.h (cpp_diagnostic_get_current_location): New decl.
(class encoding_rich_location): New.
* lex.c (skip_whitespace): Use encoding_rich_location when
complaining about null characters.
(warn_about_normalization): Generate a source range when
complaining about improperly normalized tokens, rather than just a
point, and use encoding_rich_location so that the source code
is escaped on printing.
* line-map.c (rich_location::rich_location): Initialize
m_escape_on_output.
Signed-off-by: David Malcolm <dmalcolm@redhat.com>
353 lines
8.5 KiB
C
353 lines
8.5 KiB
C
/* Default error handlers for CPP Library.
|
|
Copyright (C) 1986-2021 Free Software Foundation, Inc.
|
|
Written by Per Bothner, 1994.
|
|
Based on CCCP program by Paul Rubin, June 1986
|
|
Adapted to ANSI C, Richard Stallman, Jan 1987
|
|
|
|
This program is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License as published by the
|
|
Free Software Foundation; either version 3, or (at your option) any
|
|
later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; see the file COPYING3. If not see
|
|
<http://www.gnu.org/licenses/>.
|
|
|
|
In other words, you are welcome to use, share and improve this program.
|
|
You are forbidden to forbid anyone else to use, share and improve
|
|
what you give them. Help stamp out software-hoarding! */
|
|
|
|
#include "config.h"
|
|
#include "system.h"
|
|
#include "cpplib.h"
|
|
#include "internal.h"
|
|
|
|
/* Get a location_t for the current location in PFILE,
|
|
generally that of the previously lexed token. */
|
|
|
|
location_t
|
|
cpp_diagnostic_get_current_location (cpp_reader *pfile)
|
|
{
|
|
if (CPP_OPTION (pfile, traditional))
|
|
{
|
|
if (pfile->state.in_directive)
|
|
return pfile->directive_line;
|
|
else
|
|
return pfile->line_table->highest_line;
|
|
}
|
|
/* We don't want to refer to a token before the beginning of the
|
|
current run -- that is invalid. */
|
|
else if (pfile->cur_token == pfile->cur_run->base)
|
|
{
|
|
return 0;
|
|
}
|
|
else
|
|
{
|
|
return pfile->cur_token[-1].src_loc;
|
|
}
|
|
}
|
|
|
|
/* Print a diagnostic at the given location. */
|
|
|
|
ATTRIBUTE_FPTR_PRINTF(5,0)
|
|
static bool
|
|
cpp_diagnostic_at (cpp_reader * pfile, enum cpp_diagnostic_level level,
|
|
enum cpp_warning_reason reason, rich_location *richloc,
|
|
const char *msgid, va_list *ap)
|
|
{
|
|
bool ret;
|
|
|
|
if (!pfile->cb.diagnostic)
|
|
abort ();
|
|
ret = pfile->cb.diagnostic (pfile, level, reason, richloc, _(msgid), ap);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/* Print a diagnostic at the location of the previously lexed token. */
|
|
|
|
ATTRIBUTE_FPTR_PRINTF(4,0)
|
|
static bool
|
|
cpp_diagnostic (cpp_reader * pfile, enum cpp_diagnostic_level level,
|
|
enum cpp_warning_reason reason,
|
|
const char *msgid, va_list *ap)
|
|
{
|
|
location_t src_loc = cpp_diagnostic_get_current_location (pfile);
|
|
rich_location richloc (pfile->line_table, src_loc);
|
|
return cpp_diagnostic_at (pfile, level, reason, &richloc, msgid, ap);
|
|
}
|
|
|
|
/* Print a warning or error, depending on the value of LEVEL. */
|
|
|
|
bool
|
|
cpp_error (cpp_reader * pfile, enum cpp_diagnostic_level level,
|
|
const char *msgid, ...)
|
|
{
|
|
va_list ap;
|
|
bool ret;
|
|
|
|
va_start (ap, msgid);
|
|
|
|
ret = cpp_diagnostic (pfile, level, CPP_W_NONE, msgid, &ap);
|
|
|
|
va_end (ap);
|
|
return ret;
|
|
}
|
|
|
|
/* Print a warning. The warning reason may be given in REASON. */
|
|
|
|
bool
|
|
cpp_warning (cpp_reader * pfile, enum cpp_warning_reason reason,
|
|
const char *msgid, ...)
|
|
{
|
|
va_list ap;
|
|
bool ret;
|
|
|
|
va_start (ap, msgid);
|
|
|
|
ret = cpp_diagnostic (pfile, CPP_DL_WARNING, reason, msgid, &ap);
|
|
|
|
va_end (ap);
|
|
return ret;
|
|
}
|
|
|
|
/* Print a pedantic warning. The warning reason may be given in REASON. */
|
|
|
|
bool
|
|
cpp_pedwarning (cpp_reader * pfile, enum cpp_warning_reason reason,
|
|
const char *msgid, ...)
|
|
{
|
|
va_list ap;
|
|
bool ret;
|
|
|
|
va_start (ap, msgid);
|
|
|
|
ret = cpp_diagnostic (pfile, CPP_DL_PEDWARN, reason, msgid, &ap);
|
|
|
|
va_end (ap);
|
|
return ret;
|
|
}
|
|
|
|
/* Print a warning, including system headers. The warning reason may be
|
|
given in REASON. */
|
|
|
|
bool
|
|
cpp_warning_syshdr (cpp_reader * pfile, enum cpp_warning_reason reason,
|
|
const char *msgid, ...)
|
|
{
|
|
va_list ap;
|
|
bool ret;
|
|
|
|
va_start (ap, msgid);
|
|
|
|
ret = cpp_diagnostic (pfile, CPP_DL_WARNING_SYSHDR, reason, msgid, &ap);
|
|
|
|
va_end (ap);
|
|
return ret;
|
|
}
|
|
|
|
/* As cpp_warning above, but use RICHLOC as the location of the diagnostic. */
|
|
|
|
bool cpp_warning_at (cpp_reader *pfile, enum cpp_warning_reason reason,
|
|
rich_location *richloc, const char *msgid, ...)
|
|
{
|
|
va_list ap;
|
|
bool ret;
|
|
|
|
va_start (ap, msgid);
|
|
|
|
ret = cpp_diagnostic_at (pfile, CPP_DL_WARNING, reason, richloc,
|
|
msgid, &ap);
|
|
|
|
va_end (ap);
|
|
return ret;
|
|
|
|
}
|
|
|
|
/* As cpp_pedwarning above, but use RICHLOC as the location of the
|
|
diagnostic. */
|
|
|
|
bool
|
|
cpp_pedwarning_at (cpp_reader * pfile, enum cpp_warning_reason reason,
|
|
rich_location *richloc, const char *msgid, ...)
|
|
{
|
|
va_list ap;
|
|
bool ret;
|
|
|
|
va_start (ap, msgid);
|
|
|
|
ret = cpp_diagnostic_at (pfile, CPP_DL_PEDWARN, reason, richloc,
|
|
msgid, &ap);
|
|
|
|
va_end (ap);
|
|
return ret;
|
|
}
|
|
|
|
/* Print a diagnostic at a specific location. */
|
|
|
|
ATTRIBUTE_FPTR_PRINTF(6,0)
|
|
static bool
|
|
cpp_diagnostic_with_line (cpp_reader * pfile, enum cpp_diagnostic_level level,
|
|
enum cpp_warning_reason reason,
|
|
location_t src_loc, unsigned int column,
|
|
const char *msgid, va_list *ap)
|
|
{
|
|
bool ret;
|
|
|
|
if (!pfile->cb.diagnostic)
|
|
abort ();
|
|
rich_location richloc (pfile->line_table, src_loc);
|
|
if (column)
|
|
richloc.override_column (column);
|
|
ret = pfile->cb.diagnostic (pfile, level, reason, &richloc, _(msgid), ap);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/* Print a warning or error, depending on the value of LEVEL. */
|
|
|
|
bool
|
|
cpp_error_with_line (cpp_reader *pfile, enum cpp_diagnostic_level level,
|
|
location_t src_loc, unsigned int column,
|
|
const char *msgid, ...)
|
|
{
|
|
va_list ap;
|
|
bool ret;
|
|
|
|
va_start (ap, msgid);
|
|
|
|
ret = cpp_diagnostic_with_line (pfile, level, CPP_W_NONE, src_loc,
|
|
column, msgid, &ap);
|
|
|
|
va_end (ap);
|
|
return ret;
|
|
}
|
|
|
|
/* Print a warning. The warning reason may be given in REASON. */
|
|
|
|
bool
|
|
cpp_warning_with_line (cpp_reader *pfile, enum cpp_warning_reason reason,
|
|
location_t src_loc, unsigned int column,
|
|
const char *msgid, ...)
|
|
{
|
|
va_list ap;
|
|
bool ret;
|
|
|
|
va_start (ap, msgid);
|
|
|
|
ret = cpp_diagnostic_with_line (pfile, CPP_DL_WARNING, reason, src_loc,
|
|
column, msgid, &ap);
|
|
|
|
va_end (ap);
|
|
return ret;
|
|
}
|
|
|
|
/* Print a pedantic warning. The warning reason may be given in REASON. */
|
|
|
|
bool
|
|
cpp_pedwarning_with_line (cpp_reader *pfile, enum cpp_warning_reason reason,
|
|
location_t src_loc, unsigned int column,
|
|
const char *msgid, ...)
|
|
{
|
|
va_list ap;
|
|
bool ret;
|
|
|
|
va_start (ap, msgid);
|
|
|
|
ret = cpp_diagnostic_with_line (pfile, CPP_DL_PEDWARN, reason, src_loc,
|
|
column, msgid, &ap);
|
|
|
|
va_end (ap);
|
|
return ret;
|
|
}
|
|
|
|
/* Print a warning, including system headers. The warning reason may be
|
|
given in REASON. */
|
|
|
|
bool
|
|
cpp_warning_with_line_syshdr (cpp_reader *pfile, enum cpp_warning_reason reason,
|
|
location_t src_loc, unsigned int column,
|
|
const char *msgid, ...)
|
|
{
|
|
va_list ap;
|
|
bool ret;
|
|
|
|
va_start (ap, msgid);
|
|
|
|
ret = cpp_diagnostic_with_line (pfile, CPP_DL_WARNING_SYSHDR, reason, src_loc,
|
|
column, msgid, &ap);
|
|
|
|
va_end (ap);
|
|
return ret;
|
|
}
|
|
|
|
/* As cpp_error, but use SRC_LOC as the location of the error, without
|
|
a column override. */
|
|
|
|
bool
|
|
cpp_error_at (cpp_reader * pfile, enum cpp_diagnostic_level level,
|
|
location_t src_loc, const char *msgid, ...)
|
|
{
|
|
va_list ap;
|
|
bool ret;
|
|
|
|
va_start (ap, msgid);
|
|
|
|
rich_location richloc (pfile->line_table, src_loc);
|
|
ret = cpp_diagnostic_at (pfile, level, CPP_W_NONE, &richloc,
|
|
msgid, &ap);
|
|
|
|
va_end (ap);
|
|
return ret;
|
|
}
|
|
|
|
/* As cpp_error, but use RICHLOC as the location of the error, without
|
|
a column override. */
|
|
|
|
bool
|
|
cpp_error_at (cpp_reader * pfile, enum cpp_diagnostic_level level,
|
|
rich_location *richloc, const char *msgid, ...)
|
|
{
|
|
va_list ap;
|
|
bool ret;
|
|
|
|
va_start (ap, msgid);
|
|
|
|
ret = cpp_diagnostic_at (pfile, level, CPP_W_NONE, richloc,
|
|
msgid, &ap);
|
|
|
|
va_end (ap);
|
|
return ret;
|
|
}
|
|
|
|
/* Print a warning or error, depending on the value of LEVEL. Include
|
|
information from errno. */
|
|
|
|
bool
|
|
cpp_errno (cpp_reader *pfile, enum cpp_diagnostic_level level,
|
|
const char *msgid)
|
|
{
|
|
return cpp_error (pfile, level, "%s: %s", _(msgid), xstrerror (errno));
|
|
}
|
|
|
|
/* Print a warning or error, depending on the value of LEVEL. Include
|
|
information from errno. Unlike cpp_errno, the argument is a filename
|
|
that is not localized, but "" is replaced with localized "stdout". */
|
|
|
|
bool
|
|
cpp_errno_filename (cpp_reader *pfile, enum cpp_diagnostic_level level,
|
|
const char *filename,
|
|
location_t loc)
|
|
{
|
|
if (filename[0] == '\0')
|
|
filename = _("stdout");
|
|
|
|
return cpp_error_at (pfile, level, loc, "%s: %s", filename,
|
|
xstrerror (errno));
|
|
}
|