mirror of
https://forge.sourceware.org/marek/gcc.git
synced 2026-02-22 03:47:02 -05:00
Introduce 45 warning options, integrated with dialects. Update documentation with warning options and syntax supported by each dialect. gcc/cobol/ChangeLog: PR cobol/119329 PR cobol/119331 PR cobol/120786 * Make-lang.in: Add cobol/messages.o to cobol1 sources. * cbldiag.h (yywarn): Remove function. (struct cbl_loc_t): Introduce new location type independent of Bison. (enum cbl_diag_id_t): Enumerate diagnostic messages. (cbl_message): New function. (dialect_ok): Test for dialect, emit standard message. (dialect_not_ok): Emit standard message if syntax excluded by dialect. (dialect_error): Remove function. (cbl_unimplementedw): Use cbl_diag_id_t. (cbl_unimplemented): Whitespace. * cdf.y: Update token values. * cobol1.cc (enable_exceptions): Use cbl_message. (cobol_warning): Declare function. (cobol_langhook_handle_option): Add 44 new warning options. (cobol_langhook_type_for_mode): Remove function. * except.cc (cbl_enabled_exception_t::dump): Remove function. * exceptg.h (class exception_turn_t): Use cbl_diag_id_t. * gcobol.1: Document dialect syntax and new warning options. * genapi.cc (parser_label_label): Remove unused warning. * gengen.cc (gg_find_field_in_struct): Use cbl_internal_error. (gg_printf): Same. (gg_fprintf): Same. (gg_define_function): Same. (gg_get_function_decl): Same. (gg_call_expr): Same. (gg_call): Same. * lang-specs.h: Add warning options. * lang.opt: Add ISO dialect and options. * lexio.cc (parse_replacing_term): Use cbl_message. (parse_replacing_pair): Same. (preprocess_filter_add): Same. (cdftext::echo_input): Same. (cdftext::lex_open): Same. (cdftext::open_input): Same. * messages.cc: New file implements cbl_message. * parse.y: Use cbl_message. * parse_ante.h (dialect_proscribed): Remove function. (parser_move_carefully): Use dialect_ok. (goodnight_gracie): Convert warning to debug message. * scan.l: Use dialect_ok. * scan_ante.h (scanner_parsing): Use cbl_diag_id_t. (scanner_parsing_toggle): Same. (scanner_parsing_pop): Same. (verify_ws): Same. (level_of): Same. (typed_name): Same. (integer_of): Same. * scan_post.h (datetime_format_of): Use cbl_internal_error. (prelex): Emit only debug messages. * show_parse.h: Use cbl_internal_error. * symbols.cc (symbols_update): Remove dialect test because parser's problem. (cbl_field_t::internalize): Use cbl_message. * symbols.h (enum cbl_dialect_t): Add ISO to cbl_dialect_t. (cbl_dialect_str): Recognize ISO dialect. (dialect_has): New function. (cbl_diagnostic_kind): New function. (cbl_dialect_kind): New function. (struct cbl_alphabet_t): Emit only debug message. * token_names.h: Regenerate. * util.cc (gb4): Emit only debug message. (current_token_location): Add overload to set token_location. (yywarn): Remove function. (cobol_fileline_set): Use cbl_message. (cobol_parse_files): Same. (cbl_message): New diagnostic message function uses cbl_diag_id_t. (cbl_diagnostic_kind): New function. (cbl_diagnostic_option): New function. (cbl_unimplementedw): Use cbl_diag_id_t. (dialect_error): Remove function. * util.h (cbl_message): Remove obsolete prototype for cbl_message.
414 lines
12 KiB
C++
414 lines
12 KiB
C++
/*
|
|
* Copyright (c) 2021-2025 Symas Corporation
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are
|
|
* met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above
|
|
* copyright notice, this list of conditions and the following disclaimer
|
|
* in the documentation and/or other materials provided with the
|
|
* distribution.
|
|
* * Neither the name of the Symas Corporation nor the names of its
|
|
* contributors may be used to endorse or promote products derived from
|
|
* this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
static const char *
|
|
start_condition_str( int sc ) {
|
|
const char *state = "???";
|
|
switch(sc) {
|
|
case INITIAL: state = "INITIAL"; break;
|
|
case addr_of: state = "addr_of"; break;
|
|
case basis: state = "basis"; break;
|
|
case bool_state: state = "bool_state"; break;
|
|
case cdf_state: state = "cdf_state"; break;
|
|
case classify: state = "classify"; break;
|
|
case cobol_words: state = "cobol_words"; break;
|
|
case comment_entries: state = "comment_entries"; break;
|
|
case copy_state: state = "copy_state"; break;
|
|
case date_state: state = "date_state"; break;
|
|
case datetime_fmt: state = "datetime_fmt"; break;
|
|
case dot_state: state = "dot_state"; break;
|
|
case exception: state = "exception"; break;
|
|
case field_level: state = "field_level"; break;
|
|
case field_state: state = "field_state"; break;
|
|
case function: state = "function"; break;
|
|
case hex_state: state = "hex_state"; break;
|
|
case ident_state: state = "ident_state"; break;
|
|
case integer_count: state = "integer_count"; break;
|
|
case name_state: state = "name_state"; break;
|
|
case numeric_state: state = "numeric_state"; break;
|
|
case numstr_state: state = "numstr_state"; break;
|
|
case partial_name: state = "partial_name"; break;
|
|
case picture: state = "picture"; break;
|
|
case picture_count: state = "picture_count"; break;
|
|
case procedure_div: state = "procedure_div"; break;
|
|
case program_id_state: state = "program_id_state"; break;
|
|
case quoted1: state = "quoted1"; break;
|
|
case quoted2: state = "quoted2"; break;
|
|
case quoteq: state = "quoteq"; break;
|
|
case raising: state = "raising"; break;
|
|
case sort_state: state = "sort_state"; break;
|
|
case subscripts: state = "subscripts"; break;
|
|
}
|
|
return state;
|
|
}
|
|
|
|
static const char *
|
|
start_condition_is() { return start_condition_str( YY_START ); }
|
|
|
|
/*
|
|
* Match datetime constants.
|
|
*
|
|
* A 78 or CONSTANT could have a special literal for formatted
|
|
* date/time functions.
|
|
*/
|
|
|
|
static int
|
|
datetime_format_of( const char input[] ) {
|
|
|
|
static const char date_fmt_b[] = "YYYYMMDD|YYYYDDD|YYYYWwwD";
|
|
static const char date_fmt_e[] = "YYYY-MM-DD|YYYY-DDD|YYYY-Www-D";
|
|
|
|
static const char time_fmt_b[] =
|
|
"hhmmss([.,]s+)?|hhmmss([.,]s+)?Z|hhmmss([.,]s+)?[+]hhmm|";
|
|
static const char time_fmt_e[] =
|
|
"hh:mm:ss([.,]s+)?|hh:mm:ss([.,]s+)?Z|hh:mm:ss([.,]s+)?[+]hh:mm";
|
|
|
|
static char date_pattern[ 3 * sizeof(date_fmt_e) ];
|
|
static char time_pattern[ 3 * sizeof(time_fmt_e) ];
|
|
static char datetime_pattern[ 6 * sizeof(time_fmt_e) ];
|
|
|
|
static struct pattern_t {
|
|
regex_t re;
|
|
const char *regex;
|
|
int token;
|
|
} patterns[] = {
|
|
{ {}, datetime_pattern, DATETIME_FMT },
|
|
{ {}, date_pattern, DATE_FMT },
|
|
{ {}, time_pattern, TIME_FMT },
|
|
}, * eopatterns = patterns + COUNT_OF(patterns);;
|
|
|
|
// compile patterns
|
|
if( ! date_pattern[0] ) {
|
|
sprintf(date_pattern, "%s|%s", date_fmt_b, date_fmt_e);
|
|
sprintf(time_pattern, "%s|%s", time_fmt_b, time_fmt_e);
|
|
|
|
sprintf(datetime_pattern, "(%sT%s)|(%sT%s)",
|
|
date_fmt_b, time_fmt_b,
|
|
date_fmt_e, time_fmt_e);
|
|
|
|
for( auto p = patterns; p < eopatterns; p++ ) {
|
|
static const int cflags = REG_EXTENDED | REG_ICASE;
|
|
int erc;
|
|
|
|
if( 0 != (erc = regcomp(&p->re, p->regex, cflags)) ) {
|
|
static char msg[80];
|
|
regerror(erc, &p->re, msg, sizeof(msg));
|
|
cbl_internal_error("%s:%d: %s: %s", __func__, __LINE__,
|
|
keyword_str(p->token), msg);
|
|
}
|
|
}
|
|
}
|
|
|
|
// applies only in the datetime_fmt start condition
|
|
if( datetime_fmt == YY_START ) {
|
|
yy_pop_state();
|
|
if( input == NULL ) return 0;
|
|
|
|
// See if the input is a date, time, or datetime pattern string.
|
|
static const int nmatch = 3;
|
|
regmatch_t matches[nmatch];
|
|
|
|
auto p = std::find_if( patterns, eopatterns,
|
|
[input, &matches]( auto& pattern ) {
|
|
auto erc = regexec( &pattern.re, input,
|
|
COUNT_OF(matches), matches, 0 );
|
|
return erc == 0;
|
|
} );
|
|
|
|
return p != eopatterns? p->token : 0;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* >>DEFINE, >>IF, and >>EVALUATE
|
|
*/
|
|
|
|
static bool
|
|
is_cdf_token( int token ) {
|
|
switch(token) {
|
|
case CDF_DEFINE:
|
|
case CDF_DISPLAY:
|
|
case CDF_IF: case CDF_ELSE: case CDF_END_IF:
|
|
case CDF_EVALUATE: case CDF_WHEN: case CDF_END_EVALUATE:
|
|
case CDF_PUSH:
|
|
case CDF_POP:
|
|
return true;
|
|
case CALL_COBOL:
|
|
case CALL_VERBATIM:
|
|
case COPY:
|
|
case TURN:
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static bool
|
|
is_cdf_condition_token( int token ) {
|
|
switch(token) {
|
|
case CDF_IF: case CDF_ELSE: case CDF_END_IF:
|
|
case CDF_EVALUATE: case CDF_WHEN: case CDF_END_EVALUATE:
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* IF and EVALUATE are partially parsed in cdf.y. ELSE and WHEN, etc., are
|
|
* valid only in context.
|
|
*/
|
|
static bool
|
|
valid_conditional_context( int token ) {
|
|
switch(token) {
|
|
case CDF_DEFINE:
|
|
case CDF_IF:
|
|
case CDF_EVALUATE:
|
|
return true;
|
|
case CDF_ELSE:
|
|
case CDF_END_IF:
|
|
return scanner_token() == CDF_IF;
|
|
case CDF_WHEN:
|
|
case CDF_END_EVALUATE:
|
|
return scanner_token() == CDF_EVALUATE;
|
|
}
|
|
return true; // all other CDF tokens valid regardless of context
|
|
}
|
|
|
|
static bool
|
|
run_cdf( int token ) {
|
|
if( ! valid_conditional_context(token) ) {
|
|
error_msg(yylloc, "CDF syntax error at '%s'", keyword_str(token));
|
|
return false;
|
|
}
|
|
|
|
parsing.inject_token(token); // because it will be needed by CDF parser
|
|
|
|
if( yy_flex_debug ) dbgmsg("CDF parser start with '%s'", keyword_str(token));
|
|
|
|
parsing.parser_save(ydfparse);
|
|
|
|
int erc = ydfparse(); // Parse the CDF directive.
|
|
|
|
parsing.parser_restore();
|
|
|
|
if( YY_START == cdf_state ) yy_pop_state();
|
|
|
|
if( yy_flex_debug ) {
|
|
dbgmsg("CDF parser returned %d, scanner SC <%s>", erc, start_condition_is());
|
|
}
|
|
|
|
return 0 == erc;
|
|
}
|
|
|
|
#include <queue>
|
|
struct pending_token_t {
|
|
int token;
|
|
YYSTYPE value;
|
|
pending_token_t( int token, YYSTYPE value ) : token(token), value(value) {}
|
|
};
|
|
#define PENDING(T) pending_token_t( (T), yylval )
|
|
|
|
static std::queue<pending_token_t> pending_tokens;
|
|
|
|
int next_token() {
|
|
int token = lexer();
|
|
return token;
|
|
}
|
|
|
|
extern int ydfchar;
|
|
bool in_procedure_division(void);
|
|
|
|
// act on CDF tokens
|
|
int
|
|
prelex() {
|
|
static bool in_cdf = false;
|
|
int token = next_token();
|
|
|
|
if( in_cdf ) { return token; }
|
|
if( ! is_cdf_token(token) ) { return token; }
|
|
|
|
in_cdf = true;
|
|
|
|
assert(is_cdf_token(token));
|
|
|
|
while( is_cdf_token(token) ) {
|
|
|
|
if( ! run_cdf(token) ) {
|
|
dbgmsg( ">>CDF parser failed, ydfchar %d", ydfchar );
|
|
}
|
|
// Return the CDF's discarded lookahead token, if extant.
|
|
token = ydfchar > 0? ydfchar : next_token();
|
|
if( token == NO_CONDITION && parsing.at_eof() ) {
|
|
return YYEOF;
|
|
}
|
|
|
|
// Reenter cdf parser only if next token could affect parsing state.
|
|
if( ! parsing.on() && ! is_cdf_condition_token(token) ) break;
|
|
}
|
|
|
|
if( yy_flex_debug ) {
|
|
dbgmsg("scanner SC <%s>", start_condition_is());
|
|
}
|
|
|
|
if( YY_START == copy_state || YY_START == cdf_state ) {
|
|
if( token == NAME ) {
|
|
auto tok = keyword_tok(ydflval.string);
|
|
if( tok ) token = tok;
|
|
}
|
|
yy_pop_state();
|
|
dbgmsg("scanner SC <%s>, token now %s",
|
|
start_condition_is(), keyword_str(token));
|
|
}
|
|
|
|
/*
|
|
* The final, rejected CDF token might be a LEVEL number.
|
|
*/
|
|
if( YY_START == field_state && level_needed() ) {
|
|
switch( token ) {
|
|
case NUMSTR:
|
|
dbgmsg("final token is NUMSTR");
|
|
yylval.number = level_of(yylval.numstr.string);
|
|
token = LEVEL;
|
|
break;
|
|
case YDF_NUMBER:
|
|
dbgmsg("final token is YDF_NUMBER");
|
|
yylval.number = ydflval.number;
|
|
token = LEVEL;
|
|
break;
|
|
}
|
|
if( token == LEVEL ) {
|
|
switch(yylval.number) {
|
|
case 66:
|
|
token = LEVEL66;
|
|
break;
|
|
case 78:
|
|
token = LEVEL78;
|
|
break;
|
|
case 88:
|
|
token = LEVEL78;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
dbgmsg( ">>CDF parser done, %s returning "
|
|
"%s (because final_token %s, lookhead %d) on line %d", __func__,
|
|
keyword_str(token), keyword_str(final_token),
|
|
ydfchar, yylineno );
|
|
in_cdf = false;
|
|
return token;
|
|
}
|
|
|
|
/* There are 2 parsers and one scanner.
|
|
* yyparse calls yylex.
|
|
* yylex calls prelex
|
|
* prelex calls lexer, the scanner produced by flex.
|
|
* lexer reads input from yyin via lexer_input.
|
|
*
|
|
* prelex intercepts CDF statements, each of which it parses with ydfparse.
|
|
* ydfparse affects CDF variables, which may affect how yylex treats
|
|
* the input stream.
|
|
*
|
|
* Because the lexer is called recursively:
|
|
*
|
|
* yyparse -> yylex -> ydfparse -> yylex
|
|
*
|
|
* the global state of the scanner has changed when ydfparse returns. Part of
|
|
* that state is the unused lookahead token that ydfparse discarded, stored in
|
|
* final_token. prelex then returns final_token as its own, which is duly
|
|
* returned to yyparse.
|
|
*/
|
|
|
|
int
|
|
yylex(void) {
|
|
static bool produce_next_sentence_target = false;
|
|
int token = parsing.pending_token();
|
|
|
|
if( parsing.at_eof() ) return YYEOF;
|
|
if( token ) return token;
|
|
|
|
/*
|
|
* NEXT SENTENCE jumps to an implied CONTINUE at the next dot ('.').
|
|
* Documentation says variously that the implied CONTINUE is before or after
|
|
* that dot, but the meaning is one: after the statement that precedes the
|
|
* dot.
|
|
*
|
|
* When the lexer encounters the dot, it returns it to the parser, which may
|
|
* use it as a look-ahead token to decide the grammar production. By the
|
|
* time it returns to the lexer looking for its next token, the parser will
|
|
* have taken whatever actions the dot decided. At that point, the lexer
|
|
* injects the label that NEXT SENTENCE jumps to.
|
|
*/
|
|
if( produce_next_sentence_target ) {
|
|
next_sentence_label(next_sentence);
|
|
produce_next_sentence_target = false;
|
|
}
|
|
|
|
do {
|
|
token = prelex();
|
|
if( yy_flex_debug ) {
|
|
if( parsing.in_cdf() ) {
|
|
dbgmsg( "%s:%d: <%s> routing %s to CDF parser", __func__, __LINE__,
|
|
start_condition_is(), keyword_str(token) );
|
|
} else if( !parsing.on() ) {
|
|
dbgmsg( "eating %s because conditional compilation is FALSE",
|
|
keyword_str(token) );
|
|
}
|
|
}
|
|
|
|
} while( token && ! parsing.feed_a_parser() );
|
|
|
|
if( next_sentence && token == '.' ) {
|
|
produce_next_sentence_target = true;
|
|
}
|
|
|
|
if( parsing.normal() ) {
|
|
final_token = token;
|
|
}
|
|
|
|
if( token == YYEOF && parsing.in_cdf() ) {
|
|
if( yy_flex_debug) dbgmsg("deflecting EOF");
|
|
parsing.at_eof(true);
|
|
return NO_CONDITION;
|
|
}
|
|
|
|
return token;
|
|
}
|
|
|
|
/*
|
|
* Token name<->string utilities
|
|
*/
|
|
|
|
// tokens.h is generated as needed from parse.h with tokens.h.gen
|
|
current_tokens_t::tokenset_t::tokenset_t() {
|
|
#include "token_names.h"
|
|
};
|