Files
gcc-reflection/gcc/cobol/scan_post.h
James K. Lowden d9a64bf6a6 cobol: Expose warnings as command-line options.
Introduce 45 warning options, integrated with dialects.
	Update documentation with warning options and syntax supported by
	each dialect.

gcc/cobol/ChangeLog:

	PR cobol/119329
	PR cobol/119331
	PR cobol/120786
	* Make-lang.in: Add cobol/messages.o to cobol1 sources.
	* cbldiag.h (yywarn): Remove function.
	(struct cbl_loc_t): Introduce new location type independent of Bison.
	(enum cbl_diag_id_t): Enumerate diagnostic messages.
	(cbl_message): New function.
	(dialect_ok): Test for dialect, emit standard message.
	(dialect_not_ok): Emit standard message if syntax excluded by dialect.
	(dialect_error): Remove function.
	(cbl_unimplementedw): Use cbl_diag_id_t.
	(cbl_unimplemented): Whitespace.
	* cdf.y: Update token values.
	* cobol1.cc (enable_exceptions): Use cbl_message.
	(cobol_warning): Declare function.
	(cobol_langhook_handle_option): Add 44 new warning options.
	(cobol_langhook_type_for_mode): Remove function.
	* except.cc (cbl_enabled_exception_t::dump): Remove function.
	* exceptg.h (class exception_turn_t): Use cbl_diag_id_t.
	* gcobol.1: Document dialect syntax and new warning options.
	* genapi.cc (parser_label_label): Remove unused warning.
	* gengen.cc (gg_find_field_in_struct): Use cbl_internal_error.
	(gg_printf): Same.
	(gg_fprintf): Same.
	(gg_define_function): Same.
	(gg_get_function_decl): Same.
	(gg_call_expr): Same.
	(gg_call): Same.
	* lang-specs.h: Add warning options.
	* lang.opt: Add ISO dialect and options.
	* lexio.cc (parse_replacing_term): Use cbl_message.
	(parse_replacing_pair): Same.
	(preprocess_filter_add): Same.
	(cdftext::echo_input): Same.
	(cdftext::lex_open): Same.
	(cdftext::open_input): Same.
	* messages.cc: New file implements cbl_message.
	* parse.y: Use cbl_message.
	* parse_ante.h (dialect_proscribed): Remove function.
	(parser_move_carefully): Use dialect_ok.
	(goodnight_gracie): Convert warning to debug message.
	* scan.l: Use dialect_ok.
	* scan_ante.h (scanner_parsing): Use cbl_diag_id_t.
	(scanner_parsing_toggle): Same.
	(scanner_parsing_pop): Same.
	(verify_ws): Same.
	(level_of): Same.
	(typed_name): Same.
	(integer_of): Same.
	* scan_post.h (datetime_format_of): Use cbl_internal_error.
	(prelex): Emit only debug messages.
	* show_parse.h: Use cbl_internal_error.
	* symbols.cc (symbols_update): Remove dialect test because parser's problem.
	(cbl_field_t::internalize): Use cbl_message.
	* symbols.h (enum cbl_dialect_t): Add ISO to cbl_dialect_t.
	(cbl_dialect_str): Recognize ISO dialect.
	(dialect_has): New function.
	(cbl_diagnostic_kind): New function.
	(cbl_dialect_kind): New function.
	(struct cbl_alphabet_t): Emit only debug message.
	* token_names.h: Regenerate.
	* util.cc (gb4): Emit only debug message.
	(current_token_location): Add overload to set token_location.
	(yywarn): Remove function.
	(cobol_fileline_set): Use cbl_message.
	(cobol_parse_files): Same.
	(cbl_message): New diagnostic message function uses cbl_diag_id_t.
	(cbl_diagnostic_kind): New function.
	(cbl_diagnostic_option): New function.
	(cbl_unimplementedw): Use cbl_diag_id_t.
	(dialect_error): Remove function.
	* util.h (cbl_message): Remove obsolete prototype for cbl_message.
2025-12-01 16:35:54 -05:00

414 lines
12 KiB
C++

/*
* Copyright (c) 2021-2025 Symas Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* * Neither the name of the Symas Corporation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
static const char *
start_condition_str( int sc ) {
const char *state = "???";
switch(sc) {
case INITIAL: state = "INITIAL"; break;
case addr_of: state = "addr_of"; break;
case basis: state = "basis"; break;
case bool_state: state = "bool_state"; break;
case cdf_state: state = "cdf_state"; break;
case classify: state = "classify"; break;
case cobol_words: state = "cobol_words"; break;
case comment_entries: state = "comment_entries"; break;
case copy_state: state = "copy_state"; break;
case date_state: state = "date_state"; break;
case datetime_fmt: state = "datetime_fmt"; break;
case dot_state: state = "dot_state"; break;
case exception: state = "exception"; break;
case field_level: state = "field_level"; break;
case field_state: state = "field_state"; break;
case function: state = "function"; break;
case hex_state: state = "hex_state"; break;
case ident_state: state = "ident_state"; break;
case integer_count: state = "integer_count"; break;
case name_state: state = "name_state"; break;
case numeric_state: state = "numeric_state"; break;
case numstr_state: state = "numstr_state"; break;
case partial_name: state = "partial_name"; break;
case picture: state = "picture"; break;
case picture_count: state = "picture_count"; break;
case procedure_div: state = "procedure_div"; break;
case program_id_state: state = "program_id_state"; break;
case quoted1: state = "quoted1"; break;
case quoted2: state = "quoted2"; break;
case quoteq: state = "quoteq"; break;
case raising: state = "raising"; break;
case sort_state: state = "sort_state"; break;
case subscripts: state = "subscripts"; break;
}
return state;
}
static const char *
start_condition_is() { return start_condition_str( YY_START ); }
/*
* Match datetime constants.
*
* A 78 or CONSTANT could have a special literal for formatted
* date/time functions.
*/
static int
datetime_format_of( const char input[] ) {
static const char date_fmt_b[] = "YYYYMMDD|YYYYDDD|YYYYWwwD";
static const char date_fmt_e[] = "YYYY-MM-DD|YYYY-DDD|YYYY-Www-D";
static const char time_fmt_b[] =
"hhmmss([.,]s+)?|hhmmss([.,]s+)?Z|hhmmss([.,]s+)?[+]hhmm|";
static const char time_fmt_e[] =
"hh:mm:ss([.,]s+)?|hh:mm:ss([.,]s+)?Z|hh:mm:ss([.,]s+)?[+]hh:mm";
static char date_pattern[ 3 * sizeof(date_fmt_e) ];
static char time_pattern[ 3 * sizeof(time_fmt_e) ];
static char datetime_pattern[ 6 * sizeof(time_fmt_e) ];
static struct pattern_t {
regex_t re;
const char *regex;
int token;
} patterns[] = {
{ {}, datetime_pattern, DATETIME_FMT },
{ {}, date_pattern, DATE_FMT },
{ {}, time_pattern, TIME_FMT },
}, * eopatterns = patterns + COUNT_OF(patterns);;
// compile patterns
if( ! date_pattern[0] ) {
sprintf(date_pattern, "%s|%s", date_fmt_b, date_fmt_e);
sprintf(time_pattern, "%s|%s", time_fmt_b, time_fmt_e);
sprintf(datetime_pattern, "(%sT%s)|(%sT%s)",
date_fmt_b, time_fmt_b,
date_fmt_e, time_fmt_e);
for( auto p = patterns; p < eopatterns; p++ ) {
static const int cflags = REG_EXTENDED | REG_ICASE;
int erc;
if( 0 != (erc = regcomp(&p->re, p->regex, cflags)) ) {
static char msg[80];
regerror(erc, &p->re, msg, sizeof(msg));
cbl_internal_error("%s:%d: %s: %s", __func__, __LINE__,
keyword_str(p->token), msg);
}
}
}
// applies only in the datetime_fmt start condition
if( datetime_fmt == YY_START ) {
yy_pop_state();
if( input == NULL ) return 0;
// See if the input is a date, time, or datetime pattern string.
static const int nmatch = 3;
regmatch_t matches[nmatch];
auto p = std::find_if( patterns, eopatterns,
[input, &matches]( auto& pattern ) {
auto erc = regexec( &pattern.re, input,
COUNT_OF(matches), matches, 0 );
return erc == 0;
} );
return p != eopatterns? p->token : 0;
}
return 0;
}
/*
* >>DEFINE, >>IF, and >>EVALUATE
*/
static bool
is_cdf_token( int token ) {
switch(token) {
case CDF_DEFINE:
case CDF_DISPLAY:
case CDF_IF: case CDF_ELSE: case CDF_END_IF:
case CDF_EVALUATE: case CDF_WHEN: case CDF_END_EVALUATE:
case CDF_PUSH:
case CDF_POP:
return true;
case CALL_COBOL:
case CALL_VERBATIM:
case COPY:
case TURN:
return true;
}
return false;
}
static bool
is_cdf_condition_token( int token ) {
switch(token) {
case CDF_IF: case CDF_ELSE: case CDF_END_IF:
case CDF_EVALUATE: case CDF_WHEN: case CDF_END_EVALUATE:
return true;
}
return false;
}
/*
* IF and EVALUATE are partially parsed in cdf.y. ELSE and WHEN, etc., are
* valid only in context.
*/
static bool
valid_conditional_context( int token ) {
switch(token) {
case CDF_DEFINE:
case CDF_IF:
case CDF_EVALUATE:
return true;
case CDF_ELSE:
case CDF_END_IF:
return scanner_token() == CDF_IF;
case CDF_WHEN:
case CDF_END_EVALUATE:
return scanner_token() == CDF_EVALUATE;
}
return true; // all other CDF tokens valid regardless of context
}
static bool
run_cdf( int token ) {
if( ! valid_conditional_context(token) ) {
error_msg(yylloc, "CDF syntax error at '%s'", keyword_str(token));
return false;
}
parsing.inject_token(token); // because it will be needed by CDF parser
if( yy_flex_debug ) dbgmsg("CDF parser start with '%s'", keyword_str(token));
parsing.parser_save(ydfparse);
int erc = ydfparse(); // Parse the CDF directive.
parsing.parser_restore();
if( YY_START == cdf_state ) yy_pop_state();
if( yy_flex_debug ) {
dbgmsg("CDF parser returned %d, scanner SC <%s>", erc, start_condition_is());
}
return 0 == erc;
}
#include <queue>
struct pending_token_t {
int token;
YYSTYPE value;
pending_token_t( int token, YYSTYPE value ) : token(token), value(value) {}
};
#define PENDING(T) pending_token_t( (T), yylval )
static std::queue<pending_token_t> pending_tokens;
int next_token() {
int token = lexer();
return token;
}
extern int ydfchar;
bool in_procedure_division(void);
// act on CDF tokens
int
prelex() {
static bool in_cdf = false;
int token = next_token();
if( in_cdf ) { return token; }
if( ! is_cdf_token(token) ) { return token; }
in_cdf = true;
assert(is_cdf_token(token));
while( is_cdf_token(token) ) {
if( ! run_cdf(token) ) {
dbgmsg( ">>CDF parser failed, ydfchar %d", ydfchar );
}
// Return the CDF's discarded lookahead token, if extant.
token = ydfchar > 0? ydfchar : next_token();
if( token == NO_CONDITION && parsing.at_eof() ) {
return YYEOF;
}
// Reenter cdf parser only if next token could affect parsing state.
if( ! parsing.on() && ! is_cdf_condition_token(token) ) break;
}
if( yy_flex_debug ) {
dbgmsg("scanner SC <%s>", start_condition_is());
}
if( YY_START == copy_state || YY_START == cdf_state ) {
if( token == NAME ) {
auto tok = keyword_tok(ydflval.string);
if( tok ) token = tok;
}
yy_pop_state();
dbgmsg("scanner SC <%s>, token now %s",
start_condition_is(), keyword_str(token));
}
/*
* The final, rejected CDF token might be a LEVEL number.
*/
if( YY_START == field_state && level_needed() ) {
switch( token ) {
case NUMSTR:
dbgmsg("final token is NUMSTR");
yylval.number = level_of(yylval.numstr.string);
token = LEVEL;
break;
case YDF_NUMBER:
dbgmsg("final token is YDF_NUMBER");
yylval.number = ydflval.number;
token = LEVEL;
break;
}
if( token == LEVEL ) {
switch(yylval.number) {
case 66:
token = LEVEL66;
break;
case 78:
token = LEVEL78;
break;
case 88:
token = LEVEL78;
break;
}
}
}
dbgmsg( ">>CDF parser done, %s returning "
"%s (because final_token %s, lookhead %d) on line %d", __func__,
keyword_str(token), keyword_str(final_token),
ydfchar, yylineno );
in_cdf = false;
return token;
}
/* There are 2 parsers and one scanner.
* yyparse calls yylex.
* yylex calls prelex
* prelex calls lexer, the scanner produced by flex.
* lexer reads input from yyin via lexer_input.
*
* prelex intercepts CDF statements, each of which it parses with ydfparse.
* ydfparse affects CDF variables, which may affect how yylex treats
* the input stream.
*
* Because the lexer is called recursively:
*
* yyparse -> yylex -> ydfparse -> yylex
*
* the global state of the scanner has changed when ydfparse returns. Part of
* that state is the unused lookahead token that ydfparse discarded, stored in
* final_token. prelex then returns final_token as its own, which is duly
* returned to yyparse.
*/
int
yylex(void) {
static bool produce_next_sentence_target = false;
int token = parsing.pending_token();
if( parsing.at_eof() ) return YYEOF;
if( token ) return token;
/*
* NEXT SENTENCE jumps to an implied CONTINUE at the next dot ('.').
* Documentation says variously that the implied CONTINUE is before or after
* that dot, but the meaning is one: after the statement that precedes the
* dot.
*
* When the lexer encounters the dot, it returns it to the parser, which may
* use it as a look-ahead token to decide the grammar production. By the
* time it returns to the lexer looking for its next token, the parser will
* have taken whatever actions the dot decided. At that point, the lexer
* injects the label that NEXT SENTENCE jumps to.
*/
if( produce_next_sentence_target ) {
next_sentence_label(next_sentence);
produce_next_sentence_target = false;
}
do {
token = prelex();
if( yy_flex_debug ) {
if( parsing.in_cdf() ) {
dbgmsg( "%s:%d: <%s> routing %s to CDF parser", __func__, __LINE__,
start_condition_is(), keyword_str(token) );
} else if( !parsing.on() ) {
dbgmsg( "eating %s because conditional compilation is FALSE",
keyword_str(token) );
}
}
} while( token && ! parsing.feed_a_parser() );
if( next_sentence && token == '.' ) {
produce_next_sentence_target = true;
}
if( parsing.normal() ) {
final_token = token;
}
if( token == YYEOF && parsing.in_cdf() ) {
if( yy_flex_debug) dbgmsg("deflecting EOF");
parsing.at_eof(true);
return NO_CONDITION;
}
return token;
}
/*
* Token name<->string utilities
*/
// tokens.h is generated as needed from parse.h with tokens.h.gen
current_tokens_t::tokenset_t::tokenset_t() {
#include "token_names.h"
};