Files
gcc-reflection/libgcobol/valconv.h
Robert Dubner 0e95ebf465 cobol: Convert to individual variable character-set encoding.
Prior to this "patch", the GCOBOL compiler was capable of producing binaries
that operated internally in either ASCII or EBCDIC.  The COBOL specification,
however, allows for the concurrent presence of two encodings, known as
"alphanumeric" and "national".

In order to support this capability, we have chosen to establish an "encoding"
characteristic that gets carried along with every variable,

This change affected many parts of the COBOL front end compiler.  If anybody
looks at any of the changes listed below, they will find they fall into two
classes:

1) Removing the dependence on a global ASCII vs EBCDIC determination.

2) Adding a dependence on a new ::encoding characteristic of the compile time
and run time cbl_field_t and cblc_field_t variable structures.  Those
structures now contain the cbl_encoding_t ::encoding members, which drive the
use of the iconv(3) function in moving back and forth between variable
encodings.

Although the effort is not complete, these changes represent the bulk of what
needs to be done.  With these changes in place, all of our current ASCII and
EBCDIC tests run properly.

gcc/cobol/ChangeLog:

	* cdf.y: In support of the described changes.
	* gcobol.1: Likewise.
	* genapi.cc (level_88_helper): Likewise.
	(get_level_88_domain): Likewise.
	(get_class_condition_string): Likewise.
	(initialize_variable_internal): Likewise.
	(gg_default_qualification): Likewise.
	(cobol_compare): Likewise.
	(move_tree): Likewise.
	(move_tree_to_field): Likewise.
	(psa_FldBlob): Likewise.
	(parser_accept_date_yymmdd): Likewise.
	(parser_accept_date_yyyymmdd): Likewise.
	(parser_accept_date_yyddd): Likewise.
	(parser_accept_date_yyyyddd): Likewise.
	(parser_accept_date_dow): Likewise.
	(parser_accept_date_hhmmssff): Likewise.
	(parser_alphabet): Likewise.
	(parser_alphabet_use): Likewise.
	(parser_display_internal): Likewise.
	(parser_display): Likewise.
	(is_valuable): Likewise.
	(parser_division): Likewise.
	(parser_relop_long): Likewise.
	(parser_setop): Likewise.
	(parser_set_conditional88): Likewise.
	(parser_file_add): Likewise.
	(parser_file_open): Likewise.
	(create_and_call): Likewise.
	(parser_call): Likewise.
	(mh_identical): Likewise.
	(mh_source_is_literalN): Likewise.
	(picky_memcpy): Likewise.
	(mh_numeric_display): Likewise.
	(mh_source_is_group): Likewise.
	(mh_source_is_literalA): Likewise.
	(move_helper): Likewise.
	(initial_from_initial): Likewise.
	(actually_create_the_static_field): Likewise.
	(psa_FldLiteralA): Likewise.
	(parser_symbol_add): Likewise.
	* genmath.cc (arithmetic_operation): Likewise.
	* genutil.cc (get_binary_value): Likewise.
	(get_literal_string): Likewise.
	* genutil.h (EBCDIC_MINUS): Likewise.
	(EBCDIC_PLUS): Likewise.
	(EBCDIC_ZERO): Likewise.
	(EBCDIC_NINE): Likewise.
	* parse.y: Likewise.
	* parse_ante.h (name_of): Likewise.
	(class prog_descr_t): Likewise.
	(current_encoding): Likewise.
	(needs_picture): Likewise.
	(is_callable): Likewise.
	(field_attr_str): Likewise.
	(value_encoding_check): Likewise.
	(field_alloc): Likewise.
	(file_add): Likewise.
	* scan.l: Likewise.
	* structs.cc (create_cblc_field_t): Likewise.
	* symbols.cc (elementize): Likewise.
	(cbl_field_attr_str): Likewise.
	(is_variable_length): Likewise.
	(field_str): Likewise.
	(extend_66_capacity): Likewise.
	(assert): Likewise.
	(symbols_update): Likewise.
	(symbol_field_parent_set): Likewise.
	(add_token): Likewise.
	(symbol_table_init): Likewise.
	(symbol_field_add): Likewise.
	(symbol_field_forward_add): Likewise.
	(symbol_field_same_as): Likewise.
	(cbl_alphabet_t::reencode): Likewise.
	(new_temporary_impl): Likewise.
	(parser_symbol_add2): Likewise.
	(new_literal_add): Likewise.
	(temporaries_t::literal): Likewise.
	(new_literal): Likewise.
	(standard_internal): Likewise.
	(new_temporary): Likewise.
	(cbl_field_t::holds_ascii): Likewise.
	(cbl_field_t::is_ascii): Likewise.
	(cbl_field_t::internalize): Likewise.
	(symbol_label_add): Likewise.
	(symbol_label_section_exists): Likewise.
	(cbl_occurs_t::subscript_ok): Likewise.
	(cbl_file_t::deforward): Likewise.
	(has_value): Likewise.
	* symbols.h (is_numeric): Likewise.
	(__gg__encoding_iconv_name): Likewise.
	(current_encoding): Likewise.
	(struct cbl_field_t): Likewise.
	(new_literal): Likewise.
	(class temporaries_t): Likewise.
	(struct function_descr_t): Likewise.
	(hex_decode): Likewise.
	(struct cbl_alphabet_t): Likewise.
	(struct cbl_file_t): Likewise.
	* symfind.cc (field_structure): Likewise.
	(erase_symbol_map_fwds): Likewise.
	(symbol_find): Likewise.
	* token_names.h: Likewise.
	* util.cc (cbl_field_type_str): Likewise.
	(is_elementary): Likewise.
	(symbol_field_type_update): Likewise.
	(cbl_field_t::report_invalid_initial_value): Likewise.
	(valid_move): Likewise.
	(valid_picture): Likewise.
	(type_capacity): Likewise.
	(gcc_location_set_impl): Likewise.
	(cbl_unimplementedw): Likewise.

libgcobol/ChangeLog:

	* charmaps.cc (raw_is_SBC): Likewise.
	(extract_next_code_point): Likewise.
	(flipper): Likewise.
	(__gg__ascii_to_ascii_chr): Likewise.
	(__gg__ascii_to_ebcdic_chr): Likewise.
	(__gg__raw_to_ascii): Likewise.
	(__gg__raw_to_ebcdic): Likewise.
	(convert_cp1252_to_utf8): Likewise.
	(__gg__text_conversion_override): Likewise.
	(__gg__ascii_to_ascii): Likewise.
	(__gg__encoding_iconv_name): Likewise.
	(__gg__encoding_iconv_type): Likewise.
	(__gg__ascii_to_ebcdic): Likewise.
	(__gg__iconverter): Likewise.
	(__gg__ebcdic_to_ascii): Likewise.
	(__gg__ascii_to_console): Likewise.
	(__gg__ebcdic_to_console): Likewise.
	(__gg__console_to_ascii): Likewise.
	(__gg__console_to_ebcdic): Likewise.
	(_to_ctype): Likewise.
	(_from_ctype): Likewise.
	(__gg__get_charmap): Likewise.
	* charmaps.h (internal_is_ebcdic): Likewise.
	(internal_space): Likewise.
	(internal_zero): Likewise.
	(internal_period): Likewise.
	(internal_comma): Likewise.
	(internal_dquote): Likewise.
	(internal_asterisk): Likewise.
	(internal_plus): Likewise.
	(internal_minus): Likewise.
	(internal_cr): Likewise.
	(internal_ff): Likewise.
	(internal_newline): Likewise.
	(internal_return): Likewise.
	(internal_0): Likewise.
	(internal_1): Likewise.
	(internal_2): Likewise.
	(internal_3): Likewise.
	(internal_4): Likewise.
	(internal_5): Likewise.
	(internal_6): Likewise.
	(internal_7): Likewise.
	(internal_8): Likewise.
	(internal_9): Likewise.
	(internal_colon): Likewise.
	(internal_query): Likewise.
	(internal_A): Likewise.
	(internal_B): Likewise.
	(internal_C): Likewise.
	(internal_D): Likewise.
	(internal_E): Likewise.
	(internal_F): Likewise.
	(internal_G): Likewise.
	(internal_H): Likewise.
	(internal_I): Likewise.
	(internal_J): Likewise.
	(internal_K): Likewise.
	(internal_L): Likewise.
	(internal_M): Likewise.
	(internal_N): Likewise.
	(internal_O): Likewise.
	(internal_P): Likewise.
	(internal_Q): Likewise.
	(internal_R): Likewise.
	(internal_S): Likewise.
	(internal_T): Likewise.
	(internal_U): Likewise.
	(internal_V): Likewise.
	(internal_W): Likewise.
	(internal_X): Likewise.
	(internal_Y): Likewise.
	(internal_Z): Likewise.
	(internal_a): Likewise.
	(internal_b): Likewise.
	(internal_c): Likewise.
	(internal_d): Likewise.
	(internal_e): Likewise.
	(internal_f): Likewise.
	(internal_g): Likewise.
	(internal_h): Likewise.
	(internal_i): Likewise.
	(internal_j): Likewise.
	(internal_k): Likewise.
	(internal_l): Likewise.
	(internal_m): Likewise.
	(internal_n): Likewise.
	(internal_o): Likewise.
	(internal_p): Likewise.
	(internal_q): Likewise.
	(internal_r): Likewise.
	(internal_s): Likewise.
	(internal_t): Likewise.
	(internal_u): Likewise.
	(internal_v): Likewise.
	(internal_w): Likewise.
	(internal_x): Likewise.
	(internal_y): Likewise.
	(internal_z): Likewise.
	(enum text_codeset_t): Likewise.
	(__gg__ascii_to_ascii_chr): Likewise.
	(__gg__ascii_to_ebcdic_chr): Likewise.
	(ascii_to_internal): Likewise.
	(__gg__ascii_to_ascii): Likewise.
	(__gg__ascii_to_ebcdic): Likewise.
	(ascii_to_internal_str): Likewise.
	(__gg__raw_to_ascii): Likewise.
	(__gg__raw_to_ebcdic): Likewise.
	(raw_to_internal): Likewise.
	(__gg__ascii_to_console): Likewise.
	(__gg__ebcdic_to_console): Likewise.
	(internal_to_console): Likewise.
	(__gg__console_to_ascii): Likewise.
	(__gg__console_to_ebcdic): Likewise.
	(console_to_internal): Likewise.
	(__gg__ebcdic_to_ascii): Likewise.
	(internal_to_ascii): Likewise.
	(__gg__encoding_iconv_name): Likewise.
	(__gg__encoding_iconv_type): Likewise.
	(__gg__iconverter): Likewise.
	(DEFAULT_CHARMAP_SOURCE): Likewise.
	(class charmap_t): Likewise.
	(__gg__get_charmap): Likewise.
	* common-defs.h (EBCDIC_MINUS): Likewise.
	(EBCDIC_PLUS): Likewise.
	(EBCDIC_ZERO): Likewise.
	(EBCDIC_NINE): Likewise.
	(PACKED_NYBBLE_PLUS): Likewise.
	(PACKED_NYBBLE_MINUS): Likewise.
	(PACKED_NYBBLE_UNSIGNED): Likewise.
	(NUMERIC_DISPLAY_SIGN_BIT_EBCDIC): Likewise.
	(NUMERIC_DISPLAY_SIGN_BIT): Likewise.
	(SEPARATE_PLUS): Likewise.
	(SEPARATE_MINUS): Likewise.
	(ZONED_ZERO): Likewise.
	(ZONE_SIGNED_EBCDIC): Likewise.
	(enum cbl_field_type_t): Likewise.
	(enum cbl_field_attr_t): Likewise.
	(enum cbl_figconst_t): Likewise.
	(enum cbl_encoding_t): Likewise.
	* constants.cc (struct cblc_field_t): Likewise.
	(X): Likewise.
	(S9): Likewise.
	* gcobolio.h: Likewise.
	* gfileio.cc (get_filename): Likewise.
	(relative_file_delete): Likewise.
	(relative_file_start): Likewise.
	(relative_file_rewrite): Likewise.
	(relative_file_write_varying): Likewise.
	(relative_file_write): Likewise.
	(sequential_file_write): Likewise.
	(line_sequential_file_read): Likewise.
	(sequential_file_read): Likewise.
	(relative_file_read): Likewise.
	(file_indexed_open): Likewise.
	(__gg__file_reopen): Likewise.
	(__io__file_open): Likewise.
	(__io__file_close): Likewise.
	(__gg__file_open): Likewise.
	* intrinsic.cc (trim_trailing_spaces): Likewise.
	(is_zulu_format): Likewise.
	(string_to_dest): Likewise.
	(get_all_time): Likewise.
	(ftime_replace): Likewise.
	(__gg__char): Likewise.
	(__gg__current_date): Likewise.
	(__gg__seconds_past_midnight): Likewise.
	(__gg__formatted_current_date): Likewise.
	(__gg__formatted_date): Likewise.
	(__gg__formatted_datetime): Likewise.
	(__gg__formatted_time): Likewise.
	(__gg__lower_case): Likewise.
	(numval): Likewise.
	(numval_c): Likewise.
	(__gg__ord): Likewise.
	(__gg__trim): Likewise.
	(__gg__random): Likewise.
	(__gg__random_next): Likewise.
	(__gg__reverse): Likewise.
	(__gg__upper_case): Likewise.
	(__gg__when_compiled): Likewise.
	(gets_int): Likewise.
	(gets_year): Likewise.
	(gets_month): Likewise.
	(gets_day): Likewise.
	(gets_day_of_week): Likewise.
	(gets_day_of_year): Likewise.
	(gets_week): Likewise.
	(gets_hours): Likewise.
	(gets_minutes): Likewise.
	(gets_seconds): Likewise.
	(gets_nanoseconds): Likewise.
	(fill_cobol_tm): Likewise.
	(__gg__hex_of): Likewise.
	(floating_format_tester): Likewise.
	(__gg__numval_f): Likewise.
	(__gg__test_numval_f): Likewise.
	(strcasestr): Likewise.
	(strlaststr): Likewise.
	(__gg__locale_compare): Likewise.
	(__gg__locale_date): Likewise.
	(__gg__locale_time): Likewise.
	(__gg__locale_time_from_seconds): Likewise.
	* libgcobol.cc (struct program_state): Likewise.
	(turn_sign_bit_on): Likewise.
	(turn_sign_bit_off): Likewise.
	(is_sign_bit_on): Likewise.
	(__gg__string_to_alpha_edited_ascii): Likewise.
	(int128_to_field): Likewise.
	(edited_to_binary): Likewise.
	(get_binary_value_local): Likewise.
	(__gg__get_date_yymmdd): Likewise.
	(__gg__get_date_yyyymmdd): Likewise.
	(__gg__get_date_yyddd): Likewise.
	(__gg__get_yyyyddd): Likewise.
	(__gg__get_date_dow): Likewise.
	(__gg__get_date_hhmmssff): Likewise.
	(__gg__dirty_to_binary_internal): Likewise.
	(__gg__dirty_to_binary): Likewise.
	(__gg__dirty_to_float): Likewise.
	(psz_to_internal): Likewise.
	(get_scaled_rdigits): Likewise.
	(format_for_display_internal): Likewise.
	(format_for_display_local): Likewise.
	(compare_88): Likewise.
	(compare_field_class): Likewise.
	(compare_strings): Likewise.
	(__gg__compare_2): Likewise.
	(init_var_both): Likewise.
	(alpha_to_alpha_move_from_location): Likewise.
	(alpha_to_alpha_move): Likewise.
	(__gg__move): Likewise.
	(__gg__move_literala): Likewise.
	(normalize_id): Likewise.
	(inspect_backward_format_1): Likewise.
	(__gg__inspect_format_1): Likewise.
	(inspect_backward_format_2): Likewise.
	(__gg__inspect_format_2): Likewise.
	(normalize_for_inspect_format_4): Likewise.
	(__gg__inspect_format_4): Likewise.
	(move_string): Likewise.
	(brute_force_trim): Likewise.
	(__gg__string): Likewise.
	(display_both): Likewise.
	(__gg__display_string): Likewise.
	(not_mangled_core): Likewise.
	(__gg__accept): Likewise.
	(__gg__set_initial_switch_value): Likewise.
	(__gg__onetime_initialization): Likewise.
	(is_numeric_display_numeric): Likewise.
	(is_alpha_a_number): Likewise.
	(__gg__classify): Likewise.
	(__gg__convert_encoding): Likewise.
	(__gg__convert_encoding_length): Likewise.
	(accept_envar): Likewise.
	(__gg__accept_envar): Likewise.
	(__gg__set_envar): Likewise.
	(__gg__get_argc): Likewise.
	(__gg__get_argv): Likewise.
	(__gg__get_command_line): Likewise.
	(__gg__alphabet_use): Likewise.
	(__gg__ascii_to_internal_field): Likewise.
	(__gg__ascii_to_internal): Likewise.
	(__gg__console_to_internal): Likewise.
	(__gg__parser_set_conditional): Likewise.
	(__gg__internal_to_console_in_place): Likewise.
	(__gg__literaln_alpha_compare): Likewise.
	(__gg__unstring): Likewise.
	(struct cbl_exception_t): Likewise.
	(__gg__codeset_figurative_constants): Likewise.
	(__gg__function_handle_from_cobpath): Likewise.
	(__gg__just_mangle_name): Likewise.
	(__gg__function_handle_from_name): Likewise.
	(get_the_byte): Likewise.
	(__gg__set_env_name): Likewise.
	(__gg__get_env_name): Likewise.
	(__gg__get_env_value): Likewise.
	(__gg__set_env_value): Likewise.
	(__gg__fprintf_stderr): Likewise.
	(__gg__accept_arg_value): Likewise.
	(__gg__fc_char): Likewise.
	* libgcobol.h (__gg__dirty_to_binary_internal): Likewise.
	(__gg__dirty_to_binary): Likewise.
	(__gg__internal_to_console_in_place): Likewise.
	(__gg__fc_char): Likewise.
	(__gg__convert_encoding): Likewise.
	(__gg__convert_encoding_length): Likewise.
	* stringbin.cc (string_from_combined): Likewise.
	(__gg__binary_to_string_internal): Likewise.
	(__gg__binary_to_string_encoded): Likewise.
	(__gg__numeric_display_to_binary): Likewise.
	(__gg__packed_to_binary): Likewise.
	* stringbin.h (__gg__binary_to_string_internal): Likewise.
	(__gg__binary_to_string_encoded): Likewise.
	(__gg__numeric_display_to_binary): Likewise.
	* valconv.cc (__gg__alphabet_create): Likewise.
	(__gg__string_to_numeric_edited): Likewise.
	(__gg__string_to_alpha_edited): Likewise.
	(__gg__remove_trailing_zeroes): Likewise.
	* valconv.h (__VALCONV_H): Likewise.
	* encodings.h: New file.

gcc/testsuite/ChangeLog:

	* cobol.dg/group1/check_88.cob: Likewise.
2025-10-10 13:19:34 -04:00

72 lines
3.1 KiB
C++

/*
* Copyright (c) 2021-2025 Symas Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* * Neither the name of the Symas Corporation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __VALCONV_H
#define __VALCONV_H
// All "ordinals" are zero-based ordinals. The COBOL spec's ordinal values
// for ordinary ASCII/EBCDIC ranger from 1 to 256, so we call them zero through
// 255. We use unsigned ints so that when an custom alphabet is described, we
// can make every unmentioned character have an ordinal greater than the final
// ordinal of the custom list.
struct alphabet_state
{
unsigned short collation[256];
unsigned char low_char;
unsigned char high_char;
};
extern std::unordered_map<size_t, alphabet_state> __gg__alphabet_states;
extern "C"
{
void __gg__realloc_if_necessary(char **dest, size_t *dest_size, size_t new_size);
void __gg__alphabet_create(cbl_encoding_t encoding,
size_t alphabet_index,
const unsigned char *alphabet,
int low_char,
int high_char );
bool __gg__string_to_numeric_edited(char * const dest,
const char *source, // ASCII
int rdigits,
int is_negative,
const char *picture);
void __gg__string_to_alpha_edited(char *dest,
cbl_encoding_t dest_encoding,
const char *source,
int slength,
const char *picture);
void __gg__currency_sign_init();
void __gg__currency_sign(int symbol, const char *sign);
void __gg__remove_trailing_zeroes(char *p);
}
#endif