Files
gcc-reflection/libgcobol/xmlparse.cc
Robert Dubner e9757133bb cobol: Corrected FUNCTION CHAR and FUNCTION ORD.
The functions CHAR and ORD have been changed to correctly report on
character positions within the collation sequence.

The use of the LOW-VALUE and HIGH-VALUE figurative constants has been
corrected.

Some establishment of DISPLAY and NATIONAL encodings has been done
in anticipation of changes soon to come.

Some new testsuite tests have been added.

gcc/cobol/ChangeLog:

	* genapi.cc (parser_alphabet): Alphabet encoding.
	(parser_alphabet_use): Likewise.
	(parser_xml_parse): Use correct debugging macro; encoding.
	(parser_xml_on_exception): Likewise.
	(parser_xml_not_exception): Likewise.
	(parser_xml_end): Likewise.
	(initialize_the_data): Encoding.
	(parser_label_label): Debugging macros.
	(parser_label_goto): Likewise.
	(parser_file_add): Encoding.
	(parser_intrinsic_call_1): Special handling for __gg__char.
	(parser_intrinsic_call_2): Formatting.
	* parse.y: Response from FUNCTION ORD is flagged "unsigned".
	* symbols.cc (cbl_alphabet_t::reencode): Establish
	low_char & high_char.
	* symbols.h (struct cbl_alphabet_t): Likewise.

libgcobol/ChangeLog:

	* charmaps.cc: Encoding.
	* charmaps.h (class charmap_t): Encoding.
	* intrinsic.cc (__gg__char): Report the character at the
	collation position.
	(__gg__ord): Report the collation position of a character.
	* libgcobol.cc (struct program_state): Add encodings;
	Remove obsolete defines.
	(__gg__current_collation): New function for encoding/collation.
	(__gg__pop_program_state): Encoding.
	(__gg__init_program_state): Encoding.
	(format_for_display_internal): Handle LOW-VALUE and HIGH-VALUE.
	(__gg__compare_2): Encoding.
	(__gg__alphabet_use): Likewise.
	* libgcobol.h (__gg__current_collation): New declaration.
	* xmlparse.cc (__gg__xml_parse): Make a parameter const.

gcc/testsuite/ChangeLog:

	* cobol.dg/group2/Length_overflow__2_.out: Updated test result.
	* cobol.dg/group2/Length_overflow_with_offset__1_.out: Likewise.
	* cobol.dg/group2/Offset_overflow.out: Likewise.
	* cobol.dg/group2/CALL_with_OCCURS_DEPENDING_ON.cob: New test.
	* cobol.dg/group2/CALL_with_OCCURS_DEPENDING_ON.out: New test.
	* cobol.dg/group2/CHAR_and_ORD_with_COLLATING_sequence_-_ASCII.cob: New test.
	* cobol.dg/group2/CHAR_and_ORD_with_COLLATING_sequence_-_ASCII.out: New test.
	* cobol.dg/group2/CHAR_and_ORD_with_COLLATING_sequence_-_EBCDIC.cob: New test.
	* cobol.dg/group2/CHAR_and_ORD_with_COLLATING_sequence_-_EBCDIC.out: New test.
	* cobol.dg/group2/EC-BOUND-REF-MOD_checking_process_termination.cob: New test.
	* cobol.dg/group2/EC-BOUND-REF-MOD_checking_process_termination.out: New test.
	* cobol.dg/group2/Intrinsics_without_FUNCTION_keyword__3_.cob: New test.
	* cobol.dg/group2/Occurs_DEPENDING_ON__source_and_dest.cob: New test.
	* cobol.dg/group2/Occurs_DEPENDING_ON__source_and_dest.out: New test.
	* cobol.dg/group2/Recursive_subscripts.cob: New test.
	* cobol.dg/group2/Recursive_subscripts.out: New test.
	* cobol.dg/group2/SEARCH_ALL_with_OCCURS_DEPENDING_ON.cob: New test.
	* cobol.dg/group2/SEARCH_ALL_with_OCCURS_DEPENDING_ON.out: New test.
	* cobol.dg/group2/Subscript_by_arithmetic_expression.cob: New test.
	* cobol.dg/group2/Subscript_out_of_bounds__1_.cob: New test.
	* cobol.dg/group2/Subscript_out_of_bounds__1_.out: New test.
	* cobol.dg/group2/Subscript_out_of_bounds__2_.cob: New test.
	* cobol.dg/group2/Subscript_out_of_bounds__2_.out: New test.
	* cobol.dg/group2/Subscripted_refmods.cob: New test.
	* cobol.dg/group2/Subscripted_refmods.out: New test.
	* cobol.dg/group2/length_of_ODO_Rules_7__8A__and_8B.cob: New test.
	* cobol.dg/group2/length_of_ODO_Rules_7__8A__and_8B.out: New test.
	* cobol.dg/group2/length_of_ODO_w_-_reference_modification.cob: New test.
2025-10-23 14:51:16 -04:00

593 lines
23 KiB
C++

/*
* Copyright (c) 2021-2025 Symas Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* * Neither the name of the Symas Corporation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <fcntl.h>
#include <unistd.h>
#include <cctype>
#include <cerrno>
#include <cmath>
#include <cfenv>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <algorithm>
#include <vector>
#include <libxml/SAX2.h>
#include <libxml/parser.h>
#include "config.h"
#include "libgcobol-fp.h"
#include "ec.h"
#include "common-defs.h"
#include "io.h"
#include "gcobolio.h"
#include "libgcobol.h"
#define COUNT_OF(X) (sizeof(X) / sizeof(X[0]))
void sayso( const char func[], int line,
int len = 0 , const unsigned char data[] = { 0} ) {
if( getenv("XMLPARSE") ) {
switch(len) {
case 0:
fprintf(stderr, "%s:%d Kilroy was here\n", func, line);
break;
case -1:
fprintf(stderr, "%s:%d: '%s'\n", func, line, data);
break;
default:
fprintf(stderr, "%s:%d: '%.*s'\n", func, line, len, data);
break;
}
}
}
#define SAYSO() sayso(__func__, __LINE__)
#define SAYSO_DATAZ(S) sayso(__func__, __LINE__, -1, S)
#define SAYSO_DATA(N, S) sayso(__func__, __LINE__, N, S)
struct xml_ec_value_t {
int ibm_code;
const char msg[80];
} xml_ec_values[] = {
// Table 73. XML PARSE exceptions that allow continuation
{ 1, "invalid character between elements" },
{ 2, "invalid start before element content" },
{ 3, "duplicate attribute" },
{ 4, "markup character '<' in an attribute value" },
{ 5, "start/end tag mismatch" },
{ 6, "invalid character in element" },
{ 7, "invalid start in element content. " },
{ 8, "CDATA closing character sequence ']]>' not opened" },
{ 10, "comment the character sequence '--' without '>'" },
{ 11, "invalid character in a processing instruction" },
{ 12, "XML declaration was not start of document" },
{ 13, "invalid digit in a hexadecimal character reference" },
{ 14, "invalid digit in a decimal character reference" },
{ 15, "encoding declaration value name must start with [a-zA-Z] character" },
{ 16, "character reference did not refer to a legal XML character" },
{ 17, "invalid character in an entity reference name" },
{ 70, "EBCDIC document, supported EBCDIC page, unsupported declaration" },
{ 71, "EBCDIC document, unsupported EBCDIC page " },
{ 72, "EBCDIC document, unsupported EBCDIC page, unsupported declaration" },
{ 73, "EBCDIC document, unsupported EBCDIC page and declaration " },
{ 80, "ASCII document, supported ASCII page, unsupported declaration" },
{ 81, "ASCII document, unsupported ASCII page " },
{ 82, "ASCII document, unsupported ASCII page, unsupported declaration" },
{ 83, "ASCII document, unsupported ASCII page and declaration " },
{ 84, "ASCII document, invalid UTF-8, external UTF-8, no declaration. " },
{ 85, "ASCII document, invalid UTF-8, external UTF-8, invalid declaration" },
{ 86, "ASCII document, invalid UTF-8, external ASCII" },
{ 87, "ASCII document, invalid UTF-8, external and document UTF-8" },
{ 88, "ASCII document, invalid UTF-8, unsupported ASCII/UTF-8, UTF-8 declaration" },
{ 89, "ASCII document, invalid UTF-8, external UTF-8, ASCII declaration" },
{ 92, "alphanumeric document expected, document is UTF-16. " },
// XML PARSE exceptions that allow continuation (continued)
//// 100,001 - 165,535 EBCDIC document encoding does not match code page
//// 200,001 - 265,535 ASCII document encoding does not match code page
// XML PARSE exceptions that do not allow continuation
{ 100, "end of document before start of XML declaration" },
{ 101, "end of document before end of XML declaration" },
{ 102, "end of document before root element" },
{ 103, "end of document before version information in XML declaration" },
{ 104, "end of document before version information value in XML declaration" },
{ 106, "end of document before encoding declaration value in XML declaration" },
{ 108, "end of document before standalone declaration value in XML declaration" },
{ 109, "end of document before attribute name" },
{ 110, "end of document before attribute value" },
{ 111, "end of document before character/entity reference in attribute value" },
{ 112, "end of document before empty element tag" },
{ 113, "end of document before root element name" },
{ 114, "end of document before element name" },
{ 115, "end of document before character data in element content" },
{ 116, "end of document before processing instruction in element content" },
{ 117, "end of document before comment or CDATA section in element content" },
{ 118, "end of document before comment in element content" },
{ 119, "end of document before CDATA section in element content" },
{ 120, "end of document before character/entity reference in element content" },
{ 121, "end of document before after close of root element" },
{ 122, "possible invalid start of a document type" },
{ 123, "duplicate document type" },
{ 124, "root element name must start with [A-Za-z_:]" },
{ 125, "first attribute name must start with [A-Za-z_:]" },
{ 126, "invalid character in or after element name" },
{ 127, "attribute name not followed by '=' " },
{ 128, "invalid attribute value delimiter" },
{ 130, "attribute name must start with [A-Za-z_:]" },
{ 131, "invalid character in or after attribute name" },
{ 132, "empty element tag not terminated with '/>'" },
{ 133, "element end tag name name must start with [A-Za-z_:]" },
{ 134, "element end tag not terminated with '>'" },
{ 135, "element name must start with [A-Za-z_:]" },
{ 136, "invalid start of comment/CDATA in element" },
{ 137, "invalid start of comment" },
{ 138, "processing instruction target name must start with [A-Za-z_:]" },
{ 139, "invalid character in/afterprocessing instruction target name" },
{ 140, "processing instruction not terminated with '?>'" },
{ 141, "invalid character following '&' in a character/entity reference" },
{ 142, "missing version information in XML declaration" },
{ 143, "missing '=' after 'version' in XML declaration " },
{ 144, "missing XML version declaration " },
{ 145, "invalid character in XML version information" },
{ 146, "invalid character following XML version information value " },
{ 147, "invalid attribute in XML declaration" },
{ 148, "missing '=' after 'encoding' in XML declaration" },
{ 149, "missing XML encoding declaration value" },
{ 150, "invalid XML encoding declaration value" },
{ 151, "invalid character afer XML declaration" },
{ 152, "invalid attribute XML declaration" },
{ 153, "missing '=' after standalone XML declaration" },
{ 154, "missing standalone XML declaration value" },
{ 155, "standalone declaration must be 'yes' or 'no'" },
{ 156, "invalid standalone XML declaration value" },
{ 157, "invalid character following XML standalone declaration value" },
{ 158, "unterminated XML declaration " },
{ 159, "start of document type declaration after end of root element" },
{ 160, "start of element after end of root element" },
{ 161, "invalid UTF-8 byte sequence" },
{ 162, "UTF-8 character that has a Unicode code point above x'FFFF'" },
{ 315, "UTF-16 document little-endian unsupported" },
{ 316, "UCS4 document unsupported" },
{ 317, "unrecognized document encoding" },
{ 318, "UTF-8 document unsupported " },
{ 320, "mismatched national document data item to document encoding EBCDIC" },
{ 321, "mismatched national document data item to document encoding ASCII" },
{ 322, "mismatched native alphanumeric document data item to document encoding EBCDIC" },
{ 323, "mismatched host alphanumeric document data item to document encoding ASCII" },
{ 324, "mismatched national document data item to document encoding UTF-8" },
{ 325, "mismatched host alphanumeric document datat to document encoding UTF-8" },
{ 500, "internal error" },
}, *eoxml_ec_values = xml_ec_values + COUNT_OF(xml_ec_values);
static const xml_ec_value_t *
xml_ec_value_of( int ibm_code ) {
if( 100000 < ibm_code && ibm_code < 200000 ) {
static xml_ec_value_t not_ebcdic{ 0, "EBCDIC document encoding "
"does not match code page" };
not_ebcdic.ibm_code = ibm_code;
return &not_ebcdic;
}
if( 200000 < ibm_code && ibm_code < 300000 ) {
static xml_ec_value_t not_ascii{ 0, "ASCII document encoding "
"does not match code page" };
not_ascii.ibm_code = ibm_code;
return &not_ascii;
}
auto p = std::find_if( xml_ec_values, eoxml_ec_values,
[ibm_code]( const auto& value ) {
return ibm_code == value.ibm_code;
} );
return p < eoxml_ec_values ? &*p : nullptr;
}
const char *
xml_ec_value_str( int ibm_code ) {
auto p = xml_ec_value_of(ibm_code);
return p? p->msg : nullptr;
}
#if NEEDED
static bool
xml_fatal( int ibm_code ) {
if( ibm_code < 100 ) return false;
if( ibm_code > 100000 ) return false;
assert(ibm_code < 1000);
return true;
}
#endif
static callback_t *cobol_callback;
/*
* Internal handler functions
*/
///////////////
/*
ATTRIBUTE-CHARACTER The single character that corresponds with the predefined entity reference in the attribute value
ATTRIBUTE-CHARACTERS The value within quotation marks or apostrophes. This can be a substring of the attribute value if the value includes an entity reference.
ATTRIBUTE-NAME The attribute name; the string to the left of the equal sign
ATTRIBUTE-NATIONAL-CHARACTER Regardless of the type of the XML document specified by identifier-1 in the XML PARSE statement, XML-TEXT is empty with length zero and XML-NTEXT contains the single national character that corresponds with the numeric character reference.
CONTENT-CHARACTER The single character that corresponds with the predefined entity reference in the element content
CONTENT-NATIONAL-CHARACTER Regardless of the type of the XML document specified by identifier-1 in the XML PARSE statement, XML-TEXT is empty with length zero and XML-NTEXT contains the single national character that corresponds with the numeric character reference.1
DOCUMENT-TYPE-DECLARATION The entire document type declaration, including the opening and closing character sequences "<!DOCTYPE" and ">"
ENCODING-DECLARATION The value, between quotes or apostrophes, of the encoding declaration in the XML declaration
END-OF-CDATA-SECTION The string "]]>"
END-OF-DOCUMENT Empty with length zero
EXCEPTION The part of the document that was successfully scanned, up to and including the point at which the exception was detected.2 Special register XML-CODE contains the unique error code that identifies the exception.
PROCESSING-INSTRUCTION-TARGET The processing instruction target name, which occurs immediately after the processing instruction opening sequence, "<?"
STANDALONE-DECLARATION The value, between quotation marks or apostrophes ("yes" or "no"), of the stand-alone declaration in the XML declaration
START-OF-CDATA-SECTION The string "<![CDATA["
START-OF-DOCUMENT The entire document
UNKNOWN-REFERENCE-IN-CONTENT The entity reference name, not including the "&" and ";" delimiters
UNKNOWN-REFERENCE-IN-ATTRIBUTE The entity reference name, not including the "&" and ";" delimiters
VERSION-INFORMATION The value, between quotation marks or apostrophes, of the version information in the XML declaration
*/
///////////////
extern cblc_field_t __ggsr__xml_event;
extern cblc_field_t __ggsr__xml_code;
extern cblc_field_t __ggsr__xml_text;
extern cblc_field_t __ggsr__xml_ntext;
static void
xml_event( const char event_name[], size_t len, char text[] ) {
assert(strlen(event_name) < __ggsr__xml_event.allocated);
auto pend = __ggsr__xml_event.data + __ggsr__xml_event.allocated;
auto p = std::copy( event_name, event_name + strlen(event_name),
PTRCAST(char, __ggsr__xml_event.data) );
std::fill(PTRCAST(unsigned char, p), pend, 0x20);
__ggsr__xml_text.data = reinterpret_cast<unsigned char*>(text);
__ggsr__xml_text.capacity = __ggsr__xml_text.allocated = len;
__ggsr__xml_code.data = 0;
cobol_callback();
}
static inline void
xml_event( const char event_name[], char text[] ) {
xml_event(event_name, strlen(text), text);
}
static inline void
xml_event( const char event_name[], size_t len, const xmlChar * value ) {
char *text = reinterpret_cast<char*>(const_cast<xmlChar*>(value));
xml_event(event_name, len, text);
}
static inline void
xml_event( const char event_name[], const xmlChar * value ) {
char *text = reinterpret_cast<char*>(const_cast<xmlChar*>(value));
xml_event(event_name, strlen(text), text);
}
static void attributeDecl(void * ctx,
const xmlChar * elem,
const xmlChar * fullname,
int type,
int def,
const xmlChar * defaultValue,
xmlEnumerationPtr tree)
{
fprintf(stderr, "%s:%d: elem=%s, name=%s, default=%s\n",
__func__, __LINE__, elem, fullname, defaultValue);
}
static void cdataBlock(void * ctx,
const xmlChar * data,
int len)
{
SAYSO_DATA(len, data);
xml_event("CONTENT-CHARACTERS", len, data);
}
static void characters(void * ctx,
const xmlChar * data,
int len)
{
SAYSO_DATA(len, data);
xml_event("CONTENT-CHARACTERS", len, data);
}
static void comment(void * ctx, const xmlChar * value) {
SAYSO_DATAZ(value);
xml_event("COMMENT", value);
}
static void elementDecl(void * ctx,
const xmlChar * name,
int type,
xmlElementContentPtr content)
{ SAYSO_DATAZ(name); }
static void endDocument(void * ctx)
{ SAYSO(); }
static void endElementNs(void * ctx,
const xmlChar * localname,
const xmlChar * prefix,
const xmlChar * URI)
{
SAYSO_DATAZ(localname);
xml_event("END-OF-ELEMENT", localname);
}
static void endElement(void * ctx,
const xmlChar * name)
{ SAYSO_DATAZ(name); }
static void entityDecl(void * ctx,
const xmlChar * name,
int type,
const xmlChar * publicId,
const xmlChar * systemId,
xmlChar * content)
{ SAYSO_DATAZ(name); }
static void error(void * ctx, const char * msg, ...)
{
va_list ap;
va_start (ap, msg);
fprintf(stderr, "error: ");
vfprintf(stderr, msg, ap);
fprintf(stderr, "\n");
va_end (ap);
}
static void externalSubset(void * ctx,
const xmlChar * name,
const xmlChar * ExternalID,
const xmlChar * SystemID)
{ SAYSO_DATAZ(name); }
static void fatalError(void * ctx, const char * msg, ...)
{
va_list ap;
va_start (ap, msg);
fprintf(stderr, "fatal: ");
vfprintf(stderr, msg, ap);
fprintf(stderr, "\n");
va_end (ap);
}
static xmlEntityPtr getEntity(void * ctx,
const xmlChar * name)
{ SAYSO_DATAZ(name); }
static xmlEntityPtr getParameterEntity(void * ctx,
const xmlChar * name)
{ SAYSO_DATAZ(name); }
static int hasExternalSubset(void * ctx)
{ SAYSO(); }
static int hasInternalSubset(void * ctx)
{ SAYSO(); }
static void ignorableWhitespace(void * ctx,
const xmlChar * ch,
int len)
{ SAYSO_DATA(len, ch); }
static void internalSubset(void * ctx,
const xmlChar * name,
const xmlChar * ExternalID,
const xmlChar * SystemID)
{ SAYSO_DATAZ(name); }
static int isStandalone (void * ctx)
{ SAYSO(); }
static void notationDecl(void * ctx,
const xmlChar * name,
const xmlChar * publicId,
const xmlChar * systemId)
{ SAYSO_DATAZ(name); }
static void processingInstruction(void * ctx,
const xmlChar * target,
const xmlChar * data)
{
SAYSO_DATAZ(target);
xml_event("PROCESSING-INSTRUCTION-TARGET", target);
SAYSO_DATAZ(data);
xml_event("PROCESSING-INSTRUCTION-DATA", data);
}
static void reference(void * ctx,
const xmlChar * name)
{ SAYSO_DATAZ(name); }
static xmlParserInputPtr resolveEntity( void * ctx,
const xmlChar * publicId,
const xmlChar * systemId)
{ SAYSO(); }
static void setDocumentLocator(void * ctx,
xmlSAXLocatorPtr loc)
{ SAYSO(); }
/*
* Called after the XML declaration was parsed.
* Use xmlCtxtGetVersion(), xmlCtxtGetDeclaredEncoding() and
* xmlCtxtGetStandalone() to get data from the XML declaration.
*/
static void startDocument(void * ctx)
{ SAYSO(); }
static void startElementNs(void * ctx,
const xmlChar * localname,
const xmlChar * prefix,
const xmlChar * URI,
int nb_namespaces,
const xmlChar ** namespaces,
int nb_attributes,
int nb_defaulted,
const xmlChar ** attributes)
{
SAYSO_DATAZ(localname);
xml_event("START-OF-ELEMENT", localname);
}
static void startElement(void * ctx,
const xmlChar * name,
const xmlChar ** atts)
{ SAYSO_DATAZ(name); }
static void unparsedEntityDecl(void * ctx,
const xmlChar * name,
const xmlChar * publicId,
const xmlChar * systemId,
const xmlChar * notationName)
{ SAYSO_DATAZ(name); }
static void warning(void * ctx, const char * msg, ... )
{
va_list ap;
va_start (ap, msg);
fprintf(stderr, "warning: ");
vfprintf(stderr, msg, ap);
fprintf(stderr, "\n");
va_end (ap);
}
/*
* xmlSAXHandler is a structure of function pointers that the SAX parser calls
* as it encounters XML elements in the input. Each pointer is a callback
* function, locally defined in this file. These we term "handlers".
*
* Each handler sets the XML registers per IBM, and then calls
* cobol_callback(), which is a function pointer supplied by the COBOL program
* to be the processing procedure for XML PARSE.
*
* There is no obvious way to abort parsing at the C level. See:
* http://veillard.com/XML/messages/0540.html
*
* > The simplest to implement this would not be to add a new SAX
* > callback but rather modify the xmlParserCtxtPtr passed to the
* > callbacks. The best seems to be:
* > - set ctxt->instate to XML_PARSER_EOF
* > - hack xmlCurrentChar() to return 0
* > if (ctxt->instate == XML_PARSER_EOF)
* > Doing both should led to a quick termination of parsing
* > (but endElement(s)/endDocument will certainly be called anyway).
*
* Another hack might be to set the input to all blanks in cobol_callback.
*/
static xmlSAXHandler handlers;
static void
initialize_handlers( callback_t *callback ) {
handlers = xmlSAXHandler {};
handlers.initialized = XML_SAX2_MAGIC;
cobol_callback = callback;
#if 0
//// Should typically not be modified
handlers.attributeDecl = attributeDecl;
handlers.elementDecl = elementDecl;
handlers.entityDecl = entityDecl;
handlers.externalSubset = externalSubset;
handlers.getEntity = getEntity;
handlers.getParameterEntity = getParameterEntity;
handlers.internalSubset = internalSubset;
handlers.notationDecl = notationDecl;
handlers.resolveEntity = resolveEntity;
handlers.unparsedEntityDecl = unparsedEntityDecl;
//// Not supposed to be changed by applications
handlers.hasExternalSubset = hasExternalSubset;
handlers.hasInternalSubset = hasInternalSubset;
handlers.isStandalone = isStandalone;
//// SAX 1 only
handlers.startElement = startElement;
handlers.endElement = endElement;
//// Everything is available on the context, so this is useless in our case
handlers.setDocumentLocator = setDocumentLocator;
#endif
handlers.cdataBlock = cdataBlock;
handlers.characters = characters;
handlers.comment = comment;
handlers.endDocument = endDocument;
handlers.endElementNs = endElementNs;
handlers.ignorableWhitespace = ignorableWhitespace;
handlers.processingInstruction = processingInstruction;
handlers.reference = reference;
handlers.startDocument = startDocument;
handlers.startElementNs = startElementNs;
handlers.error = error;
handlers.fatalError = fatalError;
handlers.warning = warning;
}
extern "C"
int
__gg__xml_parse( const cblc_field_t *input_field,
size_t input_offset,
size_t len,
cblc_field_t *encoding,
cblc_field_t *validating,
int returns_national,
void (*callback)(void) )
{
initialize_handlers(callback);
const char *input = PTRCAST(char, input_field->data + input_offset);
int erc = xmlSAXUserParseMemory(&handlers, nullptr, input, len);
if( erc ) {
xmlErrorPtr msg = xmlCtxtGetLastError(nullptr);
fprintf(stderr, "XML PARSE: error: line %d: %s (%d: %d.%d.%d)\n",
msg->line, msg->message, erc, msg->domain, msg->level, msg->code);
}
return erc;
}