diff --git a/README.Rmd b/README.Rmd index 5cfbccc..deb95d1 100644 --- a/README.Rmd +++ b/README.Rmd @@ -32,20 +32,6 @@ The following functions are implemented: - `tidy_html` : Clean up gnarly HTML/XML -### TODO - -Fix: - -```{text} -* checking compiled code ... WARNING -File ‘htmltidy/libs/htmltidy.so’: - Found ‘___stderrp’, possibly from ‘stderr’ (C) - Objects: ‘alloc.o’, ‘streamio.o’, ‘tidylib.o’ - Found ‘___stdoutp’, possibly from ‘stdout’ (C) - Objects: ‘sprtf.o’, ‘tidylib.o’ - Found ‘_exit’, possibly from ‘exit’ (C) - Objects: ‘alloc.o’, ‘sprtf.o’ -``` ### Installation diff --git a/README.md b/README.md index 33f33ea..bdff89e 100644 --- a/README.md +++ b/README.md @@ -17,21 +17,6 @@ The following functions are implemented: - `tidy_html` : Clean up gnarly HTML/XML -### TODO - -Fix: - -``` text -* checking compiled code ... WARNING -File ‘htmltidy/libs/htmltidy.so’: - Found ‘___stderrp’, possibly from ‘stderr’ (C) - Objects: ‘alloc.o’, ‘streamio.o’, ‘tidylib.o’ - Found ‘___stdoutp’, possibly from ‘stdout’ (C) - Objects: ‘sprtf.o’, ‘tidylib.o’ - Found ‘_exit’, possibly from ‘exit’ (C) - Objects: ‘alloc.o’, ‘sprtf.o’ -``` - ### Installation ``` r diff --git a/src/alloc.c b/src/alloc.cpp similarity index 94% rename from src/alloc.c rename to src/alloc.cpp index 8cf0856..493204a 100644 --- a/src/alloc.c +++ b/src/alloc.cpp @@ -1,3 +1,5 @@ +#include + /* alloc.c -- Default memory allocation routines. (c) 1998-2006 (W3C) MIT, ERCIM, Keio University @@ -44,14 +46,7 @@ static void TIDY_CALL defaultPanic( TidyAllocator* ARG_UNUSED(allocator), ctmbst if ( g_panic ) g_panic( msg ); else - { - /* 2 signifies a serious error */ - fprintf( stderr, "Fatal error: %s\n", msg ); -#ifdef _DEBUG - assert(0); -#endif - exit(2); - } + Rcpp::stop("Fatal memory error"); } static void* TIDY_CALL defaultAlloc( TidyAllocator* allocator, size_t size ) diff --git a/src/lexer.c b/src/lexer.c index 48a500d..ac289ca 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1000,9 +1000,6 @@ void TY_(AddCharToLexer)( Lexer *lexer, uint c ) err = TY_(EncodeCharToUTF8Bytes)( c, buf, NULL, &count ); if (err) { -#if 0 && defined(_DEBUG) - fprintf( stderr, "lexer UTF-8 encoding error for U+%x : ", c ); -#endif /* replacement character 0xFFFD encoded as UTF-8 */ buf[0] = (byte) 0xEF; buf[1] = (byte) 0xBF; diff --git a/src/sprtf.c b/src/sprtf.cpp similarity index 97% rename from src/sprtf.c rename to src/sprtf.cpp index cd56b6c..096809b 100644 --- a/src/sprtf.c +++ b/src/sprtf.cpp @@ -1,3 +1,5 @@ +#include + /* * SPRTF - Log output utility * @@ -51,7 +53,7 @@ #pragma warning( disable:4996 ) #else #define strcmpi strcasecmp -#endif +#endif #ifndef MX_ONE_BUF #define MX_ONE_BUF 1024 @@ -156,8 +158,7 @@ int open_log_file( void ) outfile = fopen(logfile, mode); if( outfile == 0 ) { outfile = (FILE *)-1; - sprtf("ERROR: Failed to open log file [%s] ...\n", logfile); - exit(1); /* failed */ + Rcpp::stop("Failed to open log file"); return 0; /* failed */ } return 1; /* success */ @@ -299,20 +300,19 @@ static void oi( char * psin ) if( w != len ) { fclose(outfile); outfile = (FILE *)-1; - sprtf("WARNING: Failed write to log file [%s] ...\n", logfile); - exit(1); + Rcpp::stop("Failed write to log file"); } else if (addflush) { fflush( outfile ); } } - if( addstdout ) { - fwrite( ps, 1, len, stdout ); - } + // if( addstdout ) { + // fwrite( ps, 1, len, stdout ); + // } #ifdef ADD_LISTVIEW if (add2listview) { LVInsertItem(ps); - } + } #endif // ADD_LISTVIEW #ifdef ADD_SCREENOUT if (add2screen) { diff --git a/src/streamio.c b/src/streamio.c index 866d9d6..12ba6fd 100644 --- a/src/streamio.c +++ b/src/streamio.c @@ -49,7 +49,7 @@ static uint PopChar( StreamIn *in ); ** Static (duration) Globals ******************************/ -static StreamOut stderrStreamOut = +static StreamOut stderrStreamOut = { ASCII, FSM_ASCII, @@ -61,7 +61,7 @@ static StreamOut stderrStreamOut = { 0, TY_(filesink_putByte) } }; -static StreamOut stdoutStreamOut = +static StreamOut stdoutStreamOut = { ASCII, FSM_ASCII, @@ -75,20 +75,11 @@ static StreamOut stdoutStreamOut = StreamOut* TY_(StdErrOutput)(void) { - if ( stderrStreamOut.sink.sinkData == 0 ) - stderrStreamOut.sink.sinkData = stderr; +// if ( stderrStreamOut.sink.sinkData == 0 ) +// stderrStreamOut.sink.sinkData = stderr; return &stderrStreamOut; } -#if 0 -StreamOut* TY_(StdOutOutput)(void) -{ - if ( stdoutStreamOut.sink.sinkData == 0 ) - stdoutStreamOut.sink.sinkData = stdout; - return &stdoutStreamOut; -} -#endif - void TY_(ReleaseStreamOut)( TidyDocImpl *doc, StreamOut* out ) { if ( out && out != &stderrStreamOut && out != &stdoutStreamOut ) @@ -252,7 +243,7 @@ void TY_(AddCharToOriginalText)(StreamIn *in, tchar c) { int i, err, count = 0; tmbchar buf[10] = {0}; - + err = TY_(EncodeCharToUTF8Bytes)(c, buf, NULL, &count); if (err) @@ -263,7 +254,7 @@ void TY_(AddCharToOriginalText)(StreamIn *in, tchar c) buf[2] = (byte) 0xBD; count = 3; } - + for (i = 0; i < count; ++i) TY_(AddByteToOriginalText)(in, buf[i]); } @@ -320,7 +311,7 @@ uint TY_(ReadChar)( StreamIn *in ) in->tabs--; return ' '; } - + for (;;) { c = ReadCharFromStream(in); @@ -386,7 +377,7 @@ uint TY_(ReadChar)( StreamIn *in ) /* Form Feed is allowed in HTML */ if ( c == '\015' && !cfgBool(in->doc, TidyXmlTags) ) break; - + if ( c < 32 ) continue; /* discard control char */ @@ -465,32 +456,32 @@ uint TY_(ReadChar)( StreamIn *in ) Bool isVendorChar = ( in->encoding == WIN1252 || in->encoding == MACROMAN ); Bool isMacChar = ( in->encoding == MACROMAN ); - + /* set error position just before offending character */ if (in->doc->lexer) { in->doc->lexer->lines = in->curline; in->doc->lexer->columns = in->curcol; } - + if ( isMacChar ) c1 = TY_(DecodeMacRoman)( c ); else c1 = TY_(DecodeWin1252)( c ); if ( c1 ) replMode = REPLACED_CHAR; - + if ( c1 == 0 && isVendorChar ) TY_(ReportEncodingError)(in->doc, VENDOR_SPECIFIC_CHARS, c, replMode == DISCARDED_CHAR); else if ( ! isVendorChar ) TY_(ReportEncodingError)(in->doc, INVALID_SGML_CHARS, c, replMode == DISCARDED_CHAR); - + c = c1; } if ( c == 0 ) continue; /* illegal char is discarded */ - + in->curcol++; break; } @@ -533,7 +524,7 @@ void TY_(UngetChar)( uint c, StreamIn *in ) /* fprintf(stderr, "Attempt to UngetChar EOF\n"); */ return; } - + in->pushed = yes; if (in->bufpos + 1 >= in->bufsize) @@ -616,7 +607,7 @@ void TY_(WriteChar)( uint c, StreamOut* out ) else if (out->encoding == UTF8) { int count = 0; - + TY_(EncodeCharToUTF8Bytes)( c, NULL, &out->sink, &count ); if (count <= 0) { @@ -678,7 +669,7 @@ void TY_(WriteChar)( uint c, StreamOut* out ) { int i, numChars = 1; uint theChars[2]; - + if ( !TY_(IsValidUTF16FromUCS4)(c) ) { /* invalid UTF-16 value */ @@ -702,21 +693,21 @@ void TY_(WriteChar)( uint c, StreamOut* out ) /* just put the char out */ theChars[0] = c; } - + for (i = 0; i < numChars; i++) { c = theChars[i]; - + if (out->encoding == UTF16LE) { - uint ch = c & 0xFF; PutByte(ch, out); - ch = (c >> 8) & 0xFF; PutByte(ch, out); + uint ch = c & 0xFF; PutByte(ch, out); + ch = (c >> 8) & 0xFF; PutByte(ch, out); } - + else if (out->encoding == UTF16BE || out->encoding == UTF16) { - uint ch = (c >> 8) & 0xFF; PutByte(ch, out); - ch = c & 0xFF; PutByte(ch, out); + uint ch = (c >> 8) & 0xFF; PutByte(ch, out); + ch = c & 0xFF; PutByte(ch, out); } } } @@ -729,8 +720,8 @@ void TY_(WriteChar)( uint c, StreamOut* out ) PutByte(c, out); else { - uint ch = (c >> 8) & 0xFF; PutByte(ch, out); - ch = c & 0xFF; PutByte(ch, out); + uint ch = (c >> 8) & 0xFF; PutByte(ch, out); + ch = c & 0xFF; PutByte(ch, out); } } #endif @@ -761,7 +752,7 @@ uint TY_(DecodeWin1252)(uint c) { if (127 < c && c < 160) c = Win2Unicode[c - 128]; - + return c; } @@ -788,10 +779,10 @@ static void EncodeWin1252( uint c, StreamOut* out ) */ /* modified to only need chars 128-255/U+0080-U+00FF - Terry Teague 19 Aug 01 */ -static const uint Mac2Unicode[128] = +static const uint Mac2Unicode[128] = { /* x7F = DEL */ - + 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1, 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8, @@ -944,53 +935,53 @@ static void EncodeLatin0( uint c, StreamOut* out ) Unicode equivalent are mapped to '?'. Is this appropriate? */ -static const uint Symbol2Unicode[] = +static const uint Symbol2Unicode[] = { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, - + 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, - + 0x0020, 0x0021, 0x2200, 0x0023, 0x2203, 0x0025, 0x0026, 0x220D, 0x0028, 0x0029, 0x2217, 0x002B, 0x002C, 0x2212, 0x002E, 0x002F, - + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, - + 0x2245, 0x0391, 0x0392, 0x03A7, 0x0394, 0x0395, 0x03A6, 0x0393, 0x0397, 0x0399, 0x03D1, 0x039A, 0x039B, 0x039C, 0x039D, 0x039F, - + 0x03A0, 0x0398, 0x03A1, 0x03A3, 0x03A4, 0x03A5, 0x03C2, 0x03A9, 0x039E, 0x03A8, 0x0396, 0x005B, 0x2234, 0x005D, 0x22A5, 0x005F, - + 0x00AF, 0x03B1, 0x03B2, 0x03C7, 0x03B4, 0x03B5, 0x03C6, 0x03B3, 0x03B7, 0x03B9, 0x03D5, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BF, - + 0x03C0, 0x03B8, 0x03C1, 0x03C3, 0x03C4, 0x03C5, 0x03D6, 0x03C9, 0x03BE, 0x03C8, 0x03B6, 0x007B, 0x007C, 0x007D, 0x223C, 0x003F, - + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - + 0x00A0, 0x03D2, 0x2032, 0x2264, 0x2044, 0x221E, 0x0192, 0x2663, 0x2666, 0x2665, 0x2660, 0x2194, 0x2190, 0x2191, 0x2192, 0x2193, - + 0x00B0, 0x00B1, 0x2033, 0x2265, 0x00D7, 0x221D, 0x2202, 0x00B7, 0x00F7, 0x2260, 0x2261, 0x2248, 0x2026, 0x003F, 0x003F, 0x21B5, - + 0x2135, 0x2111, 0x211C, 0x2118, 0x2297, 0x2295, 0x2205, 0x2229, 0x222A, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209, - + 0x2220, 0x2207, 0x00AE, 0x00A9, 0x2122, 0x220F, 0x221A, 0x22C5, 0x00AC, 0x2227, 0x2228, 0x21D4, 0x21D0, 0x21D1, 0x21D2, 0x21D3, - + 0x25CA, 0x2329, 0x00AE, 0x00A9, 0x2122, 0x2211, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, - + 0x20AC, 0x232A, 0x222B, 0x2320, 0x003F, 0x2321, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F }; @@ -1087,7 +1078,7 @@ static void PutByte( uint byteValue, StreamOut* out ) static void UngetRawBytesToStream( StreamIn *in, byte* buf, int *count ) { int i; - + for (i = 0; i < *count; i++) { /* should never get here; testing for 0xFF, a valid char, is not a good idea */ @@ -1141,7 +1132,7 @@ static uint ReadCharFromStream( StreamIn* in ) if ( TY_(IsEOF)(in) ) return EndOfStream; - + c = ReadByte( in ); if (c == EndOfStream) @@ -1238,7 +1229,7 @@ static uint ReadCharFromStream( StreamIn* in ) /* deal with UTF-8 encoded char */ int err, count = 0; - + /* first byte "c" is passed in separately */ err = TY_(DecodeUTF8BytesToChar)( &n, c, NULL, &in->source, &count ); if (!err && (n == (uint)EndOfStream) && (count == 1)) /* EOF */ @@ -1252,13 +1243,13 @@ static uint ReadCharFromStream( StreamIn* in ) TY_(ReportEncodingError)(in->doc, INVALID_UTF8, n, no); n = 0xFFFD; /* replacement char */ } - + return n; } - + #if SUPPORT_ASIAN_ENCODINGS /* - This section is suitable for any "multibyte" variable-width + This section is suitable for any "multibyte" variable-width character encoding in which a one-byte code is less than 128, and the first byte of a two-byte code is greater or equal to 128. Note that Big5 and ShiftJIS fit into this @@ -1298,7 +1289,7 @@ static uint ReadCharFromStream( StreamIn* in ) else n = c; - + return n; } diff --git a/src/tidylib.c b/src/tidylib.c index 4787336..e77f701 100755 --- a/src/tidylib.c +++ b/src/tidylib.c @@ -180,7 +180,7 @@ void tidyDocRelease( TidyDocImpl* doc ) TY_(FreeConfig)( doc ); TY_(FreeAttrTable)( doc ); TY_(FreeTags)( doc ); - /*\ + /*\ * Issue #186 - Now FreeNode depend on the doctype, so the lexer is needed * to determine which hash is to be used, so free it last. \*/ @@ -658,7 +658,7 @@ Bool TIDY_CALL tidySetReportFilter( TidyDoc tdoc, TidyReportFilter filt ) /* TidyReportFilter2 functions similar to TidyReportFilter, but provides the ** built-in English format string and va_list so that LibTidy users can use -** the format string as a lookup key for providing their own error +** the format string as a lookup key for providing their own error ** localizations. */ Bool TIDY_CALL tidySetReportFilter2( TidyDoc tdoc, TidyReportFilter2 filt ) @@ -1081,35 +1081,17 @@ int tidyDocSaveStdout( TidyDocImpl* doc ) int status = 0; uint outenc = cfg( doc, TidyOutCharEncoding ); uint nl = cfg( doc, TidyNewline ); - StreamOut* out = TY_(FileOutput)( doc, stdout, outenc, nl ); +// StreamOut* out = TY_(FileOutput)( doc, stdout, outenc, nl ); -#if !defined(NO_SETMODE_SUPPORT) - -#if defined(_WIN32) || defined(OS2_OS) - oldstdoutmode = setmode( fileno(stdout), _O_BINARY ); - oldstderrmode = setmode( fileno(stderr), _O_BINARY ); -#endif +// if ( 0 == status ) +// status = tidyDocSaveStream( doc, out ); -#endif - - if ( 0 == status ) - status = tidyDocSaveStream( doc, out ); +// fflush(stdout); +// fflush(stderr); - fflush(stdout); - fflush(stderr); -#if !defined(NO_SETMODE_SUPPORT) -#if defined(_WIN32) || defined(OS2_OS) - if ( oldstdoutmode != -1 ) - oldstdoutmode = setmode( fileno(stdout), oldstdoutmode ); - if ( oldstderrmode != -1 ) - oldstderrmode = setmode( fileno(stderr), oldstderrmode ); -#endif - -#endif - - TidyDocFree( doc, out ); +// TidyDocFree( doc, out ); return status; } @@ -1227,7 +1209,7 @@ int TY_(DocParseStream)( TidyDocImpl* doc, StreamIn* in ) if (doc->givenDoctype) TidyDocFree(doc, doc->givenDoctype); - /*\ + /*\ * Issue #186 - Now FreeNode depend on the doctype, so the lexer is needed * to determine which hash is to be used, so free it last. \*/ @@ -1389,7 +1371,7 @@ static Bool nodeHasAlignAttr( Node *node ) * and error output is given regardless of the new option, and ensure that * cleanup takes place. This provides mostly consistent Tidy behavior even with * the introduction of this new option. Note that strings have changed, though, - * in order to maintain consistency with the `--strict-tags-attributes` + * in order to maintain consistency with the `--strict-tags-attributes` * messages. * * See also: http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#obsolete @@ -1457,7 +1439,7 @@ void TY_(CheckHTML5)( TidyDocImpl* doc, Node* node ) } } else if ( nodeIsBASEFONT(node) ) { - /* basefont: CSS equivalent 'font-size', 'font-family' and 'color' + /* basefont: CSS equivalent 'font-size', 'font-family' and 'color' * on body or class on each subsequent element. * Difficult - If it is the first body element, then could consider * adding that to the as a whole, else could perhaps apply it @@ -1561,7 +1543,7 @@ void TY_(CheckHTML5)( TidyDocImpl* doc, Node* node ) if (node->content) TY_(CheckHTML5)( doc, node->content ); - + node = node->next; } } @@ -1656,7 +1638,7 @@ void TY_(CheckHTMLTagsAttribsVersions)( TidyDocImpl* doc, Node* node ) if (node->content) TY_(CheckHTMLTagsAttribsVersions)( doc, node->content ); - + node = node->next; } } @@ -1908,9 +1890,9 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc ) it can ever be, so we can start detecting things that shouldn't be in this version of HTML */ - if (doc->lexer) + if (doc->lexer) { - /*\ + /*\ * Issue #429 #426 - These services can only be used * when there is a document loaded, ie a lexer created. * But really should not be calling a Clean and Repair