hrbrmstr
8 years ago
80 changed files with 48013 additions and 62 deletions
Binary file not shown.
@ -1,5 +1,5 @@ |
|||
# Generated by roxygen2: do not edit by hand |
|||
|
|||
export(tidy) |
|||
export(tidy_html) |
|||
importFrom(Rcpp,sourceCpp) |
|||
useDynLib(htmltidy) |
|||
|
@ -1,11 +1,11 @@ |
|||
# This file was generated by Rcpp::compileAttributes |
|||
# Generated by using Rcpp::compileAttributes() -> do not edit by hand |
|||
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 |
|||
|
|||
#' Tidy HTML/XML |
|||
#' |
|||
#' @param source length 1 character vetor containing the HTML/XML source to process |
|||
#' @export |
|||
tidy <- function(source) { |
|||
.Call('htmltidy_tidy', PACKAGE = 'htmltidy', source) |
|||
tidy_html <- function(source) { |
|||
.Call('htmltidy_tidy_html', PACKAGE = 'htmltidy', source) |
|||
} |
|||
|
|||
|
@ -1,10 +1,10 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/RcppExports.R |
|||
\name{tidy} |
|||
\alias{tidy} |
|||
\name{tidy_html} |
|||
\alias{tidy_html} |
|||
\title{Tidy HTML/XML} |
|||
\usage{ |
|||
tidy(source) |
|||
tidy_html(source) |
|||
} |
|||
\arguments{ |
|||
\item{source}{length 1 character vetor containing the HTML/XML source to process} |
Binary file not shown.
@ -1 +1,2 @@ |
|||
PKG_LIBS=-ltidy |
|||
PKG_CPPFLAGS = -I. |
|||
PKG_CXXFLAGS = -I. |
|||
|
@ -1,18 +1,18 @@ |
|||
// This file was generated by Rcpp::compileAttributes
|
|||
// Generated by using Rcpp::compileAttributes() -> do not edit by hand
|
|||
// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
|
|||
|
|||
#include <Rcpp.h> |
|||
|
|||
using namespace Rcpp; |
|||
|
|||
// tidy
|
|||
std::string tidy(std::string source); |
|||
RcppExport SEXP htmltidy_tidy(SEXP sourceSEXP) { |
|||
// tidy_html
|
|||
std::string tidy_html(std::string source); |
|||
RcppExport SEXP htmltidy_tidy_html(SEXP sourceSEXP) { |
|||
BEGIN_RCPP |
|||
Rcpp::RObject __result; |
|||
Rcpp::RNGScope __rngScope; |
|||
Rcpp::RObject rcpp_result_gen; |
|||
Rcpp::RNGScope rcpp_rngScope_gen; |
|||
Rcpp::traits::input_parameter< std::string >::type source(sourceSEXP); |
|||
__result = Rcpp::wrap(tidy(source)); |
|||
return __result; |
|||
rcpp_result_gen = Rcpp::wrap(tidy_html(source)); |
|||
return rcpp_result_gen; |
|||
END_RCPP |
|||
} |
|||
|
File diff suppressed because it is too large
@ -0,0 +1,281 @@ |
|||
#ifndef __ACCESS_H__ |
|||
#define __ACCESS_H__ |
|||
|
|||
/* access.h -- carry out accessibility checks
|
|||
|
|||
Copyright University of Toronto |
|||
Portions (c) 1998-2006 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
/*********************************************************************
|
|||
* AccessibilityChecks |
|||
* |
|||
* Carries out processes for all accessibility checks. Traverses |
|||
* through all the content within the tree and evaluates the tags for |
|||
* accessibility. |
|||
* |
|||
* To perform the following checks, 'AccessibilityChecks' must be |
|||
* called AFTER the tree structure has been formed. |
|||
* |
|||
* If, in the command prompt, there is no specification of which |
|||
* accessibility priorities to check, no accessibility checks will be |
|||
* performed. (ie. '1' for priority 1, '2' for priorities 1 and 2, |
|||
* and '3') for priorities 1, 2 and 3.) |
|||
* |
|||
* Copyright University of Toronto |
|||
* Programmed by: Mike Lam and Chris Ridpath |
|||
* Modifications by : Terry Teague (TRT) |
|||
* |
|||
*********************************************************************/ |
|||
|
|||
|
|||
#include "forward.h" |
|||
#include "message.h" |
|||
|
|||
#if SUPPORT_ACCESSIBILITY_CHECKS |
|||
|
|||
/* The accessibility checks to perform depending on user's desire.
|
|||
|
|||
1. priority 1 |
|||
2. priority 1 & 2 |
|||
3. priority 1, 2, & 3 |
|||
*/ |
|||
|
|||
/* Determines if the client-side text link is found within the document
|
|||
typedef struct AreaLinks |
|||
{ |
|||
struct AreaLinks* next; |
|||
char* link; |
|||
Bool HasBeenFound; |
|||
} AreaLinks; |
|||
*/ |
|||
|
|||
enum { |
|||
TEXTBUF_SIZE=128u |
|||
}; |
|||
|
|||
struct _TidyAccessImpl; |
|||
typedef struct _TidyAccessImpl TidyAccessImpl; |
|||
|
|||
struct _TidyAccessImpl |
|||
{ |
|||
/* gets set from Tidy variable AccessibilityCheckLevel */ |
|||
int PRIORITYCHK; |
|||
|
|||
/* Number of characters that are found within the concatenated text */ |
|||
int counter; |
|||
|
|||
/* list of characters in the text nodes found within a container element */ |
|||
tmbchar textNode[ TEXTBUF_SIZE ]; |
|||
|
|||
/* The list of characters found within one text node */ |
|||
tmbchar text[ TEXTBUF_SIZE ]; |
|||
|
|||
/* Number of frame elements found within a frameset */ |
|||
int numFrames; |
|||
|
|||
/* Number of 'longdesc' attributes found within a frameset */ |
|||
int HasCheckedLongDesc; |
|||
|
|||
int CheckedHeaders; |
|||
int ListElements; |
|||
int OtherListElements; |
|||
|
|||
/* For 'USEMAP' identifier */ |
|||
Bool HasUseMap; |
|||
Bool HasName; |
|||
Bool HasMap; |
|||
|
|||
/* For tracking nodes that are deleted from the original parse tree - TRT */ |
|||
/* Node *access_tree; */ |
|||
|
|||
Bool HasTH; |
|||
Bool HasValidFor; |
|||
Bool HasValidId; |
|||
Bool HasValidRowHeaders; |
|||
Bool HasValidColumnHeaders; |
|||
Bool HasInvalidRowHeader; |
|||
Bool HasInvalidColumnHeader; |
|||
int ForID; |
|||
|
|||
/* List containing map-links
|
|||
AreaLinks* links; |
|||
AreaLinks* start; |
|||
AreaLinks* current; |
|||
*/ |
|||
|
|||
}; |
|||
|
|||
|
|||
/*
|
|||
Determines which error/warning message should be displayed, |
|||
depending on the error code that was called. |
|||
|
|||
Offset accessibility error codes by FIRST_ACCESS_ERR to avoid conflict with |
|||
other error codes defined in message.h and used in localize.c. |
|||
|
|||
These accessErrorCodes are used throughout libtidy, and also |
|||
have associated localized strings to describe them. |
|||
|
|||
IMPORTANT: to maintain compatability with TidyMessageFilter3, if you add |
|||
or remove keys from this enum, ALSO add/remove the corresponding key |
|||
in language.c:tidyErrorFilterKeysStruct[]! |
|||
*/ |
|||
typedef enum |
|||
{ |
|||
FIRST_ACCESS_ERR = CODES_TIDY_ERROR_LAST + 1, /* must be first */ |
|||
|
|||
/* [1.1.1.1] */ IMG_MISSING_ALT, |
|||
/* [1.1.1.2] */ IMG_ALT_SUSPICIOUS_FILENAME, |
|||
/* [1.1.1.3] */ IMG_ALT_SUSPICIOUS_FILE_SIZE, |
|||
/* [1.1.1.4] */ IMG_ALT_SUSPICIOUS_PLACEHOLDER, |
|||
/* [1.1.1.10] */ IMG_ALT_SUSPICIOUS_TOO_LONG, |
|||
/* [1.1.1.11] */ IMG_MISSING_ALT_BULLET, |
|||
/* [1.1.1.12] */ IMG_MISSING_ALT_H_RULE, |
|||
/* [1.1.2.1] */ IMG_MISSING_LONGDESC_DLINK, |
|||
/* [1.1.2.2] */ IMG_MISSING_DLINK, |
|||
/* [1.1.2.3] */ IMG_MISSING_LONGDESC, |
|||
/* [1.1.2.5] */ LONGDESC_NOT_REQUIRED, |
|||
/* [1.1.3.1] */ IMG_BUTTON_MISSING_ALT, |
|||
/* [1.1.4.1] */ APPLET_MISSING_ALT, |
|||
/* [1.1.5.1] */ OBJECT_MISSING_ALT, |
|||
/* [1.1.6.1] */ AUDIO_MISSING_TEXT_WAV, |
|||
/* [1.1.6.2] */ AUDIO_MISSING_TEXT_AU, |
|||
/* [1.1.6.3] */ AUDIO_MISSING_TEXT_AIFF, |
|||
/* [1.1.6.4] */ AUDIO_MISSING_TEXT_SND, |
|||
/* [1.1.6.5] */ AUDIO_MISSING_TEXT_RA, |
|||
/* [1.1.6.6] */ AUDIO_MISSING_TEXT_RM, |
|||
/* [1.1.8.1] */ FRAME_MISSING_LONGDESC, |
|||
/* [1.1.9.1] */ AREA_MISSING_ALT, |
|||
/* [1.1.10.1] */ SCRIPT_MISSING_NOSCRIPT, |
|||
/* [1.1.12.1] */ ASCII_REQUIRES_DESCRIPTION, |
|||
/* [1.2.1.1] */ IMG_MAP_SERVER_REQUIRES_TEXT_LINKS, |
|||
/* [1.4.1.1] */ MULTIMEDIA_REQUIRES_TEXT, |
|||
/* [1.5.1.1] */ IMG_MAP_CLIENT_MISSING_TEXT_LINKS, |
|||
/* [2.1.1.1] */ INFORMATION_NOT_CONVEYED_IMAGE, |
|||
/* [2.1.1.2] */ INFORMATION_NOT_CONVEYED_APPLET, |
|||
/* [2.1.1.3] */ INFORMATION_NOT_CONVEYED_OBJECT, |
|||
/* [2.1.1.4] */ INFORMATION_NOT_CONVEYED_SCRIPT, |
|||
/* [2.1.1.5] */ INFORMATION_NOT_CONVEYED_INPUT, |
|||
/* [2.2.1.1] */ COLOR_CONTRAST_TEXT, |
|||
/* [2.2.1.2] */ COLOR_CONTRAST_LINK, |
|||
/* [2.2.1.3] */ COLOR_CONTRAST_ACTIVE_LINK, |
|||
/* [2.2.1.4] */ COLOR_CONTRAST_VISITED_LINK, |
|||
/* [3.2.1.1] */ DOCTYPE_MISSING, |
|||
/* [3.3.1.1] */ STYLE_SHEET_CONTROL_PRESENTATION, |
|||
/* [3.5.1.1] */ HEADERS_IMPROPERLY_NESTED, |
|||
/* [3.5.2.1] */ POTENTIAL_HEADER_BOLD, |
|||
/* [3.5.2.2] */ POTENTIAL_HEADER_ITALICS, |
|||
/* [3.5.2.3] */ POTENTIAL_HEADER_UNDERLINE, |
|||
/* [3.5.3.1] */ HEADER_USED_FORMAT_TEXT, |
|||
/* [3.6.1.1] */ LIST_USAGE_INVALID_UL, |
|||
/* [3.6.1.2] */ LIST_USAGE_INVALID_OL, |
|||
/* [3.6.1.4] */ LIST_USAGE_INVALID_LI, |
|||
/* [4.1.1.1] */ INDICATE_CHANGES_IN_LANGUAGE, |
|||
/* [4.3.1.1] */ LANGUAGE_NOT_IDENTIFIED, |
|||
/* [4.3.1.1] */ LANGUAGE_INVALID, |
|||
/* [5.1.2.1] */ DATA_TABLE_MISSING_HEADERS, |
|||
/* [5.1.2.2] */ DATA_TABLE_MISSING_HEADERS_COLUMN, |
|||
/* [5.1.2.3] */ DATA_TABLE_MISSING_HEADERS_ROW, |
|||
/* [5.2.1.1] */ DATA_TABLE_REQUIRE_MARKUP_COLUMN_HEADERS, |
|||
/* [5.2.1.2] */ DATA_TABLE_REQUIRE_MARKUP_ROW_HEADERS, |
|||
/* [5.3.1.1] */ LAYOUT_TABLES_LINEARIZE_PROPERLY, |
|||
/* [5.4.1.1] */ LAYOUT_TABLE_INVALID_MARKUP, |
|||
/* [5.5.1.1] */ TABLE_MISSING_SUMMARY, |
|||
/* [5.5.1.2] */ TABLE_SUMMARY_INVALID_NULL, |
|||
/* [5.5.1.3] */ TABLE_SUMMARY_INVALID_SPACES, |
|||
/* [5.5.1.6] */ TABLE_SUMMARY_INVALID_PLACEHOLDER, |
|||
/* [5.5.2.1] */ TABLE_MISSING_CAPTION, |
|||
/* [5.6.1.1] */ TABLE_MAY_REQUIRE_HEADER_ABBR, |
|||
/* [5.6.1.2] */ TABLE_MAY_REQUIRE_HEADER_ABBR_NULL, |
|||
/* [5.6.1.3] */ TABLE_MAY_REQUIRE_HEADER_ABBR_SPACES, |
|||
/* [6.1.1.1] */ STYLESHEETS_REQUIRE_TESTING_LINK, |
|||
/* [6.1.1.2] */ STYLESHEETS_REQUIRE_TESTING_STYLE_ELEMENT, |
|||
/* [6.1.1.3] */ STYLESHEETS_REQUIRE_TESTING_STYLE_ATTR, |
|||
/* [6.2.1.1] */ FRAME_SRC_INVALID, |
|||
/* [6.2.2.1] */ TEXT_EQUIVALENTS_REQUIRE_UPDATING_APPLET, |
|||
/* [6.2.2.2] */ TEXT_EQUIVALENTS_REQUIRE_UPDATING_SCRIPT, |
|||
/* [6.2.2.3] */ TEXT_EQUIVALENTS_REQUIRE_UPDATING_OBJECT, |
|||
/* [6.3.1.1] */ PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_SCRIPT, |
|||
/* [6.3.1.2] */ PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_OBJECT, |
|||
/* [6.3.1.3] */ PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_EMBED, |
|||
/* [6.3.1.4] */ PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_APPLET, |
|||
/* [6.5.1.1] */ FRAME_MISSING_NOFRAMES, |
|||
/* [6.5.1.2] */ NOFRAMES_INVALID_NO_VALUE, |
|||
/* [6.5.1.3] */ NOFRAMES_INVALID_CONTENT, |
|||
/* [6.5.1.4] */ NOFRAMES_INVALID_LINK, |
|||
/* [7.1.1.1] */ REMOVE_FLICKER_SCRIPT, |
|||
/* [7.1.1.2] */ REMOVE_FLICKER_OBJECT, |
|||
/* [7.1.1.3] */ REMOVE_FLICKER_EMBED, |
|||
/* [7.1.1.4] */ REMOVE_FLICKER_APPLET, |
|||
/* [7.1.1.5] */ REMOVE_FLICKER_ANIMATED_GIF, |
|||
/* [7.2.1.1] */ REMOVE_BLINK_MARQUEE, |
|||
/* [7.4.1.1] */ REMOVE_AUTO_REFRESH, |
|||
/* [7.5.1.1] */ REMOVE_AUTO_REDIRECT, |
|||
/* [8.1.1.1] */ ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_SCRIPT, |
|||
/* [8.1.1.2] */ ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_OBJECT, |
|||
/* [8.1.1.3] */ ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_APPLET, |
|||
/* [8.1.1.4] */ ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_EMBED, |
|||
/* [9.1.1.1] */ IMAGE_MAP_SERVER_SIDE_REQUIRES_CONVERSION, |
|||
/* [9.3.1.1] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_DOWN, |
|||
/* [9.3.1.2] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_UP, |
|||
/* [9.3.1.3] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_CLICK, |
|||
/* [9.3.1.4] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OVER, |
|||
/* [9.3.1.5] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OUT, |
|||
/* [9.3.1.6] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_MOVE, |
|||
/* [10.1.1.1] */ NEW_WINDOWS_REQUIRE_WARNING_NEW, |
|||
/* [10.1.1.2] */ NEW_WINDOWS_REQUIRE_WARNING_BLANK, |
|||
/* [10.2.1.1] */ LABEL_NEEDS_REPOSITIONING_BEFORE_INPUT, |
|||
/* [10.2.1.2] */ LABEL_NEEDS_REPOSITIONING_AFTER_INPUT, |
|||
/* [10.4.1.1] */ FORM_CONTROL_REQUIRES_DEFAULT_TEXT, |
|||
/* [10.4.1.2] */ FORM_CONTROL_DEFAULT_TEXT_INVALID_NULL, |
|||
/* [10.4.1.3] */ FORM_CONTROL_DEFAULT_TEXT_INVALID_SPACES, |
|||
/* [11.2.1.1] */ REPLACE_DEPRECATED_HTML_APPLET, |
|||
/* [11.2.1.2] */ REPLACE_DEPRECATED_HTML_BASEFONT, |
|||
/* [11.2.1.3] */ REPLACE_DEPRECATED_HTML_CENTER, |
|||
/* [11.2.1.4] */ REPLACE_DEPRECATED_HTML_DIR, |
|||
/* [11.2.1.5] */ REPLACE_DEPRECATED_HTML_FONT, |
|||
/* [11.2.1.6] */ REPLACE_DEPRECATED_HTML_ISINDEX, |
|||
/* [11.2.1.7] */ REPLACE_DEPRECATED_HTML_MENU, |
|||
/* [11.2.1.8] */ REPLACE_DEPRECATED_HTML_S, |
|||
/* [11.2.1.9] */ REPLACE_DEPRECATED_HTML_STRIKE, |
|||
/* [11.2.1.10] */ REPLACE_DEPRECATED_HTML_U, |
|||
/* [12.1.1.1] */ FRAME_MISSING_TITLE, |
|||
/* [12.1.1.2] */ FRAME_TITLE_INVALID_NULL, |
|||
/* [12.1.1.3] */ FRAME_TITLE_INVALID_SPACES, |
|||
/* [12.4.1.1] */ ASSOCIATE_LABELS_EXPLICITLY, |
|||
/* [12.4.1.2] */ ASSOCIATE_LABELS_EXPLICITLY_FOR, |
|||
/* [12.4.1.3] */ ASSOCIATE_LABELS_EXPLICITLY_ID, |
|||
/* [13.1.1.1] */ LINK_TEXT_NOT_MEANINGFUL, |
|||
/* [13.1.1.2] */ LINK_TEXT_MISSING, |
|||
/* [13.1.1.3] */ LINK_TEXT_TOO_LONG, |
|||
/* [13.1.1.4] */ LINK_TEXT_NOT_MEANINGFUL_CLICK_HERE, |
|||
/* [13.1.1.5] */ LINK_TEXT_NOT_MEANINGFUL_MORE, |
|||
/* [13.1.1.6] */ LINK_TEXT_NOT_MEANINGFUL_FOLLOW_THIS, |
|||
/* [13.2.1.1] */ METADATA_MISSING, |
|||
/* [13.2.1.2] */ METADATA_MISSING_LINK, |
|||
/* [13.2.1.3] */ METADATA_MISSING_REDIRECT_AUTOREFRESH, |
|||
/* [13.10.1.1] */ SKIPOVER_ASCII_ART, |
|||
|
|||
LAST_ACCESS_ERR /* must be last */ |
|||
} accessErrorCodes; |
|||
|
|||
|
|||
void TY_(AccessibilityHelloMessage)( TidyDocImpl* doc ); |
|||
void TY_(DisplayHTMLTableAlgorithm)( TidyDocImpl* doc ); |
|||
|
|||
/************************************************************
|
|||
* AccessibilityChecks |
|||
* |
|||
* Traverses through the individual nodes of the tree |
|||
* and checks attributes and elements for accessibility. |
|||
* after the tree structure has been formed. |
|||
************************************************************/ |
|||
|
|||
void TY_(AccessibilityChecks)( TidyDocImpl* doc ); |
|||
|
|||
|
|||
#endif /* SUPPORT_ACCESSIBILITY_CHECKS */ |
|||
#endif /* __ACCESS_H__ */ |
@ -0,0 +1,118 @@ |
|||
/* alloc.c -- Default memory allocation routines.
|
|||
|
|||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
/* #define DEBUG_MEMORY very NOISY extra DEBUG of memory allocation, reallocation and free */ |
|||
|
|||
#include "tidy.h" |
|||
#include "forward.h" |
|||
#ifdef DEBUG_MEMORY |
|||
#include "sprtf.h" |
|||
#endif |
|||
|
|||
static TidyMalloc g_malloc = NULL; |
|||
static TidyRealloc g_realloc = NULL; |
|||
static TidyFree g_free = NULL; |
|||
static TidyPanic g_panic = NULL; |
|||
|
|||
Bool TIDY_CALL tidySetMallocCall( TidyMalloc fmalloc ) |
|||
{ |
|||
g_malloc = fmalloc; |
|||
return yes; |
|||
} |
|||
Bool TIDY_CALL tidySetReallocCall( TidyRealloc frealloc ) |
|||
{ |
|||
g_realloc = frealloc; |
|||
return yes; |
|||
} |
|||
Bool TIDY_CALL tidySetFreeCall( TidyFree ffree ) |
|||
{ |
|||
g_free = ffree; |
|||
return yes; |
|||
} |
|||
Bool TIDY_CALL tidySetPanicCall( TidyPanic fpanic ) |
|||
{ |
|||
g_panic = fpanic; |
|||
return yes; |
|||
} |
|||
|
|||
static void TIDY_CALL defaultPanic( TidyAllocator* ARG_UNUSED(allocator), ctmbstr msg ) |
|||
{ |
|||
if ( g_panic ) |
|||
g_panic( msg ); |
|||
else |
|||
{ |
|||
/* 2 signifies a serious error */ |
|||
fprintf( stderr, "Fatal error: %s\n", msg ); |
|||
#ifdef _DEBUG |
|||
assert(0); |
|||
#endif |
|||
exit(2); |
|||
} |
|||
} |
|||
|
|||
static void* TIDY_CALL defaultAlloc( TidyAllocator* allocator, size_t size ) |
|||
{ |
|||
void *p = ( g_malloc ? g_malloc(size) : malloc(size) ); |
|||
if ( !p ) |
|||
defaultPanic( allocator,"Out of memory!"); |
|||
#if !defined(NDEBUG) && defined(_MSC_VER) && defined(DEBUG_MEMORY) |
|||
SPRTF("alloc MEM %p, size %d\n", p, (int)size ); |
|||
if (size == 0) { |
|||
SPRTF("NOTE: An allocation of ZERO bytes!!!!!!\n"); |
|||
} |
|||
#endif |
|||
return p; |
|||
} |
|||
|
|||
static void* TIDY_CALL defaultRealloc( TidyAllocator* allocator, void* mem, size_t newsize ) |
|||
{ |
|||
void *p; |
|||
if ( mem == NULL ) |
|||
return defaultAlloc( allocator, newsize ); |
|||
|
|||
p = ( g_realloc ? g_realloc(mem, newsize) : realloc(mem, newsize) ); |
|||
if (!p) |
|||
defaultPanic( allocator, "Out of memory!"); |
|||
#if !defined(NDEBUG) && defined(_MSC_VER) && defined(DEBUG_MEMORY) |
|||
SPRTF("realloc MEM %p, size %d\n", p, (int)newsize ); |
|||
#endif |
|||
return p; |
|||
} |
|||
|
|||
static void TIDY_CALL defaultFree( TidyAllocator* ARG_UNUSED(allocator), void* mem ) |
|||
{ |
|||
if ( mem ) |
|||
{ |
|||
#if !defined(NDEBUG) && defined(_MSC_VER) && defined(DEBUG_MEMORY) |
|||
SPRTF("free MEM %p\n", mem ); |
|||
#endif |
|||
if ( g_free ) |
|||
g_free( mem ); |
|||
else |
|||
free( mem ); |
|||
} |
|||
} |
|||
|
|||
static const TidyAllocatorVtbl defaultVtbl = { |
|||
defaultAlloc, |
|||
defaultRealloc, |
|||
defaultFree, |
|||
defaultPanic |
|||
}; |
|||
|
|||
TidyAllocator TY_(g_default_allocator) = { |
|||
&defaultVtbl |
|||
}; |
|||
|
|||
/*
|
|||
* local variables: |
|||
* mode: c |
|||
* indent-tabs-mode: nil |
|||
* c-basic-offset: 4 |
|||
* eval: (c-set-offset 'substatement-open 0) |
|||
* end: |
|||
*/ |
@ -0,0 +1,204 @@ |
|||
/* attrask.c -- Interrogate attribute type
|
|||
|
|||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
#include "tidy-int.h" |
|||
#include "tidy.h" |
|||
#include "attrs.h" |
|||
|
|||
Bool TIDY_CALL tidyAttrIsHREF( TidyAttr tattr ) |
|||
{ |
|||
return attrIsHREF( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsSRC( TidyAttr tattr ) |
|||
{ |
|||
return attrIsSRC( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsID( TidyAttr tattr ) |
|||
{ |
|||
return attrIsID( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsNAME( TidyAttr tattr ) |
|||
{ |
|||
return attrIsNAME( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsSUMMARY( TidyAttr tattr ) |
|||
{ |
|||
return attrIsSUMMARY( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsALT( TidyAttr tattr ) |
|||
{ |
|||
return attrIsALT( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsLONGDESC( TidyAttr tattr ) |
|||
{ |
|||
return attrIsLONGDESC( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsUSEMAP( TidyAttr tattr ) |
|||
{ |
|||
return attrIsUSEMAP( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsISMAP( TidyAttr tattr ) |
|||
{ |
|||
return attrIsISMAP( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsLANGUAGE( TidyAttr tattr ) |
|||
{ |
|||
return attrIsLANGUAGE( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsTYPE( TidyAttr tattr ) |
|||
{ |
|||
return attrIsTYPE( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsVALUE( TidyAttr tattr ) |
|||
{ |
|||
return attrIsVALUE( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsCONTENT( TidyAttr tattr ) |
|||
{ |
|||
return attrIsCONTENT( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsTITLE( TidyAttr tattr ) |
|||
{ |
|||
return attrIsTITLE( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsXMLNS( TidyAttr tattr ) |
|||
{ |
|||
return attrIsXMLNS( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsDATAFLD( TidyAttr tattr ) |
|||
{ |
|||
return attrIsDATAFLD( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsWIDTH( TidyAttr tattr ) |
|||
{ |
|||
return attrIsWIDTH( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsHEIGHT( TidyAttr tattr ) |
|||
{ |
|||
return attrIsHEIGHT( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsFOR( TidyAttr tattr ) |
|||
{ |
|||
return attrIsFOR( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsSELECTED( TidyAttr tattr ) |
|||
{ |
|||
return attrIsSELECTED( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsCHECKED( TidyAttr tattr ) |
|||
{ |
|||
return attrIsCHECKED( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsLANG( TidyAttr tattr ) |
|||
{ |
|||
return attrIsLANG( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsTARGET( TidyAttr tattr ) |
|||
{ |
|||
return attrIsTARGET( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsHTTP_EQUIV( TidyAttr tattr ) |
|||
{ |
|||
return attrIsHTTP_EQUIV( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsREL( TidyAttr tattr ) |
|||
{ |
|||
return attrIsREL( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsEvent( TidyAttr tattr ) |
|||
{ |
|||
return TY_(attrIsEvent)( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsOnMOUSEMOVE( TidyAttr tattr ) |
|||
{ |
|||
return attrIsOnMOUSEMOVE( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsOnMOUSEDOWN( TidyAttr tattr ) |
|||
{ |
|||
return attrIsOnMOUSEDOWN( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsOnMOUSEUP( TidyAttr tattr ) |
|||
{ |
|||
return attrIsOnMOUSEUP( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsOnCLICK( TidyAttr tattr ) |
|||
{ |
|||
return attrIsOnCLICK( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsOnMOUSEOVER( TidyAttr tattr ) |
|||
{ |
|||
return attrIsOnMOUSEOVER( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsOnMOUSEOUT( TidyAttr tattr ) |
|||
{ |
|||
return attrIsOnMOUSEOUT( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsOnKEYDOWN( TidyAttr tattr ) |
|||
{ |
|||
return attrIsOnKEYDOWN( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsOnKEYUP( TidyAttr tattr ) |
|||
{ |
|||
return attrIsOnKEYUP( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsOnKEYPRESS( TidyAttr tattr ) |
|||
{ |
|||
return attrIsOnKEYPRESS( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsOnFOCUS( TidyAttr tattr ) |
|||
{ |
|||
return attrIsOnFOCUS( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsOnBLUR( TidyAttr tattr ) |
|||
{ |
|||
return attrIsOnBLUR( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsBGCOLOR( TidyAttr tattr ) |
|||
{ |
|||
return attrIsBGCOLOR( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsLINK( TidyAttr tattr ) |
|||
{ |
|||
return attrIsLINK( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsALINK( TidyAttr tattr ) |
|||
{ |
|||
return attrIsALINK( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsVLINK( TidyAttr tattr ) |
|||
{ |
|||
return attrIsVLINK( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsTEXT( TidyAttr tattr ) |
|||
{ |
|||
return attrIsTEXT( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsSTYLE( TidyAttr tattr ) |
|||
{ |
|||
return attrIsSTYLE( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsABBR( TidyAttr tattr ) |
|||
{ |
|||
return attrIsABBR( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsCOLSPAN( TidyAttr tattr ) |
|||
{ |
|||
return attrIsCOLSPAN( tidyAttrToImpl(tattr) ); |
|||
} |
|||
Bool TIDY_CALL tidyAttrIsROWSPAN( TidyAttr tattr ) |
|||
{ |
|||
return attrIsROWSPAN( tidyAttrToImpl(tattr) ); |
|||
} |
|||
|
|||
/*
|
|||
* local variables: |
|||
* mode: c |
|||
* indent-tabs-mode: nil |
|||
* c-basic-offset: 4 |
|||
* eval: (c-set-offset 'substatement-open 0) |
|||
* end: |
|||
*/ |
File diff suppressed because it is too large
@ -0,0 +1,156 @@ |
|||
#ifndef __ATTRDICT_H__ |
|||
#define __ATTRDICT_H__ |
|||
|
|||
/* attrdict.h -- extended attribute information
|
|||
|
|||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
#include "tidy.h" |
|||
|
|||
typedef struct _AttrVersion |
|||
{ |
|||
TidyAttrId attribute; |
|||
uint versions; |
|||
} AttrVersion; |
|||
|
|||
extern const AttrVersion TY_(W3CAttrsFor_A)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_ABBR)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_ACRONYM)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_ADDRESS)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_APPLET)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_AREA)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_B)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_BASE)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_BASEFONT)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_BDO)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_BIG)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_BLOCKQUOTE)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_BODY)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_BR)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_BUTTON)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_CAPTION)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_CENTER)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_CITE)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_CODE)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_COL)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_COLGROUP)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_DD)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_DEL)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_DFN)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_DIR)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_DIV)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_DL)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_DT)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_EM)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_FIELDSET)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_FONT)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_FORM)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_FRAME)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_FRAMESET)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_H1)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_H2)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_H3)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_H4)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_H5)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_H6)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_HEAD)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_HR)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_HTML)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_I)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_IFRAME)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_IMG)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_INPUT)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_INS)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_ISINDEX)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_KBD)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_LABEL)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_LEGEND)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_LI)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_LINK)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_LISTING)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_MAP)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_MATHML)[]; /* [i_a]2 */ |
|||
extern const AttrVersion TY_(W3CAttrsFor_MENU)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_META)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_NEXTID)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_NOFRAMES)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_NOSCRIPT)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_OBJECT)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_OL)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_OPTGROUP)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_OPTION)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_P)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_PARAM)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_PICTURE)[]; /* Issue #151 - html5 */ |
|||
extern const AttrVersion TY_(W3CAttrsFor_PLAINTEXT)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_PRE)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_Q)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_RB)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_RBC)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_RP)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_RT)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_RTC)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_RUBY)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_S)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_SAMP)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_SCRIPT)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_SELECT)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_SMALL)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_SPAN)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_STRIKE)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_STRONG)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_STYLE)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_SUB)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_SUP)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_SVG)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_TABLE)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_TBODY)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_TD)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_TEXTAREA)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_TFOOT)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_TH)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_THEAD)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_TITLE)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_TR)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_TT)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_U)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_UL)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_VAR)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_XMP)[]; |
|||
|
|||
extern const AttrVersion TY_(W3CAttrsFor_TRACK)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_SUMMARY)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_FIGCAPTION)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_HGROUP)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_FIGURE)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_ARTICLE)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_ASIDE)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_BDI)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_NAV)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_SECTION)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_FOOTER)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_HEADER)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_DETAILS)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_DIALOG)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_COMMAND)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_MAIN)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_MARK)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_OUTPUT)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_MENUITEM)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_METER)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_PROGRESS)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_TEMPLATE)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_TIME)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_DATALIST)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_AUDIO)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_VIDEO)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_CANVAS)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_SOURCE)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_EMBED)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_KEYGEN)[]; |
|||
extern const AttrVersion TY_(W3CAttrsFor_WBR)[]; |
|||
|
|||
#endif /* __ATTRDICT_H__ */ |
@ -0,0 +1,208 @@ |
|||
/* attrget.c -- Locate attribute value by type
|
|||
|
|||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
#include "tidy-int.h" |
|||
#include "tags.h" |
|||
#include "attrs.h" |
|||
#include "tidy.h" |
|||
|
|||
TidyAttr TIDY_CALL tidyAttrGetById( TidyNode tnod, TidyAttrId attId ) |
|||
{ |
|||
Node* nimp = tidyNodeToImpl(tnod); |
|||
return tidyImplToAttr( TY_(AttrGetById)( nimp, attId ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetHREF( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetHREF( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetSRC( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetSRC( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetID( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetID( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetNAME( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetNAME( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetSUMMARY( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetSUMMARY( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetALT( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetALT( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetLONGDESC( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetLONGDESC( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetUSEMAP( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetUSEMAP( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetISMAP( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetISMAP( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetLANGUAGE( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetLANGUAGE( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetTYPE( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetTYPE( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetVALUE( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetVALUE( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetCONTENT( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetCONTENT( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetTITLE( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetTITLE( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetXMLNS( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetXMLNS( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetDATAFLD( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetDATAFLD( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetWIDTH( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetWIDTH( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetHEIGHT( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetHEIGHT( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetFOR( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetFOR( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetSELECTED( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetSELECTED( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetCHECKED( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetCHECKED( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetLANG( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetLANG( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetTARGET( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetTARGET( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetHTTP_EQUIV( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetHTTP_EQUIV( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetREL( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetREL( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
|
|||
TidyAttr TIDY_CALL tidyAttrGetOnMOUSEMOVE( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetOnMOUSEMOVE( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetOnMOUSEDOWN( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetOnMOUSEDOWN( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetOnMOUSEUP( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetOnMOUSEUP( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetOnCLICK( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetOnCLICK( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetOnMOUSEOVER( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetOnMOUSEOVER( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetOnMOUSEOUT( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetOnMOUSEOUT( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetOnKEYDOWN( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetOnKEYDOWN( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetOnKEYUP( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetOnKEYUP( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetOnKEYPRESS( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetOnKEYPRESS( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetOnFOCUS( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetOnFOCUS( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetOnBLUR( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetOnBLUR( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetBGCOLOR( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetBGCOLOR( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetLINK( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetLINK( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetALINK( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetALINK( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetVLINK( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetVLINK( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
|
|||
TidyAttr TIDY_CALL tidyAttrGetTEXT( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetTEXT( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetSTYLE( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetSTYLE( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetABBR( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetABBR( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetCOLSPAN( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetCOLSPAN( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
TidyAttr TIDY_CALL tidyAttrGetROWSPAN( TidyNode tnod ) |
|||
{ |
|||
return tidyImplToAttr( attrGetROWSPAN( tidyNodeToImpl(tnod) ) ); |
|||
} |
|||
|
|||
/*
|
|||
* local variables: |
|||
* mode: c |
|||
* indent-tabs-mode: nil |
|||
* c-basic-offset: 4 |
|||
* eval: (c-set-offset 'substatement-open 0) |
|||
* end: |
|||
*/ |
File diff suppressed because it is too large
@ -0,0 +1,458 @@ |
|||
#ifndef __ATTRS_H__ |
|||
#define __ATTRS_H__ |
|||
|
|||
/* attrs.h -- recognize HTML attributes
|
|||
|
|||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
#include "forward.h" |
|||
|
|||
/* declaration for methods that check attribute values */ |
|||
typedef void (AttrCheck)(TidyDocImpl* doc, Node *node, AttVal *attval); |
|||
|
|||
struct _Attribute |
|||
{ |
|||
TidyAttrId id; |
|||
tmbstr name; |
|||
AttrCheck* attrchk; |
|||
|
|||
struct _Attribute* next; |
|||
}; |
|||
|
|||
|
|||
/*
|
|||
Anchor/Node linked list |
|||
*/ |
|||
|
|||
struct _Anchor |
|||
{ |
|||
struct _Anchor *next; |
|||
Node *node; |
|||
char *name; |
|||
}; |
|||
|
|||
typedef struct _Anchor Anchor; |
|||
|
|||
#if !defined(ATTRIBUTE_HASH_LOOKUP) |
|||
#define ATTRIBUTE_HASH_LOOKUP 1 |
|||
#endif |
|||
|
|||
#if ATTRIBUTE_HASH_LOOKUP |
|||
enum |
|||
{ |
|||
ATTRIBUTE_HASH_SIZE=178u |
|||
}; |
|||
|
|||
struct _AttrHash |
|||
{ |
|||
Attribute const* attr; |
|||
struct _AttrHash* next; |
|||
}; |
|||
|
|||
typedef struct _AttrHash AttrHash; |
|||
#endif |
|||
|
|||
enum |
|||
{ |
|||
ANCHOR_HASH_SIZE=1021u |
|||
}; |
|||
|
|||
struct _TidyAttribImpl |
|||
{ |
|||
/* anchor/node lookup */ |
|||
Anchor* anchor_hash[ANCHOR_HASH_SIZE]; |
|||
|
|||
/* Declared literal attributes */ |
|||
Attribute* declared_attr_list; |
|||
|
|||
#if ATTRIBUTE_HASH_LOOKUP |
|||
AttrHash* hashtab[ATTRIBUTE_HASH_SIZE]; |
|||
#endif |
|||
}; |
|||
|
|||
typedef struct _TidyAttribImpl TidyAttribImpl; |
|||
|
|||
#define XHTML_NAMESPACE "http://www.w3.org/1999/xhtml"
|
|||
|
|||
AttrCheck TY_(CheckUrl); |
|||
|
|||
/* public method for finding attribute definition by name */ |
|||
const Attribute* TY_(CheckAttribute)( TidyDocImpl* doc, Node *node, AttVal *attval ); |
|||
|
|||
const Attribute* TY_(FindAttribute)( TidyDocImpl* doc, AttVal *attval ); |
|||
|
|||
AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name ); |
|||
|
|||
void TY_(DropAttrByName)( TidyDocImpl* doc, Node *node, ctmbstr name ); |
|||
|
|||
AttVal* TY_(AddAttribute)( TidyDocImpl* doc, |
|||
Node *node, ctmbstr name, ctmbstr value ); |
|||
|
|||
AttVal* TY_(RepairAttrValue)(TidyDocImpl* doc, Node* node, ctmbstr name, ctmbstr value); |
|||
|
|||
Bool TY_(IsUrl)( TidyDocImpl* doc, ctmbstr attrname ); |
|||
|
|||
/* Bool IsBool( TidyDocImpl* doc, ctmbstr attrname ); */ |
|||
|
|||
Bool TY_(IsScript)( TidyDocImpl* doc, ctmbstr attrname ); |
|||
|
|||
/* may id or name serve as anchor? */ |
|||
Bool TY_(IsAnchorElement)( TidyDocImpl* doc, Node* node ); |
|||
|
|||
/*
|
|||
In CSS1, selectors can contain only the characters A-Z, 0-9, and |
|||
Unicode characters 161-255, plus dash (-); they cannot start with |
|||
a dash or a digit; they can also contain escaped characters and any |
|||
Unicode character as a numeric code (see next item). |
|||
|
|||
The backslash followed by at most four hexadecimal digits (0..9A..F) |
|||
stands for the Unicode character with that number. |
|||
|
|||
Any character except a hexadecimal digit can be escaped to remove its |
|||
special meaning, by putting a backslash in front. |
|||
|
|||
#508936 - CSS class naming for -clean option |
|||
*/ |
|||
Bool TY_(IsCSS1Selector)( ctmbstr buf ); |
|||
|
|||
Bool TY_(IsValidHTMLID)(ctmbstr id); |
|||
Bool TY_(IsValidXMLID)(ctmbstr id); |
|||
|
|||
/* removes anchor for specific node */ |
|||
void TY_(RemoveAnchorByNode)( TidyDocImpl* doc, ctmbstr name, Node *node ); |
|||
|
|||
/* free all anchors */ |
|||
void TY_(FreeAnchors)( TidyDocImpl* doc ); |
|||
|
|||
|
|||
/* public methods for inititializing/freeing attribute dictionary */ |
|||
void TY_(InitAttrs)( TidyDocImpl* doc ); |
|||
void TY_(FreeAttrTable)( TidyDocImpl* doc ); |
|||
|
|||
void TY_(AppendToClassAttr)( TidyDocImpl* doc, AttVal *classattr, ctmbstr classname ); |
|||
/*
|
|||
the same attribute name can't be used |
|||
more than once in each element |
|||
*/ |
|||
void TY_(RepairDuplicateAttributes)( TidyDocImpl* doc, Node* node, Bool isXml ); |
|||
void TY_(SortAttributes)(Node* node, TidyAttrSortStrategy strat); |
|||
|
|||
Bool TY_(IsBoolAttribute)( AttVal* attval ); |
|||
Bool TY_(attrIsEvent)( AttVal* attval ); |
|||
|
|||
AttVal* TY_(AttrGetById)( Node* node, TidyAttrId id ); |
|||
|
|||
uint TY_(NodeAttributeVersions)( Node* node, TidyAttrId id ); |
|||
|
|||
Bool TY_(AttributeIsProprietary)(Node* node, AttVal* attval); |
|||
Bool TY_(AttributeIsMismatched)(Node* node, AttVal* attval, TidyDocImpl* doc); |
|||
|
|||
|
|||
/* 0 == TidyAttr_UNKNOWN */ |
|||
#define AttrId(av) ((av) && (av)->dict ? (av)->dict->id : TidyAttr_UNKNOWN) |
|||
#define AttrIsId(av, atid) ((av) && (av)->dict && ((av)->dict->id == atid)) |
|||
|
|||
#define AttrHasValue(attr) ((attr) && (attr)->value) |
|||
#define AttrValueIs(attr, val) (AttrHasValue(attr) && \ |
|||
TY_(tmbstrcasecmp)((attr)->value, val) == 0) |
|||
#define AttrContains(attr, val) (AttrHasValue(attr) && \ |
|||
TY_(tmbsubstr)((attr)->value, val) != NULL) |
|||
#define AttrVersions(attr) ((attr) && (attr)->dict ? (attr)->dict->versions : VERS_PROPRIETARY) |
|||
|
|||
#define AttrsHaveSameId(a, b) (a && b && a->dict && b->dict && a->dict->id && \ |
|||
b->dict->id && a->dict->id == b->dict->id) |
|||
|
|||
#define attrIsABBR(av) AttrIsId( av, TidyAttr_ABBR ) |
|||
#define attrIsACCEPT(av) AttrIsId( av, TidyAttr_ACCEPT ) |
|||
#define attrIsACCEPT_CHARSET(av) AttrIsId( av, TidyAttr_ACCEPT_CHARSET ) |
|||
#define attrIsACCESSKEY(av) AttrIsId( av, TidyAttr_ACCESSKEY ) |
|||
#define attrIsACTION(av) AttrIsId( av, TidyAttr_ACTION ) |
|||
#define attrIsADD_DATE(av) AttrIsId( av, TidyAttr_ADD_DATE ) |
|||
#define attrIsALIGN(av) AttrIsId( av, TidyAttr_ALIGN ) |
|||
#define attrIsALINK(av) AttrIsId( av, TidyAttr_ALINK ) |
|||
#define attrIsALT(av) AttrIsId( av, TidyAttr_ALT ) |
|||
#define attrIsARCHIVE(av) AttrIsId( av, TidyAttr_ARCHIVE ) |
|||
#define attrIsAXIS(av) AttrIsId( av, TidyAttr_AXIS ) |
|||
#define attrIsBACKGROUND(av) AttrIsId( av, TidyAttr_BACKGROUND ) |
|||
#define attrIsBGCOLOR(av) AttrIsId( av, TidyAttr_BGCOLOR ) |
|||
#define attrIsBGPROPERTIES(av) AttrIsId( av, TidyAttr_BGPROPERTIES ) |
|||
#define attrIsBORDER(av) AttrIsId( av, TidyAttr_BORDER ) |
|||
#define attrIsBORDERCOLOR(av) AttrIsId( av, TidyAttr_BORDERCOLOR ) |
|||
#define attrIsBOTTOMMARGIN(av) AttrIsId( av, TidyAttr_BOTTOMMARGIN ) |
|||
#define attrIsCELLPADDING(av) AttrIsId( av, TidyAttr_CELLPADDING ) |
|||
#define attrIsCELLSPACING(av) AttrIsId( av, TidyAttr_CELLSPACING ) |
|||
#define attrIsCHAR(av) AttrIsId( av, TidyAttr_CHAR ) |
|||
#define attrIsCHAROFF(av) AttrIsId( av, TidyAttr_CHAROFF ) |
|||
#define attrIsCHARSET(av) AttrIsId( av, TidyAttr_CHARSET ) |
|||
#define attrIsCHECKED(av) AttrIsId( av, TidyAttr_CHECKED ) |
|||
#define attrIsCITE(av) AttrIsId( av, TidyAttr_CITE ) |
|||
#define attrIsCLASS(av) AttrIsId( av, TidyAttr_CLASS ) |
|||
#define attrIsCLASSID(av) AttrIsId( av, TidyAttr_CLASSID ) |
|||
#define attrIsCLEAR(av) AttrIsId( av, TidyAttr_CLEAR ) |
|||
#define attrIsCODE(av) AttrIsId( av, TidyAttr_CODE ) |
|||
#define attrIsCODEBASE(av) AttrIsId( av, TidyAttr_CODEBASE ) |
|||
#define attrIsCODETYPE(av) AttrIsId( av, TidyAttr_CODETYPE ) |
|||
#define attrIsCOLOR(av) AttrIsId( av, TidyAttr_COLOR ) |
|||
#define attrIsCOLS(av) AttrIsId( av, TidyAttr_COLS ) |
|||
#define attrIsCOLSPAN(av) AttrIsId( av, TidyAttr_COLSPAN ) |
|||
#define attrIsCOMPACT(av) AttrIsId( av, TidyAttr_COMPACT ) |
|||
#define attrIsCONTENT(av) AttrIsId( av, TidyAttr_CONTENT ) |
|||
#define attrIsCOORDS(av) AttrIsId( av, TidyAttr_COORDS ) |
|||
#define attrIsDATA(av) AttrIsId( av, TidyAttr_DATA ) |
|||
#define attrIsDATAFLD(av) AttrIsId( av, TidyAttr_DATAFLD ) |
|||
#define attrIsDATAFORMATAS(av) AttrIsId( av, TidyAttr_DATAFORMATAS ) |
|||
#define attrIsDATAPAGESIZE(av) AttrIsId( av, TidyAttr_DATAPAGESIZE ) |
|||
#define attrIsDATASRC(av) AttrIsId( av, TidyAttr_DATASRC ) |
|||
#define attrIsDATETIME(av) AttrIsId( av, TidyAttr_DATETIME ) |
|||
#define attrIsDECLARE(av) AttrIsId( av, TidyAttr_DECLARE ) |
|||
#define attrIsDEFER(av) AttrIsId( av, TidyAttr_DEFER ) |
|||
#define attrIsDIR(av) AttrIsId( av, TidyAttr_DIR ) |
|||
#define attrIsDISABLED(av) AttrIsId( av, TidyAttr_DISABLED ) |
|||
#define attrIsENCODING(av) AttrIsId( av, TidyAttr_ENCODING ) |
|||
#define attrIsENCTYPE(av) AttrIsId( av, TidyAttr_ENCTYPE ) |
|||
#define attrIsFACE(av) AttrIsId( av, TidyAttr_FACE ) |
|||
#define attrIsFOR(av) AttrIsId( av, TidyAttr_FOR ) |
|||
#define attrIsFRAME(av) AttrIsId( av, TidyAttr_FRAME ) |
|||
#define attrIsFRAMEBORDER(av) AttrIsId( av, TidyAttr_FRAMEBORDER ) |
|||
#define attrIsFRAMESPACING(av) AttrIsId( av, TidyAttr_FRAMESPACING ) |
|||
#define attrIsGRIDX(av) AttrIsId( av, TidyAttr_GRIDX ) |
|||
#define attrIsGRIDY(av) AttrIsId( av, TidyAttr_GRIDY ) |
|||
#define attrIsHEADERS(av) AttrIsId( av, TidyAttr_HEADERS ) |
|||
#define attrIsHEIGHT(av) AttrIsId( av, TidyAttr_HEIGHT ) |
|||
#define attrIsHREF(av) AttrIsId( av, TidyAttr_HREF ) |
|||
#define attrIsHREFLANG(av) AttrIsId( av, TidyAttr_HREFLANG ) |
|||
#define attrIsHSPACE(av) AttrIsId( av, TidyAttr_HSPACE ) |
|||
#define attrIsHTTP_EQUIV(av) AttrIsId( av, TidyAttr_HTTP_EQUIV ) |
|||
#define attrIsID(av) AttrIsId( av, TidyAttr_ID ) |
|||
#define attrIsISMAP(av) AttrIsId( av, TidyAttr_ISMAP ) |
|||
#define attrIsITEMID(av) AttrIsId( av, TidyAttr_ITEMID ) |
|||
#define attrIsITEMPROP(av) AttrIsId( av, TidyAttr_ITEMPROP ) |
|||
#define attrIsITEMREF(av) AttrIsId( av, TidyAttr_ITEMREF ) |
|||
#define attrIsITEMSCOPE(av) AttrIsId( av, TidyAttr_ITEMSCOPE ) |
|||
#define attrIsITEMTYPE(av) AttrIsId( av, TidyAttr_ITEMTYPE ) |
|||
#define attrIsLABEL(av) AttrIsId( av, TidyAttr_LABEL ) |
|||
#define attrIsLANG(av) AttrIsId( av, TidyAttr_LANG ) |
|||
#define attrIsLANGUAGE(av) AttrIsId( av, TidyAttr_LANGUAGE ) |
|||
#define attrIsLAST_MODIFIED(av) AttrIsId( av, TidyAttr_LAST_MODIFIED ) |
|||
#define attrIsLAST_VISIT(av) AttrIsId( av, TidyAttr_LAST_VISIT ) |
|||
#define attrIsLEFTMARGIN(av) AttrIsId( av, TidyAttr_LEFTMARGIN ) |
|||
#define attrIsLINK(av) AttrIsId( av, TidyAttr_LINK ) |
|||
#define attrIsLONGDESC(av) AttrIsId( av, TidyAttr_LONGDESC ) |
|||
#define attrIsLOWSRC(av) AttrIsId( av, TidyAttr_LOWSRC ) |
|||
#define attrIsMARGINHEIGHT(av) AttrIsId( av, TidyAttr_MARGINHEIGHT ) |
|||
#define attrIsMARGINWIDTH(av) AttrIsId( av, TidyAttr_MARGINWIDTH ) |
|||
#define attrIsMAXLENGTH(av) AttrIsId( av, TidyAttr_MAXLENGTH ) |
|||
#define attrIsMEDIA(av) AttrIsId( av, TidyAttr_MEDIA ) |
|||
#define attrIsMETHOD(av) AttrIsId( av, TidyAttr_METHOD ) |
|||
#define attrIsMULTIPLE(av) AttrIsId( av, TidyAttr_MULTIPLE ) |
|||
#define attrIsNAME(av) AttrIsId( av, TidyAttr_NAME ) |
|||
#define attrIsNOHREF(av) AttrIsId( av, TidyAttr_NOHREF ) |
|||
#define attrIsNORESIZE(av) AttrIsId( av, TidyAttr_NORESIZE ) |
|||
#define attrIsNOSHADE(av) AttrIsId( av, TidyAttr_NOSHADE ) |
|||
#define attrIsNOWRAP(av) AttrIsId( av, TidyAttr_NOWRAP ) |
|||
#define attrIsOBJECT(av) AttrIsId( av, TidyAttr_OBJECT ) |
|||
#define attrIsOnAFTERUPDATE(av) AttrIsId( av, TidyAttr_OnAFTERUPDATE ) |
|||
#define attrIsOnBEFOREUNLOAD(av) AttrIsId( av, TidyAttr_OnBEFOREUNLOAD ) |
|||
#define attrIsOnBEFOREUPDATE(av) AttrIsId( av, TidyAttr_OnBEFOREUPDATE ) |
|||
#define attrIsOnBLUR(av) AttrIsId( av, TidyAttr_OnBLUR ) |
|||
#define attrIsOnCHANGE(av) AttrIsId( av, TidyAttr_OnCHANGE ) |
|||
#define attrIsOnCLICK(av) AttrIsId( av, TidyAttr_OnCLICK ) |
|||
#define attrIsOnDATAAVAILABLE(av) AttrIsId( av, TidyAttr_OnDATAAVAILABLE ) |
|||
#define attrIsOnDATASETCHANGED(av) AttrIsId( av, TidyAttr_OnDATASETCHANGED ) |
|||
#define attrIsOnDATASETCOMPLETE(av) AttrIsId( av, TidyAttr_OnDATASETCOMPLETE ) |
|||
#define attrIsOnDBLCLICK(av) AttrIsId( av, TidyAttr_OnDBLCLICK ) |
|||
#define attrIsOnERRORUPDATE(av) AttrIsId( av, TidyAttr_OnERRORUPDATE ) |
|||
#define attrIsOnFOCUS(av) AttrIsId( av, TidyAttr_OnFOCUS ) |
|||
#define attrIsOnKEYDOWN(av) AttrIsId( av, TidyAttr_OnKEYDOWN ) |
|||
#define attrIsOnKEYPRESS(av) AttrIsId( av, TidyAttr_OnKEYPRESS ) |
|||
#define attrIsOnKEYUP(av) AttrIsId( av, TidyAttr_OnKEYUP ) |
|||
#define attrIsOnLOAD(av) AttrIsId( av, TidyAttr_OnLOAD ) |
|||
#define attrIsOnMOUSEDOWN(av) AttrIsId( av, TidyAttr_OnMOUSEDOWN ) |
|||
#define attrIsOnMOUSEMOVE(av) AttrIsId( av, TidyAttr_OnMOUSEMOVE ) |
|||
#define attrIsOnMOUSEOUT(av) AttrIsId( av, TidyAttr_OnMOUSEOUT ) |
|||
#define attrIsOnMOUSEOVER(av) AttrIsId( av, TidyAttr_OnMOUSEOVER ) |
|||
#define attrIsOnMOUSEUP(av) AttrIsId( av, TidyAttr_OnMOUSEUP ) |
|||
#define attrIsOnRESET(av) AttrIsId( av, TidyAttr_OnRESET ) |
|||
#define attrIsOnROWENTER(av) AttrIsId( av, TidyAttr_OnROWENTER ) |
|||
#define attrIsOnROWEXIT(av) AttrIsId( av, TidyAttr_OnROWEXIT ) |
|||
#define attrIsOnSELECT(av) AttrIsId( av, TidyAttr_OnSELECT ) |
|||
#define attrIsOnSUBMIT(av) AttrIsId( av, TidyAttr_OnSUBMIT ) |
|||
#define attrIsOnUNLOAD(av) AttrIsId( av, TidyAttr_OnUNLOAD ) |
|||
#define attrIsPROFILE(av) AttrIsId( av, TidyAttr_PROFILE ) |
|||
#define attrIsPROMPT(av) AttrIsId( av, TidyAttr_PROMPT ) |
|||
#define attrIsRBSPAN(av) AttrIsId( av, TidyAttr_RBSPAN ) |
|||
#define attrIsREADONLY(av) AttrIsId( av, TidyAttr_READONLY ) |
|||
#define attrIsREL(av) AttrIsId( av, TidyAttr_REL ) |
|||
#define attrIsREV(av) AttrIsId( av, TidyAttr_REV ) |
|||
#define attrIsRIGHTMARGIN(av) AttrIsId( av, TidyAttr_RIGHTMARGIN ) |
|||
#define attrIsROLE(av) AttrIsId( av, TidyAttr_ROLE ) |
|||
#define attrIsROWS(av) AttrIsId( av, TidyAttr_ROWS ) |
|||
#define attrIsROWSPAN(av) AttrIsId( av, TidyAttr_ROWSPAN ) |
|||
#define attrIsRULES(av) AttrIsId( av, TidyAttr_RULES ) |
|||
#define attrIsSCHEME(av) AttrIsId( av, TidyAttr_SCHEME ) |
|||
#define attrIsSCOPE(av) AttrIsId( av, TidyAttr_SCOPE ) |
|||
#define attrIsSCROLLING(av) AttrIsId( av, TidyAttr_SCROLLING ) |
|||
#define attrIsSELECTED(av) AttrIsId( av, TidyAttr_SELECTED ) |
|||
#define attrIsSHAPE(av) AttrIsId( av, TidyAttr_SHAPE ) |
|||
#define attrIsSHOWGRID(av) AttrIsId( av, TidyAttr_SHOWGRID ) |
|||
#define attrIsSHOWGRIDX(av) AttrIsId( av, TidyAttr_SHOWGRIDX ) |
|||
#define attrIsSHOWGRIDY(av) AttrIsId( av, TidyAttr_SHOWGRIDY ) |
|||
#define attrIsSIZE(av) AttrIsId( av, TidyAttr_SIZE ) |
|||
#define attrIsSPAN(av) AttrIsId( av, TidyAttr_SPAN ) |
|||
#define attrIsSRC(av) AttrIsId( av, TidyAttr_SRC ) |
|||
#define attrIsSTANDBY(av) AttrIsId( av, TidyAttr_STANDBY ) |
|||
#define attrIsSTART(av) AttrIsId( av, TidyAttr_START ) |
|||
#define attrIsSTYLE(av) AttrIsId( av, TidyAttr_STYLE ) |
|||
#define attrIsSUMMARY(av) AttrIsId( av, TidyAttr_SUMMARY ) |
|||
#define attrIsTABINDEX(av) AttrIsId( av, TidyAttr_TABINDEX ) |
|||
#define attrIsTARGET(av) AttrIsId( av, TidyAttr_TARGET ) |
|||
#define attrIsTEXT(av) AttrIsId( av, TidyAttr_TEXT ) |
|||
#define attrIsTITLE(av) AttrIsId( av, TidyAttr_TITLE ) |
|||
#define attrIsTOPMARGIN(av) AttrIsId( av, TidyAttr_TOPMARGIN ) |
|||
#define attrIsTYPE(av) AttrIsId( av, TidyAttr_TYPE ) |
|||
#define attrIsUSEMAP(av) AttrIsId( av, TidyAttr_USEMAP ) |
|||
#define attrIsVALIGN(av) AttrIsId( av, TidyAttr_VALIGN ) |
|||
#define attrIsVALUE(av) AttrIsId( av, TidyAttr_VALUE ) |
|||
#define attrIsVALUETYPE(av) AttrIsId( av, TidyAttr_VALUETYPE ) |
|||
#define attrIsVERSION(av) AttrIsId( av, TidyAttr_VERSION ) |
|||
#define attrIsVLINK(av) AttrIsId( av, TidyAttr_VLINK ) |
|||
#define attrIsVSPACE(av) AttrIsId( av, TidyAttr_VSPACE ) |
|||
#define attrIsWIDTH(av) AttrIsId( av, TidyAttr_WIDTH ) |
|||
#define attrIsWRAP(av) AttrIsId( av, TidyAttr_WRAP ) |
|||
#define attrIsXMLNS(av) AttrIsId( av, TidyAttr_XMLNS ) |
|||
#define attrIsXML_LANG(av) AttrIsId( av, TidyAttr_XML_LANG ) |
|||
#define attrIsXML_SPACE(av) AttrIsId( av, TidyAttr_XML_SPACE ) |
|||
#define attrIsARIA_ACTIVEDESCENDANT(av) AttrIsId( av, TidyAttr_ARIA_ACTIVEDESCENDANT ) |
|||
#define attrIsARIA_ATOMIC(av) AttrIsId( av, TidyAttr_ARIA_ATOMIC ) |
|||
#define attrIsARIA_AUTOCOMPLETE(av) AttrIsId( av, TidyAttr_ARIA_AUTOCOMPLETE ) |
|||
#define attrIsARIA_BUSY(av) AttrIsId( av, TidyAttr_ARIA_BUSY ) |
|||
#define attrIsARIA_CHECKED(av) AttrIsId( av, TidyAttr_ARIA_CHECKED ) |
|||
#define attrIsARIA_CONTROLS(av) AttrIsId( av, TidyAttr_ARIA_CONTROLS ) |
|||
#define attrIsARIA_DESCRIBEDBY(av) AttrIsId( av, TidyAttr_ARIA_DESCRIBEDBY ) |
|||
#define attrIsARIA_DISABLED(av) AttrIsId( av, TidyAttr_ARIA_DISABLED ) |
|||
#define attrIsARIA_DROPEFFECT(av) AttrIsId( av, TidyAttr_ARIA_DROPEFFECT ) |
|||
#define attrIsARIA_EXPANDED(av) AttrIsId( av, TidyAttr_ARIA_EXPANDED ) |
|||
#define attrIsARIA_FLOWTO(av) AttrIsId( av, TidyAttr_ARIA_FLOWTO ) |
|||
#define attrIsARIA_GRABBED(av) AttrIsId( av, TidyAttr_ARIA_GRABBED ) |
|||
#define attrIsARIA_HASPOPUP(av) AttrIsId( av, TidyAttr_ARIA_HASPOPUP ) |
|||
#define attrIsARIA_HIDDEN(av) AttrIsId( av, TidyAttr_ARIA_HIDDEN ) |
|||
#define attrIsARIA_INVALID(av) AttrIsId( av, TidyAttr_ARIA_INVALID ) |
|||
#define attrIsARIA_LABEL(av) AttrIsId( av, TidyAttr_ARIA_LABEL ) |
|||
#define attrIsARIA_LABELLEDBY(av) AttrIsId( av, TidyAttr_ARIA_LABELLEDBY ) |
|||
#define attrIsARIA_LEVEL(av) AttrIsId( av, TidyAttr_ARIA_LEVEL ) |
|||
#define attrIsARIA_LIVE(av) AttrIsId( av, TidyAttr_ARIA_LIVE ) |
|||
#define attrIsARIA_MULTILINE(av) AttrIsId( av, TidyAttr_ARIA_MULTILINE ) |
|||
#define attrIsARIA_MULTISELECTABLE(av) AttrIsId( av, TidyAttr_ARIA_MULTISELECTABLE ) |
|||
#define attrIsARIA_ORIENTATION(av) AttrIsId( av, TidyAttr_ARIA_ORIENTATION ) |
|||
#define attrIsARIA_OWNS(av) AttrIsId( av, TidyAttr_ARIA_OWNS ) |
|||
#define attrIsARIA_POSINSET(av) AttrIsId( av, TidyAttr_ARIA_POSINSET ) |
|||
#define attrIsARIA_PRESSED(av) AttrIsId( av, TidyAttr_ARIA_PRESSED ) |
|||
#define attrIsARIA_READONLY(av) AttrIsId( av, TidyAttr_ARIA_READONLY ) |
|||
#define attrIsARIA_RELEVANT(av) AttrIsId( av, TidyAttr_ARIA_RELEVANT ) |
|||
#define attrIsARIA_REQUIRED(av) AttrIsId( av, TidyAttr_ARIA_REQUIRED ) |
|||
#define attrIsARIA_SELECTED(av) AttrIsId( av, TidyAttr_ARIA_SELECTED ) |
|||
#define attrIsARIA_SETSIZE(av) AttrIsId( av, TidyAttr_ARIA_SETSIZE ) |
|||
#define attrIsARIA_SORT(av) AttrIsId( av, TidyAttr_ARIA_SORT ) |
|||
#define attrIsARIA_VALUEMAX(av) AttrIsId( av, TidyAttr_ARIA_VALUEMAX ) |
|||
#define attrIsARIA_VALUEMIN(av) AttrIsId( av, TidyAttr_ARIA_VALUEMIN ) |
|||
#define attrIsARIA_VALUENOW(av) AttrIsId( av, TidyAttr_ARIA_VALUENOW ) |
|||
#define attrIsARIA_VALUETEXT(av) AttrIsId( av, TidyAttr_ARIA_VALUETEXT ) |
|||
|
|||
|
|||
|
|||
/* Attribute Retrieval macros
|
|||
*/ |
|||
#define attrGetHREF( nod ) TY_(AttrGetById)( nod, TidyAttr_HREF ) |
|||
#define attrGetSRC( nod ) TY_(AttrGetById)( nod, TidyAttr_SRC ) |
|||
#define attrGetID( nod ) TY_(AttrGetById)( nod, TidyAttr_ID ) |
|||
#define attrGetNAME( nod ) TY_(AttrGetById)( nod, TidyAttr_NAME ) |
|||
#define attrGetSUMMARY( nod ) TY_(AttrGetById)( nod, TidyAttr_SUMMARY ) |
|||
#define attrGetALT( nod ) TY_(AttrGetById)( nod, TidyAttr_ALT ) |
|||
#define attrGetLONGDESC( nod ) TY_(AttrGetById)( nod, TidyAttr_LONGDESC ) |
|||
#define attrGetUSEMAP( nod ) TY_(AttrGetById)( nod, TidyAttr_USEMAP ) |
|||
#define attrGetISMAP( nod ) TY_(AttrGetById)( nod, TidyAttr_ISMAP ) |
|||
#define attrGetLANGUAGE( nod ) TY_(AttrGetById)( nod, TidyAttr_LANGUAGE ) |
|||
#define attrGetTYPE( nod ) TY_(AttrGetById)( nod, TidyAttr_TYPE ) |
|||
#define attrGetVALUE( nod ) TY_(AttrGetById)( nod, TidyAttr_VALUE ) |
|||
#define attrGetCONTENT( nod ) TY_(AttrGetById)( nod, TidyAttr_CONTENT ) |
|||
#define attrGetTITLE( nod ) TY_(AttrGetById)( nod, TidyAttr_TITLE ) |
|||
#define attrGetXMLNS( nod ) TY_(AttrGetById)( nod, TidyAttr_XMLNS ) |
|||
#define attrGetDATAFLD( nod ) TY_(AttrGetById)( nod, TidyAttr_DATAFLD ) |
|||
#define attrGetWIDTH( nod ) TY_(AttrGetById)( nod, TidyAttr_WIDTH ) |
|||
#define attrGetHEIGHT( nod ) TY_(AttrGetById)( nod, TidyAttr_HEIGHT ) |
|||
#define attrGetFOR( nod ) TY_(AttrGetById)( nod, TidyAttr_FOR ) |
|||
#define attrGetSELECTED( nod ) TY_(AttrGetById)( nod, TidyAttr_SELECTED ) |
|||
#define attrGetCHECKED( nod ) TY_(AttrGetById)( nod, TidyAttr_CHECKED ) |
|||
#define attrGetLANG( nod ) TY_(AttrGetById)( nod, TidyAttr_LANG ) |
|||
#define attrGetTARGET( nod ) TY_(AttrGetById)( nod, TidyAttr_TARGET ) |
|||
#define attrGetHTTP_EQUIV( nod ) TY_(AttrGetById)( nod, TidyAttr_HTTP_EQUIV ) |
|||
#define attrGetREL( nod ) TY_(AttrGetById)( nod, TidyAttr_REL ) |
|||
|
|||
#define attrGetOnMOUSEMOVE( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEMOVE ) |
|||
#define attrGetOnMOUSEDOWN( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEDOWN ) |
|||
#define attrGetOnMOUSEUP( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEUP ) |
|||
#define attrGetOnCLICK( nod ) TY_(AttrGetById)( nod, TidyAttr_OnCLICK ) |
|||
#define attrGetOnMOUSEOVER( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEOVER ) |
|||
#define attrGetOnMOUSEOUT( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEOUT ) |
|||
#define attrGetOnKEYDOWN( nod ) TY_(AttrGetById)( nod, TidyAttr_OnKEYDOWN ) |
|||
#define attrGetOnKEYUP( nod ) TY_(AttrGetById)( nod, TidyAttr_OnKEYUP ) |
|||
#define attrGetOnKEYPRESS( nod ) TY_(AttrGetById)( nod, TidyAttr_OnKEYPRESS ) |
|||
#define attrGetOnFOCUS( nod ) TY_(AttrGetById)( nod, TidyAttr_OnFOCUS ) |
|||
#define attrGetOnBLUR( nod ) TY_(AttrGetById)( nod, TidyAttr_OnBLUR ) |
|||
|
|||
#define attrGetBGCOLOR( nod ) TY_(AttrGetById)( nod, TidyAttr_BGCOLOR ) |
|||
|
|||
#define attrGetLINK( nod ) TY_(AttrGetById)( nod, TidyAttr_LINK ) |
|||
#define attrGetALINK( nod ) TY_(AttrGetById)( nod, TidyAttr_ALINK ) |
|||
#define attrGetVLINK( nod ) TY_(AttrGetById)( nod, TidyAttr_VLINK ) |
|||
|
|||
#define attrGetTEXT( nod ) TY_(AttrGetById)( nod, TidyAttr_TEXT ) |
|||
#define attrGetSTYLE( nod ) TY_(AttrGetById)( nod, TidyAttr_STYLE ) |
|||
#define attrGetABBR( nod ) TY_(AttrGetById)( nod, TidyAttr_ABBR ) |
|||
#define attrGetCOLSPAN( nod ) TY_(AttrGetById)( nod, TidyAttr_COLSPAN ) |
|||
#define attrGetFONT( nod ) TY_(AttrGetById)( nod, TidyAttr_FONT ) |
|||
#define attrGetBASEFONT( nod ) TY_(AttrGetById)( nod, TidyAttr_BASEFONT ) |
|||
#define attrGetROWSPAN( nod ) TY_(AttrGetById)( nod, TidyAttr_ROWSPAN ) |
|||
|
|||
#define attrGetROLE( nod ) TY_(AttrGetById)( nod, TidyAttr_ROLE ) |
|||
|
|||
#define attrGetARIA_ACTIVEDESCENDANT( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_ACTIVEDESCENDANT ) |
|||
#define attrGetARIA_ATOMIC( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_ATOMIC ) |
|||
#define attrGetARIA_AUTOCOMPLETE( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_AUTOCOMPLETE ) |
|||
#define attrGetARIA_BUSY( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_BUSY ) |
|||
#define attrGetARIA_CHECKED( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_CHECKED ) |
|||
#define attrGetARIA_CONTROLS( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_CONTROLS ) |
|||
#define attrGetARIA_DESCRIBEDBY( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_DESCRIBEDBY ) |
|||
#define attrGetARIA_DISABLED( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_DISABLED ) |
|||
#define attrGetARIA_DROPEFFECT( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_DROPEFFECT ) |
|||
#define attrGetARIA_EXPANDED( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_EXPANDED ) |
|||
#define attrGetARIA_FLOWTO( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_FLOWTO ) |
|||
#define attrGetARIA_GRABBED( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_GRABBED ) |
|||
#define attrGetARIA_HASPOPUP( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_HASPOPUP ) |
|||
#define attrGetARIA_HIDDEN( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_HIDDEN ) |
|||
#define attrGetARIA_INVALID( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_INVALID ) |
|||
#define attrGetARIA_LABEL( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_LABEL ) |
|||
#define attrGetARIA_LABELLEDBY( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_LABELLEDBY ) |
|||
#define attrGetARIA_LEVEL( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_LEVEL ) |
|||
#define attrGetARIA_LIVE( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_LIVE ) |
|||
#define attrGetARIA_MULTILINE( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_MULTILINE ) |
|||
#define attrGetARIA_MULTISELECTABLE( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_MULTISELECTABLE ) |
|||
#define attrGetARIA_ORIENTATION( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_ORIENTATION ) |
|||
#define attrGetARIA_OWNS( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_OWNS ) |
|||
#define attrGetARIA_POSINSET( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_POSINSET ) |
|||
#define attrGetARIA_PRESSED( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_PRESSED ) |
|||
#define attrGetARIA_READONLY( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_READONLY ) |
|||
#define attrGetARIA_RELEVANT( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_RELEVANT ) |
|||
#define attrGetARIA_REQUIRED( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_REQUIRED ) |
|||
#define attrGetARIA_SELECTED( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_SELECTED ) |
|||
#define attrGetARIA_SETSIZE( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_SETSIZE ) |
|||
#define attrGetARIA_SORT( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_SORT ) |
|||
#define attrGetARIA_VALUEMAX( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_VALUEMAX ) |
|||
#define attrGetARIA_VALUEMIN( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_VALUEMIN ) |
|||
#define attrGetARIA_VALUENOW( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_VALUENOW ) |
|||
#define attrGetARIA_VALUETEXT( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_VALUETEXT ) |
|||
|
|||
#endif /* __ATTRS_H__ */ |
@ -0,0 +1,226 @@ |
|||
/* buffio.c -- Treat buffer as an I/O stream.
|
|||
|
|||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
Requires buffer to automatically grow as bytes are added. |
|||
Must keep track of current read and write points. |
|||
|
|||
*/ |
|||
|
|||
#include "tidy.h" |
|||
#include "tidybuffio.h" |
|||
#include "forward.h" |
|||
|
|||
/**************
|
|||
TIDY |
|||
**************/ |
|||
|
|||
static int TIDY_CALL insrc_getByte( void* appData ) |
|||
{ |
|||
TidyBuffer* buf = (TidyBuffer*) appData; |
|||
return tidyBufGetByte( buf ); |
|||
} |
|||
static Bool TIDY_CALL insrc_eof( void* appData ) |
|||
{ |
|||
TidyBuffer* buf = (TidyBuffer*) appData; |
|||
return tidyBufEndOfInput( buf ); |
|||
} |
|||
static void TIDY_CALL insrc_ungetByte( void* appData, byte bv ) |
|||
{ |
|||
TidyBuffer* buf = (TidyBuffer*) appData; |
|||
tidyBufUngetByte( buf, bv ); |
|||
} |
|||
|
|||
void TIDY_CALL tidyInitInputBuffer( TidyInputSource* inp, TidyBuffer* buf ) |
|||
{ |
|||
inp->getByte = insrc_getByte; |
|||
inp->eof = insrc_eof; |
|||
inp->ungetByte = insrc_ungetByte; |
|||
inp->sourceData = buf; |
|||
} |
|||
|
|||
static void TIDY_CALL outsink_putByte( void* appData, byte bv ) |
|||
{ |
|||
TidyBuffer* buf = (TidyBuffer*) appData; |
|||
tidyBufPutByte( buf, bv ); |
|||
} |
|||
|
|||
void TIDY_CALL tidyInitOutputBuffer( TidyOutputSink* outp, TidyBuffer* buf ) |
|||
{ |
|||
outp->putByte = outsink_putByte; |
|||
outp->sinkData = buf; |
|||
} |
|||
|
|||
|
|||
void TIDY_CALL tidyBufInit( TidyBuffer* buf ) |
|||
{ |
|||
assert( buf != NULL ); |
|||
tidyBufInitWithAllocator( buf, NULL ); |
|||
} |
|||
|
|||
void TIDY_CALL tidyBufAlloc( TidyBuffer* buf, uint allocSize ) |
|||
{ |
|||
tidyBufAllocWithAllocator( buf, NULL, allocSize ); |
|||
} |
|||
|
|||
void TIDY_CALL tidyBufInitWithAllocator( TidyBuffer* buf, |
|||
TidyAllocator *allocator ) |
|||
{ |
|||
assert( buf != NULL ); |
|||
TidyClearMemory( buf, sizeof(TidyBuffer) ); |
|||
buf->allocator = allocator ? allocator : &TY_(g_default_allocator); |
|||
} |
|||
|
|||
void TIDY_CALL tidyBufAllocWithAllocator( TidyBuffer* buf, |
|||
TidyAllocator *allocator, |
|||
uint allocSize ) |
|||
{ |
|||
tidyBufInitWithAllocator( buf, allocator ); |
|||
tidyBufCheckAlloc( buf, allocSize, 0 ); |
|||
buf->next = 0; |
|||
} |
|||
|
|||
void TIDY_CALL tidyBufFree( TidyBuffer* buf ) |
|||
{ |
|||
assert( buf != NULL ); |
|||
TidyFree( buf->allocator, buf->bp ); |
|||
tidyBufInitWithAllocator( buf, buf->allocator ); |
|||
} |
|||
|
|||
void TIDY_CALL tidyBufClear( TidyBuffer* buf ) |
|||
{ |
|||
assert( buf != NULL ); |
|||
if ( buf->bp ) |
|||
{ |
|||
TidyClearMemory( buf->bp, buf->allocated ); |
|||
buf->size = 0; |
|||
} |
|||
buf->next = 0; |
|||
} |
|||
|
|||
/* Many users do not call tidyBufInit() or tidyBufAlloc() or their allocator
|
|||
counterparts. So by default, set the default allocator. |
|||
*/ |
|||
static void setDefaultAllocator( TidyBuffer* buf ) |
|||
{ |
|||
buf->allocator = &TY_(g_default_allocator); |
|||
} |
|||
|
|||
/* Avoid thrashing memory by doubling buffer size
|
|||
** until larger than requested size. |
|||
buf->allocated is bigger than allocSize+1 so that a trailing null byte is |
|||
always available. |
|||
*/ |
|||
void TIDY_CALL tidyBufCheckAlloc( TidyBuffer* buf, uint allocSize, uint chunkSize ) |
|||
{ |
|||
assert( buf != NULL ); |
|||
|
|||
if ( !buf->allocator ) |
|||
setDefaultAllocator( buf ); |
|||
|
|||
if ( 0 == chunkSize ) |
|||
chunkSize = 256; |
|||
if ( allocSize+1 > buf->allocated ) |
|||
{ |
|||
byte* bp; |
|||
uint allocAmt = chunkSize; |
|||
if ( buf->allocated > 0 ) |
|||
allocAmt = buf->allocated; |
|||
while ( allocAmt < allocSize+1 ) |
|||
allocAmt *= 2; |
|||
|
|||
bp = (byte*)TidyRealloc( buf->allocator, buf->bp, allocAmt ); |
|||
if ( bp != NULL ) |
|||
{ |
|||
TidyClearMemory( bp + buf->allocated, allocAmt - buf->allocated ); |
|||
buf->bp = bp; |
|||
buf->allocated = allocAmt; |
|||
} |
|||
} |
|||
} |
|||
|
|||
/* Attach buffer to a chunk O' memory w/out allocation */ |
|||
void TIDY_CALL tidyBufAttach( TidyBuffer* buf, byte* bp, uint size ) |
|||
{ |
|||
assert( buf != NULL ); |
|||
buf->bp = bp; |
|||
buf->size = buf->allocated = size; |
|||
buf->next = 0; |
|||
if ( !buf->allocator ) |
|||
setDefaultAllocator( buf ); |
|||
} |
|||
|
|||
/* Clear pointer to memory w/out deallocation */ |
|||
void TIDY_CALL tidyBufDetach( TidyBuffer* buf ) |
|||
{ |
|||
tidyBufInitWithAllocator( buf, buf->allocator ); |
|||
} |
|||
|
|||
|
|||
/**************
|
|||
OUTPUT |
|||
**************/ |
|||
|
|||
void TIDY_CALL tidyBufAppend( TidyBuffer* buf, void* vp, uint size ) |
|||
{ |
|||
assert( buf != NULL ); |
|||
if ( vp != NULL && size > 0 ) |
|||
{ |
|||
tidyBufCheckAlloc( buf, buf->size + size, 0 ); |
|||
memcpy( buf->bp + buf->size, vp, size ); |
|||
buf->size += size; |
|||
} |
|||
} |
|||
|
|||
void TIDY_CALL tidyBufPutByte( TidyBuffer* buf, byte bv ) |
|||
{ |
|||
assert( buf != NULL ); |
|||
tidyBufCheckAlloc( buf, buf->size + 1, 0 ); |
|||
buf->bp[ buf->size++ ] = bv; |
|||
} |
|||
|
|||
|
|||
int TIDY_CALL tidyBufPopByte( TidyBuffer* buf ) |
|||
{ |
|||
int bv = EOF; |
|||
assert( buf != NULL ); |
|||
if ( buf->size > 0 ) |
|||
bv = buf->bp[ --buf->size ]; |
|||
return bv; |
|||
} |
|||
|
|||
/**************
|
|||
INPUT |
|||
**************/ |
|||
|
|||
int TIDY_CALL tidyBufGetByte( TidyBuffer* buf ) |
|||
{ |
|||
int bv = EOF; |
|||
if ( ! tidyBufEndOfInput(buf) ) |
|||
bv = buf->bp[ buf->next++ ]; |
|||
return bv; |
|||
} |
|||
|
|||
Bool TIDY_CALL tidyBufEndOfInput( TidyBuffer* buf ) |
|||
{ |
|||
return ( buf->next >= buf->size ); |
|||
} |
|||
|
|||
void TIDY_CALL tidyBufUngetByte( TidyBuffer* buf, byte bv ) |
|||
{ |
|||
if ( buf->next > 0 ) |
|||
{ |
|||
--buf->next; |
|||
assert( bv == buf->bp[ buf->next ] ); |
|||
} |
|||
} |
|||
|
|||
/*
|
|||
* local variables: |
|||
* mode: c |
|||
* indent-tabs-mode: nil |
|||
* c-basic-offset: 4 |
|||
* eval: (c-set-offset 'substatement-open 0) |
|||
* end: |
|||
*/ |
@ -0,0 +1,6 @@ |
|||
#ifdef __GNUC__ |
|||
#warning "FIXME: Using compatibility tidy header (buffio.h) that will go away!" |
|||
#endif |
|||
|
|||
#include "tidybuffio.h" |
|||
|
File diff suppressed because it is too large
@ -0,0 +1,13 @@ |
|||
/* charsets.h -- character set information and mappings
|
|||
|
|||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
uint TY_(GetEncodingIdFromName)(ctmbstr name); |
|||
uint TY_(GetEncodingIdFromCodePage)(uint cp); |
|||
uint TY_(GetEncodingCodePageFromName)(ctmbstr name); |
|||
uint TY_(GetEncodingCodePageFromId)(uint id); |
|||
ctmbstr TY_(GetEncodingNameFromId)(uint id); |
|||
ctmbstr TY_(GetEncodingNameFromCodePage)(uint cp); |
File diff suppressed because it is too large
@ -0,0 +1,82 @@ |
|||
#ifndef __CLEAN_H__ |
|||
#define __CLEAN_H__ |
|||
|
|||
/* clean.h -- clean up misuse of presentation markup
|
|||
|
|||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
void TY_(FixNodeLinks)(Node *node); |
|||
|
|||
void TY_(FreeStyles)( TidyDocImpl* doc ); |
|||
|
|||
/* Add class="foo" to node
|
|||
*/ |
|||
void TY_(AddStyleAsClass)( TidyDocImpl* doc, Node *node, ctmbstr stylevalue ); |
|||
void TY_(AddStyleProperty)(TidyDocImpl* doc, Node *node, ctmbstr property ); |
|||
|
|||
void TY_(CleanDocument)( TidyDocImpl* doc ); |
|||
|
|||
/* simplifies <b><b> ... </b> ...</b> etc. */ |
|||
void TY_(NestedEmphasis)( TidyDocImpl* doc, Node* node ); |
|||
|
|||
/* replace i by em and b by strong */ |
|||
void TY_(EmFromI)( TidyDocImpl* doc, Node* node ); |
|||
|
|||
/*
|
|||
Some people use dir or ul without an li |
|||
to indent the content. The pattern to |
|||
look for is a list with a single implicit |
|||
li. This is recursively replaced by an |
|||
implicit blockquote. |
|||
*/ |
|||
void TY_(List2BQ)( TidyDocImpl* doc, Node* node ); |
|||
|
|||
/*
|
|||
Replace implicit blockquote by div with an indent |
|||
taking care to reduce nested blockquotes to a single |
|||
div with the indent set to match the nesting depth |
|||
*/ |
|||
void TY_(BQ2Div)( TidyDocImpl* doc, Node* node ); |
|||
|
|||
|
|||
void TY_(DropSections)( TidyDocImpl* doc, Node* node ); |
|||
|
|||
|
|||
/*
|
|||
This is a major clean up to strip out all the extra stuff you get |
|||
when you save as web page from Word 2000. It doesn't yet know what |
|||
to do with VML tags, but these will appear as errors unless you |
|||
declare them as new tags, such as o:p which needs to be declared |
|||
as inline. |
|||
*/ |
|||
void TY_(CleanWord2000)( TidyDocImpl* doc, Node *node); |
|||
|
|||
Bool TY_(IsWord2000)( TidyDocImpl* doc ); |
|||
|
|||
/* where appropriate move object elements from head to body */ |
|||
void TY_(BumpObject)( TidyDocImpl* doc, Node *html ); |
|||
|
|||
/* This is disabled due to http://tidy.sf.net/bug/681116 */ |
|||
#if 0 |
|||
void TY_(FixBrakes)( TidyDocImpl* pDoc, Node *pParent ); |
|||
#endif |
|||
|
|||
void TY_(VerifyHTTPEquiv)( TidyDocImpl* pDoc, Node *pParent ); |
|||
|
|||
void TY_(DropComments)(TidyDocImpl* doc, Node* node); |
|||
void TY_(DropFontElements)(TidyDocImpl* doc, Node* node, Node **pnode); |
|||
void TY_(WbrToSpace)(TidyDocImpl* doc, Node* node); |
|||
void TY_(DowngradeTypography)(TidyDocImpl* doc, Node* node); |
|||
void TY_(ReplacePreformattedSpaces)(TidyDocImpl* doc, Node* node); |
|||
void TY_(NormalizeSpaces)(Lexer *lexer, Node *node); |
|||
void TY_(ConvertCDATANodes)(TidyDocImpl* doc, Node* node); |
|||
|
|||
void TY_(FixAnchors)(TidyDocImpl* doc, Node *node, Bool wantName, Bool wantId); |
|||
void TY_(FixXhtmlNamespace)(TidyDocImpl* doc, Bool wantXmlns); |
|||
void TY_(FixLanguageInformation)(TidyDocImpl* doc, Node* node, Bool wantXmlLang, Bool wantLang); |
|||
|
|||
|
|||
#endif /* __CLEAN_H__ */ |
File diff suppressed because it is too large
@ -0,0 +1,146 @@ |
|||
#ifndef __CONFIG_H__ |
|||
#define __CONFIG_H__ |
|||
|
|||
/* config.h -- read config file and manage config properties
|
|||
|
|||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
config files associate a property name with a value. |
|||
|
|||
// comments can start at the beginning of a line
|
|||
# comments can start at the beginning of a line |
|||
name: short values fit onto one line |
|||
name: a really long value that |
|||
continues on the next line |
|||
|
|||
property names are case insensitive and should be less than |
|||
60 characters in length and must start at the begining of |
|||
the line, as whitespace at the start of a line signifies a |
|||
line continuation. |
|||
|
|||
*/ |
|||
|
|||
#include "forward.h" |
|||
#include "tidy.h" |
|||
#include "streamio.h" |
|||
|
|||
struct _tidy_option; |
|||
typedef struct _tidy_option TidyOptionImpl; |
|||
|
|||
typedef Bool (ParseProperty)( TidyDocImpl* doc, const TidyOptionImpl* opt ); |
|||
|
|||
struct _tidy_option |
|||
{ |
|||
TidyOptionId id; |
|||
TidyConfigCategory category; /* put 'em in groups */ |
|||
ctmbstr name; /* property name */ |
|||
TidyOptionType type; /* string, int or bool */ |
|||
ulong dflt; /* default for TidyInteger and TidyBoolean */ |
|||
ParseProperty* parser; /* parsing method, read-only if NULL */ |
|||
const ctmbstr* pickList; /* pick list */ |
|||
ctmbstr pdflt; /* default for TidyString */ |
|||
}; |
|||
|
|||
typedef union |
|||
{ |
|||
ulong v; /* Value for TidyInteger and TidyBoolean */ |
|||
char *p; /* Value for TidyString */ |
|||
} TidyOptionValue; |
|||
|
|||
typedef struct _tidy_config |
|||
{ |
|||
TidyOptionValue value[ N_TIDY_OPTIONS + 1 ]; /* current config values */ |
|||
TidyOptionValue snapshot[ N_TIDY_OPTIONS + 1 ]; /* Snapshot of values to be restored later */ |
|||
|
|||
/* track what tags user has defined to eliminate unnecessary searches */ |
|||
uint defined_tags; |
|||
|
|||
uint c; /* current char in input stream */ |
|||
StreamIn* cfgIn; /* current input source */ |
|||
|
|||
} TidyConfigImpl; |
|||
|
|||
|
|||
/* Used to build a table of documentation cross-references. */ |
|||
typedef struct { |
|||
TidyOptionId opt; /**< Identifier. */ |
|||
TidyOptionId const *links; /**< Cross references. Last element must be 'TidyUnknownOption'. */ |
|||
} TidyOptionDoc; |
|||
|
|||
|
|||
const TidyOptionImpl* TY_(lookupOption)( ctmbstr optnam ); |
|||
const TidyOptionImpl* TY_(getOption)( TidyOptionId optId ); |
|||
|
|||
TidyIterator TY_(getOptionList)( TidyDocImpl* doc ); |
|||
const TidyOptionImpl* TY_(getNextOption)( TidyDocImpl* doc, TidyIterator* iter ); |
|||
|
|||
TidyIterator TY_(getOptionPickList)( const TidyOptionImpl* option ); |
|||
ctmbstr TY_(getNextOptionPick)( const TidyOptionImpl* option, TidyIterator* iter ); |
|||
|
|||
const TidyOptionDoc* TY_(OptGetDocDesc)( TidyOptionId optId ); |
|||
|
|||
void TY_(InitConfig)( TidyDocImpl* doc ); |
|||
void TY_(FreeConfig)( TidyDocImpl* doc ); |
|||
|
|||
/* Bool SetOptionValue( TidyDocImpl* doc, TidyOptionId optId, ctmbstr val ); */ |
|||
Bool TY_(SetOptionInt)( TidyDocImpl* doc, TidyOptionId optId, ulong val ); |
|||
Bool TY_(SetOptionBool)( TidyDocImpl* doc, TidyOptionId optId, Bool val ); |
|||
|
|||
Bool TY_(ResetOptionToDefault)( TidyDocImpl* doc, TidyOptionId optId ); |
|||
void TY_(ResetConfigToDefault)( TidyDocImpl* doc ); |
|||
void TY_(TakeConfigSnapshot)( TidyDocImpl* doc ); |
|||
void TY_(ResetConfigToSnapshot)( TidyDocImpl* doc ); |
|||
|
|||
void TY_(CopyConfig)( TidyDocImpl* docTo, TidyDocImpl* docFrom ); |
|||
|
|||
int TY_(ParseConfigFile)( TidyDocImpl* doc, ctmbstr cfgfil ); |
|||
int TY_(ParseConfigFileEnc)( TidyDocImpl* doc, |
|||
ctmbstr cfgfil, ctmbstr charenc ); |
|||
|
|||
int TY_(SaveConfigFile)( TidyDocImpl* doc, ctmbstr cfgfil ); |
|||
int TY_(SaveConfigSink)( TidyDocImpl* doc, TidyOutputSink* sink ); |
|||
|
|||
/* returns false if unknown option, missing parameter, or
|
|||
option doesn't use parameter |
|||
*/ |
|||
Bool TY_(ParseConfigOption)( TidyDocImpl* doc, ctmbstr optnam, ctmbstr optVal ); |
|||
Bool TY_(ParseConfigValue)( TidyDocImpl* doc, TidyOptionId optId, ctmbstr optVal ); |
|||
|
|||
/* ensure that char encodings are self consistent */ |
|||
Bool TY_(AdjustCharEncoding)( TidyDocImpl* doc, int encoding ); |
|||
|
|||
Bool TY_(ConfigDiffThanDefault)( TidyDocImpl* doc ); |
|||
Bool TY_(ConfigDiffThanSnapshot)( TidyDocImpl* doc ); |
|||
|
|||
int TY_(CharEncodingId)( TidyDocImpl* doc, ctmbstr charenc ); |
|||
ctmbstr TY_(CharEncodingName)( int encoding ); |
|||
ctmbstr TY_(CharEncodingOptName)( int encoding ); |
|||
|
|||
/* void SetEmacsFilename( TidyDocImpl* doc, ctmbstr filename ); */ |
|||
|
|||
|
|||
#ifdef _DEBUG |
|||
|
|||
/* Debug lookup functions will be type-safe and assert option type match */ |
|||
ulong TY_(_cfgGet)( TidyDocImpl* doc, TidyOptionId optId ); |
|||
Bool TY_(_cfgGetBool)( TidyDocImpl* doc, TidyOptionId optId ); |
|||
TidyTriState TY_(_cfgGetAutoBool)( TidyDocImpl* doc, TidyOptionId optId ); |
|||
ctmbstr TY_(_cfgGetString)( TidyDocImpl* doc, TidyOptionId optId ); |
|||
|
|||
#define cfg(doc, id) TY_(_cfgGet)( (doc), (id) ) |
|||
#define cfgBool(doc, id) TY_(_cfgGetBool)( (doc), (id) ) |
|||
#define cfgAutoBool(doc, id) TY_(_cfgGetAutoBool)( (doc), (id) ) |
|||
#define cfgStr(doc, id) TY_(_cfgGetString)( (doc), (id) ) |
|||
|
|||
#else |
|||
|
|||
/* Release build macros for speed */ |
|||
#define cfg(doc, id) ((doc)->config.value[ (id) ].v) |
|||
#define cfgBool(doc, id) ((Bool) cfg(doc, id)) |
|||
#define cfgAutoBool(doc, id) ((TidyTriState) cfg(doc, id)) |
|||
#define cfgStr(doc, id) ((ctmbstr) (doc)->config.value[ (id) ].p) |
|||
|
|||
#endif /* _DEBUG */ |
|||
|
|||
#endif /* __CONFIG_H__ */ |
@ -0,0 +1,424 @@ |
|||
/* entities.c -- recognize HTML ISO entities
|
|||
|
|||
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
Entity handling can be static because there are no config or |
|||
document-specific values. Lookup table is 100% defined at |
|||
compile time. |
|||
|
|||
*/ |
|||
|
|||
#include <stdio.h> |
|||
#include "entities.h" |
|||
#include "tidy-int.h" |
|||
#include "tmbstr.h" |
|||
|
|||
struct _entity; |
|||
typedef struct _entity entity; |
|||
|
|||
struct _entity |
|||
{ |
|||
ctmbstr name; |
|||
uint versions; |
|||
uint code; |
|||
}; |
|||
|
|||
|
|||
static const entity entities[] = |
|||
{ |
|||
/*
|
|||
** Markup pre-defined character entities |
|||
*/ |
|||
{ "quot", VERS_ALL|VERS_XML, 34 }, |
|||
{ "amp", VERS_ALL|VERS_XML, 38 }, |
|||
{ "apos", VERS_FROM40|VERS_XML, 39 }, |
|||
{ "lt", VERS_ALL|VERS_XML, 60 }, |
|||
{ "gt", VERS_ALL|VERS_XML, 62 }, |
|||
|
|||
/*
|
|||
** Latin-1 character entities |
|||
*/ |
|||
{ "nbsp", VERS_ALL, 160 }, |
|||
{ "iexcl", VERS_ALL, 161 }, |
|||
{ "cent", VERS_ALL, 162 }, |
|||
{ "pound", VERS_ALL, 163 }, |
|||
{ "curren", VERS_ALL, 164 }, |
|||
{ "yen", VERS_ALL, 165 }, |
|||
{ "brvbar", VERS_ALL, 166 }, |
|||
{ "sect", VERS_ALL, 167 }, |
|||
{ "uml", VERS_ALL, 168 }, |
|||
{ "copy", VERS_ALL, 169 }, |
|||
{ "ordf", VERS_ALL, 170 }, |
|||
{ "laquo", VERS_ALL, 171 }, |
|||
{ "not", VERS_ALL, 172 }, |
|||
{ "shy", VERS_ALL, 173 }, |
|||
{ "reg", VERS_ALL, 174 }, |
|||
{ "macr", VERS_ALL, 175 }, |
|||
{ "deg", VERS_ALL, 176 }, |
|||
{ "plusmn", VERS_ALL, 177 }, |
|||
{ "sup2", VERS_ALL, 178 }, |
|||
{ "sup3", VERS_ALL, 179 }, |
|||
{ "acute", VERS_ALL, 180 }, |
|||
{ "micro", VERS_ALL, 181 }, |
|||
{ "para", VERS_ALL, 182 }, |
|||
{ "middot", VERS_ALL, 183 }, |
|||
{ "cedil", VERS_ALL, 184 }, |
|||
{ "sup1", VERS_ALL, 185 }, |
|||
{ "ordm", VERS_ALL, 186 }, |
|||
{ "raquo", VERS_ALL, 187 }, |
|||
{ "frac14", VERS_ALL, 188 }, |
|||
{ "frac12", VERS_ALL, 189 }, |
|||
{ "frac34", VERS_ALL, 190 }, |
|||
{ "iquest", VERS_ALL, 191 }, |
|||
{ "Agrave", VERS_ALL, 192 }, |
|||
{ "Aacute", VERS_ALL, 193 }, |
|||
{ "Acirc", VERS_ALL, 194 }, |
|||
{ "Atilde", VERS_ALL, 195 }, |
|||
{ "Auml", VERS_ALL, 196 }, |
|||
{ "Aring", VERS_ALL, 197 }, |
|||
{ "AElig", VERS_ALL, 198 }, |
|||
{ "Ccedil", VERS_ALL, 199 }, |
|||
{ "Egrave", VERS_ALL, 200 }, |
|||
{ "Eacute", VERS_ALL, 201 }, |
|||
{ "Ecirc", VERS_ALL, 202 }, |
|||
{ "Euml", VERS_ALL, 203 }, |
|||
{ "Igrave", VERS_ALL, 204 }, |
|||
{ "Iacute", VERS_ALL, 205 }, |
|||
{ "Icirc", VERS_ALL, 206 }, |
|||
{ "Iuml", VERS_ALL, 207 }, |
|||
{ "ETH", VERS_ALL, 208 }, |
|||
{ "Ntilde", VERS_ALL, 209 }, |
|||
{ "Ograve", VERS_ALL, 210 }, |
|||
{ "Oacute", VERS_ALL, 211 }, |
|||
{ "Ocirc", VERS_ALL, 212 }, |
|||
{ "Otilde", VERS_ALL, 213 }, |
|||
{ "Ouml", VERS_ALL, 214 }, |
|||
{ "times", VERS_ALL, 215 }, |
|||
{ "Oslash", VERS_ALL, 216 }, |
|||
{ "Ugrave", VERS_ALL, 217 }, |
|||
{ "Uacute", VERS_ALL, 218 }, |
|||
{ "Ucirc", VERS_ALL, 219 }, |
|||
{ "Uuml", VERS_ALL, 220 }, |
|||
{ "Yacute", VERS_ALL, 221 }, |
|||
{ "THORN", VERS_ALL, 222 }, |
|||
{ "szlig", VERS_ALL, 223 }, |
|||
{ "agrave", VERS_ALL, 224 }, |
|||
{ "aacute", VERS_ALL, 225 }, |
|||
{ "acirc", VERS_ALL, 226 }, |
|||
{ "atilde", VERS_ALL, 227 }, |
|||
{ "auml", VERS_ALL, 228 }, |
|||
{ "aring", VERS_ALL, 229 }, |
|||
{ "aelig", VERS_ALL, 230 }, |
|||
{ "ccedil", VERS_ALL, 231 }, |
|||
{ "egrave", VERS_ALL, 232 }, |
|||
{ "eacute", VERS_ALL, 233 }, |
|||
{ "ecirc", VERS_ALL, 234 }, |
|||
{ "euml", VERS_ALL, 235 }, |
|||
{ "igrave", VERS_ALL, 236 }, |
|||
{ "iacute", VERS_ALL, 237 }, |
|||
{ "icirc", VERS_ALL, 238 }, |
|||
{ "iuml", VERS_ALL, 239 }, |
|||
{ "eth", VERS_ALL, 240 }, |
|||
{ "ntilde", VERS_ALL, 241 }, |
|||
{ "ograve", VERS_ALL, 242 }, |
|||
{ "oacute", VERS_ALL, 243 }, |
|||
{ "ocirc", VERS_ALL, 244 }, |
|||
{ "otilde", VERS_ALL, 245 }, |
|||
{ "ouml", VERS_ALL, 246 }, |
|||
{ "divide", VERS_ALL, 247 }, |
|||
{ "oslash", VERS_ALL, 248 }, |
|||
{ "ugrave", VERS_ALL, 249 }, |
|||
{ "uacute", VERS_ALL, 250 }, |
|||
{ "ucirc", VERS_ALL, 251 }, |
|||
{ "uuml", VERS_ALL, 252 }, |
|||
{ "yacute", VERS_ALL, 253 }, |
|||
{ "thorn", VERS_ALL, 254 }, |
|||
{ "yuml", VERS_ALL, 255 }, |
|||
|
|||
/*
|
|||
** Extended Entities defined in HTML 4: Symbols |
|||
*/ |
|||
{ "fnof", VERS_FROM40, 402 }, |
|||
{ "Alpha", VERS_FROM40, 913 }, |
|||
{ "Beta", VERS_FROM40, 914 }, |
|||
{ "Gamma", VERS_FROM40, 915 }, |
|||
{ "Delta", VERS_FROM40, 916 }, |
|||
{ "Epsilon", VERS_FROM40, 917 }, |
|||
{ "Zeta", VERS_FROM40, 918 }, |
|||
{ "Eta", VERS_FROM40, 919 }, |
|||
{ "Theta", VERS_FROM40, 920 }, |
|||
{ "Iota", VERS_FROM40, 921 }, |
|||
{ "Kappa", VERS_FROM40, 922 }, |
|||
{ "Lambda", VERS_FROM40, 923 }, |
|||
{ "Mu", VERS_FROM40, 924 }, |
|||
{ "Nu", VERS_FROM40, 925 }, |
|||
{ "Xi", VERS_FROM40, 926 }, |
|||
{ "Omicron", VERS_FROM40, 927 }, |
|||
{ "Pi", VERS_FROM40, 928 }, |
|||
{ "Rho", VERS_FROM40, 929 }, |
|||
{ "Sigma", VERS_FROM40, 931 }, |
|||
{ "Tau", VERS_FROM40, 932 }, |
|||
{ "Upsilon", VERS_FROM40, 933 }, |
|||
{ "Phi", VERS_FROM40, 934 }, |
|||
{ "Chi", VERS_FROM40, 935 }, |
|||
{ "Psi", VERS_FROM40, 936 }, |
|||
{ "Omega", VERS_FROM40, 937 }, |
|||
{ "alpha", VERS_FROM40, 945 }, |
|||
{ "beta", VERS_FROM40, 946 }, |
|||
{ "gamma", VERS_FROM40, 947 }, |
|||
{ "delta", VERS_FROM40, 948 }, |
|||
{ "epsilon", VERS_FROM40, 949 }, |
|||
{ "zeta", VERS_FROM40, 950 }, |
|||
{ "eta", VERS_FROM40, 951 }, |
|||
{ "theta", VERS_FROM40, 952 }, |
|||
{ "iota", VERS_FROM40, 953 }, |
|||
{ "kappa", VERS_FROM40, 954 }, |
|||
{ "lambda", VERS_FROM40, 955 }, |
|||
{ "mu", VERS_FROM40, 956 }, |
|||
{ "nu", VERS_FROM40, 957 }, |
|||
{ "xi", VERS_FROM40, 958 }, |
|||
{ "omicron", VERS_FROM40, 959 }, |
|||
{ "pi", VERS_FROM40, 960 }, |
|||
{ "rho", VERS_FROM40, 961 }, |
|||
{ "sigmaf", VERS_FROM40, 962 }, |
|||
{ "sigma", VERS_FROM40, 963 }, |
|||
{ "tau", VERS_FROM40, 964 }, |
|||
{ "upsilon", VERS_FROM40, 965 }, |
|||
{ "phi", VERS_FROM40, 966 }, |
|||
{ "chi", VERS_FROM40, 967 }, |
|||
{ "psi", VERS_FROM40, 968 }, |
|||
{ "omega", VERS_FROM40, 969 }, |
|||
{ "thetasym", VERS_FROM40, 977 }, |
|||
{ "upsih", VERS_FROM40, 978 }, |
|||
{ "piv", VERS_FROM40, 982 }, |
|||
{ "bull", VERS_FROM40, 8226 }, |
|||
{ "hellip", VERS_FROM40, 8230 }, |
|||
{ "prime", VERS_FROM40, 8242 }, |
|||
{ "Prime", VERS_FROM40, 8243 }, |
|||
{ "oline", VERS_FROM40, 8254 }, |
|||
{ "frasl", VERS_FROM40, 8260 }, |
|||
{ "weierp", VERS_FROM40, 8472 }, |
|||
{ "image", VERS_FROM40, 8465 }, |
|||
{ "real", VERS_FROM40, 8476 }, |
|||
{ "trade", VERS_FROM40, 8482 }, |
|||
{ "alefsym", VERS_FROM40, 8501 }, |
|||
{ "larr", VERS_FROM40, 8592 }, |
|||
{ "uarr", VERS_FROM40, 8593 }, |
|||
{ "rarr", VERS_FROM40, 8594 }, |
|||
{ "darr", VERS_FROM40, 8595 }, |
|||
{ "harr", VERS_FROM40, 8596 }, |
|||
{ "crarr", VERS_FROM40, 8629 }, |
|||
{ "lArr", VERS_FROM40, 8656 }, |
|||
{ "uArr", VERS_FROM40, 8657 }, |
|||
{ "rArr", VERS_FROM40, 8658 }, |
|||
{ "dArr", VERS_FROM40, 8659 }, |
|||
{ "hArr", VERS_FROM40, 8660 }, |
|||
{ "forall", VERS_FROM40, 8704 }, |
|||
{ "part", VERS_FROM40, 8706 }, |
|||
{ "exist", VERS_FROM40, 8707 }, |
|||
{ "empty", VERS_FROM40, 8709 }, |
|||
{ "nabla", VERS_FROM40, 8711 }, |
|||
{ "isin", VERS_FROM40, 8712 }, |
|||
{ "notin", VERS_FROM40, 8713 }, |
|||
{ "ni", VERS_FROM40, 8715 }, |
|||
{ "prod", VERS_FROM40, 8719 }, |
|||
{ "sum", VERS_FROM40, 8721 }, |
|||
{ "minus", VERS_FROM40, 8722 }, |
|||
{ "lowast", VERS_FROM40, 8727 }, |
|||
{ "radic", VERS_FROM40, 8730 }, |
|||
{ "prop", VERS_FROM40, 8733 }, |
|||
{ "infin", VERS_FROM40, 8734 }, |
|||
{ "ang", VERS_FROM40, 8736 }, |
|||
{ "and", VERS_FROM40, 8743 }, |
|||
{ "or", VERS_FROM40, 8744 }, |
|||
{ "cap", VERS_FROM40, 8745 }, |
|||
{ "cup", VERS_FROM40, 8746 }, |
|||
{ "int", VERS_FROM40, 8747 }, |
|||
{ "there4", VERS_FROM40, 8756 }, |
|||
{ "sim", VERS_FROM40, 8764 }, |
|||
{ "cong", VERS_FROM40, 8773 }, |
|||
{ "asymp", VERS_FROM40, 8776 }, |
|||
{ "ne", VERS_FROM40, 8800 }, |
|||
{ "equiv", VERS_FROM40, 8801 }, |
|||
{ "le", VERS_FROM40, 8804 }, |
|||
{ "ge", VERS_FROM40, 8805 }, |
|||
{ "sub", VERS_FROM40, 8834 }, |
|||
{ "sup", VERS_FROM40, 8835 }, |
|||
{ "nsub", VERS_FROM40, 8836 }, |
|||
{ "sube", VERS_FROM40, 8838 }, |
|||
{ "supe", VERS_FROM40, 8839 }, |
|||
{ "oplus", VERS_FROM40, 8853 }, |
|||
{ "otimes", VERS_FROM40, 8855 }, |
|||
{ "perp", VERS_FROM40, 8869 }, |
|||
{ "sdot", VERS_FROM40, 8901 }, |
|||
{ "lceil", VERS_FROM40, 8968 }, |
|||
{ "rceil", VERS_FROM40, 8969 }, |
|||
{ "lfloor", VERS_FROM40, 8970 }, |
|||
{ "rfloor", VERS_FROM40, 8971 }, |
|||
{ "lang", VERS_FROM40, 10216 }, |
|||
{ "rang", VERS_FROM40, 10217 }, |
|||
{ "loz", VERS_FROM40, 9674 }, |
|||
{ "spades", VERS_FROM40, 9824 }, |
|||
{ "clubs", VERS_FROM40, 9827 }, |
|||
{ "hearts", VERS_FROM40, 9829 }, |
|||
{ "diams", VERS_FROM40, 9830 }, |
|||
|
|||
/*
|
|||
** Extended Entities defined in HTML 4: Special (less Markup at top) |
|||
*/ |
|||
{ "OElig", VERS_FROM40, 338 }, |
|||
{ "oelig", VERS_FROM40, 339 }, |
|||
{ "Scaron", VERS_FROM40, 352 }, |
|||
{ "scaron", VERS_FROM40, 353 }, |
|||
{ "Yuml", VERS_FROM40, 376 }, |
|||
{ "circ", VERS_FROM40, 710 }, |
|||
{ "tilde", VERS_FROM40, 732 }, |
|||
{ "ensp", VERS_FROM40, 8194 }, |
|||
{ "emsp", VERS_FROM40, 8195 }, |
|||
{ "thinsp", VERS_FROM40, 8201 }, |
|||
{ "zwnj", VERS_FROM40, 8204 }, |
|||
{ "zwj", VERS_FROM40, 8205 }, |
|||
{ "lrm", VERS_FROM40, 8206 }, |
|||
{ "rlm", VERS_FROM40, 8207 }, |
|||
{ "ndash", VERS_FROM40, 8211 }, |
|||
{ "mdash", VERS_FROM40, 8212 }, |
|||
{ "lsquo", VERS_FROM40, 8216 }, |
|||
{ "rsquo", VERS_FROM40, 8217 }, |
|||
{ "sbquo", VERS_FROM40, 8218 }, |
|||
{ "ldquo", VERS_FROM40, 8220 }, |
|||
{ "rdquo", VERS_FROM40, 8221 }, |
|||
{ "bdquo", VERS_FROM40, 8222 }, |
|||
{ "dagger", VERS_FROM40, 8224 }, |
|||
{ "Dagger", VERS_FROM40, 8225 }, |
|||
{ "permil", VERS_FROM40, 8240 }, |
|||
{ "lsaquo", VERS_FROM40, 8249 }, |
|||
{ "rsaquo", VERS_FROM40, 8250 }, |
|||
{ "euro", VERS_FROM40, 8364 }, |
|||
{ NULL, VERS_UNKNOWN, 0 } |
|||
}; |
|||
|
|||
|
|||
/* Pure static implementation. Trades off lookup speed
|
|||
** for faster setup time (well, none actually). |
|||
** Optimization of comparing 1st character buys enough |
|||
** speed that hash doesn't improve things without > 500 |
|||
** items in list. |
|||
*/ |
|||
static const entity* entitiesLookup( ctmbstr s ) |
|||
{ |
|||
tmbchar ch = (tmbchar)( s ? *s : 0 ); |
|||
const entity *np; |
|||
for ( np = entities; ch && np && np->name; ++np ) |
|||
if ( ch == *np->name && TY_(tmbstrcmp)(s, np->name) == 0 ) |
|||
return np; |
|||
return NULL; |
|||
} |
|||
|
|||
#if 0 |
|||
/* entity starting with "&" returns zero on error */ |
|||
uint EntityCode( ctmbstr name, uint versions ) |
|||
{ |
|||
const entity* np; |
|||
assert( name && name[0] == '&' ); |
|||
|
|||
/* numeric entitity: name = "&#" followed by number */ |
|||
if ( name[1] == '#' ) |
|||
{ |
|||
uint c = 0; /* zero on missing/bad number */ |
|||
Bool isXml = ( (versions & VERS_XML) == VERS_XML ); |
|||
|
|||
/* 'x' prefix denotes hexadecimal number format */ |
|||
if ( name[2] == 'x' || (!isXml && name[2] == 'X') ) |
|||
sscanf( name+3, "%x", &c ); |
|||
else |
|||
sscanf( name+2, "%u", &c ); |
|||
|
|||
return (uint) c; |
|||
} |
|||
|
|||
/* Named entity: name ="&" followed by a name */ |
|||
if ( NULL != (np = entitiesLookup(name+1)) ) |
|||
{ |
|||
/* Only recognize entity name if version supports it. */ |
|||
if ( np->versions & versions ) |
|||
return np->code; |
|||
} |
|||
|
|||
return 0; /* zero signifies unknown entity name */ |
|||
} |
|||
#endif |
|||
|
|||
Bool TY_(EntityInfo)( ctmbstr name, Bool isXml, uint* code, uint* versions ) |
|||
{ |
|||
const entity* np; |
|||
int res; |
|||
assert( name && name[0] == '&' ); |
|||
assert( code != NULL ); |
|||
assert( versions != NULL ); |
|||
|
|||
/* numeric entitity: name = "&#" followed by number */ |
|||
if ( name[1] == '#' ) |
|||
{ |
|||
uint c = 0; /* zero on missing/bad number */ |
|||
|
|||
/* 'x' prefix denotes hexadecimal number format */ |
|||
if ( name[2] == 'x' || (!isXml && name[2] == 'X') ) |
|||
res = sscanf( name+3, "%x", &c ); |
|||
else |
|||
res = sscanf( name+2, "%u", &c ); |
|||
|
|||
/* Issue #373 - Null Char in XML result doc - sf905 2009 */ |
|||
if ( res == 1 ) |
|||
{ |
|||
*code = c; |
|||
*versions = VERS_ALL; |
|||
return yes; |
|||
} |
|||
else |
|||
{ |
|||
*code = 0; |
|||
*versions = ( isXml ? VERS_XML : VERS_PROPRIETARY ); |
|||
return no; |
|||
} |
|||
} |
|||
|
|||
/* Named entity: name ="&" followed by a name */ |
|||
if ( NULL != (np = entitiesLookup(name+1)) ) |
|||
{ |
|||
*code = np->code; |
|||
*versions = np->versions; |
|||
return yes; |
|||
} |
|||
|
|||
*code = 0; |
|||
*versions = ( isXml ? VERS_XML : VERS_PROPRIETARY ); |
|||
return no; |
|||
} |
|||
|
|||
|
|||
ctmbstr TY_(EntityName)( uint ch, uint versions ) |
|||
{ |
|||
ctmbstr entnam = NULL; |
|||
const entity *ep; |
|||
|
|||
for ( ep = entities; ep->name != NULL; ++ep ) |
|||
{ |
|||
if ( ep->code == ch ) |
|||
{ |
|||
if ( ep->versions & versions ) |
|||
entnam = ep->name; |
|||
break; /* Found code. Stop search. */ |
|||
} |
|||
} |
|||
return entnam; |
|||
} |
|||
|
|||
/*
|
|||
* local variables: |
|||
* mode: c |
|||
* indent-tabs-mode: nil |
|||
* c-basic-offset: 4 |
|||
* eval: (c-set-offset 'substatement-open 0) |
|||
* end: |
|||
*/ |
@ -0,0 +1,18 @@ |
|||
#ifndef __ENTITIES_H__ |
|||
#define __ENTITIES_H__ |
|||
|
|||
/* entities.h -- recognize character entities
|
|||
|
|||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
#include "forward.h" |
|||
|
|||
/* entity starting with "&" returns zero on error */ |
|||
/* uint EntityCode( ctmbstr name, uint versions ); */ |
|||
ctmbstr TY_(EntityName)( uint charCode, uint versions ); |
|||
Bool TY_(EntityInfo)( ctmbstr name, Bool isXml, uint* code, uint* versions ); |
|||
|
|||
#endif /* __ENTITIES_H__ */ |
@ -0,0 +1,116 @@ |
|||
/* fileio.c -- does standard I/O
|
|||
|
|||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
Default implementations of Tidy input sources |
|||
and output sinks based on standard C FILE*. |
|||
|
|||
*/ |
|||
|
|||
#include <stdio.h> |
|||
|
|||
#include "forward.h" |
|||
#include "fileio.h" |
|||
#include "tidy.h" |
|||
#if !defined(NDEBUG) && defined(_MSC_VER) |
|||
#include "sprtf.h" |
|||
#endif |
|||
|
|||
typedef struct _fp_input_source |
|||
{ |
|||
FILE* fp; |
|||
TidyBuffer unget; |
|||
} FileSource; |
|||
|
|||
static int TIDY_CALL filesrc_getByte( void* sourceData ) |
|||
{ |
|||
FileSource* fin = (FileSource*) sourceData; |
|||
int bv; |
|||
if ( fin->unget.size > 0 ) |
|||
bv = tidyBufPopByte( &fin->unget ); |
|||
else |
|||
bv = fgetc( fin->fp ); |
|||
return bv; |
|||
} |
|||
|
|||
static Bool TIDY_CALL filesrc_eof( void* sourceData ) |
|||
{ |
|||
FileSource* fin = (FileSource*) sourceData; |
|||
Bool isEOF = ( fin->unget.size == 0 ); |
|||
if ( isEOF ) |
|||
isEOF = feof( fin->fp ) != 0; |
|||
return isEOF; |
|||
} |
|||
|
|||
static void TIDY_CALL filesrc_ungetByte( void* sourceData, byte bv ) |
|||
{ |
|||
FileSource* fin = (FileSource*) sourceData; |
|||
tidyBufPutByte( &fin->unget, bv ); |
|||
} |
|||
|
|||
#if SUPPORT_POSIX_MAPPED_FILES |
|||
#define initFileSource initStdIOFileSource |
|||
#define freeFileSource freeStdIOFileSource |
|||
#endif |
|||
int TY_(initFileSource)( TidyAllocator *allocator, TidyInputSource* inp, FILE* fp ) |
|||
{ |
|||
FileSource* fin = NULL; |
|||
|
|||
fin = (FileSource*) TidyAlloc( allocator, sizeof(FileSource) ); |
|||
if ( !fin ) |
|||
return -1; |
|||
TidyClearMemory( fin, sizeof(FileSource) ); |
|||
fin->unget.allocator = allocator; |
|||
fin->fp = fp; |
|||
|
|||
inp->getByte = filesrc_getByte; |
|||
inp->eof = filesrc_eof; |
|||
inp->ungetByte = filesrc_ungetByte; |
|||
inp->sourceData = fin; |
|||
|
|||
return 0; |
|||
} |
|||
|
|||
void TY_(freeFileSource)( TidyInputSource* inp, Bool closeIt ) |
|||
{ |
|||
FileSource* fin = (FileSource*) inp->sourceData; |
|||
if ( closeIt && fin && fin->fp ) |
|||
fclose( fin->fp ); |
|||
tidyBufFree( &fin->unget ); |
|||
TidyFree( fin->unget.allocator, fin ); |
|||
} |
|||
|
|||
void TIDY_CALL TY_(filesink_putByte)( void* sinkData, byte bv ) |
|||
{ |
|||
FILE* fout = (FILE*) sinkData; |
|||
fputc( bv, fout ); |
|||
#if !defined(NDEBUG) && defined(_MSC_VER) |
|||
if (_fileno(fout) != 2) |
|||
{ |
|||
if (bv != 0x0d) |
|||
{ |
|||
/*\
|
|||
* avoid duplicate newline - SPRTF will translate an 0x0d to CRLF, |
|||
* and do the same with the following 0x0a |
|||
\*/ |
|||
SPRTF("%c",bv); |
|||
} |
|||
} |
|||
#endif |
|||
} |
|||
|
|||
void TY_(initFileSink)( TidyOutputSink* outp, FILE* fp ) |
|||
{ |
|||
outp->putByte = TY_(filesink_putByte); |
|||
outp->sinkData = fp; |
|||
} |
|||
|
|||
/*
|
|||
* local variables: |
|||
* mode: c |
|||
* indent-tabs-mode: nil |
|||
* c-basic-offset: 4 |
|||
* eval: (c-set-offset 'substatement-open 0) |
|||
* end: |
|||
*/ |
@ -0,0 +1,42 @@ |
|||
#ifndef __FILEIO_H__ |
|||
#define __FILEIO_H__ |
|||
|
|||
/** @file fileio.h - does standard C I/O
|
|||
|
|||
Implementation of a FILE* based TidyInputSource and |
|||
TidyOutputSink. |
|||
|
|||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
#include "tidybuffio.h" |
|||
#ifdef __cplusplus |
|||
extern "C" { |
|||
#endif |
|||
|
|||
/** Allocate and initialize file input source */ |
|||
int TY_(initFileSource)( TidyAllocator *allocator, TidyInputSource* source, FILE* fp ); |
|||
|
|||
/** Free file input source */ |
|||
void TY_(freeFileSource)( TidyInputSource* source, Bool closeIt ); |
|||
|
|||
#if SUPPORT_POSIX_MAPPED_FILES |
|||
/** Allocate and initialize file input source using Standard C I/O */ |
|||
int TY_(initStdIOFileSource)( TidyAllocator *allocator, TidyInputSource* source, FILE* fp ); |
|||
|
|||
/** Free file input source using Standard C I/O */ |
|||
void TY_(freeStdIOFileSource)( TidyInputSource* source, Bool closeIt ); |
|||
#endif |
|||
|
|||
/** Initialize file output sink */ |
|||
void TY_(initFileSink)( TidyOutputSink* sink, FILE* fp ); |
|||
|
|||
/* Needed for internal declarations */ |
|||
void TIDY_CALL TY_(filesink_putByte)( void* sinkData, byte bv ); |
|||
|
|||
#ifdef __cplusplus |
|||
} |
|||
#endif |
|||
#endif /* __FILEIO_H__ */ |
@ -0,0 +1,63 @@ |
|||
#ifndef __FORWARD_H__ |
|||
#define __FORWARD_H__ |
|||
|
|||
/* forward.h -- Forward declarations for major Tidy structures
|
|||
|
|||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
Avoids many include file circular dependencies. |
|||
|
|||
Try to keep this file down to the minimum to avoid |
|||
cross-talk between modules. |
|||
|
|||
Header files include this file. C files include tidy-int.h. |
|||
|
|||
*/ |
|||
|
|||
#include "tidyplatform.h" |
|||
#include "tidy.h" |
|||
|
|||
/* Internal symbols are prefixed to avoid clashes with other libraries */ |
|||
#define TYDYAPPEND(str1,str2) str1##str2 |
|||
#define TY_(str) TYDYAPPEND(prvTidy,str) |
|||
|
|||
struct _StreamIn; |
|||
typedef struct _StreamIn StreamIn; |
|||
|
|||
struct _StreamOut; |
|||
typedef struct _StreamOut StreamOut; |
|||
|
|||
struct _TidyDocImpl; |
|||
typedef struct _TidyDocImpl TidyDocImpl; |
|||
|
|||
|
|||
struct _Dict; |
|||
typedef struct _Dict Dict; |
|||
|
|||
struct _Attribute; |
|||
typedef struct _Attribute Attribute; |
|||
|
|||
struct _AttVal; |
|||
typedef struct _AttVal AttVal; |
|||
|
|||
struct _Node; |
|||
typedef struct _Node Node; |
|||
|
|||
struct _IStack; |
|||
typedef struct _IStack IStack; |
|||
|
|||
struct _Lexer; |
|||
typedef struct _Lexer Lexer; |
|||
|
|||
extern TidyAllocator TY_(g_default_allocator); |
|||
|
|||
/** Wrappers for easy memory allocation using an allocator */ |
|||
#define TidyAlloc(allocator, size) ((allocator)->vtbl->alloc((allocator), (size))) |
|||
#define TidyRealloc(allocator, block, size) ((allocator)->vtbl->realloc((allocator), (block), (size))) |
|||
#define TidyFree(allocator, block) ((allocator)->vtbl->free((allocator), (block))) |
|||
#define TidyPanic(allocator, msg) ((allocator)->vtbl->panic((allocator), (msg))) |
|||
#define TidyClearMemory(block, size) memset((block), 0, (size)) |
|||
|
|||
|
|||
#endif /* __FORWARD_H__ */ |
@ -0,0 +1,174 @@ |
|||
/*
|
|||
clean.c -- clean up misuse of presentation markup |
|||
|
|||
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
Filters from other formats such as Microsoft Word |
|||
often make excessive use of presentation markup such |
|||
as font tags, B, I, and the align attribute. By applying |
|||
a set of production rules, it is straight forward to |
|||
transform this to use CSS. |
|||
|
|||
Some rules replace some of the children of an element by |
|||
style properties on the element, e.g. |
|||
|
|||
<p><b>...</b></p> -> <p style="font-weight: bold">...</p> |
|||
|
|||
Such rules are applied to the element's content and then |
|||
to the element itself until none of the rules more apply. |
|||
Having applied all the rules to an element, it will have |
|||
a style attribute with one or more properties. |
|||
|
|||
Other rules strip the element they apply to, replacing |
|||
it by style properties on the contents, e.g. |
|||
|
|||
<dir><li><p>...</li></dir> -> <p style="margin-left 1em">... |
|||
|
|||
These rules are applied to an element before processing |
|||
its content and replace the current element by the first |
|||
element in the exposed content. |
|||
|
|||
After applying both sets of rules, you can replace the |
|||
style attribute by a class value and style rule in the |
|||
document head. To support this, an association of styles |
|||
and class names is built. |
|||
|
|||
A naive approach is to rely on string matching to test |
|||
when two property lists are the same. A better approach |
|||
would be to first sort the properties before matching. |
|||
|
|||
*/ |
|||
|
|||
#include <stdio.h> |
|||
#include <stdlib.h> |
|||
#include <string.h> |
|||
|
|||
#include "tidy-int.h" |
|||
#include "gdoc.h" |
|||
#include "lexer.h" |
|||
#include "parser.h" |
|||
#include "tags.h" |
|||
#include "attrs.h" |
|||
#include "message.h" |
|||
#include "tmbstr.h" |
|||
#include "utf8.h" |
|||
|
|||
/*
|
|||
Extricate "element", replace it by its content and delete it. |
|||
*/ |
|||
static void DiscardContainer( TidyDocImpl* doc, Node *element, Node **pnode) |
|||
{ |
|||
if (element->content) |
|||
{ |
|||
Node *node, *parent = element->parent; |
|||
|
|||
element->last->next = element->next; |
|||
|
|||
if (element->next) |
|||
{ |
|||
element->next->prev = element->last; |
|||
} |
|||
else |
|||
parent->last = element->last; |
|||
|
|||
if (element->prev) |
|||
{ |
|||
element->content->prev = element->prev; |
|||
element->prev->next = element->content; |
|||
} |
|||
else |
|||
parent->content = element->content; |
|||
|
|||
for (node = element->content; node; node = node->next) |
|||
node->parent = parent; |
|||
|
|||
*pnode = element->content; |
|||
|
|||
element->next = element->content = NULL; |
|||
TY_(FreeNode)(doc, element); |
|||
} |
|||
else |
|||
{ |
|||
*pnode = TY_(DiscardElement)(doc, element); |
|||
} |
|||
} |
|||
|
|||
static void CleanNode( TidyDocImpl* doc, Node *node ) |
|||
{ |
|||
Node *child, *next; |
|||
|
|||
if (node->content) |
|||
{ |
|||
for (child = node->content; child != NULL; child = next) |
|||
{ |
|||
next = child->next; |
|||
|
|||
if (TY_(nodeIsElement)(child)) |
|||
{ |
|||
if (nodeIsSTYLE(child)) |
|||
TY_(DiscardElement)(doc, child); |
|||
if (nodeIsP(child) && !child->content) |
|||
TY_(DiscardElement)(doc, child); |
|||
else if (nodeIsSPAN(child)) |
|||
DiscardContainer( doc, child, &next); |
|||
else if (nodeIsA(child) && !child->content) |
|||
{ |
|||
AttVal *id = TY_(GetAttrByName)( child, "name" ); |
|||
|
|||
if (id) |
|||
TY_(RepairAttrValue)( doc, child->parent, "id", id->value ); |
|||
|
|||
TY_(DiscardElement)(doc, child); |
|||
} |
|||
else |
|||
{ |
|||
if (child->attributes) |
|||
TY_(DropAttrByName)( doc, child, "class" ); |
|||
|
|||
CleanNode(doc, child); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
/* insert meta element to force browser to recognize doc as UTF8 */ |
|||
static void SetUTF8( TidyDocImpl* doc ) |
|||
{ |
|||
Node *head = TY_(FindHEAD)( doc ); |
|||
|
|||
if (head) |
|||
{ |
|||
Node *node = TY_(InferredTag)(doc, TidyTag_META); |
|||
TY_(AddAttribute)( doc, node, "http-equiv", "Content-Type" ); |
|||
TY_(AddAttribute)( doc, node, "content", "text/html; charset=UTF-8" ); |
|||
TY_(InsertNodeAtStart)( head, node ); |
|||
} |
|||
} |
|||
|
|||
/* clean html exported by Google Docs
|
|||
|
|||
- strip the script element, as the style sheet is a mess |
|||
- strip class attributes |
|||
- strip span elements, leaving their content in place |
|||
- replace <a name=...></a> by id on parent element |
|||
- strip empty <p> elements |
|||
*/ |
|||
void TY_(CleanGoogleDocument)( TidyDocImpl* doc ) |
|||
{ |
|||
/* placeholder. CleanTree()/CleanNode() will not
|
|||
** zap root element |
|||
*/ |
|||
CleanNode( doc, &doc->root ); |
|||
SetUTF8( doc ); |
|||
} |
|||
|
|||
/*
|
|||
* local variables: |
|||
* mode: c |
|||
* indent-tabs-mode: nil |
|||
* c-basic-offset: 4 |
|||
* eval: (c-set-offset 'substatement-open 0) |
|||
* end: |
|||
*/ |
@ -0,0 +1,19 @@ |
|||
#ifndef __GDOC_H__ |
|||
#define __GDOC_H__ |
|||
|
|||
/* gdoc.h -- clean up html exported by Google Docs
|
|||
|
|||
(c) 2012 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
- strip the script element, as the style sheet is a mess |
|||
- strip class attributes |
|||
- strip span elements, leaving their content in place |
|||
- replace <a name=...></a> by id on parent element |
|||
- strip empty <p> elements |
|||
|
|||
*/ |
|||
|
|||
void TY_(CleanGoogleDocument)( TidyDocImpl* doc ); |
|||
|
|||
#endif /* __GDOC_H__ */ |
@ -0,0 +1,104 @@ |
|||
/* iconvtc.c -- Interface to iconv transcoding routines
|
|||
|
|||
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
#include "tidy.h" |
|||
#include "forward.h" |
|||
#include "streamio.h" |
|||
|
|||
#ifdef TIDY_ICONV_SUPPORT |
|||
|
|||
#include <iconv.h> |
|||
|
|||
/* maximum number of bytes for a single character */ |
|||
#define TC_INBUFSIZE 16 |
|||
|
|||
/* maximum number of characters per byte sequence */ |
|||
#define TC_OUTBUFSIZE 16 |
|||
|
|||
Bool IconvInitInputTranscoder(void) |
|||
{ |
|||
return no; |
|||
} |
|||
|
|||
void IconvUninitInputTranscoder(void) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
int IconvGetChar(byte firstByte, StreamIn * in, uint * bytesRead) |
|||
{ |
|||
iconv_t cd; |
|||
TidyInputSource * source; |
|||
char inbuf[TC_INBUFSIZE] = { 0 }; |
|||
char outbuf[TC_OUTBUFSIZE] = { 0 }; |
|||
size_t inbufsize = 0; |
|||
|
|||
assert( in != NULL ); |
|||
assert( &in->source != NULL ); |
|||
assert( bytesRead != NULL ); |
|||
assert( in->iconvptr != 0 ); |
|||
|
|||
cd = (iconv_t)in->iconvptr; |
|||
source = &in->source; |
|||
|
|||
inbuf[inbufsize++] = (char)firstByte; |
|||
|
|||
while(inbufsize < TC_INBUFSIZE) |
|||
{ |
|||
char * outbufptr = (char*)outbuf; |
|||
char * inbufptr = (char*)inbuf; |
|||
size_t readNow = inbufsize; |
|||
size_t writeNow = TC_OUTBUFSIZE; |
|||
size_t result = 0; |
|||
int iconv_errno = 0; |
|||
int nextByte = EndOfStream; |
|||
|
|||
result = iconv(cd, (const char**)&inbufptr, &readNow, (char**)&outbufptr, &writeNow); |
|||
iconv_errno = errno; |
|||
|
|||
if (result != (size_t)(-1)) |
|||
{ |
|||
int c; |
|||
|
|||
/* create codepoint from UTF-32LE octets */ |
|||
c = (unsigned char)outbuf[0]; |
|||
c += (unsigned char)outbuf[1] << 8; |
|||
c += (unsigned char)outbuf[2] << 16; |
|||
c += (unsigned char)outbuf[3] << 32; |
|||
|
|||
/* set number of read bytes */ |
|||
*bytesRead = inbufsize; |
|||
|
|||
return c; |
|||
} |
|||
|
|||
assert( iconv_errno != EILSEQ ); /* broken multibyte sequence */ |
|||
assert( iconv_errno != E2BIG ); /* not enough memory */ |
|||
assert( iconv_errno == EINVAL ); /* incomplete sequence */ |
|||
|
|||
/* we need more bytes */ |
|||
nextByte = source->getByte(source->sourceData); |
|||
|
|||
if (nextByte == EndOfStream) |
|||
{ |
|||
/* todo: error message for broken stream? */ |
|||
|
|||
*bytesRead = inbufsize; |
|||
return EndOfStream; |
|||
} |
|||
|
|||
inbuf[inbufsize++] = (char)nextByte; |
|||
} |
|||
|
|||
/* No full character found after reading TC_INBUFSIZE bytes, */ |
|||
/* give up to read this stream, it's obviously unreadable. */ |
|||
|
|||
/* todo: error message for broken stream? */ |
|||
return EndOfStream; |
|||
} |
|||
|
|||
#endif /* TIDY_ICONV_SUPPORT */ |
@ -0,0 +1,14 @@ |
|||
#ifndef __ICONVTC_H__ |
|||
#define __ICONVTC_H__ |
|||
#ifdef TIDY_ICONV_SUPPORT |
|||
|
|||
/* iconvtc.h -- Interface to iconv transcoding routines
|
|||
|
|||
(c) 1998-2003 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
|
|||
#endif /* TIDY_ICONV_SUPPORT */ |
|||
#endif /* __ICONVTC_H__ */ |
@ -0,0 +1,380 @@ |
|||
/* istack.c -- inline stack for compatibility with Mosaic
|
|||
|
|||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
#include "tidy-int.h" |
|||
#include "lexer.h" |
|||
#include "attrs.h" |
|||
#include "streamio.h" |
|||
#include "tmbstr.h" |
|||
#if !defined(NDEBUG) && defined(_MSC_VER) |
|||
#include "sprtf.h" |
|||
#endif |
|||
|
|||
/* duplicate attributes */ |
|||
AttVal *TY_(DupAttrs)( TidyDocImpl* doc, AttVal *attrs) |
|||
{ |
|||
AttVal *newattrs; |
|||
|
|||
if (attrs == NULL) |
|||
return attrs; |
|||
|
|||
newattrs = TY_(NewAttribute)(doc); |
|||
*newattrs = *attrs; |
|||
newattrs->next = TY_(DupAttrs)( doc, attrs->next ); |
|||
newattrs->attribute = TY_(tmbstrdup)(doc->allocator, attrs->attribute); |
|||
newattrs->value = TY_(tmbstrdup)(doc->allocator, attrs->value); |
|||
newattrs->dict = TY_(FindAttribute)(doc, newattrs); |
|||
newattrs->asp = attrs->asp ? TY_(CloneNode)(doc, attrs->asp) : NULL; |
|||
newattrs->php = attrs->php ? TY_(CloneNode)(doc, attrs->php) : NULL; |
|||
return newattrs; |
|||
} |
|||
|
|||
static Bool IsNodePushable( Node *node ) |
|||
{ |
|||
if (node->tag == NULL) |
|||
return no; |
|||
|
|||
if (!(node->tag->model & CM_INLINE)) |
|||
return no; |
|||
|
|||
if (node->tag->model & CM_OBJECT) |
|||
return no; |
|||
|
|||
/*\ Issue #92: OLD problem of ins and del which are marked as both
|
|||
* inline and block, thus should NOT ever be 'inserted' |
|||
\*/ |
|||
if (nodeIsINS(node) || nodeIsDEL(node)) |
|||
return no; |
|||
|
|||
return yes; |
|||
} |
|||
|
|||
/*
|
|||
push a copy of an inline node onto stack |
|||
but don't push if implicit or OBJECT or APPLET |
|||
(implicit tags are ones generated from the istack) |
|||
|
|||
One issue arises with pushing inlines when |
|||
the tag is already pushed. For instance: |
|||
|
|||
<p><em>text |
|||
<p><em>more text |
|||
|
|||
Shouldn't be mapped to |
|||
|
|||
<p><em>text</em></p> |
|||
<p><em><em>more text</em></em> |
|||
*/ |
|||
void TY_(PushInline)( TidyDocImpl* doc, Node *node ) |
|||
{ |
|||
Lexer* lexer = doc->lexer; |
|||
IStack *istack; |
|||
|
|||
if (node->implicit) |
|||
return; |
|||
|
|||
if ( !IsNodePushable(node) ) |
|||
return; |
|||
|
|||
if ( !nodeIsFONT(node) && TY_(IsPushed)(doc, node) ) |
|||
return; |
|||
|
|||
/* make sure there is enough space for the stack */ |
|||
if (lexer->istacksize + 1 > lexer->istacklength) |
|||
{ |
|||
if (lexer->istacklength == 0) |
|||
lexer->istacklength = 6; /* this is perhaps excessive */ |
|||
|
|||
lexer->istacklength = lexer->istacklength * 2; |
|||
lexer->istack = (IStack *)TidyDocRealloc(doc, lexer->istack, |
|||
sizeof(IStack)*(lexer->istacklength)); |
|||
} |
|||
|
|||
istack = &(lexer->istack[lexer->istacksize]); |
|||
istack->tag = node->tag; |
|||
|
|||
istack->element = TY_(tmbstrdup)(doc->allocator, node->element); |
|||
istack->attributes = TY_(DupAttrs)( doc, node->attributes ); |
|||
++(lexer->istacksize); |
|||
} |
|||
|
|||
static void PopIStack( TidyDocImpl* doc ) |
|||
{ |
|||
Lexer* lexer = doc->lexer; |
|||
IStack *istack; |
|||
AttVal *av; |
|||
|
|||
--(lexer->istacksize); |
|||
istack = &(lexer->istack[lexer->istacksize]); |
|||
|
|||
while (istack->attributes) |
|||
{ |
|||
av = istack->attributes; |
|||
istack->attributes = av->next; |
|||
TY_(FreeAttribute)( doc, av ); |
|||
} |
|||
TidyDocFree(doc, istack->element); |
|||
istack->element = NULL; /* remove the freed element */ |
|||
} |
|||
|
|||
static void PopIStackUntil( TidyDocImpl* doc, TidyTagId tid ) |
|||
{ |
|||
Lexer* lexer = doc->lexer; |
|||
IStack *istack; |
|||
|
|||
while (lexer->istacksize > 0) |
|||
{ |
|||
PopIStack( doc ); |
|||
istack = &(lexer->istack[lexer->istacksize]); |
|||
if ( istack->tag->id == tid ) |
|||
break; |
|||
} |
|||
} |
|||
|
|||
/* pop inline stack */ |
|||
void TY_(PopInline)( TidyDocImpl* doc, Node *node ) |
|||
{ |
|||
Lexer* lexer = doc->lexer; |
|||
|
|||
if (node) |
|||
{ |
|||
if ( !IsNodePushable(node) ) |
|||
return; |
|||
|
|||
/* if node is </a> then pop until we find an <a> */ |
|||
if ( nodeIsA(node) ) |
|||
{ |
|||
PopIStackUntil( doc, TidyTag_A ); |
|||
return; |
|||
} |
|||
} |
|||
|
|||
if (lexer->istacksize > 0) |
|||
{ |
|||
PopIStack( doc ); |
|||
|
|||
/* #427822 - fix by Randy Waki 7 Aug 00 */ |
|||
if (lexer->insert >= lexer->istack + lexer->istacksize) |
|||
lexer->insert = NULL; |
|||
} |
|||
} |
|||
|
|||
Bool TY_(IsPushed)( TidyDocImpl* doc, Node *node ) |
|||
{ |
|||
Lexer* lexer = doc->lexer; |
|||
int i; |
|||
|
|||
for (i = lexer->istacksize - 1; i >= 0; --i) |
|||
{ |
|||
if (lexer->istack[i].tag == node->tag) |
|||
return yes; |
|||
} |
|||
|
|||
return no; |
|||
} |
|||
|
|||
/*
|
|||
Test whether the last element on the stack has the same type than "node". |
|||
*/ |
|||
Bool TY_(IsPushedLast)( TidyDocImpl* doc, Node *element, Node *node ) |
|||
{ |
|||
Lexer* lexer = doc->lexer; |
|||
|
|||
if ( element && !IsNodePushable(element) ) |
|||
return no; |
|||
|
|||
if (lexer->istacksize > 0) { |
|||
if (lexer->istack[lexer->istacksize - 1].tag == node->tag) { |
|||
return yes; |
|||
} |
|||
} |
|||
|
|||
return no; |
|||
} |
|||
|
|||
/*
|
|||
This has the effect of inserting "missing" inline |
|||
elements around the contents of blocklevel elements |
|||
such as P, TD, TH, DIV, PRE etc. This procedure is |
|||
called at the start of ParseBlock. when the inline |
|||
stack is not empty, as will be the case in: |
|||
|
|||
<i><h1>italic heading</h1></i> |
|||
|
|||
which is then treated as equivalent to |
|||
|
|||
<h1><i>italic heading</i></h1> |
|||
|
|||
This is implemented by setting the lexer into a mode |
|||
where it gets tokens from the inline stack rather than |
|||
from the input stream. |
|||
*/ |
|||
int TY_(InlineDup)( TidyDocImpl* doc, Node* node ) |
|||
{ |
|||
Lexer* lexer = doc->lexer; |
|||
int n; |
|||
|
|||
if ((n = lexer->istacksize - lexer->istackbase) > 0) |
|||
{ |
|||
lexer->insert = &(lexer->istack[lexer->istackbase]); |
|||
lexer->inode = node; |
|||
} |
|||
|
|||
return n; |
|||
} |
|||
|
|||
/*
|
|||
defer duplicates when entering a table or other |
|||
element where the inlines shouldn't be duplicated |
|||
*/ |
|||
void TY_(DeferDup)( TidyDocImpl* doc ) |
|||
{ |
|||
doc->lexer->insert = NULL; |
|||
doc->lexer->inode = NULL; |
|||
} |
|||
|
|||
Node *TY_(InsertedToken)( TidyDocImpl* doc ) |
|||
{ |
|||
Lexer* lexer = doc->lexer; |
|||
Node *node; |
|||
IStack *istack; |
|||
uint n; |
|||
|
|||
/* this will only be NULL if inode != NULL */ |
|||
if (lexer->insert == NULL) |
|||
{ |
|||
node = lexer->inode; |
|||
lexer->inode = NULL; |
|||
return node; |
|||
} |
|||
|
|||
/*
|
|||
If this is the "latest" node then update |
|||
the position, otherwise use current values |
|||
*/ |
|||
|
|||
if (lexer->inode == NULL) |
|||
{ |
|||
lexer->lines = doc->docIn->curline; |
|||
lexer->columns = doc->docIn->curcol; |
|||
} |
|||
|
|||
node = TY_(NewNode)(doc->allocator, lexer); |
|||
node->type = StartTag; |
|||
node->implicit = yes; |
|||
node->start = lexer->txtstart; |
|||
/* #431734 [JTidy bug #226261 (was 126261)] - fix by Gary Peskin 20 Dec 00 */ |
|||
node->end = lexer->txtend; /* was : lexer->txtstart; */ |
|||
istack = lexer->insert; |
|||
|
|||
/* #if 0 && defined(_DEBUG) */ |
|||
#if !defined(NDEBUG) && defined(_MSC_VER) |
|||
if ( lexer->istacksize == 0 ) |
|||
{ |
|||
SPRTF( "WARNING: ZERO sized istack!\n" ); |
|||
} |
|||
#endif |
|||
|
|||
node->element = TY_(tmbstrdup)(doc->allocator, istack->element); |
|||
node->tag = istack->tag; |
|||
node->attributes = TY_(DupAttrs)( doc, istack->attributes ); |
|||
|
|||
/* advance lexer to next item on the stack */ |
|||
n = (uint)(lexer->insert - &(lexer->istack[0])); |
|||
|
|||
/* and recover state if we have reached the end */ |
|||
if (++n < lexer->istacksize) |
|||
lexer->insert = &(lexer->istack[n]); |
|||
else |
|||
lexer->insert = NULL; |
|||
|
|||
return node; |
|||
} |
|||
|
|||
|
|||
/*
|
|||
We have two CM_INLINE elements pushed ... the first is closing, |
|||
but, like the browser, the second should be retained ... |
|||
Like <b>bold <i>bold and italics</b> italics only</i> |
|||
This function switches the tag positions on the stack, |
|||
returning 'yes' if both were found in the expected order. |
|||
*/ |
|||
Bool TY_(SwitchInline)( TidyDocImpl* doc, Node* element, Node* node ) |
|||
{ |
|||
Lexer* lexer = doc->lexer; |
|||
if ( lexer |
|||
&& element && element->tag |
|||
&& node && node->tag |
|||
&& TY_(IsPushed)( doc, element ) |
|||
&& TY_(IsPushed)( doc, node ) |
|||
&& ((lexer->istacksize - lexer->istackbase) >= 2) ) |
|||
{ |
|||
/* we have a chance of succeeding ... */ |
|||
int i; |
|||
for (i = (lexer->istacksize - lexer->istackbase - 1); i >= 0; --i) |
|||
{ |
|||
if (lexer->istack[i].tag == element->tag) { |
|||
/* found the element tag - phew */ |
|||
IStack *istack1 = &lexer->istack[i]; |
|||
IStack *istack2 = NULL; |
|||
--i; /* back one more, and continue */ |
|||
for ( ; i >= 0; --i) |
|||
{ |
|||
if (lexer->istack[i].tag == node->tag) |
|||
{ |
|||
/* found the element tag - phew */ |
|||
istack2 = &lexer->istack[i]; |
|||
break; |
|||
} |
|||
} |
|||
if ( istack2 ) |
|||
{ |
|||
/* perform the swap */ |
|||
IStack tmp_istack = *istack2; |
|||
*istack2 = *istack1; |
|||
*istack1 = tmp_istack; |
|||
return yes; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
return no; |
|||
} |
|||
|
|||
/*
|
|||
We want to push a specific a specific element on the stack, |
|||
but it may not be the last element, which InlineDup() |
|||
would handle. Return yes, if found and inserted. |
|||
*/ |
|||
Bool TY_(InlineDup1)( TidyDocImpl* doc, Node* node, Node* element ) |
|||
{ |
|||
Lexer* lexer = doc->lexer; |
|||
int n, i; |
|||
if ( element |
|||
&& (element->tag != NULL) |
|||
&& ((n = lexer->istacksize - lexer->istackbase) > 0) ) |
|||
{ |
|||
for ( i = n - 1; i >=0; --i ) { |
|||
if (lexer->istack[i].tag == element->tag) { |
|||
/* found our element tag - insert it */ |
|||
lexer->insert = &(lexer->istack[i]); |
|||
lexer->inode = node; |
|||
return yes; |
|||
} |
|||
} |
|||
} |
|||
return no; |
|||
} |
|||
|
|||
/*
|
|||
* local variables: |
|||
* mode: c |
|||
* indent-tabs-mode: nil |
|||
* c-basic-offset: 4 |
|||
* eval: (c-set-offset 'substatement-open 0) |
|||
* end: |
|||
*/ |
@ -0,0 +1,959 @@ |
|||
/*
|
|||
* language.c |
|||
* Localization support for HTML Tidy. |
|||
* |
|||
* (c) 2015 HTACG |
|||
* See tidy.h and access.h for the copyright notice. |
|||
* |
|||
* Created by Jim Derry on 11/28/15. |
|||
*/ |
|||
|
|||
#include "language.h" |
|||
#include "language_en.h" |
|||
#if SUPPORT_LOCALIZATIONS |
|||
#include "language_en_gb.h" |
|||
#include "language_es.h" |
|||
#include "language_es_mx.h" |
|||
#include "language_zh_cn.h" |
|||
#include "language_fr.h" |
|||
#endif |
|||
#include "tmbstr.h" |
|||
#include "locale.h" |
|||
|
|||
|
|||
/**
|
|||
* This structure type provides universal access to all of Tidy's strings. |
|||
*/ |
|||
typedef struct { |
|||
languageDefinition *currentLanguage; |
|||
languageDefinition *fallbackLanguage; |
|||
languageDefinition *languages[]; |
|||
} tidyLanguagesType; |
|||
|
|||
|
|||
/**
|
|||
* This single structure contains all localizations. Note that we preset |
|||
* `.currentLanguage` to language_en, which is Tidy's default language. |
|||
*/ |
|||
static tidyLanguagesType tidyLanguages = { |
|||
&language_en, /* current language */ |
|||
&language_en, /* first fallback language */ |
|||
{ |
|||
/* Required localization! */ |
|||
&language_en, |
|||
#if SUPPORT_LOCALIZATIONS |
|||
/* These additional languages are installed. */ |
|||
&language_en_gb, |
|||
&language_es, |
|||
&language_es_mx, |
|||
&language_zh_cn, |
|||
&language_fr, |
|||
#endif |
|||
NULL /* This array MUST be null terminated. */ |
|||
} |
|||
}; |
|||
|
|||
|
|||
/**
|
|||
* This structure maps old-fashioned Windows strings |
|||
* to proper POSIX names (modern Windows already uses |
|||
* POSIX names). |
|||
*/ |
|||
static const tidyLocaleMapItem localeMappings[] = { |
|||
{ "america", "en_us" }, |
|||
{ "american english", "en_us" }, |
|||
{ "american-english", "en_us" }, |
|||
{ "american", "en_us" }, |
|||
{ "aus", "en_au" }, |
|||
{ "australia", "en_au" }, |
|||
{ "australian", "en_au" }, |
|||
{ "austria", "de_at" }, |
|||
{ "aut", "de_at" }, |
|||
{ "bel", "nl_be" }, |
|||
{ "belgian", "nl_be" }, |
|||
{ "belgium", "nl_be" }, |
|||
{ "bra", "pt-br" }, |
|||
{ "brazil", "pt-br" }, |
|||
{ "britain", "en_gb" }, |
|||
{ "can", "en_ca" }, |
|||
{ "canada", "en_ca" }, |
|||
{ "canadian", "en_ca" }, |
|||
{ "che", "de_ch" }, |
|||
{ "china", "zh_cn" }, |
|||
{ "chinese-simplified", "zh" }, |
|||
{ "chinese-traditional", "zh_tw" }, |
|||
{ "chinese", "zh" }, |
|||
{ "chn", "zh_cn" }, |
|||
{ "chs", "zh" }, |
|||
{ "cht", "zh_tw" }, |
|||
{ "csy", "cs" }, |
|||
{ "cze", "cs_cz" }, |
|||
{ "czech", "cs_cz" }, |
|||
{ "dan", "da" }, |
|||
{ "danish", "da" }, |
|||
{ "dea", "de_at" }, |
|||
{ "denmark", "da_dk" }, |
|||
{ "des", "de_ch" }, |
|||
{ "deu", "de" }, |
|||
{ "dnk", "da_dk" }, |
|||
{ "dutch-belgian", "nl_be" }, |
|||
{ "dutch", "nl" }, |
|||
{ "ell", "el" }, |
|||
{ "ena", "en_au" }, |
|||
{ "enc", "en_ca" }, |
|||
{ "eng", "eb_gb" }, |
|||
{ "england", "en_gb" }, |
|||
{ "english-american", "en_us" }, |
|||
{ "english-aus", "en_au" }, |
|||
{ "english-can", "en_ca" }, |
|||
{ "english-nz", "en_nz" }, |
|||
{ "english-uk", "eb_gb" }, |
|||
{ "english-us", "en_us" }, |
|||
{ "english-usa", "en_us" }, |
|||
{ "english", "en" }, |
|||
{ "enu", "en_us" }, |
|||
{ "enz", "en_nz" }, |
|||
{ "esm", "es-mx" }, |
|||
{ "esn", "es" }, |
|||
{ "esp", "es" }, |
|||
{ "fin", "fi" }, |
|||
{ "finland", "fi_fi" }, |
|||
{ "finnish", "fi" }, |
|||
{ "fra", "fr" }, |
|||
{ "france", "fr_fr" }, |
|||
{ "frb", "fr_be" }, |
|||
{ "frc", "fr_ca" }, |
|||
{ "french-belgian", "fr_be" }, |
|||
{ "french-canadian", "fr_ca" }, |
|||
{ "french-swiss", "fr_ch" }, |
|||
{ "french", "fr" }, |
|||
{ "frs", "fr_ch" }, |
|||
{ "gbr", "en_gb" }, |
|||
{ "german-austrian", "de_at" }, |
|||
{ "german-swiss", "de_ch" }, |
|||
{ "german", "de" }, |
|||
{ "germany", "de_de" }, |
|||
{ "grc", "el_gr" }, |
|||
{ "great britain", "en_gb" }, |
|||
{ "greece", "el_gr" }, |
|||
{ "greek", "el" }, |
|||
{ "hkg", "zh_hk" }, |
|||
{ "holland", "nl_nl" }, |
|||
{ "hong kong", "zh_hk" }, |
|||
{ "hong-kong", "zh_hk" }, |
|||
{ "hun", "hu" }, |
|||
{ "hungarian", "hu" }, |
|||
{ "hungary", "hu_hu" }, |
|||
{ "iceland", "is_is" }, |
|||
{ "icelandic", "is" }, |
|||
{ "ireland", "en_ie" }, |
|||
{ "irl", "en_ie" }, |
|||
{ "isl", "is" }, |
|||
{ "ita", "it" }, |
|||
{ "ita", "it_it" }, |
|||
{ "italian-swiss", "it_ch" }, |
|||
{ "italian", "it" }, |
|||
{ "italy", "it_it" }, |
|||
{ "its", "it_ch" }, |
|||
{ "japan", "ja_jp" }, |
|||
{ "japanese", "ja" }, |
|||
{ "jpn", "ja" }, |
|||
{ "kor", "ko" }, |
|||
{ "korea", "ko_kr" }, |
|||
{ "korean", "ko" }, |
|||
{ "mex", "es-mx" }, |
|||
{ "mexico", "es-mx" }, |
|||
{ "netherlands", "nl_nl" }, |
|||
{ "new zealand", "en_nz" }, |
|||
{ "new-zealand", "en_nz" }, |
|||
{ "nlb", "nl_be" }, |
|||
{ "nld", "nl" }, |
|||
{ "non", "nn" }, |
|||
{ "nor", "nb" }, |
|||
{ "norway", "no" }, |
|||
{ "norwegian-bokmal", "nb" }, |
|||
{ "norwegian-nynorsk", "nn" }, |
|||
{ "norwegian", "no" }, |
|||
{ "nz", "en_nz" }, |
|||
{ "nzl", "en_nz" }, |
|||
{ "plk", "pl" }, |
|||
{ "pol", "pl-pl" }, |
|||
{ "poland", "pl-pl" }, |
|||
{ "polish", "pl" }, |
|||
{ "portugal", "pt-pt" }, |
|||
{ "portuguese-brazil", "pt-br" }, |
|||
{ "portuguese", "pt" }, |
|||
{ "pr china", "zh_cn" }, |
|||
{ "pr-china", "zh_cn" }, |
|||
{ "prt", "pt-pt" }, |
|||
{ "ptb", "pt-br" }, |
|||
{ "ptg", "pt" }, |
|||
{ "rus", "ru" }, |
|||
{ "russia", "ru-ru" }, |
|||
{ "russian", "ru" }, |
|||
{ "sgp", "zh_sg" }, |
|||
{ "singapore", "zh_sg" }, |
|||
{ "sky", "sk" }, |
|||
{ "slovak", "sk" }, |
|||
{ "spain", "es-es" }, |
|||
{ "spanish-mexican", "es-mx" }, |
|||
{ "spanish-modern", "es" }, |
|||
{ "spanish", "es" }, |
|||
{ "sve", "sv" }, |
|||
{ "svk", "sk-sk" }, |
|||
{ "swe", "sv-se" }, |
|||
{ "sweden", "sv-se" }, |
|||
{ "swedish", "sv" }, |
|||
{ "swiss", "de_ch" }, |
|||
{ "switzerland", "de_ch" }, |
|||
{ "taiwan", "zh_tw" }, |
|||
{ "trk", "tr" }, |
|||
{ "tur", "tr-tr" }, |
|||
{ "turkey", "tr-tr" }, |
|||
{ "turkish", "tr" }, |
|||
{ "twn", "zh_tw" }, |
|||
{ "uk", "en_gb" }, |
|||
{ "united kingdom", "en_gb" }, |
|||
{ "united states", "en_us" }, |
|||
{ "united-kingdom", "en_gb" }, |
|||
{ "united-states", "en_us" }, |
|||
{ "us", "en_us" }, |
|||
{ "usa", "en_us" }, |
|||
|
|||
/* MUST be last. */ |
|||
{ NULL, NULL } |
|||
}; |
|||
|
|||
|
|||
/**
|
|||
* LibTidy users may want to use `TidyReportFilter3` to enable their own |
|||
* localization lookup features. Because Tidy's errors codes are enums the |
|||
* specific values can change over time. This table will ensure that LibTidy |
|||
* users always have a static value available for use. |
|||
*/ |
|||
static const tidyErrorFilterKeyItem tidyErrorFilterKeysStruct[] = { |
|||
/* This blocks of codes comes from `tidyErrorCodes` enum. */ |
|||
{ "CODES_TIDY_ERROR_FIRST", CODES_TIDY_ERROR_FIRST }, |
|||
{ "MISSING_SEMICOLON", MISSING_SEMICOLON }, |
|||
{ "MISSING_SEMICOLON_NCR", MISSING_SEMICOLON_NCR }, |
|||
{ "UNKNOWN_ENTITY", UNKNOWN_ENTITY }, |
|||
{ "UNESCAPED_AMPERSAND", UNESCAPED_AMPERSAND }, |
|||
{ "APOS_UNDEFINED", APOS_UNDEFINED }, |
|||
{ "MISSING_ENDTAG_FOR", MISSING_ENDTAG_FOR }, |
|||
{ "MISSING_ENDTAG_BEFORE", MISSING_ENDTAG_BEFORE }, |
|||
{ "DISCARDING_UNEXPECTED", DISCARDING_UNEXPECTED }, |
|||
{ "NESTED_EMPHASIS", NESTED_EMPHASIS }, |
|||
{ "NON_MATCHING_ENDTAG", NON_MATCHING_ENDTAG }, |
|||
{ "TAG_NOT_ALLOWED_IN", TAG_NOT_ALLOWED_IN }, |
|||
{ "MISSING_STARTTAG", MISSING_STARTTAG }, |
|||
{ "UNEXPECTED_ENDTAG", UNEXPECTED_ENDTAG }, |
|||
{ "USING_BR_INPLACE_OF", USING_BR_INPLACE_OF }, |
|||
{ "INSERTING_TAG", INSERTING_TAG }, |
|||
{ "SUSPECTED_MISSING_QUOTE", SUSPECTED_MISSING_QUOTE }, |
|||
{ "MISSING_TITLE_ELEMENT", MISSING_TITLE_ELEMENT }, |
|||
{ "DUPLICATE_FRAMESET", DUPLICATE_FRAMESET }, |
|||
{ "CANT_BE_NESTED", CANT_BE_NESTED }, |
|||
{ "OBSOLETE_ELEMENT", OBSOLETE_ELEMENT }, |
|||
{ "PROPRIETARY_ELEMENT", PROPRIETARY_ELEMENT }, |
|||
{ "ELEMENT_VERS_MISMATCH_ERROR", ELEMENT_VERS_MISMATCH_ERROR }, |
|||
{ "ELEMENT_VERS_MISMATCH_WARN", ELEMENT_VERS_MISMATCH_WARN }, |
|||
{ "UNKNOWN_ELEMENT", UNKNOWN_ELEMENT }, |
|||
{ "TRIM_EMPTY_ELEMENT", TRIM_EMPTY_ELEMENT }, |
|||
{ "COERCE_TO_ENDTAG", COERCE_TO_ENDTAG }, |
|||
{ "ILLEGAL_NESTING", ILLEGAL_NESTING }, |
|||
{ "NOFRAMES_CONTENT", NOFRAMES_CONTENT }, |
|||
{ "CONTENT_AFTER_BODY", CONTENT_AFTER_BODY }, |
|||
{ "INCONSISTENT_VERSION", INCONSISTENT_VERSION }, |
|||
{ "MALFORMED_COMMENT", MALFORMED_COMMENT }, |
|||
{ "BAD_COMMENT_CHARS", BAD_COMMENT_CHARS }, |
|||
{ "BAD_XML_COMMENT", BAD_XML_COMMENT }, |
|||
{ "BAD_CDATA_CONTENT", BAD_CDATA_CONTENT }, |
|||
{ "INCONSISTENT_NAMESPACE", INCONSISTENT_NAMESPACE }, |
|||
{ "DOCTYPE_AFTER_TAGS", DOCTYPE_AFTER_TAGS }, |
|||
{ "MALFORMED_DOCTYPE", MALFORMED_DOCTYPE }, |
|||
{ "UNEXPECTED_END_OF_FILE", UNEXPECTED_END_OF_FILE }, |
|||
{ "DTYPE_NOT_UPPER_CASE", DTYPE_NOT_UPPER_CASE }, |
|||
{ "TOO_MANY_ELEMENTS", TOO_MANY_ELEMENTS }, |
|||
{ "UNESCAPED_ELEMENT", UNESCAPED_ELEMENT }, |
|||
{ "NESTED_QUOTATION", NESTED_QUOTATION }, |
|||
{ "ELEMENT_NOT_EMPTY", ELEMENT_NOT_EMPTY }, |
|||
{ "ENCODING_IO_CONFLICT", ENCODING_IO_CONFLICT }, |
|||
{ "MIXED_CONTENT_IN_BLOCK", MIXED_CONTENT_IN_BLOCK }, |
|||
{ "MISSING_DOCTYPE", MISSING_DOCTYPE }, |
|||
{ "SPACE_PRECEDING_XMLDECL", SPACE_PRECEDING_XMLDECL }, |
|||
{ "TOO_MANY_ELEMENTS_IN", TOO_MANY_ELEMENTS_IN }, |
|||
{ "UNEXPECTED_ENDTAG_IN", UNEXPECTED_ENDTAG_IN }, |
|||
{ "REPLACING_ELEMENT", REPLACING_ELEMENT }, |
|||
{ "REPLACING_UNEX_ELEMENT", REPLACING_UNEX_ELEMENT }, |
|||
{ "COERCE_TO_ENDTAG_WARN", COERCE_TO_ENDTAG_WARN }, |
|||
{ "UNKNOWN_ATTRIBUTE", UNKNOWN_ATTRIBUTE }, |
|||
{ "INSERTING_ATTRIBUTE", INSERTING_ATTRIBUTE }, |
|||
{ "INSERTING_AUTO_ATTRIBUTE", INSERTING_AUTO_ATTRIBUTE }, |
|||
{ "MISSING_ATTR_VALUE", MISSING_ATTR_VALUE }, |
|||
{ "BAD_ATTRIBUTE_VALUE", BAD_ATTRIBUTE_VALUE }, |
|||
{ "UNEXPECTED_GT", UNEXPECTED_GT }, |
|||
{ "PROPRIETARY_ATTRIBUTE", PROPRIETARY_ATTRIBUTE }, |
|||
{ "MISMATCHED_ATTRIBUTE_ERROR", MISMATCHED_ATTRIBUTE_ERROR }, |
|||
{ "MISMATCHED_ATTRIBUTE_WARN", MISMATCHED_ATTRIBUTE_WARN }, |
|||
{ "PROPRIETARY_ATTR_VALUE", PROPRIETARY_ATTR_VALUE }, |
|||
{ "REPEATED_ATTRIBUTE", REPEATED_ATTRIBUTE }, |
|||
{ "MISSING_IMAGEMAP", MISSING_IMAGEMAP }, |
|||
{ "XML_ATTRIBUTE_VALUE", XML_ATTRIBUTE_VALUE }, |
|||
{ "UNEXPECTED_QUOTEMARK", UNEXPECTED_QUOTEMARK }, |
|||
{ "MISSING_QUOTEMARK", MISSING_QUOTEMARK }, |
|||
{ "ID_NAME_MISMATCH", ID_NAME_MISMATCH }, |
|||
{ "BACKSLASH_IN_URI", BACKSLASH_IN_URI }, |
|||
{ "FIXED_BACKSLASH", FIXED_BACKSLASH }, |
|||
{ "ILLEGAL_URI_REFERENCE", ILLEGAL_URI_REFERENCE }, |
|||
{ "ESCAPED_ILLEGAL_URI", ESCAPED_ILLEGAL_URI }, |
|||
{ "NEWLINE_IN_URI", NEWLINE_IN_URI }, |
|||
{ "ANCHOR_NOT_UNIQUE", ANCHOR_NOT_UNIQUE }, |
|||
{ "JOINING_ATTRIBUTE", JOINING_ATTRIBUTE }, |
|||
{ "UNEXPECTED_EQUALSIGN", UNEXPECTED_EQUALSIGN }, |
|||
{ "ATTR_VALUE_NOT_LCASE", ATTR_VALUE_NOT_LCASE }, |
|||
{ "XML_ID_SYNTAX", XML_ID_SYNTAX }, |
|||
{ "INVALID_ATTRIBUTE", INVALID_ATTRIBUTE }, |
|||
{ "BAD_ATTRIBUTE_VALUE_REPLACED", BAD_ATTRIBUTE_VALUE_REPLACED }, |
|||
{ "INVALID_XML_ID", INVALID_XML_ID }, |
|||
{ "UNEXPECTED_END_OF_FILE_ATTR", UNEXPECTED_END_OF_FILE_ATTR }, |
|||
{ "MISSING_ATTRIBUTE", MISSING_ATTRIBUTE }, |
|||
{ "WHITE_IN_URI", WHITE_IN_URI }, |
|||
{ "REMOVED_HTML5", REMOVED_HTML5 }, |
|||
{ "BAD_SUMMARY_HTML5", BAD_SUMMARY_HTML5 }, |
|||
{ "PREVIOUS_LOCATION", PREVIOUS_LOCATION }, |
|||
{ "VENDOR_SPECIFIC_CHARS", VENDOR_SPECIFIC_CHARS }, |
|||
{ "INVALID_SGML_CHARS", INVALID_SGML_CHARS }, |
|||
{ "INVALID_UTF8", INVALID_UTF8 }, |
|||
{ "INVALID_UTF16", INVALID_UTF16 }, |
|||
{ "ENCODING_MISMATCH", ENCODING_MISMATCH }, |
|||
{ "INVALID_URI", INVALID_URI }, |
|||
{ "INVALID_NCR", INVALID_NCR }, |
|||
{ "CODES_TIDY_ERROR_LAST", CODES_TIDY_ERROR_LAST }, |
|||
#if SUPPORT_ACCESSIBILITY_CHECKS |
|||
/* This blocks of codes comes from `accessErrorCodes` enum. */ |
|||
{ "FIRST_ACCESS_ERR", FIRST_ACCESS_ERR }, |
|||
{ "IMG_MISSING_ALT", IMG_MISSING_ALT }, |
|||
{ "IMG_ALT_SUSPICIOUS_FILENAME", IMG_ALT_SUSPICIOUS_FILENAME }, |
|||
{ "IMG_ALT_SUSPICIOUS_FILE_SIZE", IMG_ALT_SUSPICIOUS_FILE_SIZE }, |
|||
{ "IMG_ALT_SUSPICIOUS_PLACEHOLDER", IMG_ALT_SUSPICIOUS_PLACEHOLDER }, |
|||
{ "IMG_ALT_SUSPICIOUS_TOO_LONG", IMG_ALT_SUSPICIOUS_TOO_LONG }, |
|||
{ "IMG_MISSING_ALT_BULLET", IMG_MISSING_ALT_BULLET }, |
|||
{ "IMG_MISSING_ALT_H_RULE", IMG_MISSING_ALT_H_RULE }, |
|||
{ "IMG_MISSING_LONGDESC_DLINK", IMG_MISSING_LONGDESC_DLINK }, |
|||
{ "IMG_MISSING_DLINK", IMG_MISSING_DLINK }, |
|||
{ "IMG_MISSING_LONGDESC", IMG_MISSING_LONGDESC }, |
|||
{ "LONGDESC_NOT_REQUIRED", LONGDESC_NOT_REQUIRED }, |
|||
{ "IMG_BUTTON_MISSING_ALT", IMG_BUTTON_MISSING_ALT }, |
|||
{ "APPLET_MISSING_ALT", APPLET_MISSING_ALT }, |
|||
{ "OBJECT_MISSING_ALT", OBJECT_MISSING_ALT }, |
|||
{ "AUDIO_MISSING_TEXT_WAV", AUDIO_MISSING_TEXT_WAV }, |
|||
{ "AUDIO_MISSING_TEXT_AU", AUDIO_MISSING_TEXT_AU }, |
|||
{ "AUDIO_MISSING_TEXT_AIFF", AUDIO_MISSING_TEXT_AIFF }, |
|||
{ "AUDIO_MISSING_TEXT_SND", AUDIO_MISSING_TEXT_SND }, |
|||
{ "AUDIO_MISSING_TEXT_RA", AUDIO_MISSING_TEXT_RA }, |
|||
{ "AUDIO_MISSING_TEXT_RM", AUDIO_MISSING_TEXT_RM }, |
|||
{ "FRAME_MISSING_LONGDESC", FRAME_MISSING_LONGDESC }, |
|||
{ "AREA_MISSING_ALT", AREA_MISSING_ALT }, |
|||
{ "SCRIPT_MISSING_NOSCRIPT", SCRIPT_MISSING_NOSCRIPT }, |
|||
{ "ASCII_REQUIRES_DESCRIPTION", ASCII_REQUIRES_DESCRIPTION }, |
|||
{ "IMG_MAP_SERVER_REQUIRES_TEXT_LINKS", IMG_MAP_SERVER_REQUIRES_TEXT_LINKS }, |
|||
{ "MULTIMEDIA_REQUIRES_TEXT", MULTIMEDIA_REQUIRES_TEXT }, |
|||
{ "IMG_MAP_CLIENT_MISSING_TEXT_LINKS", IMG_MAP_CLIENT_MISSING_TEXT_LINKS }, |
|||
{ "INFORMATION_NOT_CONVEYED_IMAGE", INFORMATION_NOT_CONVEYED_IMAGE }, |
|||
{ "INFORMATION_NOT_CONVEYED_APPLET", INFORMATION_NOT_CONVEYED_APPLET }, |
|||
{ "INFORMATION_NOT_CONVEYED_OBJECT", INFORMATION_NOT_CONVEYED_OBJECT }, |
|||
{ "INFORMATION_NOT_CONVEYED_SCRIPT", INFORMATION_NOT_CONVEYED_SCRIPT }, |
|||
{ "INFORMATION_NOT_CONVEYED_INPUT", INFORMATION_NOT_CONVEYED_INPUT }, |
|||
{ "COLOR_CONTRAST_TEXT", COLOR_CONTRAST_TEXT }, |
|||
{ "COLOR_CONTRAST_LINK", COLOR_CONTRAST_LINK }, |
|||
{ "COLOR_CONTRAST_ACTIVE_LINK", COLOR_CONTRAST_ACTIVE_LINK }, |
|||
{ "COLOR_CONTRAST_VISITED_LINK", COLOR_CONTRAST_VISITED_LINK }, |
|||
{ "DOCTYPE_MISSING", DOCTYPE_MISSING }, |
|||
{ "STYLE_SHEET_CONTROL_PRESENTATION", STYLE_SHEET_CONTROL_PRESENTATION }, |
|||
{ "HEADERS_IMPROPERLY_NESTED", HEADERS_IMPROPERLY_NESTED }, |
|||
{ "POTENTIAL_HEADER_BOLD", POTENTIAL_HEADER_BOLD }, |
|||
{ "POTENTIAL_HEADER_ITALICS", POTENTIAL_HEADER_ITALICS }, |
|||
{ "POTENTIAL_HEADER_UNDERLINE", POTENTIAL_HEADER_UNDERLINE }, |
|||
{ "HEADER_USED_FORMAT_TEXT", HEADER_USED_FORMAT_TEXT }, |
|||
{ "LIST_USAGE_INVALID_UL", LIST_USAGE_INVALID_UL }, |
|||
{ "LIST_USAGE_INVALID_OL", LIST_USAGE_INVALID_OL }, |
|||
{ "LIST_USAGE_INVALID_LI", LIST_USAGE_INVALID_LI }, |
|||
{ "INDICATE_CHANGES_IN_LANGUAGE", INDICATE_CHANGES_IN_LANGUAGE }, |
|||
{ "LANGUAGE_NOT_IDENTIFIED", LANGUAGE_NOT_IDENTIFIED }, |
|||
{ "LANGUAGE_INVALID", LANGUAGE_INVALID }, |
|||
{ "DATA_TABLE_MISSING_HEADERS", DATA_TABLE_MISSING_HEADERS }, |
|||
{ "DATA_TABLE_MISSING_HEADERS_COLUMN", DATA_TABLE_MISSING_HEADERS_COLUMN }, |
|||
{ "DATA_TABLE_MISSING_HEADERS_ROW", DATA_TABLE_MISSING_HEADERS_ROW }, |
|||
{ "DATA_TABLE_REQUIRE_MARKUP_COLUMN_HEADERS", DATA_TABLE_REQUIRE_MARKUP_COLUMN_HEADERS }, |
|||
{ "DATA_TABLE_REQUIRE_MARKUP_ROW_HEADERS", DATA_TABLE_REQUIRE_MARKUP_ROW_HEADERS }, |
|||
{ "LAYOUT_TABLES_LINEARIZE_PROPERLY", LAYOUT_TABLES_LINEARIZE_PROPERLY }, |
|||
{ "LAYOUT_TABLE_INVALID_MARKUP", LAYOUT_TABLE_INVALID_MARKUP }, |
|||
{ "TABLE_MISSING_SUMMARY", TABLE_MISSING_SUMMARY }, |
|||
{ "TABLE_SUMMARY_INVALID_NULL", TABLE_SUMMARY_INVALID_NULL }, |
|||
{ "TABLE_SUMMARY_INVALID_SPACES", TABLE_SUMMARY_INVALID_SPACES }, |
|||
{ "TABLE_SUMMARY_INVALID_PLACEHOLDER", TABLE_SUMMARY_INVALID_PLACEHOLDER }, |
|||
{ "TABLE_MISSING_CAPTION", TABLE_MISSING_CAPTION }, |
|||
{ "TABLE_MAY_REQUIRE_HEADER_ABBR", TABLE_MAY_REQUIRE_HEADER_ABBR }, |
|||
{ "TABLE_MAY_REQUIRE_HEADER_ABBR_NULL", TABLE_MAY_REQUIRE_HEADER_ABBR_NULL }, |
|||
{ "TABLE_MAY_REQUIRE_HEADER_ABBR_SPACES", TABLE_MAY_REQUIRE_HEADER_ABBR_SPACES }, |
|||
{ "STYLESHEETS_REQUIRE_TESTING_LINK", STYLESHEETS_REQUIRE_TESTING_LINK }, |
|||
{ "STYLESHEETS_REQUIRE_TESTING_STYLE_ELEMENT", STYLESHEETS_REQUIRE_TESTING_STYLE_ELEMENT }, |
|||
{ "STYLESHEETS_REQUIRE_TESTING_STYLE_ATTR", STYLESHEETS_REQUIRE_TESTING_STYLE_ATTR }, |
|||
{ "FRAME_SRC_INVALID", FRAME_SRC_INVALID }, |
|||
{ "TEXT_EQUIVALENTS_REQUIRE_UPDATING_APPLET", TEXT_EQUIVALENTS_REQUIRE_UPDATING_APPLET }, |
|||
{ "TEXT_EQUIVALENTS_REQUIRE_UPDATING_SCRIPT", TEXT_EQUIVALENTS_REQUIRE_UPDATING_SCRIPT }, |
|||
{ "TEXT_EQUIVALENTS_REQUIRE_UPDATING_OBJECT", TEXT_EQUIVALENTS_REQUIRE_UPDATING_OBJECT }, |
|||
{ "PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_SCRIPT", PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_SCRIPT }, |
|||
{ "PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_OBJECT", PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_OBJECT }, |
|||
{ "PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_EMBED", PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_EMBED }, |
|||
{ "PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_APPLET", PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_APPLET }, |
|||
{ "FRAME_MISSING_NOFRAMES", FRAME_MISSING_NOFRAMES }, |
|||
{ "NOFRAMES_INVALID_NO_VALUE", NOFRAMES_INVALID_NO_VALUE }, |
|||
{ "NOFRAMES_INVALID_CONTENT", NOFRAMES_INVALID_CONTENT }, |
|||
{ "NOFRAMES_INVALID_LINK", NOFRAMES_INVALID_LINK }, |
|||
{ "REMOVE_FLICKER_SCRIPT", REMOVE_FLICKER_SCRIPT }, |
|||
{ "REMOVE_FLICKER_OBJECT", REMOVE_FLICKER_OBJECT }, |
|||
{ "REMOVE_FLICKER_EMBED", REMOVE_FLICKER_EMBED }, |
|||
{ "REMOVE_FLICKER_APPLET", REMOVE_FLICKER_APPLET }, |
|||
{ "REMOVE_FLICKER_ANIMATED_GIF", REMOVE_FLICKER_ANIMATED_GIF }, |
|||
{ "REMOVE_BLINK_MARQUEE", REMOVE_BLINK_MARQUEE }, |
|||
{ "REMOVE_AUTO_REFRESH", REMOVE_AUTO_REFRESH }, |
|||
{ "REMOVE_AUTO_REDIRECT", REMOVE_AUTO_REDIRECT }, |
|||
{ "ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_SCRIPT", ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_SCRIPT }, |
|||
{ "ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_OBJECT", ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_OBJECT }, |
|||
{ "ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_APPLET", ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_APPLET }, |
|||
{ "ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_EMBED", ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_EMBED }, |
|||
{ "IMAGE_MAP_SERVER_SIDE_REQUIRES_CONVERSION", IMAGE_MAP_SERVER_SIDE_REQUIRES_CONVERSION }, |
|||
{ "SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_DOWN", SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_DOWN }, |
|||
{ "SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_UP", SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_UP }, |
|||
{ "SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_CLICK", SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_CLICK }, |
|||
{ "SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OVER", SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OVER }, |
|||
{ "SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OUT", SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OUT }, |
|||
{ "SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_MOVE", SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_MOVE }, |
|||
{ "NEW_WINDOWS_REQUIRE_WARNING_NEW", NEW_WINDOWS_REQUIRE_WARNING_NEW }, |
|||
{ "NEW_WINDOWS_REQUIRE_WARNING_BLANK", NEW_WINDOWS_REQUIRE_WARNING_BLANK }, |
|||
{ "LABEL_NEEDS_REPOSITIONING_BEFORE_INPUT", LABEL_NEEDS_REPOSITIONING_BEFORE_INPUT }, |
|||
{ "LABEL_NEEDS_REPOSITIONING_AFTER_INPUT", LABEL_NEEDS_REPOSITIONING_AFTER_INPUT }, |
|||
{ "FORM_CONTROL_REQUIRES_DEFAULT_TEXT", FORM_CONTROL_REQUIRES_DEFAULT_TEXT }, |
|||
{ "FORM_CONTROL_DEFAULT_TEXT_INVALID_NULL", FORM_CONTROL_DEFAULT_TEXT_INVALID_NULL }, |
|||
{ "FORM_CONTROL_DEFAULT_TEXT_INVALID_SPACES", FORM_CONTROL_DEFAULT_TEXT_INVALID_SPACES }, |
|||
{ "REPLACE_DEPRECATED_HTML_APPLET", REPLACE_DEPRECATED_HTML_APPLET }, |
|||
{ "REPLACE_DEPRECATED_HTML_BASEFONT", REPLACE_DEPRECATED_HTML_BASEFONT }, |
|||
{ "REPLACE_DEPRECATED_HTML_CENTER", REPLACE_DEPRECATED_HTML_CENTER }, |
|||
{ "REPLACE_DEPRECATED_HTML_DIR", REPLACE_DEPRECATED_HTML_DIR }, |
|||
{ "REPLACE_DEPRECATED_HTML_FONT", REPLACE_DEPRECATED_HTML_FONT }, |
|||
{ "REPLACE_DEPRECATED_HTML_ISINDEX", REPLACE_DEPRECATED_HTML_ISINDEX }, |
|||
{ "REPLACE_DEPRECATED_HTML_MENU", REPLACE_DEPRECATED_HTML_MENU }, |
|||
{ "REPLACE_DEPRECATED_HTML_S", REPLACE_DEPRECATED_HTML_S }, |
|||
{ "REPLACE_DEPRECATED_HTML_STRIKE", REPLACE_DEPRECATED_HTML_STRIKE }, |
|||
{ "REPLACE_DEPRECATED_HTML_U", REPLACE_DEPRECATED_HTML_U }, |
|||
{ "FRAME_MISSING_TITLE", FRAME_MISSING_TITLE }, |
|||
{ "FRAME_TITLE_INVALID_NULL", FRAME_TITLE_INVALID_NULL }, |
|||
{ "FRAME_TITLE_INVALID_SPACES", FRAME_TITLE_INVALID_SPACES }, |
|||
{ "ASSOCIATE_LABELS_EXPLICITLY", ASSOCIATE_LABELS_EXPLICITLY }, |
|||
{ "ASSOCIATE_LABELS_EXPLICITLY_FOR", ASSOCIATE_LABELS_EXPLICITLY_FOR }, |
|||
{ "ASSOCIATE_LABELS_EXPLICITLY_ID", ASSOCIATE_LABELS_EXPLICITLY_ID }, |
|||
{ "LINK_TEXT_NOT_MEANINGFUL", LINK_TEXT_NOT_MEANINGFUL }, |
|||
{ "LINK_TEXT_MISSING", LINK_TEXT_MISSING }, |
|||
{ "LINK_TEXT_TOO_LONG", LINK_TEXT_TOO_LONG }, |
|||
{ "LINK_TEXT_NOT_MEANINGFUL_CLICK_HERE", LINK_TEXT_NOT_MEANINGFUL_CLICK_HERE }, |
|||
{ "LINK_TEXT_NOT_MEANINGFUL_MORE", LINK_TEXT_NOT_MEANINGFUL_MORE }, |
|||
{ "LINK_TEXT_NOT_MEANINGFUL_FOLLOW_THIS", LINK_TEXT_NOT_MEANINGFUL_FOLLOW_THIS }, |
|||
{ "METADATA_MISSING", METADATA_MISSING }, |
|||
{ "METADATA_MISSING_LINK", METADATA_MISSING_LINK }, |
|||
{ "METADATA_MISSING_REDIRECT_AUTOREFRESH", METADATA_MISSING_REDIRECT_AUTOREFRESH }, |
|||
{ "SKIPOVER_ASCII_ART", SKIPOVER_ASCII_ART }, |
|||
{ "LAST_ACCESS_ERR", LAST_ACCESS_ERR }, |
|||
#endif |
|||
/* This blocks of codes comes from `tidyMessagesMisc` enum. */ |
|||
{ "STRING_UNKNOWN_OPTION", STRING_UNKNOWN_OPTION }, |
|||
{ "STRING_MISSING_MALFORMED", STRING_MISSING_MALFORMED }, |
|||
{ "STRING_DOCTYPE_GIVEN", STRING_DOCTYPE_GIVEN }, |
|||
{ "STRING_HTML_PROPRIETARY", STRING_HTML_PROPRIETARY }, |
|||
{ "STRING_CONTENT_LOOKS", STRING_CONTENT_LOOKS }, |
|||
{ "STRING_NO_SYSID", STRING_NO_SYSID }, |
|||
{ NULL, 0 }, |
|||
}; |
|||
|
|||
|
|||
/**
|
|||
* Given an error code, return the string associated with it. |
|||
*/ |
|||
ctmbstr tidyErrorCodeAsString(uint code) |
|||
{ |
|||
uint i = 0; |
|||
while (tidyErrorFilterKeysStruct[i].key) { |
|||
if ( tidyErrorFilterKeysStruct[i].value == code ) |
|||
return tidyErrorFilterKeysStruct[i].key; |
|||
i++; |
|||
} |
|||
return "UNDEFINED"; |
|||
} |
|||
|
|||
|
|||
/**
|
|||
* The real string lookup function. |
|||
*/ |
|||
ctmbstr TY_(tidyLocalizedString)( uint messageType, languageDefinition *definition, uint plural ) |
|||
{ |
|||
int i; |
|||
languageDictionary *dictionary = &definition->messages; |
|||
uint pluralForm = definition->whichPluralForm(plural); |
|||
|
|||
for (i = 0; (*dictionary)[i].value; ++i) |
|||
{ |
|||
if ( (*dictionary)[i].key == messageType && (*dictionary)[i].pluralForm == pluralForm ) |
|||
{ |
|||
return (*dictionary)[i].value; |
|||
} |
|||
} |
|||
return NULL; |
|||
} |
|||
|
|||
|
|||
/**
|
|||
* Provides a string given `messageType` in the current |
|||
* localization, returning the correct plural form given |
|||
* `quantity`. |
|||
* |
|||
* This isn't currently highly optimized; rewriting some |
|||
* of infrastructure to use hash lookups is a preferred |
|||
* future optimization. |
|||
*/ |
|||
ctmbstr tidyLocalizedStringN( uint messageType, uint quantity ) |
|||
{ |
|||
ctmbstr result; |
|||
|
|||
result = TY_(tidyLocalizedString)( messageType, tidyLanguages.currentLanguage, quantity); |
|||
|
|||
if (!result && tidyLanguages.fallbackLanguage ) |
|||
{ |
|||
result = TY_(tidyLocalizedString)( messageType, tidyLanguages.fallbackLanguage, quantity); |
|||
} |
|||
|
|||
if (!result) |
|||
{ |
|||
/* Fallback to en which is built in. */ |
|||
result = TY_(tidyLocalizedString)( messageType, &language_en, quantity); |
|||
} |
|||
|
|||
if (!result) |
|||
{ |
|||
/* Last resort: Fallback to en singular which is built in. */ |
|||
result = TY_(tidyLocalizedString)( messageType, &language_en, 1); |
|||
} |
|||
|
|||
return result; |
|||
} |
|||
|
|||
|
|||
/**
|
|||
* Provides a string given `messageType` in the current |
|||
* localization, in the non-plural form. |
|||
* |
|||
* This isn't currently highly optimized; rewriting some |
|||
* of infrastructure to use hash lookups is a preferred |
|||
* future optimization. |
|||
*/ |
|||
ctmbstr tidyLocalizedString( uint messageType ) |
|||
{ |
|||
return tidyLocalizedStringN( messageType, 1 ); |
|||
} |
|||
|
|||
|
|||
/**
|
|||
** Determines the current locale without affecting the C locale. |
|||
** Tidy has always used the default C locale, and at this point |
|||
** in its development we're not going to tamper with that. |
|||
** @param result The buffer to use to return the string. |
|||
** Returns NULL on failure. |
|||
** @return The same buffer for convenience. |
|||
*/ |
|||
tmbstr tidySystemLocale(tmbstr result) |
|||
{ |
|||
ctmbstr temp; |
|||
|
|||
/* This should set the OS locale. */ |
|||
setlocale( LC_ALL, "" ); |
|||
|
|||
/* This should read the current locale. */ |
|||
temp = setlocale( LC_ALL, NULL); |
|||
|
|||
/* Make a new copy of the string, because temp
|
|||
always points to the current locale. */ |
|||
if (( result = malloc( strlen( temp ) + 1 ) )) |
|||
strcpy(result, temp); |
|||
|
|||
/* This should restore the C locale. */ |
|||
setlocale( LC_ALL, "C" ); |
|||
|
|||
return result; |
|||
} |
|||
|
|||
|
|||
/**
|
|||
* Retrieves the POSIX name for a string. Result is a static char so please |
|||
* don't try to free it. If the name looks like a cc_ll identifier, we will |
|||
* return it if there's no other match. |
|||
*/ |
|||
tmbstr tidyNormalizedLocaleName( ctmbstr locale ) |
|||
{ |
|||
uint i; |
|||
uint len; |
|||
static char result[6] = "xx_yy"; |
|||
tmbstr search = strdup(locale); |
|||
search = TY_(tmbstrtolower)(search); |
|||
|
|||
/* See if our string matches a Windows name. */ |
|||
for (i = 0; localeMappings[i].winName; ++i) |
|||
{ |
|||
if ( strcmp( localeMappings[i].winName, search ) == 0 ) |
|||
{ |
|||
free(search); |
|||
search = strdup(localeMappings[i].POSIXName); |
|||
break; |
|||
} |
|||
} |
|||
|
|||
/* We're going to be stupid about this and trust the user, and
|
|||
return just the first two characters if they exist and the |
|||
4th and 5th if they exist. The worst that can happen is a |
|||
junk language that doesn't exist and won't be set. */ |
|||
|
|||
len = strlen( search ); |
|||
len = ( len <= 5 ? len : 5 ); |
|||
|
|||
for ( i = 0; i < len; i++ ) |
|||
{ |
|||
if ( i == 2 ) |
|||
{ |
|||
/* Either terminate the string or ensure there's an underscore */ |
|||
if (len == 5) { |
|||
result[i] = '_'; |
|||
} |
|||
else { |
|||
result[i] = '\0'; |
|||
break; /* no need to copy after null */ |
|||
} |
|||
} |
|||
else |
|||
{ |
|||
result[i] = tolower( search[i] ); |
|||
} |
|||
} |
|||
|
|||
free( search ); |
|||
return result; |
|||
} |
|||
|
|||
|
|||
/**
|
|||
* Returns the languageDefinition if the languageCode is installed in Tidy, |
|||
* otherwise return NULL |
|||
*/ |
|||
languageDefinition *TY_(tidyTestLanguage)( ctmbstr languageCode ) |
|||
{ |
|||
uint i; |
|||
languageDefinition *testLang; |
|||
languageDictionary *testDict; |
|||
ctmbstr testCode; |
|||
|
|||
for (i = 0; tidyLanguages.languages[i]; ++i) |
|||
{ |
|||
testLang = tidyLanguages.languages[i]; |
|||
testDict = &testLang->messages; |
|||
testCode = (*testDict)[0].value; |
|||
|
|||
if ( strcmp(testCode, languageCode) == 0 ) |
|||
return testLang; |
|||
} |
|||
|
|||
return NULL; |
|||
} |
|||
|
|||
|
|||
/**
|
|||
* Tells Tidy to use a different language for output. |
|||
* @param languageCode A Windows or POSIX language code, and must match |
|||
* a TIDY_LANGUAGE for an installed language. |
|||
* @result Indicates that a setting was applied, but not necessarily the |
|||
* specific request, i.e., true indicates a language and/or region |
|||
* was applied. If es_mx is requested but not installed, and es is |
|||
* installed, then es will be selected and this function will return |
|||
* true. However the opposite is not true; if es is requested but |
|||
* not present, Tidy will not try to select from the es_XX variants. |
|||
*/ |
|||
Bool tidySetLanguage( ctmbstr languageCode ) |
|||
{ |
|||
languageDefinition *dict1 = NULL; |
|||
languageDefinition *dict2 = NULL; |
|||
tmbstr wantCode = NULL; |
|||
char lang[3] = ""; |
|||
|
|||
if ( !languageCode || !(wantCode = tidyNormalizedLocaleName( languageCode )) ) |
|||
{ |
|||
return no; |
|||
} |
|||
|
|||
/* We want to use the specified language as the currentLanguage, and set
|
|||
fallback language as necessary. We have either a two or five digit code, |
|||
either or both of which might be installed. Let's test both of them: |
|||
*/ |
|||
|
|||
dict1 = TY_(tidyTestLanguage( wantCode )); /* WANTED language */ |
|||
|
|||
if ( strlen( wantCode ) > 2 ) |
|||
{ |
|||
strncpy(lang, wantCode, 2); |
|||
lang[2] = '\0'; |
|||
dict2 = TY_(tidyTestLanguage( lang ) ); /* BACKUP language? */ |
|||
} |
|||
|
|||
if ( dict1 && dict2 ) |
|||
{ |
|||
tidyLanguages.currentLanguage = dict1; |
|||
tidyLanguages.fallbackLanguage = dict2; |
|||
} |
|||
if ( dict1 && !dict2 ) |
|||
{ |
|||
tidyLanguages.currentLanguage = dict1; |
|||
tidyLanguages.fallbackLanguage = NULL; |
|||
} |
|||
if ( !dict1 && dict2 ) |
|||
{ |
|||
tidyLanguages.currentLanguage = dict2; |
|||
tidyLanguages.fallbackLanguage = NULL; |
|||
} |
|||
if ( !dict1 && !dict2 ) |
|||
{ |
|||
/* No change. */ |
|||
} |
|||
|
|||
return dict1 || dict2; |
|||
} |
|||
|
|||
|
|||
/**
|
|||
* Gets the current language used by Tidy. |
|||
*/ |
|||
ctmbstr tidyGetLanguage() |
|||
{ |
|||
languageDefinition *langDef = tidyLanguages.currentLanguage; |
|||
languageDictionary *langDict = &langDef->messages; |
|||
return (*langDict)[0].value; |
|||
} |
|||
|
|||
|
|||
/**
|
|||
* Provides a string given `messageType` in the default |
|||
* localization (which is `en`), for single plural form. |
|||
*/ |
|||
ctmbstr tidyDefaultString( uint messageType ) |
|||
{ |
|||
return TY_(tidyLocalizedString)( messageType, &language_en, 1); |
|||
} |
|||
|
|||
|
|||
/**
|
|||
* Determines the true size of the `language_en` array indicating the |
|||
* number of items in the array, _not_ the highest index. |
|||
*/ |
|||
const uint TY_(tidyStringKeyListSize)() |
|||
{ |
|||
static uint array_size = 0; |
|||
|
|||
if ( array_size == 0 ) |
|||
{ |
|||
while ( language_en.messages[array_size].value != NULL ) { |
|||
array_size++; |
|||
} |
|||
} |
|||
|
|||
return array_size; |
|||
} |
|||
|
|||
|
|||
/*
|
|||
* Initializes the TidyIterator to point to the first item |
|||
* in Tidy's list of localization string keys. Note that |
|||
* these are provided for documentation generation purposes |
|||
* and probably aren't useful for LibTidy implementors. |
|||
*/ |
|||
TidyIterator getStringKeyList() |
|||
{ |
|||
return (TidyIterator)(size_t)1; |
|||
} |
|||
|
|||
/*
|
|||
* Provides the next key value in Tidy's list of localized |
|||
* strings. Note that these are provided for documentation |
|||
* generation purposes and probably aren't useful to |
|||
* libtidy implementors. |
|||
*/ |
|||
uint getNextStringKey( TidyIterator* iter ) |
|||
{ |
|||
uint item = 0; |
|||
size_t itemIndex; |
|||
assert( iter != NULL ); |
|||
|
|||
itemIndex = (size_t)*iter; |
|||
|
|||
if ( itemIndex > 0 && itemIndex <= TY_(tidyStringKeyListSize)() ) |
|||
{ |
|||
item = language_en.messages[ itemIndex - 1 ].key; |
|||
itemIndex++; |
|||
} |
|||
|
|||
*iter = (TidyIterator)( itemIndex <= TY_(tidyStringKeyListSize)() ? itemIndex : (size_t)0 ); |
|||
return item; |
|||
} |
|||
|
|||
|
|||
/**
|
|||
* Determines the true size of the `localeMappings` array indicating the |
|||
* number of items in the array, _not_ the highest index. |
|||
*/ |
|||
const uint TY_(tidyLanguageListSize)() |
|||
{ |
|||
static uint array_size = 0; |
|||
|
|||
if ( array_size == 0 ) |
|||
{ |
|||
while ( localeMappings[array_size].winName ) { |
|||
array_size++; |
|||
} |
|||
} |
|||
|
|||
return array_size; |
|||
} |
|||
|
|||
/**
|
|||
* Initializes the TidyIterator to point to the first item |
|||
* in Tidy's structure of Windows<->POSIX local mapping. |
|||
* Items can be retrieved with getNextWindowsLanguage(); |
|||
*/ |
|||
TidyIterator getWindowsLanguageList() |
|||
{ |
|||
return (TidyIterator)(size_t)1; |
|||
} |
|||
|
|||
/**
|
|||
* Returns the next record of type `localeMapItem` in |
|||
* Tidy's structure of Windows<->POSIX local mapping. |
|||
*/ |
|||
const tidyLocaleMapItem *getNextWindowsLanguage( TidyIterator *iter ) |
|||
{ |
|||
const tidyLocaleMapItem *item = NULL; |
|||
size_t itemIndex; |
|||
assert( iter != NULL ); |
|||
|
|||
itemIndex = (size_t)*iter; |
|||
|
|||
if ( itemIndex > 0 && itemIndex <= TY_(tidyLanguageListSize)() ) |
|||
{ |
|||
item = &localeMappings[ itemIndex -1 ]; |
|||
itemIndex++; |
|||
} |
|||
|
|||
*iter = (TidyIterator)( itemIndex <= TY_(tidyLanguageListSize)() ? itemIndex : (size_t)0 ); |
|||
return item; |
|||
} |
|||
|
|||
|
|||
/**
|
|||
* Determines the number of languages installed in Tidy. |
|||
*/ |
|||
const uint TY_(tidyInstalledLanguageListSize)() |
|||
{ |
|||
static uint array_size = 0; |
|||
|
|||
if ( array_size == 0 ) |
|||
{ |
|||
while ( tidyLanguages.languages[array_size] ) { |
|||
array_size++; |
|||
} |
|||
} |
|||
|
|||
return array_size; |
|||
} |
|||
|
|||
/**
|
|||
* Initializes the TidyIterator to point to the first item |
|||
* in Tidy's list of installed language codes. |
|||
* Items can be retrieved with getNextInstalledLanguage(); |
|||
*/ |
|||
TidyIterator getInstalledLanguageList() |
|||
{ |
|||
return (TidyIterator)(size_t)1; |
|||
} |
|||
|
|||
/**
|
|||
* Returns the next installed language. |
|||
*/ |
|||
ctmbstr getNextInstalledLanguage( TidyIterator* iter ) |
|||
{ |
|||
ctmbstr item = NULL; |
|||
size_t itemIndex; |
|||
assert( iter != NULL ); |
|||
|
|||
itemIndex = (size_t)*iter; |
|||
|
|||
if ( itemIndex > 0 && itemIndex <= TY_(tidyInstalledLanguageListSize)() ) |
|||
{ |
|||
item = tidyLanguages.languages[itemIndex - 1]->messages[0].value; |
|||
itemIndex++; |
|||
} |
|||
|
|||
*iter = (TidyIterator)( itemIndex <= TY_(tidyInstalledLanguageListSize)() ? itemIndex : (size_t)0 ); |
|||
return item; |
|||
} |
|||
|
|||
|
|||
/**
|
|||
* Determines the number of error codes used by Tidy. |
|||
*/ |
|||
const uint TY_(tidyErrorCodeListSize)() |
|||
{ |
|||
static uint array_size = 0; |
|||
|
|||
if ( array_size == 0 ) |
|||
{ |
|||
while ( tidyErrorFilterKeysStruct[array_size].key ) { |
|||
array_size++; |
|||
} |
|||
} |
|||
|
|||
return array_size; |
|||
} |
|||
|
|||
/**
|
|||
* Initializes the TidyIterator to point to the first item |
|||
* in Tidy's list of error codes that can be return with |
|||
* `TidyReportFilter3`. |
|||
* Items can be retrieved with getNextErrorCode(); |
|||
*/ |
|||
TidyIterator getErrorCodeList() |
|||
{ |
|||
return (TidyIterator)(size_t)1; |
|||
} |
|||
|
|||
/**
|
|||
* Returns the next error code. |
|||
*/ |
|||
const tidyErrorFilterKeyItem *getNextErrorCode( TidyIterator* iter ) |
|||
{ |
|||
const tidyErrorFilterKeyItem *item = NULL; |
|||
size_t itemIndex; |
|||
assert( iter != NULL ); |
|||
|
|||
itemIndex = (size_t)*iter; |
|||
|
|||
if ( itemIndex > 0 && itemIndex <= TY_(tidyErrorCodeListSize)() ) |
|||
{ |
|||
item = &tidyErrorFilterKeysStruct[itemIndex - 1]; |
|||
itemIndex++; |
|||
} |
|||
|
|||
*iter = (TidyIterator)( itemIndex <= TY_(tidyErrorCodeListSize)() ? itemIndex : (size_t)0 ); |
|||
return item; |
|||
} |
@ -0,0 +1,332 @@ |
|||
#ifndef language_h |
|||
#define language_h |
|||
/*
|
|||
* language.h |
|||
* Localization support for HTML Tidy. |
|||
* This header provides the public (within libtidy) interface |
|||
* to basic localization support. To add your own localization |
|||
* create a new `language_xx.h` file and add it to the struct |
|||
* in `language.c`. |
|||
* |
|||
* (c) 2015 HTACG |
|||
* See tidy.h and access.h for the copyright notice. |
|||
* |
|||
* Created by Jim Derry on 11/28/15. |
|||
*/ |
|||
|
|||
#include "tidyplatform.h" |
|||
|
|||
|
|||
/** @name Exposed Data Structures */ |
|||
/** @{ */ |
|||
|
|||
/**
|
|||
* Describes a record for a localization string. |
|||
* - key must correspond with one of Tidy's enums (see `tidyMessageTypes` |
|||
* below) |
|||
* - pluralForm corresponds to gettext plural forms case (not singularity). |
|||
* Most entries should be case 0, representing the single case.: |
|||
* https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html
|
|||
*/ |
|||
typedef struct languageDictionaryEntry { |
|||
uint key; |
|||
uint pluralForm; |
|||
ctmbstr value; |
|||
} languageDictionaryEntry; |
|||
|
|||
|
|||
/**
|
|||
* For now we'll just use an array to hold all of the dictionary |
|||
* entries. In the future we can convert this to a hash structure |
|||
* which will make looking up strings faster. |
|||
*/ |
|||
typedef languageDictionaryEntry const languageDictionary[600]; |
|||
|
|||
|
|||
/**
|
|||
* Finally, a complete language definition. The item `pluralForm` |
|||
* is a function pointer that will provide the correct plural |
|||
* form given the value `n`. The actual function is present in |
|||
* each language header and is language dependent. |
|||
*/ |
|||
typedef struct languageDefinition { |
|||
uint (*whichPluralForm)(uint n); |
|||
languageDictionary messages; |
|||
} languageDefinition; |
|||
|
|||
|
|||
/**
|
|||
* The function getNextWindowsLanguage() returns pointers to this type; |
|||
* it gives LibTidy implementors the ability to determine how Windows |
|||
* locale names are mapped to POSIX language codes. |
|||
*/ |
|||
typedef struct tidyLocaleMapItem { |
|||
ctmbstr winName; |
|||
ctmbstr POSIXName; |
|||
} tidyLocaleMapItem; |
|||
|
|||
|
|||
/**
|
|||
* The function getNextErrorCode() returns pointers to this type; it gives |
|||
* LibTidy implementors the ability to know what errors can be returned |
|||
* via `TidyReportFilter3`. |
|||
* Provides the mapping for LibTidy users to map between an opaque key |
|||
* and an error message value. See `tidyErrorFilterKeys[]` in `language.c`. |
|||
* The `key` string is guaranteed by the API (unless deleted entirely). The |
|||
* `value` is suitable for use in looking up Tidy's strings, but its value |
|||
* is not guaranteed between releases. |
|||
*/ |
|||
typedef struct tidyErrorFilterKeyItem { |
|||
ctmbstr key; |
|||
int value; |
|||
} tidyErrorFilterKeyItem; |
|||
|
|||
|
|||
/**
|
|||
* Defines all of the possible dictionary keys. |
|||
* The starting value is arbitrary but must prevent overlaps |
|||
* with other enums that are used for retrieving strings. The |
|||
* comprehensive list of enums for which we provides strings |
|||
* is as follows: |
|||
* - `tidyMessageTypes` in this file, start == 4096. |
|||
* - `tidyErrorCodes` from `message.h`, start == 200. |
|||
* - `accessErrorCodes` from `access.h`, start == CODES_TIDY_ERROR_LAST+1. |
|||
* - `tidyMessagesMisc` from `message.h`, start == 2048. |
|||
* - `TidyOptionId` from `tidyEnum.h`, start == 0 (important!). |
|||
* - `TidyReportLevelKeys` from `tidyEnum.h`, start == 600. |
|||
* - ... |
|||
* You should never count on the value of a label being |
|||
* constant. Accordingly feel free to arrange new enum |
|||
* values in the most appropriate grouping below. |
|||
*/ |
|||
typedef enum |
|||
{ |
|||
/* This MUST be present and first. */ |
|||
TIDY_MESSAGE_TYPE_FIRST = 4096, |
|||
|
|||
/* Specify the code for this language. */ |
|||
TIDY_LANGUAGE, |
|||
|
|||
/* Localization test strings. */ |
|||
TEST_PRESENT_IN_BASE, |
|||
TEST_PRESENT_IN_REGION, |
|||
|
|||
/* Strings for the console application. */ |
|||
TC_CAT_DIAGNOSTICS, |
|||
TC_CAT_ENCODING, |
|||
TC_CAT_MARKUP, |
|||
TC_CAT_MISC, |
|||
TC_CAT_PRETTYPRINT, |
|||
TC_LABEL_COL, |
|||
TC_LABEL_FILE, |
|||
TC_LABEL_LANG, |
|||
TC_LABEL_LEVL, |
|||
TC_LABEL_OPT, |
|||
TC_MAIN_ERROR_LOAD_CONFIG, |
|||
TC_OPT_ACCESS, |
|||
TC_OPT_ASCII, |
|||
TC_OPT_ASHTML, |
|||
TC_OPT_ASXML, |
|||
TC_OPT_BARE, |
|||
TC_OPT_BIG5, |
|||
TC_OPT_CLEAN, |
|||
TC_OPT_CONFIG, |
|||
TC_OPT_ERRORS, |
|||
TC_OPT_FILE, |
|||
TC_OPT_GDOC, |
|||
TC_OPT_HELP, |
|||
TC_OPT_HELPCFG, |
|||
TC_OPT_HELPOPT, |
|||
TC_OPT_IBM858, |
|||
TC_OPT_INDENT, |
|||
TC_OPT_ISO2022, |
|||
TC_OPT_LANGUAGE, |
|||
TC_OPT_LATIN0, |
|||
TC_OPT_LATIN1, |
|||
TC_OPT_MAC, |
|||
TC_OPT_MODIFY, |
|||
TC_OPT_NUMERIC, |
|||
TC_OPT_OMIT, |
|||
TC_OPT_OUTPUT, |
|||
TC_OPT_QUIET, |
|||
TC_OPT_RAW, |
|||
TC_OPT_SHIFTJIS, |
|||
TC_OPT_SHOWCFG, |
|||
TC_OPT_UPPER, |
|||
TC_OPT_UTF16, |
|||
TC_OPT_UTF16BE, |
|||
TC_OPT_UTF16LE, |
|||
TC_OPT_UTF8, |
|||
TC_OPT_VERSION, |
|||
TC_OPT_WIN1252, |
|||
TC_OPT_WRAP, |
|||
TC_OPT_XML, |
|||
TC_OPT_XMLCFG, |
|||
TC_OPT_XMLSTRG, |
|||
TC_OPT_XMLERRS, |
|||
TC_OPT_XMLOPTS, |
|||
TC_OPT_XMLHELP, |
|||
TC_STRING_CONF_HEADER, |
|||
TC_STRING_CONF_NAME, |
|||
TC_STRING_CONF_TYPE, |
|||
TC_STRING_CONF_VALUE, |
|||
TC_STRING_CONF_NOTE, |
|||
TC_STRING_OPT_NOT_DOCUMENTED, |
|||
TC_STRING_OUT_OF_MEMORY, |
|||
TC_STRING_FATAL_ERROR, |
|||
TC_STRING_FILE_MANIP, |
|||
TC_STRING_LANG_MUST_SPECIFY, |
|||
TC_STRING_LANG_NOT_FOUND, |
|||
TC_STRING_MUST_SPECIFY, |
|||
TC_STRING_PROCESS_DIRECTIVES, |
|||
TC_STRING_CHAR_ENCODING, |
|||
TC_STRING_MISC, |
|||
TC_STRING_XML, |
|||
TC_STRING_UNKNOWN_OPTION, |
|||
TC_STRING_UNKNOWN_OPTION_B, |
|||
TC_STRING_VERS_A, |
|||
TC_STRING_VERS_B, |
|||
TC_TXT_HELP_1, |
|||
TC_TXT_HELP_2A, |
|||
TC_TXT_HELP_2B, |
|||
TC_TXT_HELP_3, |
|||
TC_TXT_HELP_CONFIG, |
|||
TC_TXT_HELP_CONFIG_NAME, |
|||
TC_TXT_HELP_CONFIG_TYPE, |
|||
TC_TXT_HELP_CONFIG_ALLW, |
|||
TC_TXT_HELP_LANG_1, |
|||
TC_TXT_HELP_LANG_2, |
|||
TC_TXT_HELP_LANG_3, |
|||
|
|||
/* This MUST be present and last. */ |
|||
TIDY_MESSAGE_TYPE_LAST |
|||
} tidyMessageTypes; |
|||
|
|||
|
|||
/**
|
|||
* LibTidy users may want to use `TidyReportFilter3` to enable their own |
|||
* localization lookup features. Because Tidy's errors codes are enums the |
|||
* specific values can change over time. This function returns a string |
|||
* representing the enum value name that can be used as a lookup key |
|||
* independent of changing string values (TidyReportFiler2 is vulnerable |
|||
* to changing strings). `TidyReportFilter3` will return this general |
|||
* string as the error message indicator. |
|||
*/ |
|||
ctmbstr tidyErrorCodeAsString(uint code); |
|||
|
|||
|
|||
/** @} */ |
|||
/** @name Localization Related Functions */ |
|||
/** @{ */ |
|||
|
|||
|
|||
/**
|
|||
** Determines the current locale without affecting the C locale. |
|||
** Tidy has always used the default C locale, and at this point |
|||
** in its development we're not going to tamper with that. |
|||
** @param result The buffer to use to return the string. |
|||
** Returns NULL on failure. |
|||
** @return The same buffer for convenience. |
|||
*/ |
|||
tmbstr tidySystemLocale(tmbstr result); |
|||
|
|||
/**
|
|||
* Tells Tidy to use a different language for output. |
|||
* @param languageCode A Windows or POSIX language code, and must match |
|||
* a TIDY_LANGUAGE for an installed language. |
|||
* @result Indicates that a setting was applied, but not necessarily the |
|||
* specific request, i.e., true indicates a language and/or region |
|||
* was applied. If es_mx is requested but not installed, and es is |
|||
* installed, then es will be selected and this function will return |
|||
* true. However the opposite is not true; if es is requested but |
|||
* not present, Tidy will not try to select from the es_XX variants. |
|||
*/ |
|||
Bool tidySetLanguage( ctmbstr languageCode ); |
|||
|
|||
/**
|
|||
* Gets the current language used by Tidy. |
|||
*/ |
|||
ctmbstr tidyGetLanguage(); |
|||
|
|||
/**
|
|||
* Provides a string given `messageType` in the current |
|||
* localization for `quantity`. |
|||
*/ |
|||
ctmbstr tidyLocalizedStringN( uint messageType, uint quantity ); |
|||
|
|||
/**
|
|||
* Provides a string given `messageType` in the current |
|||
* localization for the single case. |
|||
*/ |
|||
ctmbstr tidyLocalizedString( uint messageType ); |
|||
|
|||
|
|||
/** @} */ |
|||
/** @name Documentation Generation */ |
|||
/** @{ */ |
|||
|
|||
/**
|
|||
* Provides a string given `messageType` in the default |
|||
* localization (which is `en`). |
|||
*/ |
|||
ctmbstr tidyDefaultString( uint messageType ); |
|||
|
|||
/*
|
|||
* Initializes the TidyIterator to point to the first item |
|||
* in Tidy's list of localization string keys. Note that |
|||
* these are provided for documentation generation purposes |
|||
* and probably aren't useful for LibTidy implementors. |
|||
*/ |
|||
TidyIterator getStringKeyList(); |
|||
|
|||
/*
|
|||
* Provides the next key value in Tidy's list of localized |
|||
* strings. Note that these are provided for documentation |
|||
* generation purposes and probably aren't useful to |
|||
* libtidy implementors. |
|||
*/ |
|||
uint getNextStringKey( TidyIterator* iter ); |
|||
|
|||
/**
|
|||
* Initializes the TidyIterator to point to the first item |
|||
* in Tidy's structure of Windows<->POSIX local mapping. |
|||
* Items can be retrieved with getNextWindowsLanguage(); |
|||
*/ |
|||
TidyIterator getWindowsLanguageList(); |
|||
|
|||
/**
|
|||
* Returns the next record of type `localeMapItem` in |
|||
* Tidy's structure of Windows<->POSIX local mapping. |
|||
*/ |
|||
const tidyLocaleMapItem *getNextWindowsLanguage( TidyIterator* iter ); |
|||
|
|||
/**
|
|||
* Initializes the TidyIterator to point to the first item |
|||
* in Tidy's list of installed language codes. |
|||
* Items can be retrieved with getNextInstalledLanguage(); |
|||
*/ |
|||
TidyIterator getInstalledLanguageList(); |
|||
|
|||
/**
|
|||
* Returns the next installed language. |
|||
*/ |
|||
ctmbstr getNextInstalledLanguage( TidyIterator* iter ); |
|||
|
|||
|
|||
/**
|
|||
* Initializes the TidyIterator to point to the first item |
|||
* in Tidy's list of error codes that can be return with |
|||
* `TidyReportFilter3`. |
|||
* Items can be retrieved with getNextErrorCode(); |
|||
*/ |
|||
TidyIterator getErrorCodeList(); |
|||
|
|||
/**
|
|||
* Returns the next error code. |
|||
*/ |
|||
const tidyErrorFilterKeyItem *getNextErrorCode( TidyIterator* iter ); |
|||
|
|||
|
|||
/** @} */ |
|||
|
|||
#endif /* language_h */ |
File diff suppressed because it is too large
@ -0,0 +1,132 @@ |
|||
#ifndef language_en_gb_h |
|||
#define language_en_gb_h |
|||
/*
|
|||
* language_en_gb.h |
|||
* Localization support for HTML Tidy. |
|||
* |
|||
* |
|||
* This file is a localization file for HTML Tidy. It will have been machine |
|||
* generated or created and/or edited by hand. Both are valid options, but |
|||
* please help keep our localization efforts simple to maintain by maintaining |
|||
* the structure of this file, and changing the check box below if you make |
|||
* changes (so others know the file origin): |
|||
* |
|||
* [X] THIS FILE IS MACHINE GENERATED. It is a localization file for the |
|||
* language (and maybe region) "en_gb". The source of |
|||
* these strings is a gettext PO file in Tidy's source, probably called |
|||
* "language_en_gb.po". |
|||
* |
|||
* [ ] THIS FILE WAS HAND MODIFIED. Translators, please feel to edit this file |
|||
* directly (and check this box). If you prefer to edit PO files then use |
|||
* `poconvert.rb msgunfmt language_en_gb.h` (our own |
|||
* conversion tool) to generate a fresh PO from this file first! |
|||
* |
|||
* (c) 2015 HTACG |
|||
* See tidy.h and access.h for the copyright notice. |
|||
* |
|||
* Template Created by Jim Derry on 01/14/2016. |
|||
* |
|||
* Orginating PO file metadata: |
|||
* PO_LAST_TRANSLATOR=jderry |
|||
* PO_REVISION_DATE=2016-03-24 10:59:55 |
|||
*/ |
|||
|
|||
#ifdef _MSC_VER |
|||
#pragma execution_character_set("utf-8") |
|||
#endif |
|||
|
|||
#include "language.h" |
|||
#include "access.h" |
|||
#include "message.h" |
|||
|
|||
|
|||
/**
|
|||
* This language-specific function returns the correct pluralForm |
|||
* to use given n items, and is used as a member of each language |
|||
* definition. |
|||
*/ |
|||
static uint whichPluralForm_en_gb(uint n) { |
|||
/* Plural-Forms: nplurals=2; */ |
|||
return n != 1; |
|||
} |
|||
|
|||
|
|||
/**
|
|||
* This structure specifies all of the strings needed by Tidy for a |
|||
* single language. Static definition in a header file makes it |
|||
* easy to include and exclude languages without tinkering with |
|||
* the build system. |
|||
*/ |
|||
static languageDefinition language_en_gb = { whichPluralForm_en_gb, { |
|||
/***************************************
|
|||
** This MUST be present and first. |
|||
** Specify the code for this language. |
|||
***************************************/ |
|||
{/* Specify the ll or ll_cc language code here. */ |
|||
TIDY_LANGUAGE, 0, "en_gb" |
|||
}, |
|||
{ TEXT_USING_FONT, 0, |
|||
"You are recommended to use CSS to specify the font and\n" |
|||
"properties such as its size and colour. This will reduce\n" |
|||
"the size of HTML files and make them easier to maintain\n" |
|||
"compared with using <FONT> elements.\n\n" |
|||
}, |
|||
{ TEXT_USING_BODY, 0, "You are recommended to use CSS to specify page and link colours\n" }, |
|||
{ TEXT_GENERAL_INFO_PLEA, 0, |
|||
"\n" |
|||
"Would you like to see Tidy in proper, British English? Please consider \n" |
|||
"helping us to localise HTML Tidy. For details please see \n" |
|||
"https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md\n" |
|||
}, |
|||
|
|||
#if SUPPORT_ACCESSIBILITY_CHECKS |
|||
{ INFORMATION_NOT_CONVEYED_IMAGE, 0, "[2.1.1.1]: ensure information not conveyed through colour alone (image)." }, |
|||
{ INFORMATION_NOT_CONVEYED_APPLET, 0, "[2.1.1.2]: ensure information not conveyed through colour alone (applet)." }, |
|||
{ INFORMATION_NOT_CONVEYED_OBJECT, 0, "[2.1.1.3]: ensure information not conveyed through colour alone (object)." }, |
|||
{ INFORMATION_NOT_CONVEYED_SCRIPT, 0, "[2.1.1.4]: ensure information not conveyed through colour alone (script)." }, |
|||
{ INFORMATION_NOT_CONVEYED_INPUT, 0, "[2.1.1.5]: ensure information not conveyed through colour alone (input)." }, |
|||
{ COLOR_CONTRAST_TEXT, 0, "[2.2.1.1]: poor colour contrast (text)." }, |
|||
{ COLOR_CONTRAST_LINK, 0, "[2.2.1.2]: poor colour contrast (link)." }, |
|||
{ COLOR_CONTRAST_ACTIVE_LINK, 0, "[2.2.1.3]: poor colour contrast (active link)." }, |
|||
{ COLOR_CONTRAST_VISITED_LINK, 0, "[2.2.1.4]: poor colour contrast (visited link)." }, |
|||
#endif /* SUPPORT_ACCESSIBILITY_CHECKS */ |
|||
|
|||
{ TidyMergeDivs, 0, |
|||
"This option can be used to modify the behaviour of <code>clean</code> when " |
|||
"set to <code>yes</code>." |
|||
"<br/>" |
|||
"This option specifies if Tidy should merge nested <code><div></code> " |
|||
"such as <code><div><div>...</div></div></code>. " |
|||
"<br/>" |
|||
"If set to <code>auto</code> the attributes of the inner " |
|||
"<code><div></code> are moved to the outer one. Nested " |
|||
"<code><div></code> with <code>id</code> attributes are <em>not</em> " |
|||
"merged. " |
|||
"<br/>" |
|||
"If set to <code>yes</code> the attributes of the inner " |
|||
"<code><div></code> are discarded with the exception of " |
|||
"<code>class</code> and <code>style</code>. " |
|||
}, |
|||
{ TidyMergeSpans, 0, |
|||
"This option can be used to modify the behaviour of <code>clean</code> when " |
|||
"set to <code>yes</code>." |
|||
"<br/>" |
|||
"This option specifies if Tidy should merge nested <code><span></code> " |
|||
"such as <code><span><span>...</span></span></code>. " |
|||
"<br/>" |
|||
"The algorithm is identical to the one used by <code>merge-divs</code>. " |
|||
}, |
|||
{ TidyReplaceColor, 0, |
|||
"This option specifies if Tidy should replace numeric values in colour " |
|||
"attributes with HTML/XHTML colour names where defined, e.g. replace " |
|||
"<code>#ffffff</code> with <code>white</code>. " |
|||
}, |
|||
|
|||
{/* This MUST be present and last. */ |
|||
TIDY_MESSAGE_TYPE_LAST, 0, NULL |
|||
} |
|||
|
|||
}}; |
|||
|
|||
|
|||
#endif /* language_en_gb_h */ |
@ -0,0 +1,138 @@ |
|||
#ifndef language_es_h |
|||
#define language_es_h |
|||
/*
|
|||
* language_es.h |
|||
* Localization support for HTML Tidy. |
|||
* |
|||
* |
|||
* This file is a localization file for HTML Tidy. It will have been machine |
|||
* generated or created and/or edited by hand. Both are valid options, but |
|||
* please help keep our localization efforts simple to maintain by maintaining |
|||
* the structure of this file, and changing the check box below if you make |
|||
* changes (so others know the file origin): |
|||
* |
|||
* [X] THIS FILE IS MACHINE GENERATED. It is a localization file for the |
|||
* language (and maybe region) "es". The source of |
|||
* these strings is a gettext PO file in Tidy's source, probably called |
|||
* "language_es.po". |
|||
* |
|||
* [ ] THIS FILE WAS HAND MODIFIED. Translators, please feel to edit this file |
|||
* directly (and check this box). If you prefer to edit PO files then use |
|||
* `poconvert.rb msgunfmt language_es.h` (our own |
|||
* conversion tool) to generate a fresh PO from this file first! |
|||
* |
|||
* (c) 2015 HTACG |
|||
* See tidy.h and access.h for the copyright notice. |
|||
* |
|||
* Template Created by Jim Derry on 01/14/2016. |
|||
* |
|||
* Orginating PO file metadata: |
|||
* PO_LAST_TRANSLATOR=jderry |
|||
* PO_REVISION_DATE=2016-03-24 10:59:55 |
|||
*/ |
|||
|
|||
#ifdef _MSC_VER |
|||
#pragma execution_character_set("utf-8") |
|||
#endif |
|||
|
|||
#include "language.h" |
|||
#include "access.h" |
|||
#include "message.h" |
|||
|
|||
|
|||
/**
|
|||
* This language-specific function returns the correct pluralForm |
|||
* to use given n items, and is used as a member of each language |
|||
* definition. |
|||
*/ |
|||
static uint whichPluralForm_es(uint n) { |
|||
/* Plural-Forms: nplurals=2; */ |
|||
return n != 1; |
|||
} |
|||
|
|||
|
|||
/**
|
|||
* This structure specifies all of the strings needed by Tidy for a |
|||
* single language. Static definition in a header file makes it |
|||
* easy to include and exclude languages without tinkering with |
|||
* the build system. |
|||
*/ |
|||
static languageDefinition language_es = { whichPluralForm_es, { |
|||
/***************************************
|
|||
** This MUST be present and first. |
|||
** Specify the code for this language. |
|||
***************************************/ |
|||
{/* Specify the ll or ll_cc language code here. */ |
|||
TIDY_LANGUAGE, 0, "es" |
|||
}, |
|||
{ TEXT_GENERAL_INFO_PLEA, 0, |
|||
"\n" |
|||
"¿Le gustaría ver Tidy en un español correcto? Por favor considere \n" |
|||
"ayudarnos a localizar HTML Tidy. Para más detalles consulte \n" |
|||
"https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md \n" |
|||
}, |
|||
{ TidyMakeClean, 0, |
|||
"Esta opción especifica si Tidy debe realizar la limpieza de algún legado etiquetas de " |
|||
"presentación (actualmente <code><i></code>, <code><b></code>, <code><center></" |
|||
"code> cuando encerrados dentro de las etiquetas apropiadas en línea y <code><font></" |
|||
"code>). Si se establece en <code>yes</code>, entonces etiquetas existentes serán reemplazados " |
|||
"con CSS <code><style></code> y estructural markup según corresponda. " |
|||
}, |
|||
|
|||
#if SUPPORT_ASIAN_ENCODINGS |
|||
{ TidyNCR, 0, "Esta opción especifica si Tidy debe permitir referencias de caracteres numéricos. " }, |
|||
#endif /* SUPPORT_ASIAN_ENCODINGS */ |
|||
|
|||
{ TC_TXT_HELP_LANG_1, 0, |
|||
"\n" |
|||
"La opción --language (o --lang) indica el lenguaje Tidy debe \n" |
|||
"utilizar para comunicar su salida. Tenga en cuenta que esto no es \n" |
|||
"un servicio de traducción de documentos, y sólo afecta a los mensajes \n" |
|||
"que Tidy comunica a usted. \n" |
|||
"\n" |
|||
"Cuando se utiliza la línea de comandos el argumento --language debe \n" |
|||
"utilizarse antes de cualquier argumento que dan lugar a la producción, \n" |
|||
"de lo contrario Tidy producirá la salida antes de que se conozca el \n" |
|||
"idioma a utilizar. \n" |
|||
"\n" |
|||
"Además de los códigos de idioma estándar POSIX, Tidy es capaz de \n" |
|||
"entender códigos de idioma legados de Windows. Tenga en cuenta que \n" |
|||
"este lista indica los códigos Tidy entiende, y no indica que \n" |
|||
"actualmente el idioma está instalado. \n" |
|||
"\n" |
|||
"La columna más a la derecha indica cómo Tidy comprenderá el \n" |
|||
"legado nombre de Windows.\n" |
|||
"\n" |
|||
"Tidy está utilizando la configuración regional %s. \n" |
|||
"\n" |
|||
}, |
|||
{ TC_TXT_HELP_LANG_2, 0, |
|||
"\n" |
|||
"Los siguientes idiomas están instalados actualmente en Tidy. Tenga \n" |
|||
"en cuenta que no hay garantía de que están completos; sólo quiere decir \n" |
|||
"que un desarrollador u otro comenzaron a añadir el idioma indicado. \n" |
|||
"\n" |
|||
"Localizaciones incompletas por defecto se usan \"en\" cuando sea \n" |
|||
"necesario. ¡Favor de informar los desarrolladores de estes casos! \n" |
|||
"\n" |
|||
}, |
|||
{ TC_TXT_HELP_LANG_3, 0, |
|||
"\n" |
|||
"Si Tidy es capaz de determinar la configuración regional entonces \n" |
|||
"Tidy utilizará el lenguaje de forma automática de la configuración \n" |
|||
"regional. Por ejemplo los sistemas de tipo Unix utilizan los variables \n" |
|||
"$LANG y/o $LC_ALL. Consulte a su documentación del sistema para \n" |
|||
"obtener más información.\n" |
|||
"\n" |
|||
"Tidy está utilizando la configuración regional %s. \n" |
|||
"\n" |
|||
}, |
|||
|
|||
{/* This MUST be present and last. */ |
|||
TIDY_MESSAGE_TYPE_LAST, 0, NULL |
|||
} |
|||
|
|||
}}; |
|||
|
|||
|
|||
#endif /* language_es_h */ |
@ -0,0 +1,82 @@ |
|||
#ifndef language_es_mx_h |
|||
#define language_es_mx_h |
|||
/*
|
|||
* language_es_mx.h |
|||
* Localization support for HTML Tidy. |
|||
* |
|||
* |
|||
* This file is a localization file for HTML Tidy. It will have been machine |
|||
* generated or created and/or edited by hand. Both are valid options, but |
|||
* please help keep our localization efforts simple to maintain by maintaining |
|||
* the structure of this file, and changing the check box below if you make |
|||
* changes (so others know the file origin): |
|||
* |
|||
* [X] THIS FILE IS MACHINE GENERATED. It is a localization file for the |
|||
* language (and maybe region) "es_mx". The source of |
|||
* these strings is a gettext PO file in Tidy's source, probably called |
|||
* "language_es_mx.po". |
|||
* |
|||
* [ ] THIS FILE WAS HAND MODIFIED. Translators, please feel to edit this file |
|||
* directly (and check this box). If you prefer to edit PO files then use |
|||
* `poconvert.rb msgunfmt language_es_mx.h` (our own |
|||
* conversion tool) to generate a fresh PO from this file first! |
|||
* |
|||
* (c) 2015 HTACG |
|||
* See tidy.h and access.h for the copyright notice. |
|||
* |
|||
* Template Created by Jim Derry on 01/14/2016. |
|||
* |
|||
* Orginating PO file metadata: |
|||
* PO_LAST_TRANSLATOR=jderry |
|||
* PO_REVISION_DATE=2016-03-24 10:59:55 |
|||
*/ |
|||
|
|||
#ifdef _MSC_VER |
|||
#pragma execution_character_set("utf-8") |
|||
#endif |
|||
|
|||
#include "language.h" |
|||
#include "access.h" |
|||
#include "message.h" |
|||
|
|||
|
|||
/**
|
|||
* This language-specific function returns the correct pluralForm |
|||
* to use given n items, and is used as a member of each language |
|||
* definition. |
|||
*/ |
|||
static uint whichPluralForm_es_mx(uint n) { |
|||
/* Plural-Forms: nplurals=2; */ |
|||
return n != 1; |
|||
} |
|||
|
|||
|
|||
/**
|
|||
* This structure specifies all of the strings needed by Tidy for a |
|||
* single language. Static definition in a header file makes it |
|||
* easy to include and exclude languages without tinkering with |
|||
* the build system. |
|||
*/ |
|||
static languageDefinition language_es_mx = { whichPluralForm_es_mx, { |
|||
/***************************************
|
|||
** This MUST be present and first. |
|||
** Specify the code for this language. |
|||
***************************************/ |
|||
{/* Specify the ll or ll_cc language code here. */ |
|||
TIDY_LANGUAGE, 0, "es_mx" |
|||
}, |
|||
{ TEXT_GENERAL_INFO_PLEA, 0, |
|||
"\n" |
|||
"¿Le gustaría ver Tidy en adecuada, español mexicano? Por favor considere \n" |
|||
"ayudarnos a localizar HTML Tidy. Para más detalles consulte \n" |
|||
"https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md \n" |
|||
}, |
|||
|
|||
{/* This MUST be present and last. */ |
|||
TIDY_MESSAGE_TYPE_LAST, 0, NULL |
|||
} |
|||
|
|||
}}; |
|||
|
|||
|
|||
#endif /* language_es_mx_h */ |
@ -0,0 +1,573 @@ |
|||
#ifndef language_fr_h |
|||
#define language_fr_h |
|||
/*
|
|||
* language_fr.h |
|||
* Localization support for HTML Tidy. |
|||
* |
|||
* |
|||
* This file is a localization file for HTML Tidy. It will have been machine |
|||
* generated or created and/or edited by hand. Both are valid options, but |
|||
* please help keep our localization efforts simple to maintain by maintaining |
|||
* the structure of this file, and changing the check box below if you make |
|||
* changes (so others know the file origin): |
|||
* |
|||
* [X] THIS FILE IS MACHINE GENERATED. It is a localization file for the |
|||
* language (and maybe region) "fr". The source of |
|||
* these strings is a gettext PO file in Tidy's source, probably called |
|||
* "language_fr.po". |
|||
* |
|||
* [ ] THIS FILE WAS HAND MODIFIED. Translators, please feel to edit this file |
|||
* directly (and check this box). If you prefer to edit PO files then use |
|||
* `poconvert.rb msgunfmt language_fr.h` (our own |
|||
* conversion tool) to generate a fresh PO from this file first! |
|||
* |
|||
* (c) 2015 HTACG |
|||
* See tidy.h and access.h for the copyright notice. |
|||
* |
|||
* Template Created by Jim Derry on 01/14/2016. |
|||
* |
|||
* Orginating PO file metadata: |
|||
* PO_LAST_TRANSLATOR= |
|||
* PO_REVISION_DATE= |
|||
*/ |
|||
|
|||
#ifdef _MSC_VER |
|||
#pragma execution_character_set("utf-8") |
|||
#endif |
|||
|
|||
#include "language.h" |
|||
#include "access.h" |
|||
#include "message.h" |
|||
|
|||
|
|||
/**
|
|||
* This language-specific function returns the correct pluralForm |
|||
* to use given n items, and is used as a member of each language |
|||
* definition. |
|||
*/ |
|||
static uint whichPluralForm_fr(uint n) { |
|||
/* Plural-Forms: nplurals=2; */ |
|||
return (n > 1); |
|||
} |
|||
|
|||
|
|||
/**
|
|||
* This structure specifies all of the strings needed by Tidy for a |
|||
* single language. Static definition in a header file makes it |
|||
* easy to include and exclude languages without tinkering with |
|||
* the build system. |
|||
*/ |
|||
static languageDefinition language_fr = { whichPluralForm_fr, { |
|||
/***************************************
|
|||
** This MUST be present and first. |
|||
** Specify the code for this language. |
|||
***************************************/ |
|||
{/* Specify the ll or ll_cc language code here. */ |
|||
TIDY_LANGUAGE, 0, "fr" |
|||
}, |
|||
{ ACCESS_URL, 0, "http://www.w3.org/WAI/GL" }, |
|||
{ ATRC_ACCESS_URL, 0, "http://www.html-tidy.org/Accessibility/" }, |
|||
{ FILE_CANT_OPEN, 0, "Impossible d'ouvrir « %s »\n" }, |
|||
{ LINE_COLUMN_STRING, 0, "Ligne: %d Col: %d - " }, |
|||
{ STRING_CONTENT_LOOKS, 0, "Le contenu du document ressemble à %s" }, |
|||
{ STRING_DISCARDING, 0, "dépose" }, |
|||
{ STRING_DOCTYPE_GIVEN, 0, "DOCTYPE donné est «%s»" }, |
|||
{ STRING_ERROR_COUNT, 0, "%u %s, %u %s trouvées!" }, |
|||
{ STRING_ERROR_COUNT_ERROR, 0, "erreur" }, |
|||
{ STRING_ERROR_COUNT_ERROR, 1, "erreurs" }, |
|||
{ STRING_ERROR_COUNT_WARNING, 0, "alarme" }, |
|||
{ STRING_ERROR_COUNT_WARNING, 1, "alarmes" }, |
|||
{ STRING_HELLO_ACCESS, 0, "Contrôles d'accessibilité: version 0.1" }, |
|||
{ STRING_HTML_PROPRIETARY, 0, "HTML Proprietary" }, |
|||
{ STRING_MISSING_MALFORMED, 0, "argument manquant ou incorrect pour l'option: %s" }, |
|||
{ STRING_NO_ERRORS, 0, "Aucun avertissement ou les erreurs ne trouvées." }, |
|||
{ STRING_NO_SYSID, 0, "Aucun identificateur de système dans le doctype émis" }, |
|||
{ STRING_NOT_ALL_SHOWN, 0, "Pas tous les avertissements/erreurs ont été présentés." }, |
|||
{ STRING_PLAIN_TEXT, 0, "le texte brut" }, |
|||
{ STRING_REPLACING, 0, "remplaçant" }, |
|||
{ STRING_SPECIFIED, 0, "spécifié" }, |
|||
{ STRING_UNKNOWN_FILE, 0, "%s: Impossible d'ouvrir le fichier \"%s\"\n" }, |
|||
{ STRING_UNKNOWN_OPTION, 0, "option inconnue: %s" }, |
|||
{ STRING_UNRECZD_OPTION, 0, "option non reconnue -%c utiliser -help pour lister les options\n" }, |
|||
{ STRING_XML_DECLARATION, 0, "déclaration XML" }, |
|||
{ TEXT_HTML_T_ALGORITHM, 0, |
|||
"\n" |
|||
" - D'abord, cherchez à gauche de la position de la cellule de trouver \n" |
|||
" des cellules d'en-tête de ligne.\n" |
|||
" - Puis rechercher vers le haut pour trouver les cellules d'en-tête \n" |
|||
" de colonne.\n" |
|||
" - La recherche dans une direction donnée arrête lorsque le bord \n" |
|||
" de la table est atteinte ou lorsque la cellule de données est \n" |
|||
" trouvé après une cellule d'en-tête.\n" |
|||
" - Têtes de ligne sont insérés dans la liste dans l'ordre où ils \n" |
|||
" apparaissent dans la table.\n" |
|||
" - Pour les tables de gauche à droite, en-têtes sont insérés de \n" |
|||
" gauche à droite.\n" |
|||
" - Têtes de colonnes sont insérés après-têtes de ligne, dans\n" |
|||
" l'ordre où ils apparaissent dans le tableau, de haut en bas.\n" |
|||
" - Si une cellule d'en-tête a les têtes ensemble d'attributs, puis \n" |
|||
" les en-têtes référencée par cet attribut sont insérés dans la \n" |
|||
" liste et le recherche arrête pour la direction du courant.\n" |
|||
" TD cellules qui fixent l'attribut de l'axe sont également \n" |
|||
" traités comme des cellules d'en-tête.\n" |
|||
"\n" |
|||
}, |
|||
{ TEXT_WINDOWS_CHARS, 0, |
|||
"Personnages codes pour les polices Microsoft Windows dans la gamme\n" |
|||
"128-159 ne pas être reconnus sur d'autres plateformes. Vous êtes\n" |
|||
"au lieu recommandé d'utiliser les entités nommées, par exemple ™ \n" |
|||
"plutôt code que Windows de caractères 153 (0x2122 en Unicode). Notez que\n" |
|||
"à partir de Février 1998 quelques navigateurs supportent les nouvelles \n" |
|||
"entités.\n" |
|||
"\n" |
|||
}, |
|||
{ TEXT_VENDOR_CHARS, 0, |
|||
"Il est peu probable que fournisseur spécifique, encodages qui dépendent du système\n" |
|||
"travailler assez largement sur le World Wide Web; vous devriez éviter d'utiliser le " |
|||
"%s codage de caractères de $, à la place il est recommandé \n" |
|||
"de utiliser entités nommées, par exemple ™.\n" |
|||
}, |
|||
{ TEXT_SGML_CHARS, 0, |
|||
"Les codes de caractères 128 à 159 (U + 0080 à U + 009F) ne sont pas autorisés \n" |
|||
"en HTML; même si elles l'étaient, ils seraient probablement les \n" |
|||
"caractères non imprimables de contrôle.\n" |
|||
"Tidy supposé que vous vouliez faire référence à un personnage avec la même valeur " |
|||
"d'octet\n" |
|||
"l'encodage %s et remplacé cette référence avec l'équivalent Unicode.\n" |
|||
"\n" |
|||
}, |
|||
{ TEXT_INVALID_UTF8, 0, |
|||
"Les codes de caractères UTF-8 doivent être dans la gamme: U + 0000 à U + 10FFFF.\n" |
|||
"La définition de l'UTF-8 à l'annexe D de la norme ISO / CEI 10646-1: 2000 a " |
|||
"également\n" |
|||
"permet l'utilisation de séquences de cinq et six octets pour coder\n" |
|||
"des personnages qui sont en dehors de la gamme de l'ensemble de caractères Unicode;\n" |
|||
"ces séquences de cinq et six octets sont illégales pour l'utilisation de\n" |
|||
"UTF-8 comme une transformation de caractères Unicode. ISO / IEC 10646\n" |
|||
"ne permet pas la cartographie des substituts non appariés, ni U + FFFE et U + FFFF\n" |
|||
"(mais il ne permet d'autres non-caractères). Pour plus d'informations s'il vous " |
|||
"plaît se référer à\n" |
|||
"http://www.unicode.org/ et http://www.cl.cam.ac.uk/~mgk25/unicode.html\n" |
|||
"\n" |
|||
}, |
|||
{ TEXT_INVALID_UTF16, 0, |
|||
"Codes de caractères pour UTF-16 doit être dans la gamme: U + 0000 à U + 10FFFF.\n" |
|||
"La définition de UTF-16 dans l'annexe C de l'ISO/CEI 10646-1: 2000 n'autorise pas " |
|||
"le\n" |
|||
"mappage des substituts non appariés. Pour plus d'informations, veuillez vous " |
|||
"référer\n" |
|||
"à http://www.unicode.org/ et http://www.cl.cam.ac.uk/~mgk25/unicode.html\n" |
|||
"\n" |
|||
}, |
|||
{ TEXT_INVALID_URI, 0, |
|||
"URI doit être correctement protégés, ils ne doivent pas contenir unescaped\n" |
|||
"caractères ci-dessous U + 0021, y compris le caractère d'espace et non\n" |
|||
"ci-dessus U + 007E. Tidy échappe à l'URI pour vous comme recommandé par\n" |
|||
"HTML 4.01 section B.2.1 et XML 1.0 section 4.2.2. Certains agents utilisateurs\n" |
|||
"utiliser un autre algorithme pour échapper à ces URI et un serveur-verso\n" |
|||
"scripts dépendent de cela. Si vous voulez compter sur cela, vous devez\n" |
|||
"échapper à l'URI sur votre propre. Pour plus d'informations s'il vous plaît se " |
|||
"référer à\n" |
|||
"http://www.w3.org/International/O-URL-and-ident.html\n" |
|||
"\n" |
|||
}, |
|||
{ TEXT_BAD_FORM, 0, |
|||
"Vous devrez peut-être déplacer un ou deux de la<form>et</form>\n" |
|||
"tags. Éléments HTML doivent être correctement imbriquées et les éléments\n" |
|||
"de formulaire ne font pas exception. Par exemple, vous ne devez pas placer la\n" |
|||
"<form>dans une cellule et la </form>dans un autre. Si le <form>est placé\n" |
|||
"devant une table, le </form>ne peut pas être placé à l'intérieur de la table !\n" |
|||
"Notez qu'une forme ne peut pas être imbriquée dans un autre !\n" |
|||
"\n" |
|||
}, |
|||
{ TEXT_BAD_MAIN, 0, |
|||
"Qu'un seul <main> élément est autorisé dans un document.\n" |
|||
"Les <main>éléments ont été jetées, qui peut invalider le document\n" |
|||
"\n" |
|||
}, |
|||
{ TEXT_M_SUMMARY, 0, |
|||
"L'attribut summary table devrait servir à décrire la structure\n" |
|||
"de la table. Il est très utile pour les personnes utilisant des\n" |
|||
"navigateurs non visuels. Les attributs de portée et en-têtes\n" |
|||
"pour les cellules d'un tableau servent utiles pour spécifier les\n" |
|||
"en-têtes s'appliquent à chaque cellule du tableau, permettant\n" |
|||
"aux navigateurs non visuels fournir un contexte pour chaque cellule.\n" |
|||
"\n" |
|||
}, |
|||
{ TEXT_M_IMAGE_ALT, 0, |
|||
"L'attribut alt devrait servir à donner une brève description d'une\n" |
|||
"image ; Il faudrait aussi des descriptions plus longues avec l'attribut\n" |
|||
"longdesc qui prend une URL liée à la description. Ces mesures sont\n" |
|||
"nécessaires pour les personnes utilisant des navigateurs textuels.\n" |
|||
"\n" |
|||
}, |
|||
{ TEXT_M_IMAGE_MAP, 0, |
|||
"Utilisation côté client images interactives préférence cartes-images\n" |
|||
"côté serveur comme celui-ci est inaccessibles aux personnes utilisant\n" |
|||
"des navigateurs non graphiques. En outre, les cartes côté client sont\n" |
|||
"plus faciles à mettre en place et fournir une rétroaction immédiate\n" |
|||
"aux utilisateurs.\n" |
|||
"\n" |
|||
}, |
|||
{ TEXT_M_LINK_ALT, 0, |
|||
"Liens hypertextes définie à l'aide d'une hyperimage côté client, vous\n" |
|||
"devez utiliser l'attribut alt pour fournir une description textuelle de la\n" |
|||
"liaison pour les personnes utilisant des navigateurs textuels.\n" |
|||
"\n" |
|||
}, |
|||
{ TEXT_USING_FRAMES, 0, |
|||
"Pages conçues à l'aide de cadres pose des problèmes pour\n" |
|||
"les personnes qui sont aveugles ou utilisez un navigateur qui\n" |
|||
"ne supporte pas les frames. Une page de base de cadres doit\n" |
|||
"toujours inclure une disposition alternative à l'intérieur d'un\n" |
|||
"élément NOFRAMES.\n" |
|||
"\n" |
|||
}, |
|||
{ TEXT_ACCESS_ADVICE1, 0, |
|||
"Pour plus d'informations sur la façon de rendre vos pages\n" |
|||
"accessibles, voir http://www.w3.org/WAI/GL" |
|||
}, |
|||
{ TEXT_ACCESS_ADVICE2, 0, "et http://www.html-tidy.org/Accessibility/" }, |
|||
{ TEXT_USING_LAYER, 0, |
|||
"Les Cascading Style Sheets (CSS) mécanisme de positionnement\n" |
|||
"Il est recommandé de préférence à la propriétaire <LAYER>\n" |
|||
"élément grâce à l'appui du fournisseur limitée pour la LAYER.\n" |
|||
"\n" |
|||
}, |
|||
{ TEXT_USING_SPACER, 0, |
|||
"Il est recommandé d'utiliser les CSS pour contrôler blanc\n" |
|||
"espace (par exemple pour retrait, les marges et interlignes).\n" |
|||
"Le <SPACER> élément propriétaire a le soutien des fournisseurs limité.\n" |
|||
"\n" |
|||
}, |
|||
{ TEXT_USING_FONT, 0, |
|||
"Il est recommandé d'utiliser les CSS pour spécifier la police et\n" |
|||
"propriétés telles que sa taille et sa couleur. Cela permettra de réduire\n" |
|||
"la taille des fichiers HTML et de les rendre plus faciles à entretenir\n" |
|||
"rapport à l'utilisation <FONT> éléments.\n" |
|||
"\n" |
|||
}, |
|||
{ TEXT_USING_NOBR, 0, |
|||
"Il est recommandé d'utiliser les CSS pour contrôler les sauts de ligne.\n" |
|||
"Utilisez \"white-space: nowrap\" pour inhiber emballage en place\n" |
|||
"d'insertion <NOBR> ... </ NOBR> dans le balisage.\n" |
|||
"\n" |
|||
}, |
|||
{ TEXT_USING_BODY, 0, |
|||
"Il est recommandé d'utiliser les CSS pour spécifier la page et de liaison des " |
|||
"couleurs\n" |
|||
}, |
|||
{ TEXT_NEEDS_INTERVENTION, 0, |
|||
"Ce document contient des erreurs qui doivent être résolus avant\n" |
|||
"utilisant HTML Tidy pour générer une version rangé.\n" |
|||
"\n" |
|||
}, |
|||
{ TEXT_GENERAL_INFO, 0, |
|||
"A propos de HTML Tidy: https://github.com/htacg/tidy-html5\n" |
|||
"Les rapports de bugs et commentaires: https://github.com/htacg/tidy-html5/issues\n" |
|||
"Liste officielle de diffusion: https://lists.w3.org/Archives/Public/public-htacg/\n" |
|||
"Spécification HTML dernière: http://dev.w3.org/html5/spec-author-view/\n" |
|||
"Validez vos documents HTML: http://validator.w3.org/nu/\n" |
|||
"Hall de votre entreprise à rejoindre le W3C: http://www.w3.org/Consortium\n" |
|||
"\n" |
|||
}, |
|||
{ TEXT_GENERAL_INFO_PLEA, 0, |
|||
"\n" |
|||
"Parlez-vous une langue autre que l'anglais ou une autre variante de\n" |
|||
"Anglais? Considérez-nous aidant à localiser HTML Tidy. Pour plus de détails s'il " |
|||
"vous plaît voir\n" |
|||
"https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md\n" |
|||
}, |
|||
{ TidyInfoString, 0, "Info:" }, |
|||
{ TidyWarningString, 0, "Attention:" }, |
|||
{ TidyConfigString, 0, "Config:" }, |
|||
{ TidyAccessString, 0, "Accès:" }, |
|||
{ TidyErrorString, 0, "Erreur:" }, |
|||
{ TidyBadDocumentString, 0, "Document:" }, |
|||
{ TidyFatalString, 0, "Panique:" }, |
|||
{ ENCODING_MISMATCH, 0, "codage d'entrée spécifiée (%s) ne correspond pas réelle encodage d'entrée (%s)" }, |
|||
{ VENDOR_SPECIFIC_CHARS, 0, "%s de code de caractère invalide l'%s" }, |
|||
{ INVALID_SGML_CHARS, 0, "%s de code de caractère invalide l'%s" }, |
|||
{ INVALID_UTF8, 0, "%s invalides octets UTF-8 de (char. codes %s)" }, |
|||
{ INVALID_UTF16, 0, "paire de substitution non valide UTF-16 (code de caract. %s) %s" }, |
|||
{ INVALID_NCR, 0, "Référence de caractère numérique non valide de %s %s" }, |
|||
{ MISSING_SEMICOLON, 0, "entité « %s » ne s'arrête pas à «; »" }, |
|||
{ MISSING_SEMICOLON_NCR, 0, "Référence de caractère numérique « %s » n'est pas se terminer par «; »" }, |
|||
{ UNESCAPED_AMPERSAND, 0, "sans séquence d'échappement & qui devrait être écrit comme &" }, |
|||
{ UNKNOWN_ENTITY, 0, "sans séquence d'échappement & ou entité inconnue « %s »" }, |
|||
{ APOS_UNDEFINED, 0, "nommée l'entité ' seulement défini en XML/XHTML" }, |
|||
{ INSERTING_ATTRIBUTE, 0, "%s insérer l'attribut « %s »" }, |
|||
{ INSERTING_AUTO_ATTRIBUTE, 0, "%s insérer l'attribut « %s », à l'aide de la valeur « %s »" }, |
|||
{ MISSING_ATTR_VALUE, 0, "L'attribut %s a une valeur non valide \"%s\"" }, |
|||
{ UNKNOWN_ATTRIBUTE, 0, "L'attribut %s a une valeur non valide \"%s\"" }, |
|||
{ PROPRIETARY_ATTRIBUTE, 0, "L'attribut %s a une valeur non valide \"%s\"" }, |
|||
{ JOINING_ATTRIBUTE, 0, "%s rejoignant les valeurs d'attribut répétée « %s »" }, |
|||
{ XML_ATTRIBUTE_VALUE, 0, "L'attribut %s a une valeur non valide \"%s\"" }, |
|||
{ XML_ID_SYNTAX, 0, "ID de %s « %s » utilise la syntaxe XML ID" }, |
|||
{ ATTR_VALUE_NOT_LCASE, 0, "valeur d'attribut de %s « %s » doit être en minuscules pour XHTML" }, |
|||
{ PROPRIETARY_ATTR_VALUE, 0, "valeur d'attribut propriétaire de %s « %s »" }, |
|||
{ ANCHOR_NOT_UNIQUE, 0, "%s anchor \"%s\" déjà défini" }, |
|||
{ BAD_ATTRIBUTE_VALUE, 0, "L'attribut %s \"%s\" a une valeur non valide \"%s\"" }, |
|||
{ BAD_ATTRIBUTE_VALUE_REPLACED, 0, "%s attribut « %s » a une valeur non valide « %s » et a été remplacé" }, |
|||
{ INVALID_ATTRIBUTE, 0, "nom d'attribut de %s « %s » (valeur = « %s ») n'est pas valide" }, |
|||
{ REPEATED_ATTRIBUTE, 0, "%s laissant tomber la valeur « %s » pour l'attribut répétée « %s »" }, |
|||
{ INVALID_XML_ID, 0, "%s ne peut pas copier le nom attribut id" }, |
|||
{ UNEXPECTED_GT, 0, "manquant '>' pour tag: %s" }, |
|||
{ UNEXPECTED_QUOTEMARK, 0, "%s inattendue ou double quote mark" }, |
|||
{ MISSING_QUOTEMARK, 0, "%s attribut manquant apostrophe droite" }, |
|||
{ UNEXPECTED_END_OF_FILE_ATTR, 0, "%s fin de fichier lors de l'analyse d'attributs" }, |
|||
{ ID_NAME_MISMATCH, 0, "%s id et le nom valeur d'attribut mismatch" }, |
|||
{ BACKSLASH_IN_URI, 0, "référence URI %s contient des anti-slash. Faute de frappe ?" }, |
|||
{ FIXED_BACKSLASH, 0, "%s conversion de barre oblique inverse d'URI de slash" }, |
|||
{ ILLEGAL_URI_REFERENCE, 0, "%s mal échappé référence URI" }, |
|||
{ ESCAPED_ILLEGAL_URI, 0, "%s échapper malformé référence URI" }, |
|||
{ NEWLINE_IN_URI, 0, "rejeter la nouvelle ligne de %s en référence URI" }, |
|||
{ WHITE_IN_URI, 0, "jeter le espaces de %s en référence URI" }, |
|||
{ UNEXPECTED_EQUALSIGN, 0, "%s unexpected '=', nom d'attribut attendu" }, |
|||
{ MISSING_IMAGEMAP, 0, "%s doivent utiliser côté client image map" }, |
|||
{ MISSING_ATTRIBUTE, 0, "%s manque attribut \"%s\"" }, |
|||
{ NESTED_EMPHASIS, 0, "accent imbriquée %s" }, |
|||
{ NESTED_QUOTATION, 0, "imbriqué \"q\" éléments, typo possible" }, |
|||
{ OBSOLETE_ELEMENT, 0, "remplaçant élément obsolète %s avec %s" }, |
|||
{ COERCE_TO_ENDTAG_WARN, 0, "<%s> est probablement destinée en tant que </%s>" }, |
|||
{ REMOVED_HTML5, 0, "L'élément de %s retiré HTML5" }, |
|||
{ BAD_SUMMARY_HTML5, 0, "L'attribut summary sur l'élément du %s est obsolète dans HTML5" }, |
|||
{ TRIM_EMPTY_ELEMENT, 0, "rognage vide %s" }, |
|||
{ REPLACING_ELEMENT, 0, "remplaçant %s avec %s" }, |
|||
{ COERCE_TO_ENDTAG, 0, "<%s> est probablement destinée en tant que </%s>" }, |
|||
{ REPLACING_UNEX_ELEMENT, 0, "remplacement inattendu %s avec %s" }, |
|||
{ MISSING_ENDTAG_FOR, 0, "manquant </%s>" }, |
|||
{ MISSING_ENDTAG_BEFORE, 0, "manquante </%s> avant %s" }, |
|||
{ DISCARDING_UNEXPECTED, 0, "rejet inattendu %s" }, |
|||
{ NON_MATCHING_ENDTAG, 0, "remplacement inattendu %s avec </%s>" }, |
|||
{ TAG_NOT_ALLOWED_IN, 0, "%s n'est pas autorisé dans <%s> éléments" }, |
|||
{ MISSING_STARTTAG, 0, "manquant <%s>" }, |
|||
{ UNEXPECTED_ENDTAG, 0, "rejet inattendu </%s>" }, |
|||
{ TOO_MANY_ELEMENTS, 0, "trop de %s éléments" }, |
|||
{ USING_BR_INPLACE_OF, 0, "utilisant <br> à la place de %s" }, |
|||
{ INSERTING_TAG, 0, "insertion implicite <%s>" }, |
|||
{ CANT_BE_NESTED, 0, "%s ne peut pas être imbriquée" }, |
|||
{ PROPRIETARY_ELEMENT, 0, "%s n'est pas approuvé par le W3C" }, |
|||
{ ILLEGAL_NESTING, 0, "%s ne doivent pas être imbriqués" }, |
|||
{ NOFRAMES_CONTENT, 0, "%s non à l'intérieur 'noframes'" }, |
|||
{ UNEXPECTED_END_OF_FILE, 0, "fin inattendue du fichier %s" }, |
|||
{ ELEMENT_NOT_EMPTY, 0, "%s élément non vide ou pas fermée" }, |
|||
{ UNEXPECTED_ENDTAG_IN, 0, "inattendus </%s> dans <%s>" }, |
|||
{ TOO_MANY_ELEMENTS_IN, 0, "trop de %s éléments dans <%s>" }, |
|||
{ UNESCAPED_ELEMENT, 0, "unescaped %s dans le contenu pre" }, |
|||
{ DOCTYPE_AFTER_TAGS, 0, "<! DOCTYPE> est pas autorisé après éléments" }, |
|||
{ MISSING_TITLE_ELEMENT, 0, "insertion manquante élément 'title'" }, |
|||
{ INCONSISTENT_VERSION, 0, "DOCTYPE HTML ne correspond pas à un contenu" }, |
|||
{ MISSING_DOCTYPE, 0, "manquante <!DOCTYPE> déclaration" }, |
|||
{ CONTENT_AFTER_BODY, 0, "contenu se produit après la fin du body" }, |
|||
{ MALFORMED_COMMENT, 0, "tirets adjacents dans un commentaire" }, |
|||
{ BAD_COMMENT_CHARS, 0, "attendre -- ou >" }, |
|||
{ BAD_CDATA_CONTENT, 0, "'<' + '/' + lettre non permis ici" }, |
|||
{ INCONSISTENT_NAMESPACE, 0, "le namespace HTML ne correspond pas au contenu" }, |
|||
{ SPACE_PRECEDING_XMLDECL, 0, "supprimant l'espace blanc précédent Déclaration XML" }, |
|||
{ MALFORMED_DOCTYPE, 0, "en rejetant malformé <!DOCTYPE>" }, |
|||
{ BAD_XML_COMMENT, 0, "commentaires XML ne peut pas contenir --" }, |
|||
{ DTYPE_NOT_UPPER_CASE, 0, "SYSTEM, PUBLIC, W3C, DTD, EN doit être en majuscules" }, |
|||
{ ENCODING_IO_CONFLICT, 0, "encodage de sortie ne fonctionne pas avec la sortie standard" }, |
|||
{ SUSPECTED_MISSING_QUOTE, 0, "manquant guillemet pour la valeur d'attribut" }, |
|||
{ DUPLICATE_FRAMESET, 0, "élément répété FRAMESET" }, |
|||
{ UNKNOWN_ELEMENT, 0, "%s n'est pas reconnue !" }, |
|||
{ PREVIOUS_LOCATION, 0, "<%s> précédemment mentionnés" }, |
|||
{ TidyXmlDecl, 0, |
|||
"Cette option spécifie si Tidy devrait ajouter la déclaration XML lors de la sortie " |
|||
"XML ou XHTML. <br/> Notez que si l'entrée comprend déjà un <code> & lt;?xml ... &>" |
|||
"</code> déclaration alors cette option sera ignorée. <br/> Si l'encodage pour la " |
|||
"sortie est différente de <var>ascii</var>, l'un des l'encodage <var>utf*</var> ou " |
|||
"<var>raw</var>, la déclaration est toujours ajouté que requis par la norme XML." |
|||
}, |
|||
{ TidyXmlSpace, 0, |
|||
"Cette option spécifie si tidy doit ajouter <code>xml:espace=\"préserver \"</code> " |
|||
"pour des éléments tels que <code><pré></code>, <code><style></code> et " |
|||
"<code><script></code> lors de la génération de XML. <br />Il est nécessaire si " |
|||
"l'espace blanc dans ces éléments doit être analysée de manière appropriée sans avoir " |
|||
"accès à la DTD." |
|||
}, |
|||
{ TidyAltText, 0, |
|||
"Cette option spécifie la valeur par défaut <code>alt=</code> utilise le texte Tidy " |
|||
"pour <code><img></code> attributs lorsque le <code>alt=</code> attribut est " |
|||
"absent. <br/> Utiliser avec précaution, car il est de votre responsabilité de rendre " |
|||
"vos documents accessibles aux personnes qui ne peuvent pas voir les images." |
|||
}, |
|||
{ TidyXmlPIs, 0, |
|||
"Cette option spécifie si Tidy doit modifier l'analyse syntaxique des instructions de " |
|||
"traitement pour exiger <code>?></code> comme terminateur plutôt que <code>></" |
|||
"code>. <br/> Cette option est automatiquement activée si l'entrée est en XML." |
|||
}, |
|||
{ TidyMakeBare, 0, |
|||
"Cette option spécifie si Tidy doit dépouiller Microsoft HTML spécifique à partir de " |
|||
"Word 2000 documents, et des espaces de sortie plutôt que des espaces insécables où " |
|||
"ils existent dans l'entrée." |
|||
}, |
|||
{ TidyCSSPrefix, 0, |
|||
"Cette option spécifie le préfixe que Tidy utilise des règles de styles. <br/> Par " |
|||
"défaut, <var>c</var> sera utilisé." |
|||
}, |
|||
{ TidyMakeClean, 0, |
|||
"Cette option spécifie si Tidy doit effectuer le nettoyage de certains anciens tags " |
|||
"de présentation (actuellement de <code>& lt; i></code>, <code><b></code>, " |
|||
"<code><centre></code> lorsque placé entre les balises inline appropriées, et " |
|||
"<code>< font></code>). Si <var>yes</var> puis balises existantes seront " |
|||
"remplacées par CSS le <code><style></code> balises et le balisage structurel " |
|||
"selon le cas." |
|||
}, |
|||
{ TidyGDocClean, 0, |
|||
"Cette option spécifie si Tidy doit permettre un comportement spécifique pour le " |
|||
"nettoyage HTML exporté à partir de Google Docs." |
|||
}, |
|||
{ TidyDoctype, 0, |
|||
"Cette option spécifie la déclaration DOCTYPE générée par Tidy. <br/> Si <var>omit</" |
|||
"var> la sortie ne contiendra une déclaration DOCTYPE. Notez que ce cela implique " |
|||
"aussi <code>numeric-entities</code> est réglé sur <var>yes</var>. <br/> Si " |
|||
"<code>html5</code> le DOCTYPE est réglé sur <code><! DOCTYPE html></code>. <br/" |
|||
"> Si <var>auto</var> (par défaut) Tidy utilisera une supposition basée sur le " |
|||
"contenu du document. <br/> Si elle est définie <var>strict</var>, Tidy établira le " |
|||
"DOCTYPE du HTML4 ou XHTML 1 DTD stricte. <br/> Si <var>loose</var>, le DOCTYPE est " |
|||
"réglé sur le HTML4 ou XHTML1 loose (transitional) DTD. <br/> Alternativement, vous " |
|||
"pouvez fournir une chaîne pour l'identificateur public formel (FPI).<br/> Par " |
|||
"exemple: <br/> <code>doctype: \"- // ACME // DTD HTML. 3,14159 //EN\"</code> <br/> " |
|||
"Si vous spécifiez le FPI pour un document XHTML, Tidy établira l'identifiant du " |
|||
"système à une chaîne vide. Pour un document HTML, Tidy ajoute un identificateur de " |
|||
"système que si l'on était déjà présent dans le but de préserver le mode de certains " |
|||
"navigateurs de traitement. Tidy quitte le DOCTYPE pour les documents XML génériques " |
|||
"inchangés. <br/> Cette option ne permet pas une validation du document de conformité." |
|||
}, |
|||
{ TidyDropEmptyElems, 0, "Cette option spécifie si Tidy doit jeter des éléments vides." }, |
|||
{ TidyDropEmptyParas, 0, "Cette option spécifie si Tidy doit jeter des paragraphes vides." }, |
|||
{ TidyFixUri, 0, |
|||
"Cette option spécifie si Tidy doit vérifier les valeurs d'attributs qui portent URI " |
|||
"pour des caractères illégaux et si ce sont trouvés, leur échapper en HTML 4 " |
|||
"recommande." |
|||
}, |
|||
{ TidyPPrintTabs, 0, |
|||
"Cette option spécifie si tidy doit Indenter avec tabulation au lieu des espaces, en " |
|||
"supposant <code>indent</code> est <var>yes</var>. <br/>Définir sur <var>yes</var> " |
|||
"pour indenter avec des tabulations au lieu de la valeur par défaut des espaces. <br /" |
|||
">Utilisez l'option <code>indent-spaces</code> pour contrôler le nombre d'onglets " |
|||
"Sortie par niveau de tiret. Notez que lorsque <code>indent-with-tabs</code> est " |
|||
"activée. La valeur par défaut de <code>indent-spaces</code> est réinitialisé à " |
|||
"<var>1</var>. <br/>Remarque <code>tab-size</code> contrôle la conversion des espaces " |
|||
"d'entrée. Définissez-le à zéro pour conserver onglets de saisie." |
|||
}, |
|||
{ TidySkipNested, 0, |
|||
"Cette option spécifie que Tidy doit ignorer les balises imbriquées lors de l'analyse " |
|||
"des données de script et de style." |
|||
}, |
|||
{ TC_CAT_DIAGNOSTICS, 0, "diagnostics" }, |
|||
{ TC_CAT_ENCODING, 0, "encoding" }, |
|||
{ TC_CAT_MARKUP, 0, "markup" }, |
|||
{ TC_CAT_MISC, 0, "misc" }, |
|||
{ TC_CAT_PRETTYPRINT, 0, "imprimer" }, |
|||
{ TC_LABEL_COL, 0, "colonne" }, |
|||
{ TC_LABEL_FILE, 0, "fichier" }, |
|||
{ TC_LABEL_LANG, 0, "lang" }, |
|||
{ TC_LABEL_LEVL, 0, "niveau" }, |
|||
{ TC_LABEL_OPT, 0, "option" }, |
|||
{ TC_MAIN_ERROR_LOAD_CONFIG, 0, "Chargement du fichier de configuration \"%s\" a échoué, err =%d" }, |
|||
{ TC_OPT_ACCESS, 0, |
|||
"faire des vérifications d'accessibilité supplémentaires (<niveau> = 0, 1, 2, 3). 0 " |
|||
"est supposé si <niveau> est manquant." |
|||
}, |
|||
{ TC_OPT_ASCII, 0, "utiliser ISO-8859-1 pour l'entrée, US-ASCII pour la sortie" }, |
|||
{ TC_OPT_UPPER, 0, "balises de force en majuscules" }, |
|||
{ TC_TXT_HELP_3, 0, |
|||
"\n" |
|||
"Options de configuration Tidy\n" |
|||
"==========================\n" |
|||
"Utilisez les options de configuration de Tidy comme arguments de ligne de commande " |
|||
"sous la forme de «--option <value>\", par exemple, \"--indent-with-tabs yes\"\n" |
|||
"\n" |
|||
"Pour une liste de toutes les options de configuration, utiliser \"-help-config\"\n" |
|||
" ou consultez à la man page (si votre OS en a un).\n" |
|||
"\n" |
|||
"Si votre environnement a un ensemble de variables à un point de Tidy \n" |
|||
"$HTML_TIDY fichier de configuration puis Tidy va tenter de l'utiliser.\n" |
|||
"\n" |
|||
"Sur certaines plateformes Tidy tentera également d'utiliser une configuration " |
|||
"spécifiée dans /etc/tidy.conf ou ~/.tidy.conf.\n" |
|||
"\n" |
|||
"Autre\n" |
|||
"=====\n" |
|||
"Entrée/sortie par défaut utiliser stdin/stdout respectivement.\n" |
|||
"\n" |
|||
"Options de simple lettre en dehors de -f peuvent être combinés\n" |
|||
"comme dans: bien rangé -f errs.txt -imu foo.html\n" |
|||
"\n" |
|||
"renseignements\n" |
|||
"===========\n" |
|||
"Pour plus d'informations à propos de HTML Tidy, voir\n" |
|||
" http://www.html-tidy.org/\n" |
|||
"\n" |
|||
"Pour plus d'informations sur le langage HTML, consultez les rubriques suivantes:\n" |
|||
"\n" |
|||
" HTML: Edition pour les auteurs Web (de la dernière spécification de HTML)\n" |
|||
" http://dev.w3.org/html5/spec-author-view\n" |
|||
"\n" |
|||
}, |
|||
{ TC_TXT_HELP_CONFIG, 0, |
|||
"\n" |
|||
"HTML Tidy paramètres de configuration\n" |
|||
"\n" |
|||
"Dans un fichier, utilisez le formulaire:\n" |
|||
"\n" |
|||
"envelopper: 72\n" |
|||
"tiret: pas\n" |
|||
"\n" |
|||
"Quand il est spécifié sur la ligne de commande, utilisez le formulaire:\n" |
|||
"\n" |
|||
"--wrap 72 --indent pas\n" |
|||
"\n" |
|||
}, |
|||
{ TC_TXT_HELP_CONFIG_NAME, 0, "Nom" }, |
|||
{ TC_TXT_HELP_CONFIG_TYPE, 0, "Type" }, |
|||
{ TC_TXT_HELP_CONFIG_ALLW, 0, "Les valeurs autorisées" }, |
|||
{ TC_TXT_HELP_LANG_1, 0, |
|||
"\n" |
|||
"L'option --language (ou --lang) indique la langue Tidy\n" |
|||
"doit utiliser pour communiquer sa sortie. S'il vous plaît noter que ce ne sont pas " |
|||
"un service de traduction de documents, et affecte uniquement les messages qui Tidy " |
|||
"communique à vous.\n" |
|||
"\n" |
|||
"Lorsqu'il est utilisé à partir de la ligne de commande de l'argument doit \n" |
|||
"--language être utilisé avant des arguments qui résultent de la production, sinon " |
|||
"Tidy\n" |
|||
"va produire une sortie avant qu'il connaît la langue à utiliser.\n" |
|||
"\n" |
|||
"En plus des codes de langue standard POSIX, Tidy est capable de\n" |
|||
"héritées compréhension codes de langue de Windows. S'il vous plaît noter que \n" |
|||
"cette liste indique les codes Tidy comprend, et ne signifie pas que\n" |
|||
"la langue est actuellement installé.\n" |
|||
"\n" |
|||
"La colonne de droite indique comment Tidy comprendra le\n" |
|||
"héritage nom Windows.\n" |
|||
"\n" |
|||
}, |
|||
{ TC_TXT_HELP_LANG_2, 0, |
|||
"\n" |
|||
"Notez qu'il n'y a aucune garantie qu'ils sont complets; seulement ça\n" |
|||
"un développeur ou d'une autre ont commencé à ajouter la langue indiquée.\n" |
|||
"Localisations incomplètes ne seront par défaut \"et\" si nécessaire.\n" |
|||
"S'il vous plaît signaler les cas de chaînes incorrectes à l'équipe Tidy.\n" |
|||
"\n" |
|||
}, |
|||
{ TC_TXT_HELP_LANG_3, 0, |
|||
"\n" |
|||
"Si Tidy est capable de déterminer votre localisation puis Tidy utilisera le\n" |
|||
"langue locale automatiquement. Par exemple les systèmes Unix-like utilisent un $LANG " |
|||
"et/ou $LC_ALL variable d'environnement. Consultez votre exploitation documentation " |
|||
"du système pour plus d'informations.\n" |
|||
"\n" |
|||
}, |
|||
|
|||
{/* This MUST be present and last. */ |
|||
TIDY_MESSAGE_TYPE_LAST, 0, NULL |
|||
} |
|||
|
|||
}}; |
|||
|
|||
|
|||
#endif /* language_fr_h */ |
@ -0,0 +1,81 @@ |
|||
#ifndef language_zh_cn_h |
|||
#define language_zh_cn_h |
|||
/*
|
|||
* language_zh_cn.h |
|||
* Localization support for HTML Tidy. |
|||
* |
|||
* |
|||
* This file is a localization file for HTML Tidy. It will have been machine |
|||
* generated or created and/or edited by hand. Both are valid options, but |
|||
* please help keep our localization efforts simple to maintain by maintaining |
|||
* the structure of this file, and changing the check box below if you make |
|||
* changes (so others know the file origin): |
|||
* |
|||
* [X] THIS FILE IS MACHINE GENERATED. It is a localization file for the |
|||
* language (and maybe region) "zh_cn". The source of |
|||
* these strings is a gettext PO file in Tidy's source, probably called |
|||
* "language_zh_cn.po". |
|||
* |
|||
* [ ] THIS FILE WAS HAND MODIFIED. Translators, please feel to edit this file |
|||
* directly (and check this box). If you prefer to edit PO files then use |
|||
* `poconvert.rb msgunfmt language_zh_cn.h` (our own |
|||
* conversion tool) to generate a fresh PO from this file first! |
|||
* |
|||
* (c) 2015 HTACG |
|||
* See tidy.h and access.h for the copyright notice. |
|||
* |
|||
* Template Created by Jim Derry on 01/14/2016. |
|||
* |
|||
* Orginating PO file metadata: |
|||
* PO_LAST_TRANSLATOR=jderry |
|||
* PO_REVISION_DATE=2016-03-24 10:59:55 |
|||
*/ |
|||
|
|||
#ifdef _MSC_VER |
|||
#pragma execution_character_set("utf-8") |
|||
#endif |
|||
|
|||
#include "language.h" |
|||
#include "access.h" |
|||
#include "message.h" |
|||
|
|||
|
|||
/**
|
|||
* This language-specific function returns the correct pluralForm |
|||
* to use given n items, and is used as a member of each language |
|||
* definition. |
|||
*/ |
|||
static uint whichPluralForm_zh_cn(uint n) { |
|||
/* Plural-Forms: nplurals=1; */ |
|||
return 0; |
|||
} |
|||
|
|||
|
|||
/**
|
|||
* This structure specifies all of the strings needed by Tidy for a |
|||
* single language. Static definition in a header file makes it |
|||
* easy to include and exclude languages without tinkering with |
|||
* the build system. |
|||
*/ |
|||
static languageDefinition language_zh_cn = { whichPluralForm_zh_cn, { |
|||
/***************************************
|
|||
** This MUST be present and first. |
|||
** Specify the code for this language. |
|||
***************************************/ |
|||
{/* Specify the ll or ll_cc language code here. */ |
|||
TIDY_LANGUAGE, 0, "zh_cn" |
|||
}, |
|||
{ FILE_CANT_OPEN, 0, "无法打开”%s”\n" }, |
|||
{ LINE_COLUMN_STRING, 0, "行 %d 列 %d - " }, |
|||
{ STRING_CONTENT_LOOKS, 0, "文档内容看起来像 %s" }, |
|||
{ TC_STRING_VERS_A, 0, "HTML Tidy 用于 %s 版本 %s" }, |
|||
{ TC_STRING_VERS_B, 0, "HTML Tidy 版本 %s" }, |
|||
|
|||
{/* This MUST be present and last. */ |
|||
TIDY_MESSAGE_TYPE_LAST, 0, NULL |
|||
} |
|||
|
|||
}}; |
|||
|
|||
|
|||
#endif /* language_zh_cn_h */ |
File diff suppressed because it is too large
@ -0,0 +1,620 @@ |
|||
#ifndef __LEXER_H__ |
|||
#define __LEXER_H__ |
|||
|
|||
/* lexer.h -- Lexer for html parser
|
|||
|
|||
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
Given an input source, it returns a sequence of tokens. |
|||
|
|||
GetToken(source) gets the next token |
|||
UngetToken(source) provides one level undo |
|||
|
|||
The tags include an attribute list: |
|||
|
|||
- linked list of attribute/value nodes |
|||
- each node has 2 NULL-terminated strings. |
|||
- entities are replaced in attribute values |
|||
|
|||
white space is compacted if not in preformatted mode |
|||
If not in preformatted mode then leading white space |
|||
is discarded and subsequent white space sequences |
|||
compacted to single space characters. |
|||
|
|||
If XmlTags is no then Tag names are folded to upper |
|||
case and attribute names to lower case. |
|||
|
|||
Not yet done: |
|||
- Doctype subset and marked sections |
|||
*/ |
|||
|
|||
#ifdef __cplusplus |
|||
extern "C" { |
|||
#endif |
|||
|
|||
#include "forward.h" |
|||
|
|||
/* lexer character types
|
|||
*/ |
|||
#define digit 1u |
|||
#define letter 2u |
|||
#define namechar 4u |
|||
#define white 8u |
|||
#define newline 16u |
|||
#define lowercase 32u |
|||
#define uppercase 64u |
|||
#define digithex 128u |
|||
|
|||
|
|||
/* node->type is one of these values
|
|||
*/ |
|||
typedef enum |
|||
{ |
|||
RootNode, |
|||
DocTypeTag, |
|||
CommentTag, |
|||
ProcInsTag, |
|||
TextNode, |
|||
StartTag, |
|||
EndTag, |
|||
StartEndTag, |
|||
CDATATag, |
|||
SectionTag, |
|||
AspTag, |
|||
JsteTag, |
|||
PhpTag, |
|||
XmlDecl |
|||
} NodeType; |
|||
|
|||
|
|||
|
|||
/* lexer GetToken states
|
|||
*/ |
|||
typedef enum |
|||
{ |
|||
LEX_CONTENT, |
|||
LEX_GT, |
|||
LEX_ENDTAG, |
|||
LEX_STARTTAG, |
|||
LEX_COMMENT, |
|||
LEX_DOCTYPE, |
|||
LEX_PROCINSTR, |
|||
LEX_CDATA, |
|||
LEX_SECTION, |
|||
LEX_ASP, |
|||
LEX_JSTE, |
|||
LEX_PHP, |
|||
LEX_XMLDECL |
|||
} LexerState; |
|||
|
|||
/* ParseDocTypeDecl state constants */ |
|||
typedef enum |
|||
{ |
|||
DT_INTERMEDIATE, |
|||
DT_DOCTYPENAME, |
|||
DT_PUBLICSYSTEM, |
|||
DT_QUOTEDSTRING, |
|||
DT_INTSUBSET |
|||
} ParseDocTypeDeclState; |
|||
|
|||
/* content model shortcut encoding
|
|||
|
|||
Descriptions are tentative. |
|||
*/ |
|||
#define CM_UNKNOWN 0 |
|||
/* Elements with no content. Map to HTML specification. */ |
|||
#define CM_EMPTY (1 << 0) |
|||
/* Elements that appear outside of "BODY". */ |
|||
#define CM_HTML (1 << 1) |
|||
/* Elements that can appear within HEAD. */ |
|||
#define CM_HEAD (1 << 2) |
|||
/* HTML "block" elements. */ |
|||
#define CM_BLOCK (1 << 3) |
|||
/* HTML "inline" elements. */ |
|||
#define CM_INLINE (1 << 4) |
|||
/* Elements that mark list item ("LI"). */ |
|||
#define CM_LIST (1 << 5) |
|||
/* Elements that mark definition list item ("DL", "DT"). */ |
|||
#define CM_DEFLIST (1 << 6) |
|||
/* Elements that can appear inside TABLE. */ |
|||
#define CM_TABLE (1 << 7) |
|||
/* Used for "THEAD", "TFOOT" or "TBODY". */ |
|||
#define CM_ROWGRP (1 << 8) |
|||
/* Used for "TD", "TH" */ |
|||
#define CM_ROW (1 << 9) |
|||
/* Elements whose content must be protected against white space movement.
|
|||
Includes some elements that can found in forms. */ |
|||
#define CM_FIELD (1 << 10) |
|||
/* Used to avoid propagating inline emphasis inside some elements
|
|||
such as OBJECT or APPLET. */ |
|||
#define CM_OBJECT (1 << 11) |
|||
/* Elements that allows "PARAM". */ |
|||
#define CM_PARAM (1 << 12) |
|||
/* "FRAME", "FRAMESET", "NOFRAMES". Used in ParseFrameSet. */ |
|||
#define CM_FRAMES (1 << 13) |
|||
/* Heading elements (h1, h2, ...). */ |
|||
#define CM_HEADING (1 << 14) |
|||
/* Elements with an optional end tag. */ |
|||
#define CM_OPT (1 << 15) |
|||
/* Elements that use "align" attribute for vertical position. */ |
|||
#define CM_IMG (1 << 16) |
|||
/* Elements with inline and block model. Used to avoid calling InlineDup. */ |
|||
#define CM_MIXED (1 << 17) |
|||
/* Elements whose content needs to be indented only if containing one
|
|||
CM_BLOCK element. */ |
|||
#define CM_NO_INDENT (1 << 18) |
|||
/* Elements that are obsolete (such as "dir", "menu"). */ |
|||
#define CM_OBSOLETE (1 << 19) |
|||
/* User defined elements. Used to determine how attributes wihout value
|
|||
should be printed. */ |
|||
#define CM_NEW (1 << 20) |
|||
/* Elements that cannot be omitted. */ |
|||
#define CM_OMITST (1 << 21) |
|||
|
|||
/* If the document uses just HTML 2.0 tags and attributes described
|
|||
** it as HTML 2.0 Similarly for HTML 3.2 and the 3 flavors of HTML 4.0. |
|||
** If there are proprietary tags and attributes then describe it as |
|||
** HTML Proprietary. If it includes the xml-lang or xmlns attributes |
|||
** but is otherwise HTML 2.0, 3.2 or 4.0 then describe it as one of the |
|||
** flavors of Voyager (strict, loose or frameset). |
|||
*/ |
|||
|
|||
/* unknown */ |
|||
#define xxxx 0u |
|||
|
|||
/* W3C defined HTML/XHTML family document types */ |
|||
#define HT20 1u |
|||
#define HT32 2u |
|||
#define H40S 4u |
|||
#define H40T 8u |
|||
#define H40F 16u |
|||
#define H41S 32u |
|||
#define H41T 64u |
|||
#define H41F 128u |
|||
#define X10S 256u |
|||
#define X10T 512u |
|||
#define X10F 1024u |
|||
#define XH11 2048u |
|||
#define XB10 4096u |
|||
|
|||
/* proprietary stuff */ |
|||
#define VERS_SUN 8192u |
|||
#define VERS_NETSCAPE 16384u |
|||
#define VERS_MICROSOFT 32768u |
|||
|
|||
/* special flag */ |
|||
#define VERS_XML 65536u |
|||
|
|||
/* HTML5 */ |
|||
#define HT50 131072u |
|||
#define XH50 262144u |
|||
|
|||
/* compatibility symbols */ |
|||
#define VERS_UNKNOWN (xxxx) |
|||
#define VERS_HTML20 (HT20) |
|||
#define VERS_HTML32 (HT32) |
|||
#define VERS_HTML40_STRICT (H40S|H41S|X10S) |
|||
#define VERS_HTML40_LOOSE (H40T|H41T|X10T) |
|||
#define VERS_FRAMESET (H40F|H41F|X10F) |
|||
#define VERS_XHTML11 (XH11) |
|||
#define VERS_BASIC (XB10) |
|||
/* HTML5 */ |
|||
#define VERS_HTML5 (HT50|XH50) |
|||
|
|||
/* meta symbols */ |
|||
#define VERS_HTML40 (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMESET) |
|||
#define VERS_IFRAME (VERS_HTML40_LOOSE|VERS_FRAMESET) |
|||
#define VERS_LOOSE (VERS_HTML20|VERS_HTML32|VERS_IFRAME) |
|||
#define VERS_EVENTS (VERS_HTML40|VERS_XHTML11) |
|||
#define VERS_FROM32 (VERS_HTML32|VERS_HTML40) |
|||
#define VERS_FROM40 (VERS_HTML40|VERS_XHTML11|VERS_BASIC) |
|||
#define VERS_XHTML (X10S|X10T|X10F|XH11|XB10|XH50) |
|||
|
|||
/* strict */ |
|||
#define VERS_STRICT (VERS_HTML5|VERS_HTML40_STRICT) |
|||
|
|||
/* all W3C defined document types */ |
|||
#define VERS_ALL (VERS_HTML20|VERS_HTML32|VERS_FROM40|XH50|HT50) |
|||
|
|||
/* all proprietary types */ |
|||
#define VERS_PROPRIETARY (VERS_NETSCAPE|VERS_MICROSOFT|VERS_SUN) |
|||
|
|||
/* Linked list of class names and styles
|
|||
*/ |
|||
struct _Style; |
|||
typedef struct _Style TagStyle; |
|||
|
|||
struct _Style |
|||
{ |
|||
tmbstr tag; |
|||
tmbstr tag_class; |
|||
tmbstr properties; |
|||
TagStyle *next; |
|||
}; |
|||
|
|||
|
|||
/* Linked list of style properties
|
|||
*/ |
|||
struct _StyleProp; |
|||
typedef struct _StyleProp StyleProp; |
|||
|
|||
struct _StyleProp |
|||
{ |
|||
tmbstr name; |
|||
tmbstr value; |
|||
StyleProp *next; |
|||
}; |
|||
|
|||
|
|||
|
|||
|
|||
/* Attribute/Value linked list node
|
|||
*/ |
|||
|
|||
struct _AttVal |
|||
{ |
|||
AttVal* next; |
|||
const Attribute* dict; |
|||
Node* asp; |
|||
Node* php; |
|||
int delim; |
|||
tmbstr attribute; |
|||
tmbstr value; |
|||
}; |
|||
|
|||
|
|||
|
|||
/*
|
|||
Mosaic handles inlines via a separate stack from other elements |
|||
We duplicate this to recover from inline markup errors such as: |
|||
|
|||
<i>italic text |
|||
<p>more italic text</b> normal text |
|||
|
|||
which for compatibility with Mosaic is mapped to: |
|||
|
|||
<i>italic text</i> |
|||
<p><i>more italic text</i> normal text |
|||
|
|||
Note that any inline end tag pop's the effect of the current |
|||
inline start tag, so that </b> pop's <i> in the above example. |
|||
*/ |
|||
struct _IStack |
|||
{ |
|||
IStack* next; |
|||
const Dict* tag; /* tag's dictionary definition */ |
|||
tmbstr element; /* name (NULL for text nodes) */ |
|||
AttVal* attributes; |
|||
}; |
|||
|
|||
|
|||
/* HTML/XHTML/XML Element, Comment, PI, DOCTYPE, XML Decl,
|
|||
** etc. etc. |
|||
*/ |
|||
|
|||
struct _Node |
|||
{ |
|||
Node* parent; /* tree structure */ |
|||
Node* prev; |
|||
Node* next; |
|||
Node* content; |
|||
Node* last; |
|||
|
|||
AttVal* attributes; |
|||
const Dict* was; /* old tag when it was changed */ |
|||
const Dict* tag; /* tag's dictionary definition */ |
|||
|
|||
tmbstr element; /* name (NULL for text nodes) */ |
|||
|
|||
uint start; /* start of span onto text array */ |
|||
uint end; /* end of span onto text array */ |
|||
NodeType type; /* TextNode, StartTag, EndTag etc. */ |
|||
|
|||
uint line; /* current line of document */ |
|||
uint column; /* current column of document */ |
|||
|
|||
Bool closed; /* true if closed by explicit end tag */ |
|||
Bool implicit; /* true if inferred */ |
|||
Bool linebreak; /* true if followed by a line break */ |
|||
|
|||
#ifdef TIDY_STORE_ORIGINAL_TEXT |
|||
tmbstr otext; |
|||
#endif |
|||
}; |
|||
|
|||
|
|||
/*
|
|||
The following are private to the lexer |
|||
Use NewLexer() to create a lexer, and |
|||
FreeLexer() to free it. |
|||
*/ |
|||
|
|||
struct _Lexer |
|||
{ |
|||
#if 0 /* Move to TidyDocImpl */
|
|||
StreamIn* in; /* document content input */ |
|||
StreamOut* errout; /* error output stream */ |
|||
|
|||
uint badAccess; /* for accessibility errors */ |
|||
uint badLayout; /* for bad style errors */ |
|||
uint badChars; /* for bad character encodings */ |
|||
uint badForm; /* for mismatched/mispositioned form tags */ |
|||
uint warnings; /* count of warnings in this document */ |
|||
uint errors; /* count of errors */ |
|||
#endif |
|||
|
|||
uint lines; /* lines seen */ |
|||
uint columns; /* at start of current token */ |
|||
Bool waswhite; /* used to collapse contiguous white space */ |
|||
Bool pushed; /* true after token has been pushed back */ |
|||
Bool insertspace; /* when space is moved after end tag */ |
|||
Bool excludeBlocks; /* Netscape compatibility */ |
|||
Bool exiled; /* true if moved out of table */ |
|||
Bool isvoyager; /* true if xmlns attribute on html element */ |
|||
uint versions; /* bit vector of HTML versions */ |
|||
uint doctype; /* version as given by doctype (if any) */ |
|||
uint versionEmitted; /* version of doctype emitted */ |
|||
Bool bad_doctype; /* e.g. if html or PUBLIC is missing */ |
|||
uint txtstart; /* start of current node */ |
|||
uint txtend; /* end of current node */ |
|||
LexerState state; /* state of lexer's finite state machine */ |
|||
|
|||
Node* token; /* last token returned by GetToken() */ |
|||
Node* itoken; /* last duplicate inline returned by GetToken() */ |
|||
Node* root; /* remember root node of the document */ |
|||
Node* parent; /* remember parent node for CDATA elements */ |
|||
|
|||
Bool seenEndBody; /* true if a </body> tag has been encountered */ |
|||
Bool seenEndHtml; /* true if a </html> tag has been encountered */ |
|||
|
|||
/*
|
|||
Lexer character buffer |
|||
|
|||
Parse tree nodes span onto this buffer |
|||
which contains the concatenated text |
|||
contents of all of the elements. |
|||
|
|||
lexsize must be reset for each file. |
|||
*/ |
|||
tmbstr lexbuf; /* MB character buffer */ |
|||
uint lexlength; /* allocated */ |
|||
uint lexsize; /* used */ |
|||
|
|||
/* Inline stack for compatibility with Mosaic */ |
|||
Node* inode; /* for deferring text node */ |
|||
IStack* insert; /* for inferring inline tags */ |
|||
IStack* istack; |
|||
uint istacklength; /* allocated */ |
|||
uint istacksize; /* used */ |
|||
uint istackbase; /* start of frame */ |
|||
|
|||
TagStyle *styles; /* used for cleaning up presentation markup */ |
|||
|
|||
TidyAllocator* allocator; /* allocator */ |
|||
|
|||
#if 0 |
|||
TidyDocImpl* doc; /* Pointer back to doc for error reporting */ |
|||
#endif |
|||
}; |
|||
|
|||
|
|||
/* Lexer Functions
|
|||
*/ |
|||
|
|||
/* choose what version to use for new doctype */ |
|||
int TY_(HTMLVersion)( TidyDocImpl* doc ); |
|||
|
|||
/* everything is allowed in proprietary version of HTML */ |
|||
/* this is handled here rather than in the tag/attr dicts */ |
|||
|
|||
void TY_(ConstrainVersion)( TidyDocImpl* doc, uint vers ); |
|||
|
|||
Bool TY_(IsWhite)(uint c); |
|||
Bool TY_(IsDigit)(uint c); |
|||
Bool TY_(IsLetter)(uint c); |
|||
Bool TY_(IsHTMLSpace)(uint c); |
|||
Bool TY_(IsNewline)(uint c); |
|||
Bool TY_(IsNamechar)(uint c); |
|||
Bool TY_(IsXMLLetter)(uint c); |
|||
Bool TY_(IsXMLNamechar)(uint c); |
|||
|
|||
/* Bool IsLower(uint c); */ |
|||
Bool TY_(IsUpper)(uint c); |
|||
uint TY_(ToLower)(uint c); |
|||
uint TY_(ToUpper)(uint c); |
|||
|
|||
Lexer* TY_(NewLexer)( TidyDocImpl* doc ); |
|||
void TY_(FreeLexer)( TidyDocImpl* doc ); |
|||
|
|||
/* store character c as UTF-8 encoded byte stream */ |
|||
void TY_(AddCharToLexer)( Lexer *lexer, uint c ); |
|||
|
|||
/*
|
|||
Used for elements and text nodes |
|||
element name is NULL for text nodes |
|||
start and end are offsets into lexbuf |
|||
which contains the textual content of |
|||
all elements in the parse tree. |
|||
|
|||
parent and content allow traversal |
|||
of the parse tree in any direction. |
|||
attributes are represented as a linked |
|||
list of AttVal nodes which hold the |
|||
strings for attribute/value pairs. |
|||
*/ |
|||
Node* TY_(NewNode)( TidyAllocator* allocator, Lexer* lexer ); |
|||
|
|||
|
|||
/* used to clone heading nodes when split by an <HR> */ |
|||
Node* TY_(CloneNode)( TidyDocImpl* doc, Node *element ); |
|||
|
|||
/* free node's attributes */ |
|||
void TY_(FreeAttrs)( TidyDocImpl* doc, Node *node ); |
|||
|
|||
/* doesn't repair attribute list linkage */ |
|||
void TY_(FreeAttribute)( TidyDocImpl* doc, AttVal *av ); |
|||
|
|||
/* detach attribute from node */ |
|||
void TY_(DetachAttribute)( Node *node, AttVal *attr ); |
|||
|
|||
/* detach attribute from node then free it
|
|||
*/ |
|||
void TY_(RemoveAttribute)( TidyDocImpl* doc, Node *node, AttVal *attr ); |
|||
|
|||
/*
|
|||
Free document nodes by iterating through peers and recursing |
|||
through children. Set next to NULL before calling FreeNode() |
|||
to avoid freeing peer nodes. Doesn't patch up prev/next links. |
|||
*/ |
|||
void TY_(FreeNode)( TidyDocImpl* doc, Node *node ); |
|||
|
|||
Node* TY_(TextToken)( Lexer *lexer ); |
|||
|
|||
/* used for creating preformatted text from Word2000 */ |
|||
Node* TY_(NewLineNode)( Lexer *lexer ); |
|||
|
|||
/* used for adding a for Word2000 */ |
|||
Node* TY_(NewLiteralTextNode)(Lexer *lexer, ctmbstr txt ); |
|||
|
|||
void TY_(AddStringLiteral)( Lexer* lexer, ctmbstr str ); |
|||
/* void AddStringLiteralLen( Lexer* lexer, ctmbstr str, int len ); */ |
|||
|
|||
/* find element */ |
|||
Node* TY_(FindDocType)( TidyDocImpl* doc ); |
|||
Node* TY_(FindHTML)( TidyDocImpl* doc ); |
|||
Node* TY_(FindHEAD)( TidyDocImpl* doc ); |
|||
Node* TY_(FindTITLE)(TidyDocImpl* doc); |
|||
Node* TY_(FindBody)( TidyDocImpl* doc ); |
|||
Node* TY_(FindXmlDecl)(TidyDocImpl* doc); |
|||
|
|||
/* Returns containing block element, if any */ |
|||
Node* TY_(FindContainer)( Node* node ); |
|||
|
|||
/* add meta element for Tidy */ |
|||
Bool TY_(AddGenerator)( TidyDocImpl* doc ); |
|||
|
|||
uint TY_(ApparentVersion)( TidyDocImpl* doc ); |
|||
|
|||
ctmbstr TY_(HTMLVersionNameFromCode)( uint vers, Bool isXhtml ); |
|||
|
|||
Bool TY_(WarnMissingSIInEmittedDocType)( TidyDocImpl* doc ); |
|||
|
|||
Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc ); |
|||
|
|||
|
|||
/* fixup doctype if missing */ |
|||
Bool TY_(FixDocType)( TidyDocImpl* doc ); |
|||
|
|||
/* ensure XML document starts with <?xml version="1.0"?> */ |
|||
/* add encoding attribute if not using ASCII or UTF-8 output */ |
|||
Bool TY_(FixXmlDecl)( TidyDocImpl* doc ); |
|||
|
|||
Node* TY_(InferredTag)(TidyDocImpl* doc, TidyTagId id); |
|||
|
|||
void TY_(UngetToken)( TidyDocImpl* doc ); |
|||
|
|||
|
|||
/*
|
|||
modes for GetToken() |
|||
|
|||
MixedContent -- for elements which don't accept PCDATA |
|||
Preformatted -- white space preserved as is |
|||
IgnoreMarkup -- for CDATA elements such as script, style |
|||
*/ |
|||
typedef enum |
|||
{ |
|||
IgnoreWhitespace, |
|||
MixedContent, |
|||
Preformatted, |
|||
IgnoreMarkup, |
|||
OtherNamespace, |
|||
CdataContent |
|||
} GetTokenMode; |
|||
|
|||
Node* TY_(GetToken)( TidyDocImpl* doc, GetTokenMode mode ); |
|||
|
|||
void TY_(InitMap)(void); |
|||
|
|||
|
|||
/* create a new attribute */ |
|||
AttVal* TY_(NewAttribute)( TidyDocImpl* doc ); |
|||
|
|||
/* create a new attribute with given name and value */ |
|||
AttVal* TY_(NewAttributeEx)( TidyDocImpl* doc, ctmbstr name, ctmbstr value, |
|||
int delim ); |
|||
|
|||
/* insert attribute at the end of attribute list of a node */ |
|||
void TY_(InsertAttributeAtEnd)( Node *node, AttVal *av ); |
|||
|
|||
/* insert attribute at the start of attribute list of a node */ |
|||
void TY_(InsertAttributeAtStart)( Node *node, AttVal *av ); |
|||
|
|||
/*************************************
|
|||
In-line Stack functions |
|||
*************************************/ |
|||
|
|||
|
|||
/* duplicate attributes */ |
|||
AttVal* TY_(DupAttrs)( TidyDocImpl* doc, AttVal* attrs ); |
|||
|
|||
/*
|
|||
push a copy of an inline node onto stack |
|||
but don't push if implicit or OBJECT or APPLET |
|||
(implicit tags are ones generated from the istack) |
|||
|
|||
One issue arises with pushing inlines when |
|||
the tag is already pushed. For instance: |
|||
|
|||
<p><em>text |
|||
<p><em>more text |
|||
|
|||
Shouldn't be mapped to |
|||
|
|||
<p><em>text</em></p> |
|||
<p><em><em>more text</em></em> |
|||
*/ |
|||
void TY_(PushInline)( TidyDocImpl* doc, Node* node ); |
|||
|
|||
/* pop inline stack */ |
|||
void TY_(PopInline)( TidyDocImpl* doc, Node* node ); |
|||
|
|||
Bool TY_(IsPushed)( TidyDocImpl* doc, Node* node ); |
|||
Bool TY_(IsPushedLast)( TidyDocImpl* doc, Node *element, Node *node ); |
|||
|
|||
/*
|
|||
This has the effect of inserting "missing" inline |
|||
elements around the contents of blocklevel elements |
|||
such as P, TD, TH, DIV, PRE etc. This procedure is |
|||
called at the start of ParseBlock. when the inline |
|||
stack is not empty, as will be the case in: |
|||
|
|||
<i><h1>italic heading</h1></i> |
|||
|
|||
which is then treated as equivalent to |
|||
|
|||
<h1><i>italic heading</i></h1> |
|||
|
|||
This is implemented by setting the lexer into a mode |
|||
where it gets tokens from the inline stack rather than |
|||
from the input stream. |
|||
*/ |
|||
int TY_(InlineDup)( TidyDocImpl* doc, Node *node ); |
|||
|
|||
/*
|
|||
defer duplicates when entering a table or other |
|||
element where the inlines shouldn't be duplicated |
|||
*/ |
|||
void TY_(DeferDup)( TidyDocImpl* doc ); |
|||
Node* TY_(InsertedToken)( TidyDocImpl* doc ); |
|||
|
|||
/* stack manipulation for inline elements */ |
|||
Bool TY_(SwitchInline)( TidyDocImpl* doc, Node* element, Node* node ); |
|||
Bool TY_(InlineDup1)( TidyDocImpl* doc, Node* node, Node* element ); |
|||
|
|||
#ifdef __cplusplus |
|||
} |
|||
#endif |
|||
|
|||
|
|||
#endif /* __LEXER_H__ */ |
@ -0,0 +1,343 @@ |
|||
/* Interface to mmap style I/O
|
|||
|
|||
(c) 2006-2008 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
Originally contributed by Cory Nelson and Nuno Lopes |
|||
|
|||
*/ |
|||
|
|||
/* keep these here to keep file non-empty */ |
|||
#include "forward.h" |
|||
#include "mappedio.h" |
|||
|
|||
#if SUPPORT_POSIX_MAPPED_FILES |
|||
|
|||
#include "fileio.h" |
|||
|
|||
#include <sys/types.h> |
|||
#include <sys/stat.h> |
|||
#include <unistd.h> |
|||
#include <stdio.h> |
|||
|
|||
#include <sys/mman.h> |
|||
|
|||
|
|||
typedef struct |
|||
{ |
|||
TidyAllocator *allocator; |
|||
const byte *base; |
|||
size_t pos, size; |
|||
} MappedFileSource; |
|||
|
|||
static int TIDY_CALL mapped_getByte( void* sourceData ) |
|||
{ |
|||
MappedFileSource* fin = (MappedFileSource*) sourceData; |
|||
return fin->base[fin->pos++]; |
|||
} |
|||
|
|||
static Bool TIDY_CALL mapped_eof( void* sourceData ) |
|||
{ |
|||
MappedFileSource* fin = (MappedFileSource*) sourceData; |
|||
return (fin->pos >= fin->size); |
|||
} |
|||
|
|||
static void TIDY_CALL mapped_ungetByte( void* sourceData, byte ARG_UNUSED(bv) ) |
|||
{ |
|||
MappedFileSource* fin = (MappedFileSource*) sourceData; |
|||
fin->pos--; |
|||
} |
|||
|
|||
int TY_(initFileSource)( TidyAllocator *allocator, TidyInputSource* inp, FILE* fp ) |
|||
{ |
|||
MappedFileSource* fin; |
|||
struct stat sbuf; |
|||
int fd; |
|||
|
|||
fin = (MappedFileSource*) TidyAlloc( allocator, sizeof(MappedFileSource) ); |
|||
if ( !fin ) |
|||
return -1; |
|||
|
|||
fd = fileno(fp); |
|||
if ( fstat(fd, &sbuf) == -1 |
|||
|| sbuf.st_size == 0 |
|||
|| (fin->base = mmap(0, fin->size = sbuf.st_size, PROT_READ, |
|||
MAP_SHARED, fd, 0)) == MAP_FAILED) |
|||
{ |
|||
TidyFree( allocator, fin ); |
|||
/* Fallback on standard I/O */ |
|||
return TY_(initStdIOFileSource)( allocator, inp, fp ); |
|||
} |
|||
|
|||
fin->pos = 0; |
|||
fin->allocator = allocator; |
|||
fclose(fp); |
|||
|
|||
inp->getByte = mapped_getByte; |
|||
inp->eof = mapped_eof; |
|||
inp->ungetByte = mapped_ungetByte; |
|||
inp->sourceData = fin; |
|||
|
|||
return 0; |
|||
} |
|||
|
|||
void TY_(freeFileSource)( TidyInputSource* inp, Bool closeIt ) |
|||
{ |
|||
if ( inp->getByte == mapped_getByte ) |
|||
{ |
|||
MappedFileSource* fin = (MappedFileSource*) inp->sourceData; |
|||
munmap( (void*)fin->base, fin->size ); |
|||
TidyFree( fin->allocator, fin ); |
|||
} |
|||
else |
|||
TY_(freeStdIOFileSource)( inp, closeIt ); |
|||
} |
|||
|
|||
#endif |
|||
|
|||
|
|||
#if defined(_WIN32) |
|||
#if defined(_MSC_VER) && (_MSC_VER < 1300) /* less than msvc++ 7.0 */ |
|||
#pragma warning(disable:4115) /* named type definition in parentheses in windows headers */ |
|||
#endif |
|||
#include <windows.h> |
|||
#include <errno.h> |
|||
|
|||
#include "streamio.h" |
|||
#include "tidy-int.h" |
|||
#include "message.h" |
|||
|
|||
typedef struct _fp_input_mapped_source |
|||
{ |
|||
TidyAllocator *allocator; |
|||
LONGLONG size, pos; |
|||
HANDLE file, map; |
|||
byte *view, *iter, *end; |
|||
unsigned int gran; |
|||
} MappedFileSource; |
|||
|
|||
static int mapped_openView( MappedFileSource *data ) |
|||
{ |
|||
DWORD numb = ( ( data->size - data->pos ) > data->gran ) ? |
|||
data->gran : (DWORD)( data->size - data->pos ); |
|||
|
|||
if ( data->view ) |
|||
{ |
|||
UnmapViewOfFile( data->view ); |
|||
data->view = NULL; |
|||
} |
|||
|
|||
data->view = MapViewOfFile( data->map, FILE_MAP_READ, |
|||
(DWORD)( data->pos >> 32 ), |
|||
(DWORD)data->pos, numb ); |
|||
|
|||
if ( !data->view ) return -1; |
|||
|
|||
data->iter = data->view; |
|||
data->end = data->iter + numb; |
|||
|
|||
return 0; |
|||
} |
|||
|
|||
static int TIDY_CALL mapped_getByte( void *sourceData ) |
|||
{ |
|||
MappedFileSource *data = sourceData; |
|||
|
|||
if ( !data->view || data->iter >= data->end ) |
|||
{ |
|||
data->pos += data->gran; |
|||
|
|||
if ( data->pos >= data->size || mapped_openView(data) != 0 ) |
|||
return EndOfStream; |
|||
} |
|||
|
|||
return *( data->iter++ ); |
|||
} |
|||
|
|||
static Bool TIDY_CALL mapped_eof( void *sourceData ) |
|||
{ |
|||
MappedFileSource *data = sourceData; |
|||
return ( data->pos >= data->size ); |
|||
} |
|||
|
|||
static void TIDY_CALL mapped_ungetByte( void *sourceData, byte ARG_UNUSED(bt) ) |
|||
{ |
|||
MappedFileSource *data = sourceData; |
|||
|
|||
if ( data->iter >= data->view ) |
|||
{ |
|||
--data->iter; |
|||
return; |
|||
} |
|||
|
|||
if ( data->pos < data->gran ) |
|||
{ |
|||
assert(0); |
|||
return; |
|||
} |
|||
|
|||
data->pos -= data->gran; |
|||
mapped_openView( data ); |
|||
} |
|||
|
|||
static int initMappedFileSource( TidyAllocator *allocator, TidyInputSource* inp, HANDLE fp ) |
|||
{ |
|||
MappedFileSource* fin = NULL; |
|||
|
|||
inp->getByte = mapped_getByte; |
|||
inp->eof = mapped_eof; |
|||
inp->ungetByte = mapped_ungetByte; |
|||
|
|||
fin = (MappedFileSource*) TidyAlloc( allocator, sizeof(MappedFileSource) ); |
|||
if ( !fin ) |
|||
return -1; |
|||
|
|||
#if defined(__MINGW32__) |
|||
{ |
|||
DWORD lowVal, highVal; |
|||
lowVal = GetFileSize(fp, &highVal); |
|||
if ((lowVal == INVALID_FILE_SIZE) && (GetLastError() != NO_ERROR)) |
|||
{ |
|||
TidyFree(allocator, fin); |
|||
return -1; |
|||
} |
|||
fin->size = highVal; |
|||
fin->size = (fin->size << 32); |
|||
fin->size += lowVal; |
|||
} |
|||
#else /* NOT a MinGW build */ |
|||
#if defined(_MSC_VER) && (_MSC_VER < 1300) /* less than msvc++ 7.0 */ |
|||
{ |
|||
LARGE_INTEGER* pli = (LARGE_INTEGER *)&fin->size; |
|||
(DWORD)pli->LowPart = GetFileSize( fp, (DWORD *)&pli->HighPart ); |
|||
if ( GetLastError() != NO_ERROR || fin->size <= 0 ) |
|||
{ |
|||
TidyFree(allocator, fin); |
|||
return -1; |
|||
} |
|||
} |
|||
#else |
|||
if ( !GetFileSizeEx( fp, (LARGE_INTEGER*)&fin->size ) |
|||
|| fin->size <= 0 ) |
|||
{ |
|||
TidyFree(allocator, fin); |
|||
return -1; |
|||
} |
|||
#endif |
|||
#endif /* MinGW y/n */ |
|||
|
|||
fin->map = CreateFileMapping( fp, NULL, PAGE_READONLY, 0, 0, NULL ); |
|||
|
|||
if ( !fin->map ) |
|||
{ |
|||
TidyFree(allocator, fin); |
|||
return -1; |
|||
} |
|||
|
|||
{ |
|||
SYSTEM_INFO info; |
|||
GetSystemInfo( &info ); |
|||
fin->gran = info.dwAllocationGranularity; |
|||
} |
|||
|
|||
fin->allocator = allocator; |
|||
fin->pos = 0; |
|||
fin->view = NULL; |
|||
fin->iter = NULL; |
|||
fin->end = NULL; |
|||
|
|||
if ( mapped_openView( fin ) != 0 ) |
|||
{ |
|||
CloseHandle( fin->map ); |
|||
TidyFree( allocator, fin ); |
|||
return -1; |
|||
} |
|||
|
|||
fin->file = fp; |
|||
inp->sourceData = fin; |
|||
|
|||
return 0; |
|||
} |
|||
|
|||
static void freeMappedFileSource( TidyInputSource* inp, Bool closeIt ) |
|||
{ |
|||
MappedFileSource* fin = (MappedFileSource*) inp->sourceData; |
|||
if ( closeIt && fin && fin->file != INVALID_HANDLE_VALUE ) |
|||
{ |
|||
if ( fin->view ) |
|||
UnmapViewOfFile( fin->view ); |
|||
|
|||
CloseHandle( fin->map ); |
|||
CloseHandle( fin->file ); |
|||
} |
|||
TidyFree( fin->allocator, fin ); |
|||
} |
|||
|
|||
StreamIn* MappedFileInput ( TidyDocImpl* doc, HANDLE fp, int encoding ) |
|||
{ |
|||
StreamIn *in = TY_(initStreamIn)( doc, encoding ); |
|||
if ( initMappedFileSource( doc->allocator, &in->source, fp ) != 0 ) |
|||
{ |
|||
TY_(freeStreamIn)( in ); |
|||
return NULL; |
|||
} |
|||
in->iotype = FileIO; |
|||
return in; |
|||
} |
|||
|
|||
|
|||
int TY_(DocParseFileWithMappedFile)( TidyDocImpl* doc, ctmbstr filnam ) { |
|||
int status = -ENOENT; |
|||
HANDLE fin = CreateFileA( filnam, GENERIC_READ, FILE_SHARE_READ, NULL, |
|||
OPEN_EXISTING, 0, NULL ); |
|||
|
|||
#if PRESERVE_FILE_TIMES |
|||
LONGLONG actime, modtime; |
|||
TidyClearMemory( &doc->filetimes, sizeof(doc->filetimes) ); |
|||
|
|||
if ( fin != INVALID_HANDLE_VALUE && cfgBool(doc,TidyKeepFileTimes) && |
|||
GetFileTime(fin, NULL, (FILETIME*)&actime, (FILETIME*)&modtime) ) |
|||
{ |
|||
#define TY_I64(str) TYDYAPPEND(str,LL) |
|||
#if _MSC_VER < 1300 && !defined(__GNUC__) /* less than msvc++ 7.0 */ |
|||
# undef TY_I64 |
|||
# define TY_I64(str) TYDYAPPEND(str,i64) |
|||
#endif |
|||
doc->filetimes.actime = |
|||
(time_t)( ( actime - TY_I64(116444736000000000)) / 10000000 ); |
|||
|
|||
doc->filetimes.modtime = |
|||
(time_t)( ( modtime - TY_I64(116444736000000000)) / 10000000 ); |
|||
} |
|||
#endif |
|||
|
|||
if ( fin != INVALID_HANDLE_VALUE ) |
|||
{ |
|||
StreamIn* in = MappedFileInput( doc, fin, |
|||
cfg( doc, TidyInCharEncoding ) ); |
|||
if ( !in ) |
|||
{ |
|||
CloseHandle( fin ); |
|||
return -ENOMEM; |
|||
} |
|||
|
|||
status = TY_(DocParseStream)( doc, in ); |
|||
freeMappedFileSource( &in->source, yes ); |
|||
TY_(freeStreamIn)( in ); |
|||
} |
|||
else /* Error message! */ |
|||
TY_(FileError)( doc, filnam, TidyError ); |
|||
return status; |
|||
} |
|||
|
|||
#endif |
|||
|
|||
|
|||
/*
|
|||
* local variables: |
|||
* mode: c |
|||
* indent-tabs-mode: nil |
|||
* c-basic-offset: 4 |
|||
* eval: (c-set-offset 'substatement-open 0) |
|||
* end: |
|||
*/ |
@ -0,0 +1,15 @@ |
|||
#ifndef __TIDY_MAPPED_IO_H__ |
|||
#define __TIDY_MAPPED_IO_H__ |
|||
|
|||
/* Interface to mmap style I/O
|
|||
|
|||
(c) 2006 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
#if defined(_WIN32) |
|||
int TY_(DocParseFileWithMappedFile)( TidyDocImpl* doc, ctmbstr filnam ); |
|||
#endif |
|||
|
|||
#endif /* __TIDY_MAPPED_IO_H__ */ |
File diff suppressed because it is too large
@ -0,0 +1,282 @@ |
|||
#ifndef __MESSAGE_H__ |
|||
#define __MESSAGE_H__ |
|||
|
|||
/* message.h -- general message writing routines
|
|||
|
|||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
#include "forward.h" |
|||
#include "tidy.h" /* For TidyReportLevel */ |
|||
#include "language.h" |
|||
|
|||
/* General message writing routines.
|
|||
** Each message is a single warning, error, etc. |
|||
** |
|||
** These routines keep track of counts and, |
|||
** if the caller has set a filter, it will be |
|||
** called. The new preferred way of handling |
|||
** Tidy diagnostics output is either a) define |
|||
** a new output sink or b) install a message |
|||
** filter routine. |
|||
** |
|||
** Keep track of ShowWarnings, ShowErrors, etc. |
|||
*/ |
|||
|
|||
ctmbstr TY_(ReleaseDate)(void); |
|||
|
|||
void TY_(ReportUnknownOption)( TidyDocImpl* doc, ctmbstr option ); |
|||
void TY_(ReportBadArgument)( TidyDocImpl* doc, ctmbstr option ); |
|||
void TY_(NeedsAuthorIntervention)( TidyDocImpl* doc ); |
|||
|
|||
void TY_(ReportMarkupVersion)( TidyDocImpl* doc ); |
|||
void TY_(ReportNumWarnings)( TidyDocImpl* doc ); |
|||
|
|||
void TY_(GeneralInfo)( TidyDocImpl* doc ); |
|||
/* void TY_(UnknownOption)( TidyDocImpl* doc, char c ); */ |
|||
/* void TY_(UnknownFile)( TidyDocImpl* doc, ctmbstr program, ctmbstr file ); */ |
|||
void TY_(FileError)( TidyDocImpl* doc, ctmbstr file, TidyReportLevel level ); |
|||
|
|||
void TY_(ErrorSummary)( TidyDocImpl* doc ); |
|||
|
|||
void TY_(ReportEncodingWarning)(TidyDocImpl* doc, uint code, uint encoding); |
|||
void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarded); |
|||
void TY_(ReportEntityError)( TidyDocImpl* doc, uint code, ctmbstr entity, int c ); |
|||
void TY_(ReportAttrError)( TidyDocImpl* doc, Node* node, AttVal* av, uint code ); |
|||
void TY_(ReportMissingAttr)( TidyDocImpl* doc, Node* node, ctmbstr name ); |
|||
|
|||
#if SUPPORT_ACCESSIBILITY_CHECKS |
|||
|
|||
void TY_(ReportAccessWarning)( TidyDocImpl* doc, Node* node, uint code ); |
|||
void TY_(ReportAccessError)( TidyDocImpl* doc, Node* node, uint code ); |
|||
|
|||
#endif |
|||
|
|||
void TY_(ReportNotice)(TidyDocImpl* doc, Node *element, Node *node, uint code); |
|||
void TY_(ReportWarning)(TidyDocImpl* doc, Node *element, Node *node, uint code); |
|||
void TY_(ReportError)(TidyDocImpl* doc, Node* element, Node* node, uint code); |
|||
void TY_(ReportFatal)(TidyDocImpl* doc, Node* element, Node* node, uint code); |
|||
|
|||
|
|||
/**
|
|||
* These tidyErrorCodes are used throughout libtidy, and also |
|||
* have associated localized strings to describe them. |
|||
* |
|||
* IMPORTANT: to maintain compatability with TidyMessageFilter3, if you add |
|||
* or remove keys from this enum, ALSO add/remove the corresponding key |
|||
* in language.c:tidyErrorFilterKeysStruct[]! |
|||
*/ |
|||
typedef enum { |
|||
/* This MUST be present and first. */ |
|||
CODES_TIDY_ERROR_FIRST = 200, |
|||
|
|||
/* error codes for entities/numeric character references */ |
|||
|
|||
MISSING_SEMICOLON, |
|||
MISSING_SEMICOLON_NCR, |
|||
UNKNOWN_ENTITY, |
|||
UNESCAPED_AMPERSAND, |
|||
APOS_UNDEFINED, |
|||
|
|||
/* error codes for element messages */ |
|||
|
|||
MISSING_ENDTAG_FOR, |
|||
MISSING_ENDTAG_BEFORE, |
|||
DISCARDING_UNEXPECTED, |
|||
NESTED_EMPHASIS, |
|||
NON_MATCHING_ENDTAG, |
|||
TAG_NOT_ALLOWED_IN, |
|||
MISSING_STARTTAG, |
|||
UNEXPECTED_ENDTAG, |
|||
USING_BR_INPLACE_OF, |
|||
INSERTING_TAG, |
|||
SUSPECTED_MISSING_QUOTE, |
|||
MISSING_TITLE_ELEMENT, |
|||
DUPLICATE_FRAMESET, |
|||
CANT_BE_NESTED, |
|||
OBSOLETE_ELEMENT, |
|||
PROPRIETARY_ELEMENT, |
|||
ELEMENT_VERS_MISMATCH_ERROR, |
|||
ELEMENT_VERS_MISMATCH_WARN, |
|||
UNKNOWN_ELEMENT, |
|||
TRIM_EMPTY_ELEMENT, |
|||
COERCE_TO_ENDTAG, |
|||
ILLEGAL_NESTING, |
|||
NOFRAMES_CONTENT, |
|||
CONTENT_AFTER_BODY, |
|||
INCONSISTENT_VERSION, |
|||
MALFORMED_COMMENT, |
|||
BAD_COMMENT_CHARS, |
|||
BAD_XML_COMMENT, |
|||
BAD_CDATA_CONTENT, |
|||
INCONSISTENT_NAMESPACE, |
|||
DOCTYPE_AFTER_TAGS, |
|||
MALFORMED_DOCTYPE, |
|||
UNEXPECTED_END_OF_FILE, |
|||
DTYPE_NOT_UPPER_CASE, |
|||
TOO_MANY_ELEMENTS, |
|||
UNESCAPED_ELEMENT, |
|||
NESTED_QUOTATION, |
|||
ELEMENT_NOT_EMPTY, |
|||
ENCODING_IO_CONFLICT, |
|||
MIXED_CONTENT_IN_BLOCK, |
|||
MISSING_DOCTYPE, |
|||
SPACE_PRECEDING_XMLDECL, |
|||
TOO_MANY_ELEMENTS_IN, |
|||
UNEXPECTED_ENDTAG_IN, |
|||
REPLACING_ELEMENT, |
|||
REPLACING_UNEX_ELEMENT, |
|||
COERCE_TO_ENDTAG_WARN, |
|||
|
|||
/* error codes used for attribute messages */ |
|||
|
|||
UNKNOWN_ATTRIBUTE, |
|||
INSERTING_ATTRIBUTE, |
|||
INSERTING_AUTO_ATTRIBUTE, |
|||
MISSING_ATTR_VALUE, |
|||
BAD_ATTRIBUTE_VALUE, |
|||
UNEXPECTED_GT, |
|||
PROPRIETARY_ATTRIBUTE, |
|||
MISMATCHED_ATTRIBUTE_ERROR, |
|||
MISMATCHED_ATTRIBUTE_WARN, |
|||
PROPRIETARY_ATTR_VALUE, |
|||
REPEATED_ATTRIBUTE, |
|||
MISSING_IMAGEMAP, |
|||
XML_ATTRIBUTE_VALUE, |
|||
UNEXPECTED_QUOTEMARK, |
|||
MISSING_QUOTEMARK, |
|||
ID_NAME_MISMATCH, |
|||
|
|||
BACKSLASH_IN_URI, |
|||
FIXED_BACKSLASH, |
|||
ILLEGAL_URI_REFERENCE, |
|||
ESCAPED_ILLEGAL_URI, |
|||
|
|||
NEWLINE_IN_URI, |
|||
ANCHOR_NOT_UNIQUE, |
|||
|
|||
JOINING_ATTRIBUTE, |
|||
UNEXPECTED_EQUALSIGN, |
|||
ATTR_VALUE_NOT_LCASE, |
|||
XML_ID_SYNTAX, |
|||
|
|||
INVALID_ATTRIBUTE, |
|||
|
|||
BAD_ATTRIBUTE_VALUE_REPLACED, |
|||
|
|||
INVALID_XML_ID, |
|||
UNEXPECTED_END_OF_FILE_ATTR, |
|||
MISSING_ATTRIBUTE, |
|||
WHITE_IN_URI, |
|||
|
|||
REMOVED_HTML5, /* this element removed from HTML5 */ |
|||
BAD_SUMMARY_HTML5, /* use of summary attr removed from HTML5 */ |
|||
|
|||
PREVIOUS_LOCATION, /* last */ |
|||
|
|||
/* character encoding errors */ |
|||
|
|||
VENDOR_SPECIFIC_CHARS, |
|||
INVALID_SGML_CHARS, |
|||
INVALID_UTF8, |
|||
INVALID_UTF16, |
|||
ENCODING_MISMATCH, |
|||
INVALID_URI, |
|||
INVALID_NCR, |
|||
|
|||
/* This MUST be present and last. */ |
|||
CODES_TIDY_ERROR_LAST |
|||
} tidyErrorCodes; |
|||
|
|||
/**
|
|||
* These tidyMessagesMisc are used throughout libtidy, and also |
|||
* have associated localized strings to describe them. |
|||
*/ |
|||
typedef enum { |
|||
ACCESS_URL = 2048, /* Used to point to Web Accessibility Guidelines. */ |
|||
ATRC_ACCESS_URL, /* Points to Tidy's accessibility page. */ |
|||
FILE_CANT_OPEN, /* For retrieving a string when a file can't be opened. */ |
|||
LINE_COLUMN_STRING, /* For retrieving localized `line %d column %d` text. */ |
|||
STRING_CONTENT_LOOKS, /* `Document content looks like %s`. */ |
|||
STRING_DISCARDING, /* For `discarding`. */ |
|||
STRING_DOCTYPE_GIVEN, /* `Doctype given is \"%s\". */ |
|||
STRING_ERROR_COUNT, /* `%u %s, %u %s were found!`. */ |
|||
STRING_ERROR_COUNT_ERROR, /* `error` and `errors`. */ |
|||
STRING_ERROR_COUNT_WARNING, /* `warning` and `warnings`. */ |
|||
STRING_HELLO_ACCESS, /* Accessibility hello message. */ |
|||
STRING_HTML_PROPRIETARY, /* `HTML Proprietary`/ */ |
|||
STRING_MISSING_MALFORMED, /* For `missing or malformed argument for option: %s`. */ |
|||
STRING_NO_ERRORS, /* `No warnings or errors were found.\n\n`. */ |
|||
STRING_NO_SYSID, /* `No system identifier in emitted doctype`. */ |
|||
STRING_NOT_ALL_SHOWN, /* ` Not all warnings/errors were shown.\n\n`. */ |
|||
STRING_PLAIN_TEXT, /* For retrieving a string `plain text`. */ |
|||
STRING_REPLACING, /* For `replacing`. */ |
|||
STRING_SPECIFIED, /* For `specified`. */ |
|||
STRING_UNKNOWN_FILE, /* `%s: can't open file \"%s\"\n`. */ |
|||
STRING_UNKNOWN_OPTION, /* For retrieving a string `unknown option: %s`. */ |
|||
STRING_UNRECZD_OPTION, /* `unrecognized option -%c use -help to list options\n`. */ |
|||
STRING_XML_DECLARATION, /* For retrieving a string `XML declaration`. */ |
|||
TEXT_ACCESS_ADVICE1, /* Explanatory text. */ |
|||
TEXT_ACCESS_ADVICE2, /* Explanatory text. */ |
|||
TEXT_BAD_FORM, /* Explanatory text. */ |
|||
TEXT_BAD_MAIN, /* Explanatory text. */ |
|||
TEXT_GENERAL_INFO, /* Explanatory text. */ |
|||
TEXT_GENERAL_INFO_PLEA, /* Explanatory text. */ |
|||
TEXT_HTML_T_ALGORITHM, /* Paragraph for describing the HTML table algorithm. */ |
|||
TEXT_INVALID_URI, /* Explanatory text. */ |
|||
TEXT_INVALID_UTF16, /* Explanatory text. */ |
|||
TEXT_INVALID_UTF8, /* Explanatory text. */ |
|||
TEXT_M_IMAGE_ALT, /* Explanatory text. */ |
|||
TEXT_M_IMAGE_MAP, /* Explanatory text. */ |
|||
TEXT_M_LINK_ALT, /* Explanatory text. */ |
|||
TEXT_M_SUMMARY, /* Explanatory text. */ |
|||
TEXT_NEEDS_INTERVENTION, /* Explanatory text. */ |
|||
TEXT_SGML_CHARS, /* Explanatory text. */ |
|||
TEXT_USING_BODY, /* Explanatory text. */ |
|||
TEXT_USING_FONT, /* Explanatory text. */ |
|||
TEXT_USING_FRAMES, /* Explanatory text. */ |
|||
TEXT_USING_LAYER, /* Explanatory text. */ |
|||
TEXT_USING_NOBR, /* Explanatory text. */ |
|||
TEXT_USING_SPACER, /* Explanatory text. */ |
|||
TEXT_VENDOR_CHARS, /* Explanatory text. */ |
|||
TEXT_WINDOWS_CHARS /* Explanatory text. */ |
|||
} tidyMessagesMisc; |
|||
|
|||
/* accessibility flaws */ |
|||
|
|||
#define BA_MISSING_IMAGE_ALT 1 |
|||
#define BA_MISSING_LINK_ALT 2 |
|||
#define BA_MISSING_SUMMARY 4 |
|||
#define BA_MISSING_IMAGE_MAP 8 |
|||
#define BA_USING_FRAMES 16 |
|||
#define BA_USING_NOFRAMES 32 |
|||
#define BA_INVALID_LINK_NOFRAMES 64 /* WAI [6.5.1.4] */ |
|||
#define BA_WAI (1 << 31) |
|||
|
|||
/* presentation flaws */ |
|||
|
|||
#define USING_SPACER 1 |
|||
#define USING_LAYER 2 |
|||
#define USING_NOBR 4 |
|||
#define USING_FONT 8 |
|||
#define USING_BODY 16 |
|||
|
|||
/* badchar bit field */ |
|||
|
|||
#define BC_VENDOR_SPECIFIC_CHARS 1 |
|||
#define BC_INVALID_SGML_CHARS 2 |
|||
#define BC_INVALID_UTF8 4 |
|||
#define BC_INVALID_UTF16 8 |
|||
#define BC_ENCODING_MISMATCH 16 /* fatal error */ |
|||
#define BC_INVALID_URI 32 |
|||
#define BC_INVALID_NCR 64 |
|||
|
|||
/* Lexer and I/O Macros */ |
|||
|
|||
#define REPLACED_CHAR 0 |
|||
#define DISCARDED_CHAR 1 |
|||
|
|||
|
|||
#endif /* __MESSAGE_H__ */ |
File diff suppressed because it is too large
@ -0,0 +1,70 @@ |
|||
#ifndef __PARSER_H__ |
|||
#define __PARSER_H__ |
|||
|
|||
/* parser.h -- HTML Parser
|
|||
|
|||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
#include "forward.h" |
|||
|
|||
Bool TY_(CheckNodeIntegrity)(Node *node); |
|||
|
|||
Bool TY_(TextNodeEndWithSpace)( Lexer *lexer, Node *node ); |
|||
|
|||
/*
|
|||
used to determine how attributes |
|||
without values should be printed |
|||
this was introduced to deal with |
|||
user defined tags e.g. Cold Fusion |
|||
*/ |
|||
Bool TY_(IsNewNode)(Node *node); |
|||
|
|||
void TY_(CoerceNode)(TidyDocImpl* doc, Node *node, TidyTagId tid, Bool obsolete, Bool expected); |
|||
|
|||
/* extract a node and its children from a markup tree */ |
|||
Node *TY_(RemoveNode)(Node *node); |
|||
|
|||
/* remove node from markup tree and discard it */ |
|||
Node *TY_(DiscardElement)( TidyDocImpl* doc, Node *element); |
|||
|
|||
/* insert node into markup tree as the firt element
|
|||
of content of element */ |
|||
void TY_(InsertNodeAtStart)(Node *element, Node *node); |
|||
|
|||
/* insert node into markup tree as the last element
|
|||
of content of "element" */ |
|||
void TY_(InsertNodeAtEnd)(Node *element, Node *node); |
|||
|
|||
/* insert node into markup tree before element */ |
|||
void TY_(InsertNodeBeforeElement)(Node *element, Node *node); |
|||
|
|||
/* insert node into markup tree after element */ |
|||
void TY_(InsertNodeAfterElement)(Node *element, Node *node); |
|||
|
|||
Node *TY_(TrimEmptyElement)( TidyDocImpl* doc, Node *element ); |
|||
Node* TY_(DropEmptyElements)(TidyDocImpl* doc, Node* node); |
|||
|
|||
|
|||
/* assumes node is a text node */ |
|||
Bool TY_(IsBlank)(Lexer *lexer, Node *node); |
|||
|
|||
Bool TY_(IsJavaScript)(Node *node); |
|||
|
|||
/*
|
|||
HTML is the top level element |
|||
*/ |
|||
void TY_(ParseDocument)( TidyDocImpl* doc ); |
|||
|
|||
|
|||
|
|||
/*
|
|||
XML documents |
|||
*/ |
|||
Bool TY_(XMLPreserveWhiteSpace)( TidyDocImpl* doc, Node *element ); |
|||
|
|||
void TY_(ParseXMLDocument)( TidyDocImpl* doc ); |
|||
|
|||
#endif /* __PARSER_H__ */ |
@ -0,0 +1,6 @@ |
|||
#ifdef __GNUC__ |
|||
#warning "FIXME: Using compatibility tidy header (platform.h) that will go away!" |
|||
#endif |
|||
|
|||
#include "tidyplatform.h" |
|||
|
File diff suppressed because it is too large
@ -0,0 +1,94 @@ |
|||
#ifndef __PPRINT_H__ |
|||
#define __PPRINT_H__ |
|||
|
|||
/* pprint.h -- pretty print parse tree
|
|||
|
|||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
#include "forward.h" |
|||
|
|||
/*
|
|||
Block-level and unknown elements are printed on |
|||
new lines and their contents indented 2 spaces |
|||
|
|||
Inline elements are printed inline. |
|||
|
|||
Inline content is wrapped on spaces (except in |
|||
attribute values or preformatted text, after |
|||
start tags and before end tags |
|||
*/ |
|||
|
|||
#define NORMAL 0u |
|||
#define PREFORMATTED 1u |
|||
#define COMMENT 2u |
|||
#define ATTRIBVALUE 4u |
|||
#define NOWRAP 8u |
|||
#define CDATA 16u |
|||
|
|||
|
|||
/* The pretty printer keeps at most two lines of text in the
|
|||
** buffer before flushing output. We need to capture the |
|||
** indent state (indent level) at the _beginning_ of _each_ |
|||
** line, not the end of just the second line. |
|||
** |
|||
** We must also keep track "In Attribute" and "In String" |
|||
** states at the _end_ of each line, |
|||
*/ |
|||
|
|||
typedef struct _TidyIndent |
|||
{ |
|||
int spaces; |
|||
int attrValStart; |
|||
int attrStringStart; |
|||
} TidyIndent; |
|||
|
|||
typedef struct _TidyPrintImpl |
|||
{ |
|||
TidyAllocator *allocator; /* Allocator */ |
|||
|
|||
uint *linebuf; |
|||
uint lbufsize; |
|||
uint linelen; |
|||
uint wraphere; |
|||
uint line; |
|||
|
|||
uint ixInd; |
|||
TidyIndent indent[2]; /* Two lines worth of indent state */ |
|||
} TidyPrintImpl; |
|||
|
|||
|
|||
#if 0 && SUPPORT_ASIAN_ENCODINGS
|
|||
/* #431953 - start RJ Wraplen adjusted for smooth international ride */ |
|||
uint CWrapLen( TidyDocImpl* doc, uint ind ); |
|||
#endif |
|||
|
|||
void TY_(InitPrintBuf)( TidyDocImpl* doc ); |
|||
void TY_(FreePrintBuf)( TidyDocImpl* doc ); |
|||
|
|||
void TY_(PFlushLine)( TidyDocImpl* doc, uint indent ); |
|||
|
|||
|
|||
/* print just the content of the body element.
|
|||
** useful when you want to reuse material from |
|||
** other documents. |
|||
** |
|||
** -- Sebastiano Vigna <vigna@dsi.unimi.it> |
|||
*/ |
|||
|
|||
void TY_(PrintBody)( TidyDocImpl* doc ); /* you can print an entire document */ |
|||
/* node as body using PPrintTree() */ |
|||
|
|||
void TY_(PPrintTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node ); |
|||
|
|||
void TY_(PPrintXMLTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node ); |
|||
|
|||
/*\
|
|||
* 20150515 - support using tabs instead of spaces |
|||
\*/ |
|||
void TY_(PPrintTabs)(void); |
|||
void TY_(PPrintSpaces)(void); |
|||
|
|||
#endif /* __PPRINT_H__ */ |
@ -0,0 +1,446 @@ |
|||
/*
|
|||
* SPRTF - Log output utility |
|||
* |
|||
* Author: Geoff R. McLane <reports _at_ geoffair _dot_ info> |
|||
* License: GPL v2 (or later at your choice) |
|||
* |
|||
* Revision 1.0.1 2012/11/06 13:01:25 geoff |
|||
* Revision 1.0.0 2012/10/17 00:00:00 geoff |
|||
* |
|||
* This program is free software; you can redistribute it and/or |
|||
* modify it under the terms of the GNU General Public License as |
|||
* published by the Free Software Foundation; either version 2 of the |
|||
* License, or (at your option) any later version. |
|||
* |
|||
* This program is distributed in the hope that it will be useful, but |
|||
* WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
* General Public License for more details. |
|||
* |
|||
* You should have received a copy of the GNU General Public License |
|||
* along with this program; if not, write to the Free Software |
|||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, US |
|||
* |
|||
*/ |
|||
|
|||
#ifdef _MSC_VER |
|||
#pragma warning( disable : 4995 ) |
|||
#endif |
|||
// Module: sprtf.cxx
|
|||
// Debug log file output
|
|||
#include <stdio.h> // fopen()... |
|||
#include <string.h> // strcpy |
|||
#include <stdarg.h> // va_start, va_end, ... |
|||
#ifdef _MSC_VER |
|||
#include <WinSock2.h> |
|||
#include <sys/timeb.h> |
|||
#if (defined(UNICODE) || defined(_UNICODE)) |
|||
#include <Strsafe.h> |
|||
#endif |
|||
#else /* !_MSC_VER */ |
|||
#include <sys/time.h> // gettimeoday(), struct timeval,... |
|||
#endif /* _MSC_VER y/n */ |
|||
#include <time.h> |
|||
#include <stdlib.h> // for exit() in unix |
|||
#include "sprtf.h" |
|||
|
|||
#ifdef _MSC_VER |
|||
#ifndef _CRT_SECURE_NO_DEPRECATE |
|||
#define _CRT_SECURE_NO_DEPRECATE |
|||
#endif // #ifndef _CRT_SECURE_NO_DEPRECATE
|
|||
#pragma warning( disable:4996 ) |
|||
#else |
|||
#define strcmpi strcasecmp |
|||
#endif |
|||
|
|||
#ifndef MX_ONE_BUF |
|||
#define MX_ONE_BUF 1024 |
|||
#endif |
|||
#ifndef MX_BUFFERS |
|||
#define MX_BUFFERS 1024 |
|||
#endif |
|||
|
|||
static char _s_strbufs[MX_ONE_BUF * MX_BUFFERS]; |
|||
static int iNextBuf = 0; |
|||
|
|||
char *GetNxtBuf() |
|||
{ |
|||
iNextBuf++; |
|||
if(iNextBuf >= MX_BUFFERS) |
|||
iNextBuf = 0; |
|||
return &_s_strbufs[MX_ONE_BUF * iNextBuf]; |
|||
} |
|||
|
|||
#define MXIO 512 |
|||
#ifdef _MSC_VER // use local log
|
|||
static char def_log[] = "tempex.txt"; |
|||
#else |
|||
static char def_log[] = "ex.log"; |
|||
#endif |
|||
static char logfile[264] = "\0"; |
|||
static FILE * outfile = NULL; |
|||
static int addsystime = 0; |
|||
static int addsysdate = 0; |
|||
static int addstdout = 1; |
|||
static int addflush = 1; |
|||
static int add2screen = 0; |
|||
static int add2listview = 0; |
|||
static int append_to_log = 0; |
|||
|
|||
#ifndef VFP |
|||
#define VFP(a) ( a && ( a != (FILE *)-1 ) ) |
|||
#endif |
|||
|
|||
int add_list_out( int val ) |
|||
{ |
|||
int i = add2listview; |
|||
add2listview = val; |
|||
return i; |
|||
} |
|||
|
|||
int add_std_out( int val ) |
|||
{ |
|||
int i = addstdout; |
|||
addstdout = val; |
|||
return i; |
|||
} |
|||
|
|||
int add_screen_out( int val ) |
|||
{ |
|||
int i = add2screen; |
|||
add2screen = val; |
|||
return i; |
|||
} |
|||
|
|||
|
|||
int add_sys_time( int val ) |
|||
{ |
|||
int i = addsystime; |
|||
addsystime = val; |
|||
return i; |
|||
} |
|||
|
|||
int add_sys_date( int val ) |
|||
{ |
|||
int i = addsysdate; |
|||
addsysdate = val; |
|||
return i; |
|||
} |
|||
|
|||
|
|||
int add_append_log( int val ) |
|||
{ |
|||
int i = append_to_log; |
|||
append_to_log = val; |
|||
return i; |
|||
} |
|||
|
|||
|
|||
#ifdef _MSC_VER |
|||
static const char *mode = "wb"; // in window sprtf looks after the line endings
|
|||
#else |
|||
static const char *mode = "w"; |
|||
#endif |
|||
|
|||
int open_log_file( void ) |
|||
{ |
|||
if (logfile[0] == 0) |
|||
strcpy(logfile,def_log); |
|||
if (append_to_log) { |
|||
#ifdef _MSC_VER |
|||
mode = "ab"; // in window sprtf looks after the line endings
|
|||
#else |
|||
mode = "a"; |
|||
#endif |
|||
} |
|||
outfile = fopen(logfile, mode); |
|||
if( outfile == 0 ) { |
|||
outfile = (FILE *)-1; |
|||
sprtf("ERROR: Failed to open log file [%s] ...\n", logfile); |
|||
exit(1); /* failed */ |
|||
return 0; /* failed */ |
|||
} |
|||
return 1; /* success */ |
|||
} |
|||
|
|||
void close_log_file( void ) |
|||
{ |
|||
if( VFP(outfile) ) { |
|||
fclose(outfile); |
|||
} |
|||
outfile = NULL; |
|||
} |
|||
|
|||
char * get_log_file( void ) |
|||
{ |
|||
if (logfile[0] == 0) |
|||
strcpy(logfile,def_log); |
|||
if (outfile == (FILE *)-1) // disable the log file
|
|||
return (char *)"none"; |
|||
return logfile; |
|||
} |
|||
|
|||
void set_log_file( char * nf, int open ) |
|||
{ |
|||
if (logfile[0] == 0) |
|||
strcpy(logfile,def_log); |
|||
if ( nf && *nf && strcmpi(nf,logfile) ) { |
|||
close_log_file(); // remove any previous
|
|||
strcpy(logfile,nf); // set new name
|
|||
if (strcmp(logfile,"none") == 0) { // if equal 'none'
|
|||
outfile = (FILE *)-1; // disable the log file
|
|||
} else if (open) { |
|||
open_log_file(); // and open it ... anything previous written is 'lost'
|
|||
} else |
|||
outfile = 0; // else set 0 to open on first write
|
|||
} |
|||
} |
|||
|
|||
#ifdef _MSC_VER |
|||
int gettimeofday(struct timeval *tp, void *tzp) |
|||
{ |
|||
#ifdef WIN32 |
|||
struct _timeb timebuffer; |
|||
_ftime(&timebuffer); |
|||
tp->tv_sec = (long)timebuffer.time; |
|||
tp->tv_usec = timebuffer.millitm * 1000; |
|||
#else |
|||
tp->tv_sec = time(NULL); |
|||
tp->tv_usec = 0; |
|||
#endif |
|||
return 0; |
|||
} |
|||
|
|||
#endif // _MSC_VER
|
|||
|
|||
void add_date_stg( char *ps, struct timeval *ptv ) |
|||
{ |
|||
time_t curtime; |
|||
struct tm * ptm; |
|||
curtime = (ptv->tv_sec & 0xffffffff); |
|||
ptm = localtime(&curtime); |
|||
if (ptm) { |
|||
strftime(EndBuf(ps),128,"%Y/%m/%d",ptm); |
|||
} |
|||
} |
|||
|
|||
void add_time_stg( char *ps, struct timeval *ptv ) |
|||
{ |
|||
time_t curtime; |
|||
struct tm * ptm; |
|||
curtime = (ptv->tv_sec & 0xffffffff); |
|||
ptm = localtime(&curtime); |
|||
if (ptm) { |
|||
strftime(EndBuf(ps),128,"%H:%M:%S",ptm); |
|||
} |
|||
} |
|||
|
|||
char *get_date_stg() |
|||
{ |
|||
char *ps; |
|||
struct timeval tv; |
|||
gettimeofday( (struct timeval *)&tv, (struct timezone *)0 ); |
|||
ps = GetNxtBuf(); |
|||
*ps = 0; |
|||
add_date_stg( ps, &tv ); |
|||
return ps; |
|||
} |
|||
|
|||
char *get_time_stg() |
|||
{ |
|||
char *ps; |
|||
struct timeval tv; |
|||
gettimeofday( (struct timeval *)&tv, (struct timezone *)0 ); |
|||
ps = GetNxtBuf(); |
|||
*ps = 0; |
|||
add_time_stg( ps, &tv ); |
|||
return ps; |
|||
} |
|||
|
|||
char *get_date_time_stg() |
|||
{ |
|||
char *ps; |
|||
struct timeval tv; |
|||
gettimeofday( (struct timeval *)&tv, (struct timezone *)0 ); |
|||
ps = GetNxtBuf(); |
|||
*ps = 0; |
|||
add_date_stg( ps, &tv ); |
|||
strcat(ps," "); |
|||
add_time_stg( ps, &tv ); |
|||
return ps; |
|||
} |
|||
|
|||
static void oi( char * psin ) |
|||
{ |
|||
int len, w; |
|||
char * ps = psin; |
|||
if (!ps) |
|||
return; |
|||
|
|||
len = (int)strlen(ps); |
|||
if (len) { |
|||
|
|||
if( outfile == 0 ) { |
|||
open_log_file(); |
|||
} |
|||
if( VFP(outfile) ) { |
|||
char *tb; |
|||
if (addsysdate) { |
|||
tb = GetNxtBuf(); |
|||
len = sprintf( tb, "%s - %s", get_date_time_stg(), ps ); |
|||
ps = tb; |
|||
} else if( addsystime ) { |
|||
tb = GetNxtBuf(); |
|||
len = sprintf( tb, "%s - %s", get_time_stg(), ps ); |
|||
ps = tb; |
|||
} |
|||
|
|||
w = (int)fwrite( ps, 1, len, outfile ); |
|||
if( w != len ) { |
|||
fclose(outfile); |
|||
outfile = (FILE *)-1; |
|||
sprtf("WARNING: Failed write to log file [%s] ...\n", logfile); |
|||
exit(1); |
|||
} else if (addflush) { |
|||
fflush( outfile ); |
|||
} |
|||
} |
|||
|
|||
if( addstdout ) { |
|||
fwrite( ps, 1, len, stdout ); |
|||
} |
|||
#ifdef ADD_LISTVIEW |
|||
if (add2listview) { |
|||
LVInsertItem(ps); |
|||
} |
|||
#endif // ADD_LISTVIEW
|
|||
#ifdef ADD_SCREENOUT |
|||
if (add2screen) { |
|||
Add_String(ps); // add string to screen list
|
|||
} |
|||
#endif // #ifdef ADD_SCREENOUT
|
|||
} |
|||
} |
|||
|
|||
#ifdef _MSC_VER |
|||
// service to ensure line endings in windows only
|
|||
static void prt( char * ps ) |
|||
{ |
|||
static char _s_buf[1024]; |
|||
char * pb = _s_buf; |
|||
size_t i, j, k; |
|||
char c, d; |
|||
i = strlen(ps); |
|||
k = 0; |
|||
d = 0; |
|||
if(i) { |
|||
k = 0; |
|||
d = 0; |
|||
for( j = 0; j < i; j++ ) { |
|||
c = ps[j]; |
|||
if( c == 0x0d ) { |
|||
if( (j+1) < i ) { |
|||
if( ps[j+1] != 0x0a ) { |
|||
pb[k++] = c; |
|||
c = 0x0a; |
|||
} |
|||
} else { |
|||
pb[k++] = c; |
|||
c = 0x0a; |
|||
} |
|||
} else if( c == 0x0a ) { |
|||
if( d != 0x0d ) { |
|||
pb[k++] = 0x0d; |
|||
} |
|||
} |
|||
pb[k++] = c; |
|||
d = c; |
|||
if( k >= MXIO ) { |
|||
pb[k] = 0; |
|||
oi(pb); |
|||
k = 0; |
|||
} |
|||
} // for length of string
|
|||
if( k ) { |
|||
//if( ( gbCheckCrLf ) &&
|
|||
// ( d != 0x0a ) ) {
|
|||
// add Cr/Lf pair
|
|||
//pb[k++] = 0x0d;
|
|||
//pb[k++] = 0x0a;
|
|||
//pb[k] = 0;
|
|||
//}
|
|||
pb[k] = 0; |
|||
oi( pb ); |
|||
} |
|||
} |
|||
} |
|||
#endif // #ifdef _MSC_VER
|
|||
|
|||
int direct_out_it( char *cp ) |
|||
{ |
|||
#ifdef _MSC_VER |
|||
prt(cp); |
|||
#else |
|||
oi(cp); |
|||
#endif |
|||
return (int)strlen(cp); |
|||
} |
|||
|
|||
// STDAPI StringCchVPrintf( OUT LPTSTR pszDest,
|
|||
// IN size_t cchDest, IN LPCTSTR pszFormat, IN va_list argList );
|
|||
int MCDECL sprtf( const char *pf, ... ) |
|||
{ |
|||
static char _s_sprtfbuf[M_MAX_SPRTF+4]; |
|||
char * pb = _s_sprtfbuf; |
|||
int i; |
|||
va_list arglist; |
|||
va_start(arglist, pf); |
|||
i = vsnprintf( pb, M_MAX_SPRTF, pf, arglist ); |
|||
va_end(arglist); |
|||
#ifdef _MSC_VER |
|||
prt(pb); // ensure CR/LF
|
|||
#else |
|||
oi(pb); |
|||
#endif |
|||
return i; |
|||
} |
|||
|
|||
#ifdef UNICODE |
|||
// WIDE VARIETY
|
|||
static void wprt( PTSTR ps ) |
|||
{ |
|||
static char _s_woibuf[1024]; |
|||
char * cp = _s_woibuf; |
|||
int len = (int)lstrlen(ps); |
|||
if(len) { |
|||
int ret = WideCharToMultiByte( CP_ACP, // UINT CodePage, // code page
|
|||
0, // DWORD dwFlags, // performance and mapping flags
|
|||
ps, // LPCWSTR lpWideCharStr, // wide-character string
|
|||
len, // int cchWideChar, // number of chars in string.
|
|||
cp, // LPSTR lpMultiByteStr, // buffer for new string
|
|||
1024, // int cbMultiByte, // size of buffer
|
|||
NULL, // LPCSTR lpDefaultChar, // default for unmappable chars
|
|||
NULL ); // LPBOOL lpUsedDefaultChar // set when default char used
|
|||
//oi(cp);
|
|||
prt(cp); |
|||
} |
|||
} |
|||
|
|||
int MCDECL wsprtf( PTSTR pf, ... ) |
|||
{ |
|||
static WCHAR _s_sprtfwbuf[1024]; |
|||
PWSTR pb = _s_sprtfwbuf; |
|||
int i = 1; |
|||
va_list arglist; |
|||
va_start(arglist, pf); |
|||
*pb = 0; |
|||
StringCchVPrintf(pb,1024,pf,arglist); |
|||
//i = vswprintf( pb, pf, arglist );
|
|||
va_end(arglist); |
|||
wprt(pb); |
|||
return i; |
|||
} |
|||
|
|||
#endif // #ifdef UNICODE
|
|||
|
|||
// eof - sprtf.cxx
|
@ -0,0 +1,77 @@ |
|||
/*
|
|||
* SPRTF - Log output utility |
|||
* |
|||
* Author: Geoff R. McLane <reports _at_ geoffair _dot_ info> |
|||
* License: GPL v2 (or later at your choice) |
|||
* |
|||
* Revision 1.0.1 2012/11/06 13:01:25 geoff |
|||
* Revision 1.0.0 2012/10/17 00:00:00 geoff |
|||
* |
|||
* This program is free software; you can redistribute it and/or |
|||
* modify it under the terms of the GNU General Public License as |
|||
* published by the Free Software Foundation; either version 2 of the |
|||
* License, or (at your option) any later version. |
|||
* |
|||
* This program is distributed in the hope that it will be useful, but |
|||
* WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
* General Public License for more details. |
|||
* |
|||
* You should have received a copy of the GNU General Public License |
|||
* along with this program; if not, write to the Free Software |
|||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, US |
|||
* |
|||
*/ |
|||
|
|||
// Module: sprtf.hxx
|
|||
// Debug log file output
|
|||
#ifndef _SPRTF_HXX_ |
|||
#define _SPRTF_HXX_ |
|||
#include "tidyplatform.h" |
|||
|
|||
#ifdef __cplusplus |
|||
extern "C" { |
|||
#endif |
|||
#ifdef _MSC_VER |
|||
#define MCDECL _cdecl |
|||
#else |
|||
#define MCDECL |
|||
#endif |
|||
|
|||
TIDY_EXPORT int add_std_out( int val ); |
|||
TIDY_EXPORT int add_sys_time( int val ); |
|||
TIDY_EXPORT int add_sys_date( int val ); |
|||
|
|||
TIDY_EXPORT int add_screen_out( int val ); |
|||
TIDY_EXPORT int add_list_out( int val ); |
|||
TIDY_EXPORT int add_append_log( int val ); |
|||
|
|||
TIDY_EXPORT int open_log_file( void ); |
|||
TIDY_EXPORT void close_log_file( void ); |
|||
TIDY_EXPORT void set_log_file( char * nf, int open ); |
|||
TIDY_EXPORT char * get_log_file( void ); |
|||
|
|||
TIDY_EXPORT int MCDECL sprtf( const char *pf, ... ); |
|||
#define M_MAX_SPRTF 2048 |
|||
TIDY_EXPORT int direct_out_it( char *cp ); |
|||
|
|||
TIDY_EXPORT char *GetNxtBuf(); |
|||
|
|||
#define EndBuf(a) ( a + strlen(a) ) |
|||
|
|||
TIDY_EXPORT char *get_date_stg(); |
|||
TIDY_EXPORT char *get_time_stg(); |
|||
TIDY_EXPORT char *get_date_time_stg(); |
|||
#ifdef _MSC_VER |
|||
TIDY_EXPORT int gettimeofday(struct timeval *tp, void *tzp); |
|||
#endif |
|||
|
|||
#ifndef SPRTF |
|||
#define SPRTF sprtf |
|||
#endif |
|||
|
|||
#ifdef __cplusplus |
|||
} |
|||
#endif |
|||
#endif // #ifndef _SPRTF_HXX_
|
|||
// oef - sprtf.hxx
|
File diff suppressed because it is too large
@ -0,0 +1,210 @@ |
|||
#ifndef __STREAMIO_H__ |
|||
#define __STREAMIO_H__ |
|||
|
|||
/* streamio.h -- handles character stream I/O
|
|||
|
|||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
Wrapper around Tidy input source and output sink |
|||
that calls appropriate interfaces, and applies |
|||
necessary char encoding transformations: to/from |
|||
ISO-10646 and/or UTF-8. |
|||
|
|||
*/ |
|||
|
|||
#include "forward.h" |
|||
#include "tidybuffio.h" |
|||
#include "fileio.h" |
|||
|
|||
#ifdef __cplusplus |
|||
extern "C" |
|||
{ |
|||
#endif |
|||
typedef enum |
|||
{ |
|||
FileIO, |
|||
BufferIO, |
|||
UserIO |
|||
} IOType; |
|||
|
|||
/* states for ISO 2022
|
|||
|
|||
A document in ISO-2022 based encoding uses some ESC sequences called |
|||
"designator" to switch character sets. The designators defined and |
|||
used in ISO-2022-JP are: |
|||
|
|||
"ESC" + "(" + ? for ISO646 variants |
|||
|
|||
"ESC" + "$" + ? and |
|||
"ESC" + "$" + "(" + ? for multibyte character sets |
|||
*/ |
|||
typedef enum |
|||
{ |
|||
FSM_ASCII, |
|||
FSM_ESC, |
|||
FSM_ESCD, |
|||
FSM_ESCDP, |
|||
FSM_ESCP, |
|||
FSM_NONASCII |
|||
} ISO2022State; |
|||
|
|||
/************************
|
|||
** Source |
|||
************************/ |
|||
|
|||
enum |
|||
{ |
|||
CHARBUF_SIZE=5, |
|||
LASTPOS_SIZE=64 |
|||
}; |
|||
|
|||
/* non-raw input is cleaned up*/ |
|||
struct _StreamIn |
|||
{ |
|||
ISO2022State state; /* FSM for ISO2022 */ |
|||
Bool pushed; |
|||
TidyAllocator *allocator; |
|||
tchar* charbuf; |
|||
uint bufpos; |
|||
uint bufsize; |
|||
int tabs; |
|||
int lastcols[LASTPOS_SIZE]; |
|||
unsigned short curlastpos; /* current last position in lastcols */ |
|||
unsigned short firstlastpos; /* first valid last position in lastcols */ |
|||
int curcol; |
|||
int curline; |
|||
int encoding; |
|||
IOType iotype; |
|||
|
|||
TidyInputSource source; |
|||
|
|||
#ifdef TIDY_WIN32_MLANG_SUPPORT |
|||
void* mlang; |
|||
#endif |
|||
|
|||
#ifdef TIDY_STORE_ORIGINAL_TEXT |
|||
tmbstr otextbuf; |
|||
size_t otextsize; |
|||
uint otextlen; |
|||
#endif |
|||
|
|||
/* Pointer back to document for error reporting */ |
|||
TidyDocImpl* doc; |
|||
}; |
|||
|
|||
StreamIn* TY_(initStreamIn)( TidyDocImpl* doc, int encoding ); |
|||
void TY_(freeStreamIn)(StreamIn* in); |
|||
|
|||
StreamIn* TY_(FileInput)( TidyDocImpl* doc, FILE* fp, int encoding ); |
|||
StreamIn* TY_(BufferInput)( TidyDocImpl* doc, TidyBuffer* content, int encoding ); |
|||
StreamIn* TY_(UserInput)( TidyDocImpl* doc, TidyInputSource* source, int encoding ); |
|||
|
|||
int TY_(ReadBOMEncoding)(StreamIn *in); |
|||
uint TY_(ReadChar)( StreamIn* in ); |
|||
void TY_(UngetChar)( uint c, StreamIn* in ); |
|||
Bool TY_(IsEOF)( StreamIn* in ); |
|||
|
|||
|
|||
/************************
|
|||
** Sink |
|||
************************/ |
|||
|
|||
struct _StreamOut |
|||
{ |
|||
int encoding; |
|||
ISO2022State state; /* for ISO 2022 */ |
|||
uint nl; |
|||
|
|||
#ifdef TIDY_WIN32_MLANG_SUPPORT |
|||
void* mlang; |
|||
#endif |
|||
|
|||
IOType iotype; |
|||
TidyOutputSink sink; |
|||
}; |
|||
|
|||
StreamOut* TY_(FileOutput)( TidyDocImpl *doc, FILE* fp, int encoding, uint newln ); |
|||
StreamOut* TY_(BufferOutput)( TidyDocImpl *doc, TidyBuffer* buf, int encoding, uint newln ); |
|||
StreamOut* TY_(UserOutput)( TidyDocImpl *doc, TidyOutputSink* sink, int encoding, uint newln ); |
|||
|
|||
StreamOut* TY_(StdErrOutput)(void); |
|||
/* StreamOut* StdOutOutput(void); */ |
|||
void TY_(ReleaseStreamOut)( TidyDocImpl *doc, StreamOut* out ); |
|||
|
|||
void TY_(WriteChar)( uint c, StreamOut* out ); |
|||
void TY_(outBOM)( StreamOut *out ); |
|||
|
|||
ctmbstr TY_(GetEncodingNameFromTidyId)(uint id); |
|||
ctmbstr TY_(GetEncodingOptNameFromTidyId)(uint id); |
|||
int TY_(GetCharEncodingFromOptName)(ctmbstr charenc); |
|||
|
|||
/************************
|
|||
** Misc |
|||
************************/ |
|||
|
|||
/* character encodings
|
|||
*/ |
|||
#define RAW 0 |
|||
#define ASCII 1 |
|||
#define LATIN0 2 |
|||
#define LATIN1 3 |
|||
#define UTF8 4 |
|||
#define ISO2022 5 |
|||
#define MACROMAN 6 |
|||
#define WIN1252 7 |
|||
#define IBM858 8 |
|||
|
|||
#if SUPPORT_UTF16_ENCODINGS |
|||
#define UTF16LE 9 |
|||
#define UTF16BE 10 |
|||
#define UTF16 11 |
|||
#endif |
|||
|
|||
/* Note that Big5 and SHIFTJIS are not converted to ISO 10646 codepoints
|
|||
** (i.e., to Unicode) before being recoded into UTF-8. This may be |
|||
** confusing: usually UTF-8 implies ISO10646 codepoints. |
|||
*/ |
|||
#if SUPPORT_ASIAN_ENCODINGS |
|||
#if SUPPORT_UTF16_ENCODINGS |
|||
#define BIG5 12 |
|||
#define SHIFTJIS 13 |
|||
#else |
|||
#define BIG5 9 |
|||
#define SHIFTJIS 10 |
|||
#endif |
|||
#endif |
|||
|
|||
#ifdef TIDY_WIN32_MLANG_SUPPORT |
|||
/* hack: windows code page numbers start at 37 */ |
|||
#define WIN32MLANG 36 |
|||
#endif |
|||
|
|||
|
|||
/* Function for conversion from Windows-1252 to Unicode */ |
|||
uint TY_(DecodeWin1252)(uint c); |
|||
|
|||
/* Function to convert from MacRoman to Unicode */ |
|||
uint TY_(DecodeMacRoman)(uint c); |
|||
|
|||
#ifdef __cplusplus |
|||
} |
|||
#endif |
|||
|
|||
|
|||
/* Use numeric constants as opposed to escape chars (\r, \n)
|
|||
** to avoid conflict Mac compilers that may re-define these. |
|||
*/ |
|||
#define CR 0xD |
|||
#define LF 0xA |
|||
|
|||
#if defined(MAC_OS_CLASSIC) |
|||
#define DEFAULT_NL_CONFIG TidyCR |
|||
#elif defined(_WIN32) || defined(OS2_OS) |
|||
#define DEFAULT_NL_CONFIG TidyCRLF |
|||
#else |
|||
#define DEFAULT_NL_CONFIG TidyLF |
|||
#endif |
|||
|
|||
|
|||
#endif /* __STREAMIO_H__ */ |
@ -0,0 +1,285 @@ |
|||
/* tagask.c -- Interrogate node type
|
|||
|
|||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
#include "tidy-int.h" |
|||
#include "tags.h" |
|||
#include "tidy.h" |
|||
|
|||
Bool TIDY_CALL tidyNodeIsText( TidyNode tnod ) |
|||
{ return TY_(nodeIsText)( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool tidyNodeCMIsBlock( TidyNode tnod ); /* not exported yet */ |
|||
Bool tidyNodeCMIsBlock( TidyNode tnod ) |
|||
{ return TY_(nodeCMIsBlock)( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool tidyNodeCMIsInline( TidyNode tnod ); /* not exported yet */ |
|||
Bool tidyNodeCMIsInline( TidyNode tnod ) |
|||
{ return TY_(nodeCMIsInline)( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool tidyNodeCMIsEmpty( TidyNode tnod ); /* not exported yet */ |
|||
Bool tidyNodeCMIsEmpty( TidyNode tnod ) |
|||
{ return TY_(nodeCMIsEmpty)( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsHeader( TidyNode tnod ) |
|||
{ return TY_(nodeIsHeader)( tidyNodeToImpl(tnod) ); |
|||
} |
|||
|
|||
Bool TIDY_CALL tidyNodeIsHTML( TidyNode tnod ) |
|||
{ return nodeIsHTML( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsHEAD( TidyNode tnod ) |
|||
{ return nodeIsHEAD( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsTITLE( TidyNode tnod ) |
|||
{ return nodeIsTITLE( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsBASE( TidyNode tnod ) |
|||
{ return nodeIsBASE( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsMETA( TidyNode tnod ) |
|||
{ return nodeIsMETA( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsBODY( TidyNode tnod ) |
|||
{ return nodeIsBODY( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsFRAMESET( TidyNode tnod ) |
|||
{ return nodeIsFRAMESET( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsFRAME( TidyNode tnod ) |
|||
{ return nodeIsFRAME( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsIFRAME( TidyNode tnod ) |
|||
{ return nodeIsIFRAME( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsNOFRAMES( TidyNode tnod ) |
|||
{ return nodeIsNOFRAMES( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsHR( TidyNode tnod ) |
|||
{ return nodeIsHR( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsH1( TidyNode tnod ) |
|||
{ return nodeIsH1( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsH2( TidyNode tnod ) |
|||
{ return nodeIsH2( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsPRE( TidyNode tnod ) |
|||
{ return nodeIsPRE( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsLISTING( TidyNode tnod ) |
|||
{ return nodeIsLISTING( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsP( TidyNode tnod ) |
|||
{ return nodeIsP( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsUL( TidyNode tnod ) |
|||
{ return nodeIsUL( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsOL( TidyNode tnod ) |
|||
{ return nodeIsOL( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsDL( TidyNode tnod ) |
|||
{ return nodeIsDL( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsDIR( TidyNode tnod ) |
|||
{ return nodeIsDIR( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsLI( TidyNode tnod ) |
|||
{ return nodeIsLI( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsDT( TidyNode tnod ) |
|||
{ return nodeIsDT( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsDD( TidyNode tnod ) |
|||
{ return nodeIsDD( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsTABLE( TidyNode tnod ) |
|||
{ return nodeIsTABLE( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsCAPTION( TidyNode tnod ) |
|||
{ return nodeIsCAPTION( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsTD( TidyNode tnod ) |
|||
{ return nodeIsTD( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsTH( TidyNode tnod ) |
|||
{ return nodeIsTH( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsTR( TidyNode tnod ) |
|||
{ return nodeIsTR( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsCOL( TidyNode tnod ) |
|||
{ return nodeIsCOL( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsCOLGROUP( TidyNode tnod ) |
|||
{ return nodeIsCOLGROUP( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsBR( TidyNode tnod ) |
|||
{ return nodeIsBR( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsA( TidyNode tnod ) |
|||
{ return nodeIsA( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsLINK( TidyNode tnod ) |
|||
{ return nodeIsLINK( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsB( TidyNode tnod ) |
|||
{ return nodeIsB( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsI( TidyNode tnod ) |
|||
{ return nodeIsI( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsSTRONG( TidyNode tnod ) |
|||
{ return nodeIsSTRONG( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsEM( TidyNode tnod ) |
|||
{ return nodeIsEM( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsBIG( TidyNode tnod ) |
|||
{ return nodeIsBIG( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsSMALL( TidyNode tnod ) |
|||
{ return nodeIsSMALL( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsPARAM( TidyNode tnod ) |
|||
{ return nodeIsPARAM( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsOPTION( TidyNode tnod ) |
|||
{ return nodeIsOPTION( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsOPTGROUP( TidyNode tnod ) |
|||
{ return nodeIsOPTGROUP( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsIMG( TidyNode tnod ) |
|||
{ return nodeIsIMG( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsMAP( TidyNode tnod ) |
|||
{ return nodeIsMAP( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsAREA( TidyNode tnod ) |
|||
{ return nodeIsAREA( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsNOBR( TidyNode tnod ) |
|||
{ return nodeIsNOBR( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsWBR( TidyNode tnod ) |
|||
{ return nodeIsWBR( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsFONT( TidyNode tnod ) |
|||
{ return nodeIsFONT( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsLAYER( TidyNode tnod ) |
|||
{ return nodeIsLAYER( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsSPACER( TidyNode tnod ) |
|||
{ return nodeIsSPACER( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsCENTER( TidyNode tnod ) |
|||
{ return nodeIsCENTER( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsSTYLE( TidyNode tnod ) |
|||
{ return nodeIsSTYLE( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsSCRIPT( TidyNode tnod ) |
|||
{ return nodeIsSCRIPT( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsNOSCRIPT( TidyNode tnod ) |
|||
{ return nodeIsNOSCRIPT( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsFORM( TidyNode tnod ) |
|||
{ return nodeIsFORM( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsTEXTAREA( TidyNode tnod ) |
|||
{ return nodeIsTEXTAREA( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsBLOCKQUOTE( TidyNode tnod ) |
|||
{ return nodeIsBLOCKQUOTE( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsAPPLET( TidyNode tnod ) |
|||
{ return nodeIsAPPLET( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsOBJECT( TidyNode tnod ) |
|||
{ return nodeIsOBJECT( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsDIV( TidyNode tnod ) |
|||
{ return nodeIsDIV( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsSPAN( TidyNode tnod ) |
|||
{ return nodeIsSPAN( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsINPUT( TidyNode tnod ) |
|||
{ return nodeIsINPUT( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsQ( TidyNode tnod ) |
|||
{ return nodeIsQ( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsLABEL( TidyNode tnod ) |
|||
{ return nodeIsLABEL( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsH3( TidyNode tnod ) |
|||
{ return nodeIsH3( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsH4( TidyNode tnod ) |
|||
{ return nodeIsH4( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsH5( TidyNode tnod ) |
|||
{ return nodeIsH5( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsH6( TidyNode tnod ) |
|||
{ return nodeIsH6( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsADDRESS( TidyNode tnod ) |
|||
{ return nodeIsADDRESS( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsXMP( TidyNode tnod ) |
|||
{ return nodeIsXMP( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsSELECT( TidyNode tnod ) |
|||
{ return nodeIsSELECT( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsBLINK( TidyNode tnod ) |
|||
{ return nodeIsBLINK( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsMARQUEE( TidyNode tnod ) |
|||
{ return nodeIsMARQUEE( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsEMBED( TidyNode tnod ) |
|||
{ return nodeIsEMBED( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsBASEFONT( TidyNode tnod ) |
|||
{ return nodeIsBASEFONT( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsISINDEX( TidyNode tnod ) |
|||
{ return nodeIsISINDEX( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsS( TidyNode tnod ) |
|||
{ return nodeIsS( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsSTRIKE( TidyNode tnod ) |
|||
{ return nodeIsSTRIKE( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsU( TidyNode tnod ) |
|||
{ return nodeIsU( tidyNodeToImpl(tnod) ); |
|||
} |
|||
Bool TIDY_CALL tidyNodeIsMENU( TidyNode tnod ) |
|||
{ return nodeIsMENU( tidyNodeToImpl(tnod) ); |
|||
} |
|||
|
|||
/* HTML5 */ |
|||
Bool TIDY_CALL tidyNodeIsDATALIST( TidyNode tnod ) |
|||
{ return nodeIsDATALIST( tidyNodeToImpl(tnod) ); |
|||
} |
|||
|
|||
|
|||
/*
|
|||
* local variables: |
|||
* mode: c |
|||
* indent-tabs-mode: nil |
|||
* c-basic-offset: 4 |
|||
* eval: (c-set-offset 'substatement-open 0) |
|||
* end: |
|||
*/ |
File diff suppressed because it is too large
@ -0,0 +1,247 @@ |
|||
#ifndef __TAGS_H__ |
|||
#define __TAGS_H__ |
|||
|
|||
/* tags.h -- recognize HTML tags
|
|||
|
|||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
The HTML tags are stored as 8 bit ASCII strings. |
|||
Use lookupw() to find a tag given a wide char string. |
|||
|
|||
*/ |
|||
|
|||
#include "forward.h" |
|||
#include "attrdict.h" |
|||
|
|||
typedef void (Parser)( TidyDocImpl* doc, Node *node, GetTokenMode mode ); |
|||
typedef void (CheckAttribs)( TidyDocImpl* doc, Node *node ); |
|||
|
|||
/*
|
|||
Tag dictionary node |
|||
*/ |
|||
|
|||
/* types of tags that the user can define */ |
|||
typedef enum |
|||
{ |
|||
tagtype_null = 0, |
|||
tagtype_empty = 1, |
|||
tagtype_inline = 2, |
|||
tagtype_block = 4, |
|||
tagtype_pre = 8 |
|||
} UserTagType; |
|||
|
|||
struct _Dict |
|||
{ |
|||
TidyTagId id; |
|||
tmbstr name; |
|||
uint versions; |
|||
AttrVersion const * attrvers; |
|||
uint model; |
|||
Parser* parser; |
|||
CheckAttribs* chkattrs; |
|||
Dict* next; |
|||
}; |
|||
|
|||
#if !defined(ELEMENT_HASH_LOOKUP) |
|||
#define ELEMENT_HASH_LOOKUP 1 |
|||
#endif |
|||
|
|||
#if ELEMENT_HASH_LOOKUP |
|||
enum |
|||
{ |
|||
ELEMENT_HASH_SIZE=178u |
|||
}; |
|||
|
|||
struct _DictHash |
|||
{ |
|||
Dict const* tag; |
|||
struct _DictHash* next; |
|||
}; |
|||
|
|||
typedef struct _DictHash DictHash; |
|||
#endif |
|||
|
|||
struct _TidyTagImpl |
|||
{ |
|||
Dict* xml_tags; /* placeholder for all xml tags */ |
|||
Dict* declared_tag_list; /* User declared tags */ |
|||
#if ELEMENT_HASH_LOOKUP |
|||
DictHash* hashtab[ELEMENT_HASH_SIZE]; |
|||
#endif |
|||
}; |
|||
|
|||
typedef struct _TidyTagImpl TidyTagImpl; |
|||
|
|||
/* interface for finding tag by name */ |
|||
const Dict* TY_(LookupTagDef)( TidyTagId tid ); |
|||
Bool TY_(FindTag)( TidyDocImpl* doc, Node *node ); |
|||
Parser* TY_(FindParser)( TidyDocImpl* doc, Node *node ); |
|||
void TY_(DefineTag)( TidyDocImpl* doc, UserTagType tagType, ctmbstr name ); |
|||
void TY_(FreeDeclaredTags)( TidyDocImpl* doc, UserTagType tagType ); /* tagtype_null to free all */ |
|||
|
|||
TidyIterator TY_(GetDeclaredTagList)( TidyDocImpl* doc ); |
|||
ctmbstr TY_(GetNextDeclaredTag)( TidyDocImpl* doc, UserTagType tagType, |
|||
TidyIterator* iter ); |
|||
|
|||
void TY_(InitTags)( TidyDocImpl* doc ); |
|||
void TY_(FreeTags)( TidyDocImpl* doc ); |
|||
void TY_(AdjustTags)( TidyDocImpl *doc ); /* if NOT HTML5 DOCTYPE, fall back to HTML4 legacy mode */ |
|||
void TY_(ResetTags)( TidyDocImpl *doc ); /* set table to HTML5 mode */ |
|||
Bool TY_(IsHTML5Mode)( TidyDocImpl *doc ); |
|||
|
|||
/* Parser methods for tags */ |
|||
|
|||
Parser TY_(ParseHTML); |
|||
Parser TY_(ParseHead); |
|||
Parser TY_(ParseTitle); |
|||
Parser TY_(ParseScript); |
|||
Parser TY_(ParseFrameSet); |
|||
Parser TY_(ParseNoFrames); |
|||
Parser TY_(ParseBody); |
|||
Parser TY_(ParsePre); |
|||
Parser TY_(ParseList); |
|||
Parser TY_(ParseDefList); |
|||
Parser TY_(ParseBlock); |
|||
Parser TY_(ParseInline); |
|||
Parser TY_(ParseEmpty); |
|||
Parser TY_(ParseTableTag); |
|||
Parser TY_(ParseColGroup); |
|||
Parser TY_(ParseRowGroup); |
|||
Parser TY_(ParseRow); |
|||
Parser TY_(ParseSelect); |
|||
Parser TY_(ParseOptGroup); |
|||
Parser TY_(ParseText); |
|||
Parser TY_(ParseDatalist); |
|||
Parser TY_(ParseNamespace); |
|||
|
|||
CheckAttribs TY_(CheckAttributes); |
|||
|
|||
/* 0 == TidyTag_UNKNOWN */ |
|||
#define TagId(node) ((node) && (node)->tag ? (node)->tag->id : TidyTag_UNKNOWN) |
|||
#define TagIsId(node, tid) ((node) && (node)->tag && (node)->tag->id == tid) |
|||
|
|||
Bool TY_(nodeIsText)( Node* node ); |
|||
Bool TY_(nodeIsElement)( Node* node ); |
|||
|
|||
Bool TY_(nodeHasText)( TidyDocImpl* doc, Node* node ); |
|||
|
|||
#if 0 |
|||
/* Compare & result to operand. If equal, then all bits
|
|||
** requested are set. |
|||
*/ |
|||
Bool nodeMatchCM( Node* node, uint contentModel ); |
|||
#endif |
|||
|
|||
/* True if any of the bits requested are set.
|
|||
*/ |
|||
Bool TY_(nodeHasCM)( Node* node, uint contentModel ); |
|||
|
|||
Bool TY_(nodeCMIsBlock)( Node* node ); |
|||
Bool TY_(nodeCMIsInline)( Node* node ); |
|||
Bool TY_(nodeCMIsEmpty)( Node* node ); |
|||
|
|||
|
|||
Bool TY_(nodeIsHeader)( Node* node ); /* H1, H2, ..., H6 */ |
|||
uint TY_(nodeHeaderLevel)( Node* node ); /* 1, 2, ..., 6 */ |
|||
|
|||
#define nodeIsHTML( node ) TagIsId( node, TidyTag_HTML ) |
|||
#define nodeIsHEAD( node ) TagIsId( node, TidyTag_HEAD ) |
|||
#define nodeIsTITLE( node ) TagIsId( node, TidyTag_TITLE ) |
|||
#define nodeIsBASE( node ) TagIsId( node, TidyTag_BASE ) |
|||
#define nodeIsMETA( node ) TagIsId( node, TidyTag_META ) |
|||
#define nodeIsBODY( node ) TagIsId( node, TidyTag_BODY ) |
|||
#define nodeIsFRAMESET( node ) TagIsId( node, TidyTag_FRAMESET ) |
|||
#define nodeIsFRAME( node ) TagIsId( node, TidyTag_FRAME ) |
|||
#define nodeIsIFRAME( node ) TagIsId( node, TidyTag_IFRAME ) |
|||
#define nodeIsNOFRAMES( node ) TagIsId( node, TidyTag_NOFRAMES ) |
|||
#define nodeIsHR( node ) TagIsId( node, TidyTag_HR ) |
|||
#define nodeIsH1( node ) TagIsId( node, TidyTag_H1 ) |
|||
#define nodeIsH2( node ) TagIsId( node, TidyTag_H2 ) |
|||
#define nodeIsPRE( node ) TagIsId( node, TidyTag_PRE ) |
|||
#define nodeIsLISTING( node ) TagIsId( node, TidyTag_LISTING ) |
|||
#define nodeIsP( node ) TagIsId( node, TidyTag_P ) |
|||
#define nodeIsUL( node ) TagIsId( node, TidyTag_UL ) |
|||
#define nodeIsOL( node ) TagIsId( node, TidyTag_OL ) |
|||
#define nodeIsDL( node ) TagIsId( node, TidyTag_DL ) |
|||
#define nodeIsDIR( node ) TagIsId( node, TidyTag_DIR ) |
|||
#define nodeIsLI( node ) TagIsId( node, TidyTag_LI ) |
|||
#define nodeIsDT( node ) TagIsId( node, TidyTag_DT ) |
|||
#define nodeIsDD( node ) TagIsId( node, TidyTag_DD ) |
|||
#define nodeIsTABLE( node ) TagIsId( node, TidyTag_TABLE ) |
|||
#define nodeIsCAPTION( node ) TagIsId( node, TidyTag_CAPTION ) |
|||
#define nodeIsTD( node ) TagIsId( node, TidyTag_TD ) |
|||
#define nodeIsTH( node ) TagIsId( node, TidyTag_TH ) |
|||
#define nodeIsTR( node ) TagIsId( node, TidyTag_TR ) |
|||
#define nodeIsCOL( node ) TagIsId( node, TidyTag_COL ) |
|||
#define nodeIsCOLGROUP( node ) TagIsId( node, TidyTag_COLGROUP ) |
|||
#define nodeIsBR( node ) TagIsId( node, TidyTag_BR ) |
|||
#define nodeIsA( node ) TagIsId( node, TidyTag_A ) |
|||
#define nodeIsLINK( node ) TagIsId( node, TidyTag_LINK ) |
|||
#define nodeIsB( node ) TagIsId( node, TidyTag_B ) |
|||
#define nodeIsI( node ) TagIsId( node, TidyTag_I ) |
|||
#define nodeIsSTRONG( node ) TagIsId( node, TidyTag_STRONG ) |
|||
#define nodeIsEM( node ) TagIsId( node, TidyTag_EM ) |
|||
#define nodeIsBIG( node ) TagIsId( node, TidyTag_BIG ) |
|||
#define nodeIsSMALL( node ) TagIsId( node, TidyTag_SMALL ) |
|||
#define nodeIsPARAM( node ) TagIsId( node, TidyTag_PARAM ) |
|||
#define nodeIsOPTION( node ) TagIsId( node, TidyTag_OPTION ) |
|||
#define nodeIsOPTGROUP( node ) TagIsId( node, TidyTag_OPTGROUP ) |
|||
#define nodeIsIMG( node ) TagIsId( node, TidyTag_IMG ) |
|||
#define nodeIsMAP( node ) TagIsId( node, TidyTag_MAP ) |
|||
#define nodeIsAREA( node ) TagIsId( node, TidyTag_AREA ) |
|||
#define nodeIsNOBR( node ) TagIsId( node, TidyTag_NOBR ) |
|||
#define nodeIsWBR( node ) TagIsId( node, TidyTag_WBR ) |
|||
#define nodeIsFONT( node ) TagIsId( node, TidyTag_FONT ) |
|||
#define nodeIsLAYER( node ) TagIsId( node, TidyTag_LAYER ) |
|||
#define nodeIsSPACER( node ) TagIsId( node, TidyTag_SPACER ) |
|||
#define nodeIsCENTER( node ) TagIsId( node, TidyTag_CENTER ) |
|||
#define nodeIsSTYLE( node ) TagIsId( node, TidyTag_STYLE ) |
|||
#define nodeIsSCRIPT( node ) TagIsId( node, TidyTag_SCRIPT ) |
|||
#define nodeIsNOSCRIPT( node ) TagIsId( node, TidyTag_NOSCRIPT ) |
|||
#define nodeIsFORM( node ) TagIsId( node, TidyTag_FORM ) |
|||
#define nodeIsTEXTAREA( node ) TagIsId( node, TidyTag_TEXTAREA ) |
|||
#define nodeIsBLOCKQUOTE( node ) TagIsId( node, TidyTag_BLOCKQUOTE ) |
|||
#define nodeIsAPPLET( node ) TagIsId( node, TidyTag_APPLET ) |
|||
#define nodeIsOBJECT( node ) TagIsId( node, TidyTag_OBJECT ) |
|||
#define nodeIsDIV( node ) TagIsId( node, TidyTag_DIV ) |
|||
#define nodeIsSPAN( node ) TagIsId( node, TidyTag_SPAN ) |
|||
#define nodeIsINPUT( node ) TagIsId( node, TidyTag_INPUT ) |
|||
#define nodeIsQ( node ) TagIsId( node, TidyTag_Q ) |
|||
#define nodeIsLABEL( node ) TagIsId( node, TidyTag_LABEL ) |
|||
#define nodeIsH3( node ) TagIsId( node, TidyTag_H3 ) |
|||
#define nodeIsH4( node ) TagIsId( node, TidyTag_H4 ) |
|||
#define nodeIsH5( node ) TagIsId( node, TidyTag_H5 ) |
|||
#define nodeIsH6( node ) TagIsId( node, TidyTag_H6 ) |
|||
#define nodeIsADDRESS( node ) TagIsId( node, TidyTag_ADDRESS ) |
|||
#define nodeIsXMP( node ) TagIsId( node, TidyTag_XMP ) |
|||
#define nodeIsSELECT( node ) TagIsId( node, TidyTag_SELECT ) |
|||
#define nodeIsBLINK( node ) TagIsId( node, TidyTag_BLINK ) |
|||
#define nodeIsMARQUEE( node ) TagIsId( node, TidyTag_MARQUEE ) |
|||
#define nodeIsEMBED( node ) TagIsId( node, TidyTag_EMBED ) |
|||
#define nodeIsBASEFONT( node ) TagIsId( node, TidyTag_BASEFONT ) |
|||
#define nodeIsISINDEX( node ) TagIsId( node, TidyTag_ISINDEX ) |
|||
#define nodeIsS( node ) TagIsId( node, TidyTag_S ) |
|||
#define nodeIsSTRIKE( node ) TagIsId( node, TidyTag_STRIKE ) |
|||
#define nodeIsSUB( node ) TagIsId( node, TidyTag_SUB ) |
|||
#define nodeIsSUP( node ) TagIsId( node, TidyTag_SUP ) |
|||
#define nodeIsU( node ) TagIsId( node, TidyTag_U ) |
|||
#define nodeIsMENU( node ) TagIsId( node, TidyTag_MENU ) |
|||
#define nodeIsMAIN( node ) TagIsId( node, TidyTag_MAIN ) |
|||
#define nodeIsBUTTON( node ) TagIsId( node, TidyTag_BUTTON ) |
|||
#define nodeIsCANVAS( node ) TagIsId( node, TidyTag_CANVAS ) |
|||
#define nodeIsPROGRESS( node ) TagIsId( node, TidyTag_PROGRESS ) |
|||
|
|||
#define nodeIsINS( node ) TagIsId( node, TidyTag_INS ) |
|||
#define nodeIsDEL( node ) TagIsId( node, TidyTag_DEL ) |
|||
|
|||
/* HTML5 */ |
|||
#define nodeIsDATALIST( node ) TagIsId( node, TidyTag_DATALIST ) |
|||
#define nodeIsMATHML( node ) TagIsId( node, TidyTag_MATHML ) /* #130 MathML attr and entity fix! */ |
|||
|
|||
/* NOT in HTML 5 */ |
|||
#define nodeIsACRONYM( node ) TagIsId( node, TidyTag_ACRONYM ) |
|||
#define nodesIsFRAME( node ) TagIsId( node, TidyTag_FRAME ) |
|||
#define nodeIsTT( node ) TagIsId( node, TidyTag_TT ) |
|||
|
|||
#endif /* __TAGS_H__ */ |
@ -0,0 +1,159 @@ |
|||
#ifndef __TIDY_INT_H__ |
|||
#define __TIDY_INT_H__ |
|||
|
|||
/* tidy-int.h -- internal library declarations
|
|||
|
|||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
#include "tidy.h" |
|||
#include "config.h" |
|||
#include "lexer.h" |
|||
#include "tags.h" |
|||
#include "attrs.h" |
|||
#include "pprint.h" |
|||
#include "access.h" |
|||
|
|||
#ifndef MAX |
|||
#define MAX(a,b) (((a) > (b))?(a):(b)) |
|||
#endif |
|||
#ifndef MIN |
|||
#define MIN(a,b) (((a) < (b))?(a):(b)) |
|||
#endif |
|||
|
|||
/*\
|
|||
* Issue #166 - repeated <main> element |
|||
* Change the previous on/off uint flag badForm |
|||
* to a BIT flag to support other than <form> |
|||
* errors. This could be extended more... |
|||
\*/ |
|||
#define flg_BadForm 0x00000001 |
|||
#define flg_BadMain 0x00000002 |
|||
|
|||
struct _TidyDocImpl |
|||
{ |
|||
/* The Document Tree (and backing store buffer) */ |
|||
Node root; /* This MUST remain the first declared
|
|||
variable in this structure */ |
|||
Lexer* lexer; |
|||
|
|||
/* Config + Markup Declarations */ |
|||
TidyConfigImpl config; |
|||
TidyTagImpl tags; |
|||
TidyAttribImpl attribs; |
|||
|
|||
#if SUPPORT_ACCESSIBILITY_CHECKS |
|||
/* Accessibility Checks state */ |
|||
TidyAccessImpl access; |
|||
#endif |
|||
|
|||
/* The Pretty Print buffer */ |
|||
TidyPrintImpl pprint; |
|||
|
|||
/* I/O */ |
|||
StreamIn* docIn; |
|||
StreamOut* docOut; |
|||
StreamOut* errout; |
|||
TidyReportFilter mssgFilt; |
|||
TidyReportFilter2 mssgFilt2; |
|||
TidyReportFilter3 mssgFilt3; |
|||
TidyOptCallback pOptCallback; |
|||
TidyPPProgress progressCallback; |
|||
|
|||
/* Parse + Repair Results */ |
|||
uint optionErrors; |
|||
uint errors; |
|||
uint warnings; |
|||
uint accessErrors; |
|||
uint infoMessages; |
|||
uint docErrors; |
|||
int parseStatus; |
|||
|
|||
uint badAccess; /* for accessibility errors */ |
|||
uint badLayout; /* for bad style errors */ |
|||
uint badChars; /* for bad char encodings */ |
|||
uint badForm; /* bit field, for badly placed form tags, or other format errors */ |
|||
|
|||
Bool HTML5Mode; /* current mode is html5 */ |
|||
|
|||
/* Memory allocator */ |
|||
TidyAllocator* allocator; |
|||
|
|||
/* Miscellaneous */ |
|||
void* appData; |
|||
uint nClassId; |
|||
Bool inputHadBOM; |
|||
|
|||
#ifdef TIDY_STORE_ORIGINAL_TEXT |
|||
Bool storeText; |
|||
#endif |
|||
|
|||
#if PRESERVE_FILE_TIMES |
|||
struct utimbuf filetimes; |
|||
#endif |
|||
tmbstr givenDoctype; |
|||
}; |
|||
|
|||
|
|||
/* Twizzle internal/external types */ |
|||
#ifdef NEVER |
|||
TidyDocImpl* tidyDocToImpl( TidyDoc tdoc ); |
|||
TidyDoc tidyImplToDoc( TidyDocImpl* impl ); |
|||
|
|||
Node* tidyNodeToImpl( TidyNode tnod ); |
|||
TidyNode tidyImplToNode( Node* node ); |
|||
|
|||
AttVal* tidyAttrToImpl( TidyAttr tattr ); |
|||
TidyAttr tidyImplToAttr( AttVal* attval ); |
|||
|
|||
const TidyOptionImpl* tidyOptionToImpl( TidyOption topt ); |
|||
TidyOption tidyImplToOption( const TidyOptionImpl* option ); |
|||
#else |
|||
|
|||
#define tidyDocToImpl( tdoc ) ((TidyDocImpl*)(tdoc)) |
|||
#define tidyImplToDoc( doc ) ((TidyDoc)(doc)) |
|||
|
|||
#define tidyNodeToImpl( tnod ) ((Node*)(tnod)) |
|||
#define tidyImplToNode( node ) ((TidyNode)(node)) |
|||
|
|||
#define tidyAttrToImpl( tattr ) ((AttVal*)(tattr)) |
|||
#define tidyImplToAttr( attval ) ((TidyAttr)(attval)) |
|||
|
|||
#define tidyOptionToImpl( topt ) ((const TidyOptionImpl*)(topt)) |
|||
#define tidyImplToOption( option ) ((TidyOption)(option)) |
|||
|
|||
#endif |
|||
|
|||
/** Wrappers for easy memory allocation using the document's allocator */ |
|||
#define TidyDocAlloc(doc, size) TidyAlloc((doc)->allocator, size) |
|||
#define TidyDocRealloc(doc, block, size) TidyRealloc((doc)->allocator, block, size) |
|||
#define TidyDocFree(doc, block) TidyFree((doc)->allocator, block) |
|||
#define TidyDocPanic(doc, msg) TidyPanic((doc)->allocator, msg) |
|||
|
|||
int TY_(DocParseStream)( TidyDocImpl* impl, StreamIn* in ); |
|||
|
|||
/*
|
|||
[i_a] generic node tree traversal code; used in several spots. |
|||
|
|||
Define your own callback, which returns one of the NodeTraversalSignal values |
|||
to instruct the tree traversal routine TraverseNodeTree() what to do. |
|||
|
|||
Pass custom data to/from the callback using the 'propagate' reference. |
|||
*/ |
|||
typedef enum |
|||
{ |
|||
ContinueTraversal, /* visit siblings and children */ |
|||
SkipChildren, /* visit siblings of this node; ignore its children */ |
|||
SkipSiblings, /* ignore subsequent siblings of this node; ignore their children; traverse */ |
|||
SkipChildrenAndSiblings, /* visit siblings of this node; ignore its children */ |
|||
VisitParent, /* REVERSE traversal: visit the parent of the current node */ |
|||
ExitTraversal /* terminate traversal on the spot */ |
|||
} NodeTraversalSignal; |
|||
|
|||
typedef NodeTraversalSignal NodeTraversalCallBack(TidyDocImpl* doc, Node* node, void *propagate); |
|||
|
|||
NodeTraversalSignal TY_(TraverseNodeTree)(TidyDocImpl* doc, Node* node, NodeTraversalCallBack *cb, void *propagate); |
|||
|
|||
#endif /* __TIDY_INT_H__ */ |
File diff suppressed because it is too large
@ -0,0 +1,112 @@ |
|||
#ifndef __TIDY_BUFFIO_H__ |
|||
#define __TIDY_BUFFIO_H__ |
|||
|
|||
/** @file tidybuffio.h - Treat buffer as an I/O stream.
|
|||
|
|||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
Requires buffer to automatically grow as bytes are added. |
|||
Must keep track of current read and write points. |
|||
|
|||
*/ |
|||
|
|||
#include "tidyplatform.h" |
|||
#include "tidy.h" |
|||
|
|||
#ifdef __cplusplus |
|||
extern "C" { |
|||
#endif |
|||
|
|||
/** TidyBuffer - A chunk of memory */ |
|||
TIDY_STRUCT |
|||
struct _TidyBuffer |
|||
{ |
|||
TidyAllocator* allocator; /**< Memory allocator */ |
|||
byte* bp; /**< Pointer to bytes */ |
|||
uint size; /**< # bytes currently in use */ |
|||
uint allocated; /**< # bytes allocated */ |
|||
uint next; /**< Offset of current input position */ |
|||
}; |
|||
|
|||
/** Initialize data structure using the default allocator */ |
|||
TIDY_EXPORT void TIDY_CALL tidyBufInit( TidyBuffer* buf ); |
|||
|
|||
/** Initialize data structure using the given custom allocator */ |
|||
TIDY_EXPORT void TIDY_CALL tidyBufInitWithAllocator( TidyBuffer* buf, TidyAllocator* allocator ); |
|||
|
|||
/** Free current buffer, allocate given amount, reset input pointer,
|
|||
use the default allocator */ |
|||
TIDY_EXPORT void TIDY_CALL tidyBufAlloc( TidyBuffer* buf, uint allocSize ); |
|||
|
|||
/** Free current buffer, allocate given amount, reset input pointer,
|
|||
use the given custom allocator */ |
|||
TIDY_EXPORT void TIDY_CALL tidyBufAllocWithAllocator( TidyBuffer* buf, |
|||
TidyAllocator* allocator, |
|||
uint allocSize ); |
|||
|
|||
/** Expand buffer to given size.
|
|||
** Chunk size is minimum growth. Pass 0 for default of 256 bytes. |
|||
*/ |
|||
TIDY_EXPORT void TIDY_CALL tidyBufCheckAlloc( TidyBuffer* buf, |
|||
uint allocSize, uint chunkSize ); |
|||
|
|||
/** Free current contents and zero out */ |
|||
TIDY_EXPORT void TIDY_CALL tidyBufFree( TidyBuffer* buf ); |
|||
|
|||
/** Set buffer bytes to 0 */ |
|||
TIDY_EXPORT void TIDY_CALL tidyBufClear( TidyBuffer* buf ); |
|||
|
|||
/** Attach to existing buffer */ |
|||
TIDY_EXPORT void TIDY_CALL tidyBufAttach( TidyBuffer* buf, byte* bp, uint size ); |
|||
|
|||
/** Detach from buffer. Caller must free. */ |
|||
TIDY_EXPORT void TIDY_CALL tidyBufDetach( TidyBuffer* buf ); |
|||
|
|||
|
|||
/** Append bytes to buffer. Expand if necessary. */ |
|||
TIDY_EXPORT void TIDY_CALL tidyBufAppend( TidyBuffer* buf, void* vp, uint size ); |
|||
|
|||
/** Append one byte to buffer. Expand if necessary. */ |
|||
TIDY_EXPORT void TIDY_CALL tidyBufPutByte( TidyBuffer* buf, byte bv ); |
|||
|
|||
/** Get byte from end of buffer */ |
|||
TIDY_EXPORT int TIDY_CALL tidyBufPopByte( TidyBuffer* buf ); |
|||
|
|||
|
|||
/** Get byte from front of buffer. Increment input offset. */ |
|||
TIDY_EXPORT int TIDY_CALL tidyBufGetByte( TidyBuffer* buf ); |
|||
|
|||
/** At end of buffer? */ |
|||
TIDY_EXPORT Bool TIDY_CALL tidyBufEndOfInput( TidyBuffer* buf ); |
|||
|
|||
/** Put a byte back into the buffer. Decrement input offset. */ |
|||
TIDY_EXPORT void TIDY_CALL tidyBufUngetByte( TidyBuffer* buf, byte bv ); |
|||
|
|||
|
|||
/**************
|
|||
TIDY |
|||
**************/ |
|||
|
|||
/* Forward declarations
|
|||
*/ |
|||
|
|||
/** Initialize a buffer input source */ |
|||
TIDY_EXPORT void TIDY_CALL tidyInitInputBuffer( TidyInputSource* inp, TidyBuffer* buf ); |
|||
|
|||
/** Initialize a buffer output sink */ |
|||
TIDY_EXPORT void TIDY_CALL tidyInitOutputBuffer( TidyOutputSink* outp, TidyBuffer* buf ); |
|||
|
|||
#ifdef __cplusplus |
|||
} |
|||
#endif |
|||
#endif /* __TIDY_BUFFIO_H__ */ |
|||
|
|||
/*
|
|||
* local variables: |
|||
* mode: c |
|||
* indent-tabs-mode: nil |
|||
* c-basic-offset: 4 |
|||
* eval: (c-set-offset 'substatement-open 0) |
|||
* end: |
|||
*/ |
@ -0,0 +1,858 @@ |
|||
#ifndef __TIDYENUM_H__ |
|||
#define __TIDYENUM_H__ |
|||
|
|||
/* @file tidyenum.h -- Split public enums into separate header
|
|||
|
|||
Simplifies enum re-use in various wrappers. e.g. SWIG |
|||
generated wrappers and COM IDL files. |
|||
|
|||
Copyright (c) 1998-2008 World Wide Web Consortium |
|||
(Massachusetts Institute of Technology, European Research |
|||
Consortium for Informatics and Mathematics, Keio University). |
|||
All Rights Reserved. |
|||
|
|||
Contributing Author(s): |
|||
|
|||
Dave Raggett <dsr@w3.org> |
|||
|
|||
The contributing author(s) would like to thank all those who |
|||
helped with testing, bug fixes and suggestions for improvements. |
|||
This wouldn't have been possible without your help. |
|||
|
|||
COPYRIGHT NOTICE: |
|||
|
|||
This software and documentation is provided "as is," and |
|||
the copyright holders and contributing author(s) make no |
|||
representations or warranties, express or implied, including |
|||
but not limited to, warranties of merchantability or fitness |
|||
for any particular purpose or that the use of the software or |
|||
documentation will not infringe any third party patents, |
|||
copyrights, trademarks or other rights. |
|||
|
|||
The copyright holders and contributing author(s) will not be held |
|||
liable for any direct, indirect, special or consequential damages |
|||
arising out of any use of the software or documentation, even if |
|||
advised of the possibility of such damage. |
|||
|
|||
Permission is hereby granted to use, copy, modify, and distribute |
|||
this source code, or portions hereof, documentation and executables, |
|||
for any purpose, without fee, subject to the following restrictions: |
|||
|
|||
1. The origin of this source code must not be misrepresented. |
|||
2. Altered versions must be plainly marked as such and must |
|||
not be misrepresented as being the original source. |
|||
3. This Copyright notice may not be removed or altered from any |
|||
source or altered source distribution. |
|||
|
|||
The copyright holders and contributing author(s) specifically |
|||
permit, without fee, and encourage the use of this source code |
|||
as a component for supporting the Hypertext Markup Language in |
|||
commercial products. If you use this source code in a product, |
|||
acknowledgment is not required but would be appreciated. |
|||
|
|||
|
|||
Created 2001-05-20 by Charles Reitzel |
|||
Updated 2002-07-01 by Charles Reitzel - 1st Implementation |
|||
|
|||
*/ |
|||
|
|||
#ifdef __cplusplus |
|||
extern "C" { |
|||
#endif |
|||
|
|||
/* Enumerate configuration options
|
|||
*/ |
|||
|
|||
/** Categories of Tidy configuration options
|
|||
*/ |
|||
typedef enum |
|||
{ |
|||
TidyMarkup, /**< Markup options: (X)HTML version, etc */ |
|||
TidyDiagnostics, /**< Diagnostics */ |
|||
TidyPrettyPrint, /**< Output layout */ |
|||
TidyEncoding, /**< Character encodings */ |
|||
TidyMiscellaneous /**< File handling, message format, etc. */ |
|||
} TidyConfigCategory; |
|||
|
|||
|
|||
/** Option IDs Used to get/set option values.
|
|||
|
|||
These TidyOptionId are used throughout libtidy, and also |
|||
have associated localized strings to describe them. |
|||
|
|||
Note this enum MUST start at zero due to historical design-time |
|||
decisions that make assumptions about this starting value. |
|||
*/ |
|||
typedef enum |
|||
{ |
|||
TidyUnknownOption, /**< Unknown option! */ |
|||
TidyIndentSpaces, /**< Indentation n spaces/tabs */ |
|||
TidyWrapLen, /**< Wrap margin */ |
|||
TidyTabSize, /**< Expand tabs to n spaces */ |
|||
|
|||
TidyCharEncoding, /**< In/out character encoding */ |
|||
TidyInCharEncoding, /**< Input character encoding (if different) */ |
|||
TidyOutCharEncoding, /**< Output character encoding (if different) */ |
|||
TidyNewline, /**< Output line ending (default to platform) */ |
|||
|
|||
TidyDoctypeMode, /**< See doctype property */ |
|||
TidyDoctype, /**< User specified doctype */ |
|||
|
|||
TidyDuplicateAttrs, /**< Keep first or last duplicate attribute */ |
|||
TidyAltText, /**< Default text for alt attribute */ |
|||
|
|||
/* obsolete */ |
|||
TidySlideStyle, /**< Style sheet for slides: not used for anything yet */ |
|||
|
|||
TidyErrFile, /**< File name to write errors to */ |
|||
TidyOutFile, /**< File name to write markup to */ |
|||
TidyWriteBack, /**< If true then output tidied markup */ |
|||
TidyShowMarkup, /**< If false, normal output is suppressed */ |
|||
TidyShowInfo, /**< If true, info-level messages are shown */ |
|||
TidyShowWarnings, /**< However errors are always shown */ |
|||
TidyQuiet, /**< No 'Parsing X', guessed DTD or summary */ |
|||
TidyIndentContent, /**< Indent content of appropriate tags */ |
|||
/**< "auto" does text/block level content indentation */ |
|||
TidyCoerceEndTags, /**< Coerce end tags from start tags where probably intended */ |
|||
TidyOmitOptionalTags,/**< Suppress optional start tags and end tags */ |
|||
TidyHideEndTags, /**< Legacy name for TidyOmitOptionalTags */ |
|||
TidyXmlTags, /**< Treat input as XML */ |
|||
TidyXmlOut, /**< Create output as XML */ |
|||
TidyXhtmlOut, /**< Output extensible HTML */ |
|||
TidyHtmlOut, /**< Output plain HTML, even for XHTML input.
|
|||
Yes means set explicitly. */ |
|||
TidyXmlDecl, /**< Add <?xml?> for XML docs */ |
|||
TidyUpperCaseTags, /**< Output tags in upper not lower case */ |
|||
TidyUpperCaseAttrs, /**< Output attributes in upper not lower case */ |
|||
TidyMakeBare, /**< Make bare HTML: remove Microsoft cruft */ |
|||
TidyMakeClean, /**< Replace presentational clutter by style rules */ |
|||
TidyGDocClean, /**< Clean up HTML exported from Google Docs */ |
|||
TidyLogicalEmphasis, /**< Replace i by em and b by strong */ |
|||
TidyDropPropAttrs, /**< Discard proprietary attributes */ |
|||
TidyDropFontTags, /**< Discard presentation tags */ |
|||
TidyDropEmptyElems, /**< Discard empty elements */ |
|||
TidyDropEmptyParas, /**< Discard empty p elements */ |
|||
TidyFixComments, /**< Fix comments with adjacent hyphens */ |
|||
TidyBreakBeforeBR, /**< Output newline before <br> or not? */ |
|||
|
|||
/* obsolete */ |
|||
TidyBurstSlides, /**< Create slides on each h2 element */ |
|||
|
|||
TidyNumEntities, /**< Use numeric entities */ |
|||
TidyQuoteMarks, /**< Output " marks as " */ |
|||
TidyQuoteNbsp, /**< Output non-breaking space as entity */ |
|||
TidyQuoteAmpersand, /**< Output naked ampersand as & */ |
|||
TidyWrapAttVals, /**< Wrap within attribute values */ |
|||
TidyWrapScriptlets, /**< Wrap within JavaScript string literals */ |
|||
TidyWrapSection, /**< Wrap within <![ ... ]> section tags */ |
|||
TidyWrapAsp, /**< Wrap within ASP pseudo elements */ |
|||
TidyWrapJste, /**< Wrap within JSTE pseudo elements */ |
|||
TidyWrapPhp, /**< Wrap within PHP pseudo elements */ |
|||
TidyFixBackslash, /**< Fix URLs by replacing \ with / */ |
|||
TidyIndentAttributes,/**< Newline+indent before each attribute */ |
|||
TidyXmlPIs, /**< If set to yes PIs must end with ?> */ |
|||
TidyXmlSpace, /**< If set to yes adds xml:space attr as needed */ |
|||
TidyEncloseBodyText, /**< If yes text at body is wrapped in P's */ |
|||
TidyEncloseBlockText,/**< If yes text in blocks is wrapped in P's */ |
|||
TidyKeepFileTimes, /**< If yes last modied time is preserved */ |
|||
TidyWord2000, /**< Draconian cleaning for Word2000 */ |
|||
TidyMark, /**< Add meta element indicating tidied doc */ |
|||
TidyEmacs, /**< If true format error output for GNU Emacs */ |
|||
TidyEmacsFile, /**< Name of current Emacs file */ |
|||
TidyLiteralAttribs, /**< If true attributes may use newlines */ |
|||
TidyBodyOnly, /**< Output BODY content only */ |
|||
TidyFixUri, /**< Applies URI encoding if necessary */ |
|||
TidyLowerLiterals, /**< Folds known attribute values to lower case */ |
|||
TidyHideComments, /**< Hides all (real) comments in output */ |
|||
TidyIndentCdata, /**< Indent <!CDATA[ ... ]]> section */ |
|||
TidyForceOutput, /**< Output document even if errors were found */ |
|||
TidyShowErrors, /**< Number of errors to put out */ |
|||
TidyAsciiChars, /**< Convert quotes and dashes to nearest ASCII char */ |
|||
TidyJoinClasses, /**< Join multiple class attributes */ |
|||
TidyJoinStyles, /**< Join multiple style attributes */ |
|||
TidyEscapeCdata, /**< Replace <![CDATA[]]> sections with escaped text */ |
|||
|
|||
#if SUPPORT_ASIAN_ENCODINGS |
|||
TidyLanguage, /**< Language property: not used for anything yet */ |
|||
TidyNCR, /**< Allow numeric character references */ |
|||
#else |
|||
TidyLanguageNotUsed, |
|||
TidyNCRNotUsed, |
|||
#endif |
|||
#if SUPPORT_UTF16_ENCODINGS |
|||
TidyOutputBOM, /**< Output a Byte Order Mark (BOM) for UTF-16 encodings */ |
|||
/**< auto: if input stream has BOM, we output a BOM */ |
|||
#else |
|||
TidyOutputBOMNotUsed, |
|||
#endif |
|||
|
|||
TidyReplaceColor, /**< Replace hex color attribute values with names */ |
|||
TidyCSSPrefix, /**< CSS class naming for -clean option */ |
|||
|
|||
TidyInlineTags, /**< Declared inline tags */ |
|||
TidyBlockTags, /**< Declared block tags */ |
|||
TidyEmptyTags, /**< Declared empty tags */ |
|||
TidyPreTags, /**< Declared pre tags */ |
|||
|
|||
TidyAccessibilityCheckLevel, /**< Accessibility check level
|
|||
0 (old style), or 1, 2, 3 */ |
|||
|
|||
TidyVertSpace, /**< degree to which markup is spread out vertically */ |
|||
#if SUPPORT_ASIAN_ENCODINGS |
|||
TidyPunctWrap, /**< consider punctuation and breaking spaces for wrapping */ |
|||
#else |
|||
TidyPunctWrapNotUsed, |
|||
#endif |
|||
TidyMergeEmphasis, /**< Merge nested B and I elements */ |
|||
TidyMergeDivs, /**< Merge multiple DIVs */ |
|||
TidyDecorateInferredUL, /**< Mark inferred UL elements with no indent CSS */ |
|||
TidyPreserveEntities, /**< Preserve entities */ |
|||
TidySortAttributes, /**< Sort attributes */ |
|||
TidyMergeSpans, /**< Merge multiple SPANs */ |
|||
TidyAnchorAsName, /**< Define anchors as name attributes */ |
|||
TidyPPrintTabs, /**< Indent using tabs istead of spaces */ |
|||
TidySkipNested, /**< Skip nested tags in script and style CDATA */ |
|||
TidyStrictTagsAttr, /**< Ensure tags and attributes match output HTML version */ |
|||
TidyEscapeScripts, /**< Escape items that look like closing tags in script tags */ |
|||
N_TIDY_OPTIONS /**< Must be last */ |
|||
} TidyOptionId; |
|||
|
|||
|
|||
/** Option data types
|
|||
*/ |
|||
typedef enum |
|||
{ |
|||
TidyString, /**< String */ |
|||
TidyInteger, /**< Integer or enumeration */ |
|||
TidyBoolean /**< Boolean flag */ |
|||
} TidyOptionType; |
|||
|
|||
|
|||
/** AutoBool values used by ParseBool, ParseTriState, ParseIndent, ParseBOM
|
|||
*/ |
|||
typedef enum |
|||
{ |
|||
TidyNoState, /**< maps to 'no' */ |
|||
TidyYesState, /**< maps to 'yes' */ |
|||
TidyAutoState /**< Automatic */ |
|||
} TidyTriState; |
|||
|
|||
/** TidyNewline option values to control output line endings.
|
|||
*/ |
|||
typedef enum |
|||
{ |
|||
TidyLF, /**< Use Unix style: LF */ |
|||
TidyCRLF, /**< Use DOS/Windows style: CR+LF */ |
|||
TidyCR /**< Use Macintosh style: CR */ |
|||
} TidyLineEnding; |
|||
|
|||
|
|||
/** Mode controlling treatment of doctype
|
|||
*/ |
|||
typedef enum |
|||
{ |
|||
TidyDoctypeHtml5, /**< <!DOCTYPE html> */ |
|||
TidyDoctypeOmit, /**< Omit DOCTYPE altogether */ |
|||
TidyDoctypeAuto, /**< Keep DOCTYPE in input. Set version to content */ |
|||
TidyDoctypeStrict, /**< Convert document to HTML 4 strict content model */ |
|||
TidyDoctypeLoose, /**< Convert document to HTML 4 transitional
|
|||
content model */ |
|||
TidyDoctypeUser /**< Set DOCTYPE FPI explicitly */ |
|||
} TidyDoctypeModes; |
|||
|
|||
/** Mode controlling treatment of duplicate Attributes
|
|||
*/ |
|||
typedef enum |
|||
{ |
|||
TidyKeepFirst, |
|||
TidyKeepLast |
|||
} TidyDupAttrModes; |
|||
|
|||
/** Mode controlling treatment of sorting attributes
|
|||
*/ |
|||
typedef enum |
|||
{ |
|||
TidySortAttrNone, |
|||
TidySortAttrAlpha |
|||
} TidyAttrSortStrategy; |
|||
|
|||
|
|||
/* I/O and Message handling interface
|
|||
** |
|||
** By default, Tidy will define, create and use |
|||
** instances of input and output handlers for |
|||
** standard C buffered I/O (i.e. FILE* stdin, |
|||
** FILE* stdout and FILE* stderr for content |
|||
** input, content output and diagnostic output, |
|||
** respectively. A FILE* cfgFile input handler |
|||
** will be used for config files. Command line |
|||
** options will just be set directly. |
|||
*/ |
|||
|
|||
/** Message severity level
|
|||
* These TidyReportLevel are used throughout libtidy, but don't |
|||
* have associated localized strings to describe them because |
|||
* TidyReportLevel is externally-facing, and changing the enum |
|||
* starting int can break existing API's for poorly-written |
|||
* applications using libtidy. See enum `TidyReportLevelKeys`. |
|||
*/ |
|||
typedef enum |
|||
{ |
|||
TidyInfo, /**< Information about markup usage */ |
|||
TidyWarning, /**< Warning message */ |
|||
TidyConfig, /**< Configuration error */ |
|||
TidyAccess, /**< Accessibility message */ |
|||
TidyError, /**< Error message - output suppressed */ |
|||
TidyBadDocument, /**< I/O or file system error */ |
|||
TidyFatal /**< Crash! */ |
|||
} TidyReportLevel; |
|||
|
|||
/** Message severity level - string lookup keys
|
|||
* These TidyReportLevelKeys are used throughout libtidy, and |
|||
* have associated localized strings to describe them. They |
|||
* correspond to enum `TidyReportLevel`. |
|||
*/ |
|||
typedef enum |
|||
{ |
|||
TidyInfoString = 600, |
|||
TidyWarningString, |
|||
TidyConfigString, |
|||
TidyAccessString, |
|||
TidyErrorString, |
|||
TidyBadDocumentString, |
|||
TidyFatalString |
|||
} TidyReportLevelKeys; |
|||
|
|||
|
|||
/* Document tree traversal functions
|
|||
*/ |
|||
|
|||
/** Node types
|
|||
*/ |
|||
typedef enum |
|||
{ |
|||
TidyNode_Root, /**< Root */ |
|||
TidyNode_DocType, /**< DOCTYPE */ |
|||
TidyNode_Comment, /**< Comment */ |
|||
TidyNode_ProcIns, /**< Processing Instruction */ |
|||
TidyNode_Text, /**< Text */ |
|||
TidyNode_Start, /**< Start Tag */ |
|||
TidyNode_End, /**< End Tag */ |
|||
TidyNode_StartEnd, /**< Start/End (empty) Tag */ |
|||
TidyNode_CDATA, /**< Unparsed Text */ |
|||
TidyNode_Section, /**< XML Section */ |
|||
TidyNode_Asp, /**< ASP Source */ |
|||
TidyNode_Jste, /**< JSTE Source */ |
|||
TidyNode_Php, /**< PHP Source */ |
|||
TidyNode_XmlDecl /**< XML Declaration */ |
|||
} TidyNodeType; |
|||
|
|||
|
|||
/** Known HTML element types
|
|||
*/ |
|||
typedef enum |
|||
{ |
|||
TidyTag_UNKNOWN, /**< Unknown tag! */ |
|||
TidyTag_A, /**< A */ |
|||
TidyTag_ABBR, /**< ABBR */ |
|||
TidyTag_ACRONYM, /**< ACRONYM */ |
|||
TidyTag_ADDRESS, /**< ADDRESS */ |
|||
TidyTag_ALIGN, /**< ALIGN */ |
|||
TidyTag_APPLET, /**< APPLET */ |
|||
TidyTag_AREA, /**< AREA */ |
|||
TidyTag_B, /**< B */ |
|||
TidyTag_BASE, /**< BASE */ |
|||
TidyTag_BASEFONT, /**< BASEFONT */ |
|||
TidyTag_BDO, /**< BDO */ |
|||
TidyTag_BGSOUND, /**< BGSOUND */ |
|||
TidyTag_BIG, /**< BIG */ |
|||
TidyTag_BLINK, /**< BLINK */ |
|||
TidyTag_BLOCKQUOTE, /**< BLOCKQUOTE */ |
|||
TidyTag_BODY, /**< BODY */ |
|||
TidyTag_BR, /**< BR */ |
|||
TidyTag_BUTTON, /**< BUTTON */ |
|||
TidyTag_CAPTION, /**< CAPTION */ |
|||
TidyTag_CENTER, /**< CENTER */ |
|||
TidyTag_CITE, /**< CITE */ |
|||
TidyTag_CODE, /**< CODE */ |
|||
TidyTag_COL, /**< COL */ |
|||
TidyTag_COLGROUP, /**< COLGROUP */ |
|||
TidyTag_COMMENT, /**< COMMENT */ |
|||
TidyTag_DD, /**< DD */ |
|||
TidyTag_DEL, /**< DEL */ |
|||
TidyTag_DFN, /**< DFN */ |
|||
TidyTag_DIR, /**< DIR */ |
|||
TidyTag_DIV, /**< DIF */ |
|||
TidyTag_DL, /**< DL */ |
|||
TidyTag_DT, /**< DT */ |
|||
TidyTag_EM, /**< EM */ |
|||
TidyTag_EMBED, /**< EMBED */ |
|||
TidyTag_FIELDSET, /**< FIELDSET */ |
|||
TidyTag_FONT, /**< FONT */ |
|||
TidyTag_FORM, /**< FORM */ |
|||
TidyTag_FRAME, /**< FRAME */ |
|||
TidyTag_FRAMESET, /**< FRAMESET */ |
|||
TidyTag_H1, /**< H1 */ |
|||
TidyTag_H2, /**< H2 */ |
|||
TidyTag_H3, /**< H3 */ |
|||
TidyTag_H4, /**< H4 */ |
|||
TidyTag_H5, /**< H5 */ |
|||
TidyTag_H6, /**< H6 */ |
|||
TidyTag_HEAD, /**< HEAD */ |
|||
TidyTag_HR, /**< HR */ |
|||
TidyTag_HTML, /**< HTML */ |
|||
TidyTag_I, /**< I */ |
|||
TidyTag_IFRAME, /**< IFRAME */ |
|||
TidyTag_ILAYER, /**< ILAYER */ |
|||
TidyTag_IMG, /**< IMG */ |
|||
TidyTag_INPUT, /**< INPUT */ |
|||
TidyTag_INS, /**< INS */ |
|||
TidyTag_ISINDEX, /**< ISINDEX */ |
|||
TidyTag_KBD, /**< KBD */ |
|||
TidyTag_KEYGEN, /**< KEYGEN */ |
|||
TidyTag_LABEL, /**< LABEL */ |
|||
TidyTag_LAYER, /**< LAYER */ |
|||
TidyTag_LEGEND, /**< LEGEND */ |
|||
TidyTag_LI, /**< LI */ |
|||
TidyTag_LINK, /**< LINK */ |
|||
TidyTag_LISTING, /**< LISTING */ |
|||
TidyTag_MAP, /**< MAP */ |
|||
TidyTag_MATHML, /**< MATH (HTML5) [i_a]2 MathML embedded in [X]HTML */ |
|||
TidyTag_MARQUEE, /**< MARQUEE */ |
|||
TidyTag_MENU, /**< MENU */ |
|||
TidyTag_META, /**< META */ |
|||
TidyTag_MULTICOL, /**< MULTICOL */ |
|||
TidyTag_NOBR, /**< NOBR */ |
|||
TidyTag_NOEMBED, /**< NOEMBED */ |
|||
TidyTag_NOFRAMES, /**< NOFRAMES */ |
|||
TidyTag_NOLAYER, /**< NOLAYER */ |
|||
TidyTag_NOSAVE, /**< NOSAVE */ |
|||
TidyTag_NOSCRIPT, /**< NOSCRIPT */ |
|||
TidyTag_OBJECT, /**< OBJECT */ |
|||
TidyTag_OL, /**< OL */ |
|||
TidyTag_OPTGROUP, /**< OPTGROUP */ |
|||
TidyTag_OPTION, /**< OPTION */ |
|||
TidyTag_P, /**< P */ |
|||
TidyTag_PARAM, /**< PARAM */ |
|||
TidyTag_PICTURE, /**< PICTURE (HTML5) */ |
|||
TidyTag_PLAINTEXT,/**< PLAINTEXT */ |
|||
TidyTag_PRE, /**< PRE */ |
|||
TidyTag_Q, /**< Q */ |
|||
TidyTag_RB, /**< RB */ |
|||
TidyTag_RBC, /**< RBC */ |
|||
TidyTag_RP, /**< RP */ |
|||
TidyTag_RT, /**< RT */ |
|||
TidyTag_RTC, /**< RTC */ |
|||
TidyTag_RUBY, /**< RUBY */ |
|||
TidyTag_S, /**< S */ |
|||
TidyTag_SAMP, /**< SAMP */ |
|||
TidyTag_SCRIPT, /**< SCRIPT */ |
|||
TidyTag_SELECT, /**< SELECT */ |
|||
TidyTag_SERVER, /**< SERVER */ |
|||
TidyTag_SERVLET, /**< SERVLET */ |
|||
TidyTag_SMALL, /**< SMALL */ |
|||
TidyTag_SPACER, /**< SPACER */ |
|||
TidyTag_SPAN, /**< SPAN */ |
|||
TidyTag_STRIKE, /**< STRIKE */ |
|||
TidyTag_STRONG, /**< STRONG */ |
|||
TidyTag_STYLE, /**< STYLE */ |
|||
TidyTag_SUB, /**< SUB */ |
|||
TidyTag_SUP, /**< SUP */ |
|||
TidyTag_SVG, /**< SVG (HTML5) */ |
|||
TidyTag_TABLE, /**< TABLE */ |
|||
TidyTag_TBODY, /**< TBODY */ |
|||
TidyTag_TD, /**< TD */ |
|||
TidyTag_TEXTAREA, /**< TEXTAREA */ |
|||
TidyTag_TFOOT, /**< TFOOT */ |
|||
TidyTag_TH, /**< TH */ |
|||
TidyTag_THEAD, /**< THEAD */ |
|||
TidyTag_TITLE, /**< TITLE */ |
|||
TidyTag_TR, /**< TR */ |
|||
TidyTag_TT, /**< TT */ |
|||
TidyTag_U, /**< U */ |
|||
TidyTag_UL, /**< UL */ |
|||
TidyTag_VAR, /**< VAR */ |
|||
TidyTag_WBR, /**< WBR */ |
|||
TidyTag_XMP, /**< XMP */ |
|||
TidyTag_NEXTID, /**< NEXTID */ |
|||
|
|||
TidyTag_ARTICLE, |
|||
TidyTag_ASIDE, |
|||
TidyTag_AUDIO, |
|||
TidyTag_BDI, |
|||
TidyTag_CANVAS, |
|||
TidyTag_COMMAND, |
|||
TidyTag_DATALIST, |
|||
TidyTag_DETAILS, |
|||
TidyTag_DIALOG, |
|||
TidyTag_FIGCAPTION, |
|||
TidyTag_FIGURE, |
|||
TidyTag_FOOTER, |
|||
TidyTag_HEADER, |
|||
TidyTag_HGROUP, |
|||
TidyTag_MAIN, |
|||
TidyTag_MARK, |
|||
TidyTag_MENUITEM, |
|||
TidyTag_METER, |
|||
TidyTag_NAV, |
|||
TidyTag_OUTPUT, |
|||
TidyTag_PROGRESS, |
|||
TidyTag_SECTION, |
|||
TidyTag_SOURCE, |
|||
TidyTag_SUMMARY, |
|||
TidyTag_TEMPLATE, |
|||
TidyTag_TIME, |
|||
TidyTag_TRACK, |
|||
TidyTag_VIDEO, |
|||
|
|||
N_TIDY_TAGS /**< Must be last */ |
|||
} TidyTagId; |
|||
|
|||
/* Attribute interrogation
|
|||
*/ |
|||
|
|||
/** Known HTML attributes
|
|||
*/ |
|||
typedef enum |
|||
{ |
|||
TidyAttr_UNKNOWN, /**< UNKNOWN= */ |
|||
TidyAttr_ABBR, /**< ABBR= */ |
|||
TidyAttr_ACCEPT, /**< ACCEPT= */ |
|||
TidyAttr_ACCEPT_CHARSET, /**< ACCEPT_CHARSET= */ |
|||
TidyAttr_ACCESSKEY, /**< ACCESSKEY= */ |
|||
TidyAttr_ACTION, /**< ACTION= */ |
|||
TidyAttr_ADD_DATE, /**< ADD_DATE= */ |
|||
TidyAttr_ALIGN, /**< ALIGN= */ |
|||
TidyAttr_ALINK, /**< ALINK= */ |
|||
TidyAttr_ALLOWFULLSCREEN, /**< ALLOWFULLSCREEN= */ |
|||
TidyAttr_ALT, /**< ALT= */ |
|||
TidyAttr_ARCHIVE, /**< ARCHIVE= */ |
|||
TidyAttr_AXIS, /**< AXIS= */ |
|||
TidyAttr_BACKGROUND, /**< BACKGROUND= */ |
|||
TidyAttr_BGCOLOR, /**< BGCOLOR= */ |
|||
TidyAttr_BGPROPERTIES, /**< BGPROPERTIES= */ |
|||
TidyAttr_BORDER, /**< BORDER= */ |
|||
TidyAttr_BORDERCOLOR, /**< BORDERCOLOR= */ |
|||
TidyAttr_BOTTOMMARGIN, /**< BOTTOMMARGIN= */ |
|||
TidyAttr_CELLPADDING, /**< CELLPADDING= */ |
|||
TidyAttr_CELLSPACING, /**< CELLSPACING= */ |
|||
TidyAttr_CHAR, /**< CHAR= */ |
|||
TidyAttr_CHAROFF, /**< CHAROFF= */ |
|||
TidyAttr_CHARSET, /**< CHARSET= */ |
|||
TidyAttr_CHECKED, /**< CHECKED= */ |
|||
TidyAttr_CITE, /**< CITE= */ |
|||
TidyAttr_CLASS, /**< CLASS= */ |
|||
TidyAttr_CLASSID, /**< CLASSID= */ |
|||
TidyAttr_CLEAR, /**< CLEAR= */ |
|||
TidyAttr_CODE, /**< CODE= */ |
|||
TidyAttr_CODEBASE, /**< CODEBASE= */ |
|||
TidyAttr_CODETYPE, /**< CODETYPE= */ |
|||
TidyAttr_COLOR, /**< COLOR= */ |
|||
TidyAttr_COLS, /**< COLS= */ |
|||
TidyAttr_COLSPAN, /**< COLSPAN= */ |
|||
TidyAttr_COMPACT, /**< COMPACT= */ |
|||
TidyAttr_CONTENT, /**< CONTENT= */ |
|||
TidyAttr_COORDS, /**< COORDS= */ |
|||
TidyAttr_DATA, /**< DATA= */ |
|||
TidyAttr_DATAFLD, /**< DATAFLD= */ |
|||
TidyAttr_DATAFORMATAS, /**< DATAFORMATAS= */ |
|||
TidyAttr_DATAPAGESIZE, /**< DATAPAGESIZE= */ |
|||
TidyAttr_DATASRC, /**< DATASRC= */ |
|||
TidyAttr_DATETIME, /**< DATETIME= */ |
|||
TidyAttr_DECLARE, /**< DECLARE= */ |
|||
TidyAttr_DEFER, /**< DEFER= */ |
|||
TidyAttr_DIR, /**< DIR= */ |
|||
TidyAttr_DISABLED, /**< DISABLED= */ |
|||
TidyAttr_ENCODING, /**< ENCODING= */ |
|||
TidyAttr_ENCTYPE, /**< ENCTYPE= */ |
|||
TidyAttr_FACE, /**< FACE= */ |
|||
TidyAttr_FOR, /**< FOR= */ |
|||
TidyAttr_FRAME, /**< FRAME= */ |
|||
TidyAttr_FRAMEBORDER, /**< FRAMEBORDER= */ |
|||
TidyAttr_FRAMESPACING, /**< FRAMESPACING= */ |
|||
TidyAttr_GRIDX, /**< GRIDX= */ |
|||
TidyAttr_GRIDY, /**< GRIDY= */ |
|||
TidyAttr_HEADERS, /**< HEADERS= */ |
|||
TidyAttr_HEIGHT, /**< HEIGHT= */ |
|||
TidyAttr_HREF, /**< HREF= */ |
|||
TidyAttr_HREFLANG, /**< HREFLANG= */ |
|||
TidyAttr_HSPACE, /**< HSPACE= */ |
|||
TidyAttr_HTTP_EQUIV, /**< HTTP_EQUIV= */ |
|||
TidyAttr_ID, /**< ID= */ |
|||
TidyAttr_ISMAP, /**< ISMAP= */ |
|||
TidyAttr_ITEMID, /**< ITEMID= */ |
|||
TidyAttr_ITEMPROP, /**< ITEMPROP= */ |
|||
TidyAttr_ITEMREF, /**< ITEMREF= */ |
|||
TidyAttr_ITEMSCOPE, /**< ITEMSCOPE= */ |
|||
TidyAttr_ITEMTYPE, /**< ITEMTYPE= */ |
|||
TidyAttr_LABEL, /**< LABEL= */ |
|||
TidyAttr_LANG, /**< LANG= */ |
|||
TidyAttr_LANGUAGE, /**< LANGUAGE= */ |
|||
TidyAttr_LAST_MODIFIED, /**< LAST_MODIFIED= */ |
|||
TidyAttr_LAST_VISIT, /**< LAST_VISIT= */ |
|||
TidyAttr_LEFTMARGIN, /**< LEFTMARGIN= */ |
|||
TidyAttr_LINK, /**< LINK= */ |
|||
TidyAttr_LONGDESC, /**< LONGDESC= */ |
|||
TidyAttr_LOWSRC, /**< LOWSRC= */ |
|||
TidyAttr_MARGINHEIGHT, /**< MARGINHEIGHT= */ |
|||
TidyAttr_MARGINWIDTH, /**< MARGINWIDTH= */ |
|||
TidyAttr_MAXLENGTH, /**< MAXLENGTH= */ |
|||
TidyAttr_MEDIA, /**< MEDIA= */ |
|||
TidyAttr_METHOD, /**< METHOD= */ |
|||
TidyAttr_MULTIPLE, /**< MULTIPLE= */ |
|||
TidyAttr_NAME, /**< NAME= */ |
|||
TidyAttr_NOHREF, /**< NOHREF= */ |
|||
TidyAttr_NORESIZE, /**< NORESIZE= */ |
|||
TidyAttr_NOSHADE, /**< NOSHADE= */ |
|||
TidyAttr_NOWRAP, /**< NOWRAP= */ |
|||
TidyAttr_OBJECT, /**< OBJECT= */ |
|||
TidyAttr_OnAFTERUPDATE, /**< OnAFTERUPDATE= */ |
|||
TidyAttr_OnBEFOREUNLOAD, /**< OnBEFOREUNLOAD= */ |
|||
TidyAttr_OnBEFOREUPDATE, /**< OnBEFOREUPDATE= */ |
|||
TidyAttr_OnBLUR, /**< OnBLUR= */ |
|||
TidyAttr_OnCHANGE, /**< OnCHANGE= */ |
|||
TidyAttr_OnCLICK, /**< OnCLICK= */ |
|||
TidyAttr_OnDATAAVAILABLE, /**< OnDATAAVAILABLE= */ |
|||
TidyAttr_OnDATASETCHANGED, /**< OnDATASETCHANGED= */ |
|||
TidyAttr_OnDATASETCOMPLETE, /**< OnDATASETCOMPLETE= */ |
|||
TidyAttr_OnDBLCLICK, /**< OnDBLCLICK= */ |
|||
TidyAttr_OnERRORUPDATE, /**< OnERRORUPDATE= */ |
|||
TidyAttr_OnFOCUS, /**< OnFOCUS= */ |
|||
TidyAttr_OnKEYDOWN, /**< OnKEYDOWN= */ |
|||
TidyAttr_OnKEYPRESS, /**< OnKEYPRESS= */ |
|||
TidyAttr_OnKEYUP, /**< OnKEYUP= */ |
|||
TidyAttr_OnLOAD, /**< OnLOAD= */ |
|||
TidyAttr_OnMOUSEDOWN, /**< OnMOUSEDOWN= */ |
|||
TidyAttr_OnMOUSEMOVE, /**< OnMOUSEMOVE= */ |
|||
TidyAttr_OnMOUSEOUT, /**< OnMOUSEOUT= */ |
|||
TidyAttr_OnMOUSEOVER, /**< OnMOUSEOVER= */ |
|||
TidyAttr_OnMOUSEUP, /**< OnMOUSEUP= */ |
|||
TidyAttr_OnRESET, /**< OnRESET= */ |
|||
TidyAttr_OnROWENTER, /**< OnROWENTER= */ |
|||
TidyAttr_OnROWEXIT, /**< OnROWEXIT= */ |
|||
TidyAttr_OnSELECT, /**< OnSELECT= */ |
|||
TidyAttr_OnSUBMIT, /**< OnSUBMIT= */ |
|||
TidyAttr_OnUNLOAD, /**< OnUNLOAD= */ |
|||
TidyAttr_PROFILE, /**< PROFILE= */ |
|||
TidyAttr_PROMPT, /**< PROMPT= */ |
|||
TidyAttr_RBSPAN, /**< RBSPAN= */ |
|||
TidyAttr_READONLY, /**< READONLY= */ |
|||
TidyAttr_REL, /**< REL= */ |
|||
TidyAttr_REV, /**< REV= */ |
|||
TidyAttr_RIGHTMARGIN, /**< RIGHTMARGIN= */ |
|||
TidyAttr_ROLE, /**< ROLE= */ |
|||
TidyAttr_ROWS, /**< ROWS= */ |
|||
TidyAttr_ROWSPAN, /**< ROWSPAN= */ |
|||
TidyAttr_RULES, /**< RULES= */ |
|||
TidyAttr_SCHEME, /**< SCHEME= */ |
|||
TidyAttr_SCOPE, /**< SCOPE= */ |
|||
TidyAttr_SCROLLING, /**< SCROLLING= */ |
|||
TidyAttr_SELECTED, /**< SELECTED= */ |
|||
TidyAttr_SHAPE, /**< SHAPE= */ |
|||
TidyAttr_SHOWGRID, /**< SHOWGRID= */ |
|||
TidyAttr_SHOWGRIDX, /**< SHOWGRIDX= */ |
|||
TidyAttr_SHOWGRIDY, /**< SHOWGRIDY= */ |
|||
TidyAttr_SIZE, /**< SIZE= */ |
|||
TidyAttr_SPAN, /**< SPAN= */ |
|||
TidyAttr_SRC, /**< SRC= */ |
|||
TidyAttr_SRCSET, /**< SRCSET= (HTML5) */ |
|||
TidyAttr_STANDBY, /**< STANDBY= */ |
|||
TidyAttr_START, /**< START= */ |
|||
TidyAttr_STYLE, /**< STYLE= */ |
|||
TidyAttr_SUMMARY, /**< SUMMARY= */ |
|||
TidyAttr_TABINDEX, /**< TABINDEX= */ |
|||
TidyAttr_TARGET, /**< TARGET= */ |
|||
TidyAttr_TEXT, /**< TEXT= */ |
|||
TidyAttr_TITLE, /**< TITLE= */ |
|||
TidyAttr_TOPMARGIN, /**< TOPMARGIN= */ |
|||
TidyAttr_TRANSLATE, /**< TRANSLATE= */ |
|||
TidyAttr_TYPE, /**< TYPE= */ |
|||
TidyAttr_USEMAP, /**< USEMAP= */ |
|||
TidyAttr_VALIGN, /**< VALIGN= */ |
|||
TidyAttr_VALUE, /**< VALUE= */ |
|||
TidyAttr_VALUETYPE, /**< VALUETYPE= */ |
|||
TidyAttr_VERSION, /**< VERSION= */ |
|||
TidyAttr_VLINK, /**< VLINK= */ |
|||
TidyAttr_VSPACE, /**< VSPACE= */ |
|||
TidyAttr_WIDTH, /**< WIDTH= */ |
|||
TidyAttr_WRAP, /**< WRAP= */ |
|||
TidyAttr_XML_LANG, /**< XML_LANG= */ |
|||
TidyAttr_XML_SPACE, /**< XML_SPACE= */ |
|||
TidyAttr_XMLNS, /**< XMLNS= */ |
|||
|
|||
TidyAttr_EVENT, /**< EVENT= */ |
|||
TidyAttr_METHODS, /**< METHODS= */ |
|||
TidyAttr_N, /**< N= */ |
|||
TidyAttr_SDAFORM, /**< SDAFORM= */ |
|||
TidyAttr_SDAPREF, /**< SDAPREF= */ |
|||
TidyAttr_SDASUFF, /**< SDASUFF= */ |
|||
TidyAttr_URN, /**< URN= */ |
|||
|
|||
TidyAttr_ASYNC, |
|||
TidyAttr_AUTOCOMPLETE, |
|||
TidyAttr_AUTOFOCUS, |
|||
TidyAttr_AUTOPLAY, |
|||
TidyAttr_CHALLENGE, |
|||
TidyAttr_CONTENTEDITABLE, |
|||
TidyAttr_CONTEXTMENU, |
|||
TidyAttr_CONTROLS, |
|||
TidyAttr_CROSSORIGIN, /**< CROSSORIGIN= */ |
|||
TidyAttr_DEFAULT, |
|||
TidyAttr_DIRNAME, |
|||
TidyAttr_DRAGGABLE, |
|||
TidyAttr_DROPZONE, |
|||
TidyAttr_FORM, |
|||
TidyAttr_FORMACTION, |
|||
TidyAttr_FORMENCTYPE, |
|||
TidyAttr_FORMMETHOD, |
|||
TidyAttr_FORMNOVALIDATE, |
|||
TidyAttr_FORMTARGET, |
|||
TidyAttr_HIDDEN, |
|||
TidyAttr_HIGH, |
|||
TidyAttr_ICON, |
|||
TidyAttr_KEYTYPE, |
|||
TidyAttr_KIND, |
|||
TidyAttr_LIST, |
|||
TidyAttr_LOOP, |
|||
TidyAttr_LOW, |
|||
TidyAttr_MANIFEST, |
|||
TidyAttr_MAX, |
|||
TidyAttr_MEDIAGROUP, |
|||
TidyAttr_MIN, |
|||
TidyAttr_NOVALIDATE, |
|||
TidyAttr_OPEN, |
|||
TidyAttr_OPTIMUM, |
|||
TidyAttr_OnABORT, |
|||
TidyAttr_OnAFTERPRINT, |
|||
TidyAttr_OnBEFOREPRINT, |
|||
TidyAttr_OnCANPLAY, |
|||
TidyAttr_OnCANPLAYTHROUGH, |
|||
TidyAttr_OnCONTEXTMENU, |
|||
TidyAttr_OnCUECHANGE, |
|||
TidyAttr_OnDRAG, |
|||
TidyAttr_OnDRAGEND, |
|||
TidyAttr_OnDRAGENTER, |
|||
TidyAttr_OnDRAGLEAVE, |
|||
TidyAttr_OnDRAGOVER, |
|||
TidyAttr_OnDRAGSTART, |
|||
TidyAttr_OnDROP, |
|||
TidyAttr_OnDURATIONCHANGE, |
|||
TidyAttr_OnEMPTIED, |
|||
TidyAttr_OnENDED, |
|||
TidyAttr_OnERROR, |
|||
TidyAttr_OnHASHCHANGE, |
|||
TidyAttr_OnINPUT, |
|||
TidyAttr_OnINVALID, |
|||
TidyAttr_OnLOADEDDATA, |
|||
TidyAttr_OnLOADEDMETADATA, |
|||
TidyAttr_OnLOADSTART, |
|||
TidyAttr_OnMESSAGE, |
|||
TidyAttr_OnMOUSEWHEEL, |
|||
TidyAttr_OnOFFLINE, |
|||
TidyAttr_OnONLINE, |
|||
TidyAttr_OnPAGEHIDE, |
|||
TidyAttr_OnPAGESHOW, |
|||
TidyAttr_OnPAUSE, |
|||
TidyAttr_OnPLAY, |
|||
TidyAttr_OnPLAYING, |
|||
TidyAttr_OnPOPSTATE, |
|||
TidyAttr_OnPROGRESS, |
|||
TidyAttr_OnRATECHANGE, |
|||
TidyAttr_OnREADYSTATECHANGE, |
|||
TidyAttr_OnREDO, |
|||
TidyAttr_OnRESIZE, |
|||
TidyAttr_OnSCROLL, |
|||
TidyAttr_OnSEEKED, |
|||
TidyAttr_OnSEEKING, |
|||
TidyAttr_OnSHOW, |
|||
TidyAttr_OnSTALLED, |
|||
TidyAttr_OnSTORAGE, |
|||
TidyAttr_OnSUSPEND, |
|||
TidyAttr_OnTIMEUPDATE, |
|||
TidyAttr_OnUNDO, |
|||
TidyAttr_OnVOLUMECHANGE, |
|||
TidyAttr_OnWAITING, |
|||
TidyAttr_PATTERN, |
|||
TidyAttr_PLACEHOLDER, |
|||
TidyAttr_POSTER, |
|||
TidyAttr_PRELOAD, |
|||
TidyAttr_PUBDATE, |
|||
TidyAttr_RADIOGROUP, |
|||
TidyAttr_REQUIRED, |
|||
TidyAttr_REVERSED, |
|||
TidyAttr_SANDBOX, |
|||
TidyAttr_SCOPED, |
|||
TidyAttr_SEAMLESS, |
|||
TidyAttr_SIZES, |
|||
TidyAttr_SPELLCHECK, |
|||
TidyAttr_SRCDOC, |
|||
TidyAttr_SRCLANG, |
|||
TidyAttr_STEP, |
|||
TidyAttr_ARIA_ACTIVEDESCENDANT, |
|||
TidyAttr_ARIA_ATOMIC, |
|||
TidyAttr_ARIA_AUTOCOMPLETE, |
|||
TidyAttr_ARIA_BUSY, |
|||
TidyAttr_ARIA_CHECKED, |
|||
TidyAttr_ARIA_CONTROLS, |
|||
TidyAttr_ARIA_DESCRIBEDBY, |
|||
TidyAttr_ARIA_DISABLED, |
|||
TidyAttr_ARIA_DROPEFFECT, |
|||
TidyAttr_ARIA_EXPANDED, |
|||
TidyAttr_ARIA_FLOWTO, |
|||
TidyAttr_ARIA_GRABBED, |
|||
TidyAttr_ARIA_HASPOPUP, |
|||
TidyAttr_ARIA_HIDDEN, |
|||
TidyAttr_ARIA_INVALID, |
|||
TidyAttr_ARIA_LABEL, |
|||
TidyAttr_ARIA_LABELLEDBY, |
|||
TidyAttr_ARIA_LEVEL, |
|||
TidyAttr_ARIA_LIVE, |
|||
TidyAttr_ARIA_MULTILINE, |
|||
TidyAttr_ARIA_MULTISELECTABLE, |
|||
TidyAttr_ARIA_ORIENTATION, |
|||
TidyAttr_ARIA_OWNS, |
|||
TidyAttr_ARIA_POSINSET, |
|||
TidyAttr_ARIA_PRESSED, |
|||
TidyAttr_ARIA_READONLY, |
|||
TidyAttr_ARIA_RELEVANT, |
|||
TidyAttr_ARIA_REQUIRED, |
|||
TidyAttr_ARIA_SELECTED, |
|||
TidyAttr_ARIA_SETSIZE, |
|||
TidyAttr_ARIA_SORT, |
|||
TidyAttr_ARIA_VALUEMAX, |
|||
TidyAttr_ARIA_VALUEMIN, |
|||
TidyAttr_ARIA_VALUENOW, |
|||
TidyAttr_ARIA_VALUETEXT, |
|||
|
|||
/* SVG attributes (SVG 1.1) */ |
|||
TidyAttr_X, /**< X= */ |
|||
TidyAttr_Y, /**< Y= */ |
|||
TidyAttr_VIEWBOX, /**< VIEWBOX= */ |
|||
TidyAttr_PRESERVEASPECTRATIO, /**< PRESERVEASPECTRATIO= */ |
|||
TidyAttr_ZOOMANDPAN, /**< ZOOMANDPAN= */ |
|||
TidyAttr_BASEPROFILE, /**< BASEPROFILE= */ |
|||
TidyAttr_CONTENTSCRIPTTYPE, /**< CONTENTSCRIPTTYPE= */ |
|||
TidyAttr_CONTENTSTYLETYPE, /**< CONTENTSTYLETYPE= */ |
|||
/* MathML <math> attributes */ |
|||
TidyAttr_DISPLAY, /**< DISPLAY= (html5) */ |
|||
|
|||
/* RDFa global attributes */ |
|||
TidyAttr_ABOUT, /**< ABOUT= */ |
|||
TidyAttr_DATATYPE, /**< DATATYPE= */ |
|||
TidyAttr_INLIST, /**< INLIST= */ |
|||
TidyAttr_PREFIX, /**< PREFIX= */ |
|||
TidyAttr_PROPERTY, /**< PROPERTY= */ |
|||
TidyAttr_RESOURCE, /**< RESOURCE= */ |
|||
TidyAttr_TYPEOF, /**< TYPEOF= */ |
|||
TidyAttr_VOCAB, /**< VOCAB= */ |
|||
|
|||
TidyAttr_INTEGRITY, /**< INTEGRITY= */ |
|||
|
|||
N_TIDY_ATTRIBS /**< Must be last */ |
|||
} TidyAttrId; |
|||
|
|||
#ifdef __cplusplus |
|||
} /* extern "C" */ |
|||
#endif |
|||
#endif /* __TIDYENUM_H__ */ |
|||
|
File diff suppressed because it is too large
@ -0,0 +1,635 @@ |
|||
#ifndef __TIDY_PLATFORM_H__ |
|||
#define __TIDY_PLATFORM_H__ |
|||
|
|||
/* tidyplatform.h -- Platform specifics
|
|||
|
|||
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
#ifdef __cplusplus |
|||
extern "C" { |
|||
#endif |
|||
|
|||
/*
|
|||
Uncomment and edit one of the following #defines if you |
|||
want to specify the config file at compile-time. |
|||
*/ |
|||
|
|||
/* #define TIDY_CONFIG_FILE "/etc/tidy_config.txt" */ /* original */ |
|||
/* #define TIDY_CONFIG_FILE "/etc/tidyrc" */ |
|||
/* #define TIDY_CONFIG_FILE "/etc/tidy.conf" */ |
|||
|
|||
/*
|
|||
Uncomment the following #define if you are on a system |
|||
supporting the HOME environment variable. |
|||
It enables tidy to find config files named ~/.tidyrc if |
|||
the HTML_TIDY environment variable is not set. |
|||
*/ |
|||
/* #define TIDY_USER_CONFIG_FILE "~/.tidyrc" */ |
|||
|
|||
/*
|
|||
Uncomment the following #define if your |
|||
system supports the call getpwnam(). |
|||
E.g. Unix and Linux. |
|||
|
|||
It enables tidy to find files named |
|||
~your/foo for use in the HTML_TIDY environment |
|||
variable or CONFIG_FILE or USER_CONFIGFILE or |
|||
on the command line: -config ~joebob/tidy.cfg |
|||
|
|||
Contributed by Todd Lewis. |
|||
*/ |
|||
|
|||
/* #define SUPPORT_GETPWNAM */ |
|||
|
|||
|
|||
/* Enable/disable support for Big5 and Shift_JIS character encodings */ |
|||
#ifndef SUPPORT_ASIAN_ENCODINGS |
|||
#define SUPPORT_ASIAN_ENCODINGS 1 |
|||
#endif |
|||
|
|||
/* Enable/disable support for UTF-16 character encodings */ |
|||
#ifndef SUPPORT_UTF16_ENCODINGS |
|||
#define SUPPORT_UTF16_ENCODINGS 1 |
|||
#endif |
|||
|
|||
/* Enable/disable support for additional accessibility checks */ |
|||
#ifndef SUPPORT_ACCESSIBILITY_CHECKS |
|||
#define SUPPORT_ACCESSIBILITY_CHECKS 1 |
|||
#endif |
|||
|
|||
/* Enable/disable support for additional languages */ |
|||
#ifndef SUPPORT_LOCALIZATIONS |
|||
#define SUPPORT_LOCALIZATIONS 1 |
|||
#endif |
|||
|
|||
|
|||
/* Convenience defines for Mac platforms */ |
|||
|
|||
#if defined(macintosh) |
|||
/* Mac OS 6.x/7.x/8.x/9.x, with or without CarbonLib - MPW or Metrowerks 68K/PPC compilers */ |
|||
#define MAC_OS_CLASSIC |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "Mac OS" |
|||
#endif |
|||
|
|||
/* needed for access() */ |
|||
#if !defined(_POSIX) && !defined(NO_ACCESS_SUPPORT) |
|||
#define NO_ACCESS_SUPPORT |
|||
#endif |
|||
|
|||
#ifdef SUPPORT_GETPWNAM |
|||
#undef SUPPORT_GETPWNAM |
|||
#endif |
|||
|
|||
#elif defined(__APPLE__) && defined(__MACH__) |
|||
/* Mac OS X (client) 10.x (or server 1.x/10.x) - gcc or Metrowerks MachO compilers */ |
|||
#define MAC_OS_X |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "Mac OS X" |
|||
#endif |
|||
#endif |
|||
|
|||
#if defined(MAC_OS_CLASSIC) || defined(MAC_OS_X) |
|||
/* Any OS on Mac platform */ |
|||
#define MAC_OS |
|||
#define FILENAMES_CASE_SENSITIVE 0 |
|||
#define strcasecmp strcmp |
|||
#endif |
|||
|
|||
/* Convenience defines for BSD like platforms */ |
|||
|
|||
#if defined(__FreeBSD__) |
|||
#define BSD_BASED_OS |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "FreeBSD" |
|||
#endif |
|||
|
|||
#elif defined(__NetBSD__) |
|||
#define BSD_BASED_OS |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "NetBSD" |
|||
#endif |
|||
|
|||
#elif defined(__OpenBSD__) |
|||
#define BSD_BASED_OS |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "OpenBSD" |
|||
#endif |
|||
|
|||
#elif defined(__DragonFly__) |
|||
#define BSD_BASED_OS |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "DragonFly" |
|||
#endif |
|||
|
|||
#elif defined(__MINT__) |
|||
#define BSD_BASED_OS |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "FreeMiNT" |
|||
#endif |
|||
|
|||
#elif defined(__bsdi__) |
|||
#define BSD_BASED_OS |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "BSD/OS" |
|||
#endif |
|||
|
|||
#endif |
|||
|
|||
/* Convenience defines for Windows platforms */ |
|||
|
|||
#if defined(WINDOWS) || defined(_WIN32) |
|||
|
|||
#define WINDOWS_OS |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "Windows" |
|||
#endif |
|||
|
|||
#if defined(__MWERKS__) || defined(__MSL__) |
|||
/* not available with Metrowerks Standard Library */ |
|||
|
|||
#ifdef SUPPORT_GETPWNAM |
|||
#undef SUPPORT_GETPWNAM |
|||
#endif |
|||
|
|||
/* needed for setmode() */ |
|||
#if !defined(NO_SETMODE_SUPPORT) |
|||
#define NO_SETMODE_SUPPORT |
|||
#endif |
|||
|
|||
#define strcasecmp _stricmp |
|||
|
|||
#endif |
|||
|
|||
#if defined(__BORLANDC__) |
|||
#define strcasecmp stricmp |
|||
#endif |
|||
|
|||
#define FILENAMES_CASE_SENSITIVE 0 |
|||
#define SUPPORT_POSIX_MAPPED_FILES 0 |
|||
|
|||
#endif |
|||
|
|||
/* Convenience defines for Linux platforms */ |
|||
|
|||
#if defined(linux) && defined(__alpha__) |
|||
/* Linux on Alpha - gcc compiler */ |
|||
#define LINUX_OS |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "Linux/Alpha" |
|||
#endif |
|||
|
|||
#elif defined(linux) && defined(__sparc__) |
|||
/* Linux on Sparc - gcc compiler */ |
|||
#define LINUX_OS |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "Linux/Sparc" |
|||
#endif |
|||
|
|||
#elif defined(linux) && (defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)) |
|||
/* Linux on x86 - gcc compiler */ |
|||
#define LINUX_OS |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "Linux/x86" |
|||
#endif |
|||
|
|||
#elif defined(linux) && defined(__powerpc__) |
|||
/* Linux on PPC - gcc compiler */ |
|||
#define LINUX_OS |
|||
|
|||
#if defined(__linux__) && defined(__powerpc__) |
|||
|
|||
/* #if #system(linux) */ |
|||
/* MkLinux on PPC - gcc (egcs) compiler */ |
|||
/* #define MAC_OS_MKLINUX */ |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "MkLinux" |
|||
#endif |
|||
|
|||
#else |
|||
|
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "Linux/PPC" |
|||
#endif |
|||
|
|||
#endif |
|||
|
|||
#elif defined(linux) || defined(__linux__) |
|||
/* generic Linux */ |
|||
#define LINUX_OS |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "Linux" |
|||
#endif |
|||
|
|||
#endif |
|||
|
|||
/* Convenience defines for Solaris platforms */ |
|||
|
|||
#if defined(sun) |
|||
#define SOLARIS_OS |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "Solaris" |
|||
#endif |
|||
#endif |
|||
|
|||
/* Convenience defines for HPUX + gcc platforms */ |
|||
|
|||
#if defined(__hpux) |
|||
#define HPUX_OS |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "HPUX" |
|||
#endif |
|||
#endif |
|||
|
|||
/* Convenience defines for RISCOS + gcc platforms */ |
|||
|
|||
#if defined(__riscos__) |
|||
#define RISC_OS |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "RISC OS" |
|||
#endif |
|||
#endif |
|||
|
|||
/* Convenience defines for OS/2 + icc/gcc platforms */ |
|||
|
|||
#if defined(__OS2__) || defined(__EMX__) |
|||
#define OS2_OS |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "OS/2" |
|||
#endif |
|||
#define FILENAMES_CASE_SENSITIVE 0 |
|||
#define strcasecmp stricmp |
|||
#endif |
|||
|
|||
/* Convenience defines for IRIX */ |
|||
|
|||
#if defined(__sgi) |
|||
#define IRIX_OS |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "SGI IRIX" |
|||
#endif |
|||
#endif |
|||
|
|||
/* Convenience defines for AIX */ |
|||
|
|||
#if defined(_AIX) |
|||
#define AIX_OS |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "IBM AIX" |
|||
#endif |
|||
#endif |
|||
|
|||
|
|||
/* Convenience defines for BeOS platforms */ |
|||
|
|||
#if defined(__BEOS__) |
|||
#define BE_OS |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "BeOS" |
|||
#endif |
|||
#endif |
|||
|
|||
/* Convenience defines for Cygwin platforms */ |
|||
|
|||
#if defined(__CYGWIN__) |
|||
#define CYGWIN_OS |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "Cygwin" |
|||
#endif |
|||
#define FILENAMES_CASE_SENSITIVE 0 |
|||
#endif |
|||
|
|||
/* Convenience defines for OpenVMS */ |
|||
|
|||
#if defined(__VMS) |
|||
#define OPENVMS_OS |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "OpenVMS" |
|||
#endif |
|||
#define FILENAMES_CASE_SENSITIVE 0 |
|||
#endif |
|||
|
|||
/* Convenience defines for DEC Alpha OSF + gcc platforms */ |
|||
|
|||
#if defined(__osf__) |
|||
#define OSF_OS |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "DEC Alpha OSF" |
|||
#endif |
|||
#endif |
|||
|
|||
/* Convenience defines for ARM platforms */ |
|||
|
|||
#if defined(__arm) |
|||
#define ARM_OS |
|||
|
|||
#if defined(forARM) && defined(__NEWTON_H) |
|||
|
|||
/* Using Newton C++ Tools ARMCpp compiler */ |
|||
#define NEWTON_OS |
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "Newton" |
|||
#endif |
|||
|
|||
#else |
|||
|
|||
#ifndef PLATFORM_NAME |
|||
#define PLATFORM_NAME "ARM" |
|||
#endif |
|||
|
|||
#endif |
|||
|
|||
#endif |
|||
|
|||
#include <ctype.h> |
|||
#include <stdio.h> |
|||
#include <setjmp.h> /* for longjmp on error exit */ |
|||
#include <stdlib.h> |
|||
#include <stdarg.h> /* may need <varargs.h> for Unix V */ |
|||
#include <string.h> |
|||
#include <assert.h> |
|||
|
|||
#ifdef NEEDS_MALLOC_H |
|||
#include <malloc.h> |
|||
#endif |
|||
|
|||
#ifdef SUPPORT_GETPWNAM |
|||
#include <pwd.h> |
|||
#endif |
|||
|
|||
#ifdef NEEDS_UNISTD_H |
|||
#include <unistd.h> /* needed for unlink on some Unix systems */ |
|||
#endif |
|||
|
|||
/* By default, use case-sensitive filename comparison.
|
|||
*/ |
|||
#ifndef FILENAMES_CASE_SENSITIVE |
|||
#define FILENAMES_CASE_SENSITIVE 1 |
|||
#endif |
|||
|
|||
|
|||
/*
|
|||
Tidy preserves the last modified time for the files it |
|||
cleans up. |
|||
*/ |
|||
|
|||
/*
|
|||
If your platform doesn't support <utime.h> and the |
|||
utime() function, or <sys/futime> and the futime() |
|||
function then set PRESERVE_FILE_TIMES to 0. |
|||
|
|||
If your platform doesn't support <sys/utime.h> and the |
|||
futime() function, then set HAS_FUTIME to 0. |
|||
|
|||
If your platform supports <utime.h> and the |
|||
utime() function requires the file to be |
|||
closed first, then set UTIME_NEEDS_CLOSED_FILE to 1. |
|||
*/ |
|||
|
|||
/* Keep old PRESERVEFILETIMES define for compatibility */ |
|||
#ifdef PRESERVEFILETIMES |
|||
#undef PRESERVE_FILE_TIMES |
|||
#define PRESERVE_FILE_TIMES PRESERVEFILETIMES |
|||
#endif |
|||
|
|||
#ifndef PRESERVE_FILE_TIMES |
|||
#if defined(RISC_OS) || defined(OPENVMS_OS) || defined(OSF_OS) |
|||
#define PRESERVE_FILE_TIMES 0 |
|||
#else |
|||
#define PRESERVE_FILE_TIMES 1 |
|||
#endif |
|||
#endif |
|||
|
|||
#if PRESERVE_FILE_TIMES |
|||
|
|||
#ifndef HAS_FUTIME |
|||
#if defined(CYGWIN_OS) || defined(BE_OS) || defined(OS2_OS) || defined(HPUX_OS) || defined(SOLARIS_OS) || defined(LINUX_OS) || defined(BSD_BASED_OS) || defined(MAC_OS) || defined(__MSL__) || defined(IRIX_OS) || defined(AIX_OS) || defined(__BORLANDC__) || defined(__GLIBC__) |
|||
#define HAS_FUTIME 0 |
|||
#else |
|||
#define HAS_FUTIME 1 |
|||
#endif |
|||
#endif |
|||
|
|||
#ifndef UTIME_NEEDS_CLOSED_FILE |
|||
#if defined(SOLARIS_OS) || defined(BSD_BASED_OS) || defined(MAC_OS) || defined(__MSL__) || defined(LINUX_OS) |
|||
#define UTIME_NEEDS_CLOSED_FILE 1 |
|||
#else |
|||
#define UTIME_NEEDS_CLOSED_FILE 0 |
|||
#endif |
|||
#endif |
|||
|
|||
#if defined(MAC_OS_X) || (!defined(MAC_OS_CLASSIC) && !defined(__MSL__)) |
|||
#include <sys/types.h> |
|||
#include <sys/stat.h> |
|||
#else |
|||
#include <stat.h> |
|||
#endif |
|||
|
|||
#if HAS_FUTIME |
|||
#include <sys/utime.h> |
|||
#else |
|||
#include <utime.h> |
|||
#endif /* HASFUTIME */ |
|||
|
|||
/*
|
|||
MS Windows needs _ prefix for Unix file functions. |
|||
Not required by Metrowerks Standard Library (MSL). |
|||
|
|||
Tidy uses following for preserving the last modified time. |
|||
|
|||
WINDOWS automatically set by Win16 compilers. |
|||
_WIN32 automatically set by Win32 compilers. |
|||
*/ |
|||
#if defined(_WIN32) && !defined(__MSL__) && !defined(__BORLANDC__) |
|||
|
|||
#define futime _futime |
|||
#define fstat _fstat |
|||
#define utimbuf _utimbuf /* Windows seems to want utimbuf */ |
|||
#define stat _stat |
|||
#define utime _utime |
|||
#define vsnprintf _vsnprintf |
|||
#endif /* _WIN32 */ |
|||
|
|||
#endif /* PRESERVE_FILE_TIMES */ |
|||
|
|||
/*
|
|||
MS Windows needs _ prefix for Unix file functions. |
|||
Not required by Metrowerks Standard Library (MSL). |
|||
|
|||
WINDOWS automatically set by Win16 compilers. |
|||
_WIN32 automatically set by Win32 compilers. |
|||
*/ |
|||
#if defined(_WIN32) && !defined(__MSL__) && !defined(__BORLANDC__) |
|||
|
|||
#if !(defined(__WATCOMC__) || defined(__MINGW32__)) |
|||
#define fileno _fileno |
|||
#define setmode _setmode |
|||
#endif |
|||
|
|||
#define access _access |
|||
#define strcasecmp _stricmp |
|||
|
|||
#ifndef va_copy |
|||
#define va_copy(dest, src) (dest = src) |
|||
#endif |
|||
|
|||
#if _MSC_VER > 1000 |
|||
#pragma warning( disable : 4189 ) /* local variable is initialized but not referenced */ |
|||
#pragma warning( disable : 4100 ) /* unreferenced formal parameter */ |
|||
#pragma warning( disable : 4706 ) /* assignment within conditional expression */ |
|||
#endif |
|||
|
|||
#if _MSC_VER > 1300 |
|||
#pragma warning( disable : 4996 ) /* disable depreciation warning */ |
|||
#endif |
|||
|
|||
#endif /* _WIN32 */ |
|||
|
|||
#if defined(_WIN32) |
|||
|
|||
#if (defined(_USRDLL) || defined(_WINDLL) || defined(BUILD_SHARED_LIB)) && !defined(TIDY_EXPORT) && !defined(TIDY_STATIC) |
|||
#ifdef BUILDING_SHARED_LIB |
|||
#define TIDY_EXPORT __declspec( dllexport ) |
|||
#else |
|||
#define TIDY_EXPORT __declspec( dllimport ) |
|||
#endif |
|||
#else |
|||
#define TIDY_EXPORT extern |
|||
#endif |
|||
|
|||
#ifndef TIDY_CALL |
|||
#ifdef _WIN64 |
|||
# define TIDY_CALL __fastcall |
|||
#else |
|||
# define TIDY_CALL __stdcall |
|||
#endif |
|||
#endif |
|||
|
|||
#endif /* _WIN32 */ |
|||
|
|||
/* hack for gnu sys/types.h file which defines uint and ulong */ |
|||
|
|||
#if defined(BE_OS) || defined(SOLARIS_OS) || defined(BSD_BASED_OS) || defined(OSF_OS) || defined(IRIX_OS) || defined(AIX_OS) |
|||
#include <sys/types.h> |
|||
#endif |
|||
#if !defined(HPUX_OS) && !defined(CYGWIN_OS) && !defined(MAC_OS_X) && !defined(BE_OS) && !defined(SOLARIS_OS) && !defined(BSD_BASED_OS) && !defined(OSF_OS) && !defined(IRIX_OS) && !defined(AIX_OS) && !defined(LINUX_OS) |
|||
# undef uint |
|||
typedef unsigned int uint; |
|||
#endif |
|||
#if defined(HPUX_OS) || defined(CYGWIN_OS) || defined(MAC_OS) || defined(BSD_BASED_OS) || defined(_WIN32) |
|||
# undef ulong |
|||
typedef unsigned long ulong; |
|||
#endif |
|||
|
|||
/*
|
|||
With GCC 4, __attribute__ ((visibility("default"))) can be used along compiling with tidylib |
|||
with "-fvisibility=hidden". See http://gcc.gnu.org/wiki/Visibility and build/gmake/Makefile.
|
|||
*/ |
|||
/*
|
|||
#if defined(__GNUC__) && __GNUC__ >= 4 |
|||
#define TIDY_EXPORT __attribute__ ((visibility("default"))) |
|||
#endif |
|||
*/ |
|||
|
|||
#ifndef TIDY_EXPORT /* Define it away for most builds */ |
|||
#define TIDY_EXPORT |
|||
#endif |
|||
|
|||
#ifndef TIDY_STRUCT |
|||
#define TIDY_STRUCT |
|||
#endif |
|||
|
|||
typedef unsigned char byte; |
|||
|
|||
typedef uint tchar; /* single, full character */ |
|||
typedef char tmbchar; /* single, possibly partial character */ |
|||
#ifndef TMBSTR_DEFINED |
|||
typedef tmbchar* tmbstr; /* pointer to buffer of possibly partial chars */ |
|||
typedef const tmbchar* ctmbstr; /* Ditto, but const */ |
|||
#define NULLSTR (tmbstr)"" |
|||
#define TMBSTR_DEFINED |
|||
#endif |
|||
|
|||
#ifndef TIDY_CALL |
|||
#define TIDY_CALL |
|||
#endif |
|||
|
|||
#if defined(__GNUC__) || defined(__INTEL_COMPILER) |
|||
# define ARG_UNUSED(x) x __attribute__((unused)) |
|||
#else |
|||
# define ARG_UNUSED(x) x |
|||
#endif |
|||
|
|||
/* HAS_VSNPRINTF triggers the use of "vsnprintf", which is safe related to
|
|||
buffer overflow. Therefore, we make it the default unless HAS_VSNPRINTF |
|||
has been defined. */ |
|||
#ifndef HAS_VSNPRINTF |
|||
# define HAS_VSNPRINTF 1 |
|||
#endif |
|||
|
|||
#ifndef SUPPORT_POSIX_MAPPED_FILES |
|||
# define SUPPORT_POSIX_MAPPED_FILES 1 |
|||
#endif |
|||
|
|||
/*
|
|||
bool is a reserved word in some but |
|||
not all C++ compilers depending on age |
|||
work around is to avoid bool altogether |
|||
by introducing a new enum called Bool |
|||
*/ |
|||
/* We could use the C99 definition where supported
|
|||
typedef _Bool Bool; |
|||
#define no (_Bool)0 |
|||
#define yes (_Bool)1 |
|||
*/ |
|||
typedef enum |
|||
{ |
|||
no, |
|||
yes |
|||
} Bool; |
|||
|
|||
/* for NULL pointers
|
|||
#define null ((const void*)0) |
|||
extern void* null; |
|||
*/ |
|||
|
|||
#if defined(DMALLOC) |
|||
#include "dmalloc.h" |
|||
#endif |
|||
|
|||
/* Opaque data structure.
|
|||
* Cast to implementation type struct within lib. |
|||
* This will reduce inter-dependencies/conflicts w/ application code. |
|||
*/ |
|||
#if 1 |
|||
#define opaque_type( typenam )\ |
|||
struct _##typenam { int _opaque; };\ |
|||
typedef struct _##typenam const * typenam |
|||
#else |
|||
#define opaque_type(typenam) typedef const void* typenam |
|||
#endif |
|||
|
|||
/* Opaque data structure used to pass back
|
|||
** and forth to keep current position in a |
|||
** list or other collection. |
|||
*/ |
|||
opaque_type( TidyIterator ); |
|||
|
|||
#ifdef __cplusplus |
|||
} /* extern "C" */ |
|||
#endif |
|||
|
|||
#endif /* __TIDY_PLATFORM_H__ */ |
|||
|
|||
|
|||
/*
|
|||
* local variables: |
|||
* mode: c |
|||
* indent-tabs-mode: nil |
|||
* c-basic-offset: 4 |
|||
* eval: (c-set-offset 'substatement-open 0) |
|||
* end: |
|||
*/ |
@ -0,0 +1,295 @@ |
|||
/* tmbstr.c -- Tidy string utility functions
|
|||
|
|||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
#include "forward.h" |
|||
#include "tmbstr.h" |
|||
#include "lexer.h" |
|||
|
|||
/* like strdup but using an allocator */ |
|||
tmbstr TY_(tmbstrdup)( TidyAllocator *allocator, ctmbstr str ) |
|||
{ |
|||
tmbstr s = NULL; |
|||
if ( str ) |
|||
{ |
|||
uint len = TY_(tmbstrlen)( str ); |
|||
tmbstr cp = s = (tmbstr) TidyAlloc( allocator, 1+len ); |
|||
while ( 0 != (*cp++ = *str++) ) |
|||
/**/; |
|||
} |
|||
return s; |
|||
} |
|||
|
|||
/* like strndup but using an allocator */ |
|||
tmbstr TY_(tmbstrndup)( TidyAllocator *allocator, ctmbstr str, uint len ) |
|||
{ |
|||
tmbstr s = NULL; |
|||
if ( str && len > 0 ) |
|||
{ |
|||
tmbstr cp = s = (tmbstr) TidyAlloc( allocator, 1+len ); |
|||
while ( len-- > 0 && (*cp++ = *str++) ) |
|||
/**/; |
|||
*cp = 0; |
|||
} |
|||
return s; |
|||
} |
|||
|
|||
/* exactly same as strncpy */ |
|||
uint TY_(tmbstrncpy)( tmbstr s1, ctmbstr s2, uint size ) |
|||
{ |
|||
if ( s1 != NULL && s2 != NULL ) |
|||
{ |
|||
tmbstr cp = s1; |
|||
while ( *s2 && --size ) /* Predecrement: reserve byte */ |
|||
*cp++ = *s2++; /* for NULL terminator. */ |
|||
*cp = 0; |
|||
} |
|||
return size; |
|||
} |
|||
|
|||
/* Allows expressions like: cp += tmbstrcpy( cp, "joebob" );
|
|||
*/ |
|||
uint TY_(tmbstrcpy)( tmbstr s1, ctmbstr s2 ) |
|||
{ |
|||
uint ncpy = 0; |
|||
while (0 != (*s1++ = *s2++) ) |
|||
++ncpy; |
|||
return ncpy; |
|||
} |
|||
|
|||
/* Allows expressions like: cp += tmbstrcat( cp, "joebob" );
|
|||
*/ |
|||
uint TY_(tmbstrcat)( tmbstr s1, ctmbstr s2 ) |
|||
{ |
|||
uint ncpy = 0; |
|||
while ( *s1 ) |
|||
++s1; |
|||
|
|||
while (0 != (*s1++ = *s2++) ) |
|||
++ncpy; |
|||
return ncpy; |
|||
} |
|||
|
|||
/* exactly same as strcmp */ |
|||
int TY_(tmbstrcmp)( ctmbstr s1, ctmbstr s2 ) |
|||
{ |
|||
int c; |
|||
while ((c = *s1) == *s2) |
|||
{ |
|||
if (c == '\0') |
|||
return 0; |
|||
|
|||
++s1; |
|||
++s2; |
|||
} |
|||
|
|||
return (*s1 > *s2 ? 1 : -1); |
|||
} |
|||
|
|||
/* returns byte count, not char count */ |
|||
uint TY_(tmbstrlen)( ctmbstr str ) |
|||
{ |
|||
uint len = 0; |
|||
if ( str ) |
|||
{ |
|||
while ( *str++ ) |
|||
++len; |
|||
} |
|||
return len; |
|||
} |
|||
|
|||
/*
|
|||
MS C 4.2 doesn't include strcasecmp. |
|||
Note that tolower and toupper won't |
|||
work on chars > 127. |
|||
|
|||
Neither does ToLower()! |
|||
*/ |
|||
int TY_(tmbstrcasecmp)( ctmbstr s1, ctmbstr s2 ) |
|||
{ |
|||
uint c; |
|||
|
|||
while (c = (uint)(*s1), TY_(ToLower)(c) == TY_(ToLower)((uint)(*s2))) |
|||
{ |
|||
if (c == '\0') |
|||
return 0; |
|||
|
|||
++s1; |
|||
++s2; |
|||
} |
|||
|
|||
return (*s1 > *s2 ? 1 : -1); |
|||
} |
|||
|
|||
int TY_(tmbstrncmp)( ctmbstr s1, ctmbstr s2, uint n ) |
|||
{ |
|||
uint c; |
|||
|
|||
while ((c = (byte)*s1) == (byte)*s2) |
|||
{ |
|||
if (c == '\0') |
|||
return 0; |
|||
|
|||
if (n == 0) |
|||
return 0; |
|||
|
|||
++s1; |
|||
++s2; |
|||
--n; |
|||
} |
|||
|
|||
if (n == 0) |
|||
return 0; |
|||
|
|||
return (*s1 > *s2 ? 1 : -1); |
|||
} |
|||
|
|||
int TY_(tmbstrncasecmp)( ctmbstr s1, ctmbstr s2, uint n ) |
|||
{ |
|||
uint c; |
|||
|
|||
while (c = (uint)(*s1), TY_(ToLower)(c) == TY_(ToLower)((uint)(*s2))) |
|||
{ |
|||
if (c == '\0') |
|||
return 0; |
|||
|
|||
if (n == 0) |
|||
return 0; |
|||
|
|||
++s1; |
|||
++s2; |
|||
--n; |
|||
} |
|||
|
|||
if (n == 0) |
|||
return 0; |
|||
|
|||
return (*s1 > *s2 ? 1 : -1); |
|||
} |
|||
|
|||
#if 0 |
|||
/* return offset of cc from beginning of s1,
|
|||
** -1 if not found. |
|||
*/ |
|||
int TY_(tmbstrnchr)( ctmbstr s1, uint maxlen, tmbchar cc ) |
|||
{ |
|||
int i; |
|||
ctmbstr cp = s1; |
|||
|
|||
for ( i = 0; (uint)i < maxlen; ++i, ++cp ) |
|||
{ |
|||
if ( *cp == cc ) |
|||
return i; |
|||
} |
|||
|
|||
return -1; |
|||
} |
|||
#endif |
|||
|
|||
ctmbstr TY_(tmbsubstrn)( ctmbstr s1, uint len1, ctmbstr s2 ) |
|||
{ |
|||
uint len2 = TY_(tmbstrlen)(s2); |
|||
int ix, diff = len1 - len2; |
|||
|
|||
for ( ix = 0; ix <= diff; ++ix ) |
|||
{ |
|||
if ( TY_(tmbstrncmp)(s1+ix, s2, len2) == 0 ) |
|||
return (ctmbstr) s1+ix; |
|||
} |
|||
return NULL; |
|||
} |
|||
|
|||
#if 0 |
|||
ctmbstr TY_(tmbsubstrncase)( ctmbstr s1, uint len1, ctmbstr s2 ) |
|||
{ |
|||
uint len2 = TY_(tmbstrlen)(s2); |
|||
int ix, diff = len1 - len2; |
|||
|
|||
for ( ix = 0; ix <= diff; ++ix ) |
|||
{ |
|||
if ( TY_(tmbstrncasecmp)(s1+ix, s2, len2) == 0 ) |
|||
return (ctmbstr) s1+ix; |
|||
} |
|||
return NULL; |
|||
} |
|||
#endif |
|||
|
|||
ctmbstr TY_(tmbsubstr)( ctmbstr s1, ctmbstr s2 ) |
|||
{ |
|||
uint len1 = TY_(tmbstrlen)(s1), len2 = TY_(tmbstrlen)(s2); |
|||
int ix, diff = len1 - len2; |
|||
|
|||
for ( ix = 0; ix <= diff; ++ix ) |
|||
{ |
|||
if ( TY_(tmbstrncasecmp)(s1+ix, s2, len2) == 0 ) |
|||
return (ctmbstr) s1+ix; |
|||
} |
|||
return NULL; |
|||
} |
|||
|
|||
/* Transform ASCII chars in string to lower case */ |
|||
tmbstr TY_(tmbstrtolower)( tmbstr s ) |
|||
{ |
|||
tmbstr cp; |
|||
for ( cp=s; *cp; ++cp ) |
|||
*cp = (tmbchar) TY_(ToLower)( *cp ); |
|||
return s; |
|||
} |
|||
|
|||
/* Transform ASCII chars in string to upper case */ |
|||
tmbstr TY_(tmbstrtoupper)(tmbstr s) |
|||
{ |
|||
tmbstr cp; |
|||
|
|||
for (cp = s; *cp; ++cp) |
|||
*cp = (tmbchar)TY_(ToUpper)(*cp); |
|||
|
|||
return s; |
|||
} |
|||
|
|||
#if 0 |
|||
Bool TY_(tmbsamefile)( ctmbstr filename1, ctmbstr filename2 ) |
|||
{ |
|||
#if FILENAMES_CASE_SENSITIVE |
|||
return ( TY_(tmbstrcmp)( filename1, filename2 ) == 0 ); |
|||
#else |
|||
return ( TY_(tmbstrcasecmp)( filename1, filename2 ) == 0 ); |
|||
#endif |
|||
} |
|||
#endif |
|||
|
|||
int TY_(tmbvsnprintf)(tmbstr buffer, size_t count, ctmbstr format, va_list args) |
|||
{ |
|||
int retval; |
|||
|
|||
#if HAS_VSNPRINTF |
|||
retval = vsnprintf(buffer, count - 1, format, args); |
|||
/* todo: conditionally null-terminate the string? */ |
|||
buffer[count - 1] = 0; |
|||
#else |
|||
retval = vsprintf(buffer, format, args); |
|||
#endif /* HAS_VSNPRINTF */ |
|||
return retval; |
|||
} |
|||
|
|||
int TY_(tmbsnprintf)(tmbstr buffer, size_t count, ctmbstr format, ...) |
|||
{ |
|||
int retval; |
|||
va_list args; |
|||
va_start(args, format); |
|||
retval = TY_(tmbvsnprintf)(buffer, count, format, args); |
|||
va_end(args); |
|||
return retval; |
|||
} |
|||
|
|||
/*
|
|||
* local variables: |
|||
* mode: c |
|||
* indent-tabs-mode: nil |
|||
* c-basic-offset: 4 |
|||
* eval: (c-set-offset 'substatement-open 0) |
|||
* end: |
|||
*/ |
@ -0,0 +1,86 @@ |
|||
#ifndef __TMBSTR_H__ |
|||
#define __TMBSTR_H__ |
|||
|
|||
/* tmbstr.h - Tidy string utility functions
|
|||
|
|||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
#include "tidyplatform.h" |
|||
|
|||
#ifdef __cplusplus |
|||
extern "C" |
|||
{ |
|||
#endif |
|||
|
|||
/* like strdup but using an allocator */ |
|||
tmbstr TY_(tmbstrdup)( TidyAllocator *allocator, ctmbstr str ); |
|||
|
|||
/* like strndup but using an allocator */ |
|||
tmbstr TY_(tmbstrndup)( TidyAllocator *allocator, ctmbstr str, uint len); |
|||
|
|||
/* exactly same as strncpy */ |
|||
uint TY_(tmbstrncpy)( tmbstr s1, ctmbstr s2, uint size ); |
|||
|
|||
uint TY_(tmbstrcpy)( tmbstr s1, ctmbstr s2 ); |
|||
|
|||
uint TY_(tmbstrcat)( tmbstr s1, ctmbstr s2 ); |
|||
|
|||
/* exactly same as strcmp */ |
|||
int TY_(tmbstrcmp)( ctmbstr s1, ctmbstr s2 ); |
|||
|
|||
/* returns byte count, not char count */ |
|||
uint TY_(tmbstrlen)( ctmbstr str ); |
|||
|
|||
/*
|
|||
MS C 4.2 doesn't include strcasecmp. |
|||
Note that tolower and toupper won't |
|||
work on chars > 127. |
|||
|
|||
Neither do Lexer.ToLower() or Lexer.ToUpper()! |
|||
|
|||
We get away with this because, except for XML tags, |
|||
we are always comparing to ascii element and |
|||
attribute names defined by HTML specs. |
|||
*/ |
|||
int TY_(tmbstrcasecmp)( ctmbstr s1, ctmbstr s2 ); |
|||
|
|||
int TY_(tmbstrncmp)( ctmbstr s1, ctmbstr s2, uint n ); |
|||
|
|||
int TY_(tmbstrncasecmp)( ctmbstr s1, ctmbstr s2, uint n ); |
|||
|
|||
/* return offset of cc from beginning of s1,
|
|||
** -1 if not found. |
|||
*/ |
|||
/* int TY_(tmbstrnchr)( ctmbstr s1, uint len1, tmbchar cc ); */ |
|||
|
|||
ctmbstr TY_(tmbsubstrn)( ctmbstr s1, uint len1, ctmbstr s2 ); |
|||
/* ctmbstr TY_(tmbsubstrncase)( ctmbstr s1, uint len1, ctmbstr s2 ); */ |
|||
ctmbstr TY_(tmbsubstr)( ctmbstr s1, ctmbstr s2 ); |
|||
|
|||
/* transform string to lower case */ |
|||
tmbstr TY_(tmbstrtolower)( tmbstr s ); |
|||
|
|||
/* Transform ASCII chars in string to upper case */ |
|||
tmbstr TY_(tmbstrtoupper)( tmbstr s ); |
|||
|
|||
/* Bool TY_(tmbsamefile)( ctmbstr filename1, ctmbstr filename2 ); */ |
|||
|
|||
int TY_(tmbvsnprintf)(tmbstr buffer, size_t count, ctmbstr format, va_list args) |
|||
#ifdef __GNUC__ |
|||
__attribute__((format(printf, 3, 0))) |
|||
#endif |
|||
; |
|||
int TY_(tmbsnprintf)(tmbstr buffer, size_t count, ctmbstr format, ...) |
|||
#ifdef __GNUC__ |
|||
__attribute__((format(printf, 3, 4))) |
|||
#endif |
|||
; |
|||
|
|||
#ifdef __cplusplus |
|||
} /* extern "C" */ |
|||
#endif |
|||
|
|||
#endif /* __TMBSTR_H__ */ |
@ -0,0 +1,533 @@ |
|||
/* utf8.c -- convert characters to/from UTF-8
|
|||
|
|||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
Uses public interfaces to abstract input source and output |
|||
sink, which may be user supplied or either FILE* or memory |
|||
based Tidy implementations. Encoding support is uniform |
|||
regardless of I/O mechanism. |
|||
|
|||
Note, UTF-8 encoding, by itself, does not affect the actual |
|||
"codepoints" of the underlying character encoding. In the |
|||
cases of ASCII, Latin1, Unicode (16-bit, BMP), these all |
|||
refer to ISO-10646 "codepoints". For anything else, they |
|||
refer to some other "codepoint" set. |
|||
|
|||
Put another way, UTF-8 is a variable length method to |
|||
represent any non-negative integer value. The glyph |
|||
that a integer value represents is unchanged and defined |
|||
externally (e.g. by ISO-10646, Big5, Win1252, MacRoman, |
|||
Latin2-9, and so on). |
|||
|
|||
Put still another way, UTF-8 is more of a _transfer_ encoding |
|||
than a _character_ encoding, per se. |
|||
*/ |
|||
|
|||
#include "tidy.h" |
|||
#include "forward.h" |
|||
#include "utf8.h" |
|||
|
|||
/*
|
|||
UTF-8 encoding/decoding functions |
|||
Return # of bytes in UTF-8 sequence; result < 0 if illegal sequence |
|||
|
|||
Also see below for UTF-16 encoding/decoding functions |
|||
|
|||
References : |
|||
|
|||
1) UCS Transformation Format 8 (UTF-8): |
|||
ISO/IEC 10646-1:1996 Amendment 2 or ISO/IEC 10646-1:2000 Annex D |
|||
<http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n1335>
|
|||
<http://www.cl.cam.ac.uk/~mgk25/ucs/ISO-10646-UTF-8.html>
|
|||
|
|||
Table 4 - Mapping from UCS-4 to UTF-8 |
|||
|
|||
2) Unicode standards: |
|||
<http://www.unicode.org/unicode/standard/standard.html>
|
|||
|
|||
3) Legal UTF-8 byte sequences: |
|||
<http://www.unicode.org/unicode/uni2errata/UTF-8_Corrigendum.html>
|
|||
|
|||
Code point 1st byte 2nd byte 3rd byte 4th byte |
|||
---------- -------- -------- -------- -------- |
|||
U+0000..U+007F 00..7F |
|||
U+0080..U+07FF C2..DF 80..BF |
|||
U+0800..U+0FFF E0 A0..BF 80..BF |
|||
U+1000..U+FFFF E1..EF 80..BF 80..BF |
|||
U+10000..U+3FFFF F0 90..BF 80..BF 80..BF |
|||
U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF |
|||
U+100000..U+10FFFF F4 80..8F 80..BF 80..BF |
|||
|
|||
The definition of UTF-8 in Annex D of ISO/IEC 10646-1:2000 also |
|||
allows for the use of five- and six-byte sequences to encode |
|||
characters that are outside the range of the Unicode character |
|||
set; those five- and six-byte sequences are illegal for the use |
|||
of UTF-8 as a transformation of Unicode characters. ISO/IEC 10646 |
|||
does not allow mapping of unpaired surrogates, nor U+FFFE and U+FFFF |
|||
(but it does allow other noncharacters). |
|||
|
|||
4) RFC 2279: UTF-8, a transformation format of ISO 10646: |
|||
<http://www.ietf.org/rfc/rfc2279.txt>
|
|||
|
|||
5) UTF-8 and Unicode FAQ: |
|||
<http://www.cl.cam.ac.uk/~mgk25/unicode.html>
|
|||
|
|||
6) Markus Kuhn's UTF-8 decoder stress test file: |
|||
<http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt>
|
|||
|
|||
7) UTF-8 Demo: |
|||
<http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-demo.txt>
|
|||
|
|||
8) UTF-8 Sampler: |
|||
<http://www.columbia.edu/kermit/utf8.html>
|
|||
|
|||
9) Transformation Format for 16 Planes of Group 00 (UTF-16): |
|||
ISO/IEC 10646-1:1996 Amendment 1 or ISO/IEC 10646-1:2000 Annex C |
|||
<http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n2005/n2005.pdf>
|
|||
<http://www.cl.cam.ac.uk/~mgk25/ucs/ISO-10646-UTF-16.html>
|
|||
|
|||
10) RFC 2781: UTF-16, an encoding of ISO 10646: |
|||
<http://www.ietf.org/rfc/rfc2781.txt>
|
|||
|
|||
11) UTF-16 invalid surrogate pairs: |
|||
<http://www.unicode.org/unicode/faq/utf_bom.html#16>
|
|||
|
|||
UTF-16 UTF-8 UCS-4 |
|||
D83F DFF* F0 9F BF B* 0001FFF* |
|||
D87F DFF* F0 AF BF B* 0002FFF* |
|||
D8BF DFF* F0 BF BF B* 0003FFF* |
|||
D8FF DFF* F1 8F BF B* 0004FFF* |
|||
D93F DFF* F1 9F BF B* 0005FFF* |
|||
D97F DFF* F1 AF BF B* 0006FFF* |
|||
... |
|||
DBBF DFF* F3 BF BF B* 000FFFF* |
|||
DBFF DFF* F4 8F BF B* 0010FFF* |
|||
|
|||
* = E or F |
|||
|
|||
1010 A |
|||
1011 B |
|||
1100 C |
|||
1101 D |
|||
1110 E |
|||
1111 F |
|||
|
|||
*/ |
|||
|
|||
#define kNumUTF8Sequences 7 |
|||
#define kMaxUTF8Bytes 4 |
|||
|
|||
#define kUTF8ByteSwapNotAChar 0xFFFE |
|||
#define kUTF8NotAChar 0xFFFF |
|||
|
|||
#define kMaxUTF8FromUCS4 0x10FFFF |
|||
|
|||
#define kUTF16SurrogatesBegin 0x10000 |
|||
#define kMaxUTF16FromUCS4 0x10FFFF |
|||
|
|||
/* UTF-16 surrogate pair areas */ |
|||
#define kUTF16LowSurrogateBegin 0xD800 |
|||
#define kUTF16LowSurrogateEnd 0xDBFF |
|||
#define kUTF16HighSurrogateBegin 0xDC00 |
|||
#define kUTF16HighSurrogateEnd 0xDFFF |
|||
|
|||
|
|||
/* offsets into validUTF8 table below */ |
|||
static const int offsetUTF8Sequences[kMaxUTF8Bytes + 1] = |
|||
{ |
|||
0, /* 1 byte */ |
|||
1, /* 2 bytes */ |
|||
2, /* 3 bytes */ |
|||
4, /* 4 bytes */ |
|||
kNumUTF8Sequences /* must be last */ |
|||
}; |
|||
|
|||
static const struct validUTF8Sequence |
|||
{ |
|||
uint lowChar; |
|||
uint highChar; |
|||
int numBytes; |
|||
byte validBytes[8]; |
|||
} validUTF8[kNumUTF8Sequences] = |
|||
{ |
|||
/* low high #bytes byte 1 byte 2 byte 3 byte 4 */ |
|||
{0x0000, 0x007F, 1, {0x00, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, |
|||
{0x0080, 0x07FF, 2, {0xC2, 0xDF, 0x80, 0xBF, 0x00, 0x00, 0x00, 0x00}}, |
|||
{0x0800, 0x0FFF, 3, {0xE0, 0xE0, 0xA0, 0xBF, 0x80, 0xBF, 0x00, 0x00}}, |
|||
{0x1000, 0xFFFF, 3, {0xE1, 0xEF, 0x80, 0xBF, 0x80, 0xBF, 0x00, 0x00}}, |
|||
{0x10000, 0x3FFFF, 4, {0xF0, 0xF0, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF}}, |
|||
{0x40000, 0xFFFFF, 4, {0xF1, 0xF3, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}}, |
|||
{0x100000, 0x10FFFF, 4, {0xF4, 0xF4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF}} |
|||
}; |
|||
|
|||
int TY_(DecodeUTF8BytesToChar)( uint* c, uint firstByte, ctmbstr successorBytes, |
|||
TidyInputSource* inp, int* count ) |
|||
{ |
|||
byte tempbuf[10]; |
|||
byte *buf = &tempbuf[0]; |
|||
uint ch = 0, n = 0; |
|||
int i, bytes = 0; |
|||
Bool hasError = no; |
|||
|
|||
if ( successorBytes ) |
|||
buf = (byte*) successorBytes; |
|||
|
|||
/* special check if we have been passed an EOF char */ |
|||
if ( firstByte == EndOfStream ) |
|||
{ |
|||
/* at present */ |
|||
*c = firstByte; |
|||
*count = 1; |
|||
return 0; |
|||
} |
|||
|
|||
ch = firstByte; /* first byte is passed in separately */ |
|||
|
|||
if (ch <= 0x7F) /* 0XXX XXXX one byte */ |
|||
{ |
|||
n = ch; |
|||
bytes = 1; |
|||
} |
|||
else if ((ch & 0xE0) == 0xC0) /* 110X XXXX two bytes */ |
|||
{ |
|||
n = ch & 31; |
|||
bytes = 2; |
|||
} |
|||
else if ((ch & 0xF0) == 0xE0) /* 1110 XXXX three bytes */ |
|||
{ |
|||
n = ch & 15; |
|||
bytes = 3; |
|||
} |
|||
else if ((ch & 0xF8) == 0xF0) /* 1111 0XXX four bytes */ |
|||
{ |
|||
n = ch & 7; |
|||
bytes = 4; |
|||
} |
|||
else if ((ch & 0xFC) == 0xF8) /* 1111 10XX five bytes */ |
|||
{ |
|||
n = ch & 3; |
|||
bytes = 5; |
|||
hasError = yes; |
|||
} |
|||
else if ((ch & 0xFE) == 0xFC) /* 1111 110X six bytes */ |
|||
{ |
|||
n = ch & 1; |
|||
bytes = 6; |
|||
hasError = yes; |
|||
} |
|||
else |
|||
{ |
|||
/* not a valid first byte of a UTF-8 sequence */ |
|||
n = ch; |
|||
bytes = 1; |
|||
hasError = yes; |
|||
} |
|||
|
|||
/* successor bytes should have the form 10XX XXXX */ |
|||
|
|||
/* If caller supplied buffer, use it. Else see if caller
|
|||
** supplied an input source, use that. |
|||
*/ |
|||
if ( successorBytes ) |
|||
{ |
|||
for ( i=0; i < bytes-1; ++i ) |
|||
{ |
|||
if ( !buf[i] || (buf[i] & 0xC0) != 0x80 ) |
|||
{ |
|||
hasError = yes; |
|||
bytes = i+1; |
|||
break; |
|||
} |
|||
n = (n << 6) | (buf[i] & 0x3F); |
|||
} |
|||
} |
|||
else if ( inp ) |
|||
{ |
|||
for ( i=0; i < bytes-1 && !inp->eof(inp->sourceData); ++i ) |
|||
{ |
|||
int b = inp->getByte( inp->sourceData ); |
|||
buf[i] = (tmbchar) b; |
|||
|
|||
/* End of data or illegal successor byte value */ |
|||
if ( b == EOF || (buf[i] & 0xC0) != 0x80 ) |
|||
{ |
|||
hasError = yes; |
|||
bytes = i+1; |
|||
if ( b != EOF ) |
|||
inp->ungetByte( inp->sourceData, buf[i] ); |
|||
break; |
|||
} |
|||
n = (n << 6) | (buf[i] & 0x3F); |
|||
} |
|||
} |
|||
else if ( bytes > 1 ) |
|||
{ |
|||
hasError = yes; |
|||
bytes = 1; |
|||
} |
|||
|
|||
if (!hasError && ((n == kUTF8ByteSwapNotAChar) || (n == kUTF8NotAChar))) |
|||
hasError = yes; |
|||
|
|||
if (!hasError && (n > kMaxUTF8FromUCS4)) |
|||
hasError = yes; |
|||
|
|||
#if 0 /* Breaks Big5 D8 - DF */
|
|||
if (!hasError && (n >= kUTF16LowSurrogateBegin) && (n <= kUTF16HighSurrogateEnd)) |
|||
/* unpaired surrogates not allowed */ |
|||
hasError = yes; |
|||
#endif |
|||
|
|||
if (!hasError) |
|||
{ |
|||
int lo, hi; |
|||
|
|||
lo = offsetUTF8Sequences[bytes - 1]; |
|||
hi = offsetUTF8Sequences[bytes] - 1; |
|||
|
|||
/* check for overlong sequences */ |
|||
if ((n < validUTF8[lo].lowChar) || (n > validUTF8[hi].highChar)) |
|||
hasError = yes; |
|||
else |
|||
{ |
|||
hasError = yes; /* assume error until proven otherwise */ |
|||
|
|||
for (i = lo; i <= hi; i++) |
|||
{ |
|||
int tempCount; |
|||
byte theByte; |
|||
|
|||
for (tempCount = 0; tempCount < bytes; tempCount++) |
|||
{ |
|||
if (!tempCount) |
|||
theByte = (tmbchar) firstByte; |
|||
else |
|||
theByte = buf[tempCount - 1]; |
|||
|
|||
if ( theByte >= validUTF8[i].validBytes[(tempCount * 2)] && |
|||
theByte <= validUTF8[i].validBytes[(tempCount * 2) + 1] ) |
|||
hasError = no; |
|||
if (hasError) |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
#if 1 && defined(_DEBUG) |
|||
if ( hasError ) |
|||
{ |
|||
/* debug */ |
|||
fprintf( stderr, "UTF-8 decoding error of %d bytes : ", bytes ); |
|||
fprintf( stderr, "0x%02x ", firstByte ); |
|||
for (i = 1; i < bytes; i++) |
|||
fprintf( stderr, "0x%02x ", buf[i - 1] ); |
|||
fprintf( stderr, " = U+%04ulx\n", n ); |
|||
} |
|||
#endif |
|||
|
|||
*count = bytes; |
|||
*c = n; |
|||
if ( hasError ) |
|||
return -1; |
|||
return 0; |
|||
} |
|||
|
|||
int TY_(EncodeCharToUTF8Bytes)( uint c, tmbstr encodebuf, |
|||
TidyOutputSink* outp, int* count ) |
|||
{ |
|||
byte tempbuf[10] = {0}; |
|||
byte* buf = &tempbuf[0]; |
|||
int bytes = 0; |
|||
Bool hasError = no; |
|||
|
|||
if ( encodebuf ) |
|||
buf = (byte*) encodebuf; |
|||
|
|||
if (c <= 0x7F) /* 0XXX XXXX one byte */ |
|||
{ |
|||
buf[0] = (tmbchar) c; |
|||
bytes = 1; |
|||
} |
|||
else if (c <= 0x7FF) /* 110X XXXX two bytes */ |
|||
{ |
|||
buf[0] = (tmbchar) ( 0xC0 | (c >> 6) ); |
|||
buf[1] = (tmbchar) ( 0x80 | (c & 0x3F) ); |
|||
bytes = 2; |
|||
} |
|||
else if (c <= 0xFFFF) /* 1110 XXXX three bytes */ |
|||
{ |
|||
buf[0] = (tmbchar) (0xE0 | (c >> 12)); |
|||
buf[1] = (tmbchar) (0x80 | ((c >> 6) & 0x3F)); |
|||
buf[2] = (tmbchar) (0x80 | (c & 0x3F)); |
|||
bytes = 3; |
|||
if ( c == kUTF8ByteSwapNotAChar || c == kUTF8NotAChar ) |
|||
hasError = yes; |
|||
#if 0 /* Breaks Big5 D8 - DF */
|
|||
else if ( c >= kUTF16LowSurrogateBegin && c <= kUTF16HighSurrogateEnd ) |
|||
/* unpaired surrogates not allowed */ |
|||
hasError = yes; |
|||
#endif |
|||
} |
|||
else if (c <= 0x1FFFFF) /* 1111 0XXX four bytes */ |
|||
{ |
|||
buf[0] = (tmbchar) (0xF0 | (c >> 18)); |
|||
buf[1] = (tmbchar) (0x80 | ((c >> 12) & 0x3F)); |
|||
buf[2] = (tmbchar) (0x80 | ((c >> 6) & 0x3F)); |
|||
buf[3] = (tmbchar) (0x80 | (c & 0x3F)); |
|||
bytes = 4; |
|||
if (c > kMaxUTF8FromUCS4) |
|||
hasError = yes; |
|||
} |
|||
else if (c <= 0x3FFFFFF) /* 1111 10XX five bytes */ |
|||
{ |
|||
buf[0] = (tmbchar) (0xF8 | (c >> 24)); |
|||
buf[1] = (tmbchar) (0x80 | (c >> 18)); |
|||
buf[2] = (tmbchar) (0x80 | ((c >> 12) & 0x3F)); |
|||
buf[3] = (tmbchar) (0x80 | ((c >> 6) & 0x3F)); |
|||
buf[4] = (tmbchar) (0x80 | (c & 0x3F)); |
|||
bytes = 5; |
|||
hasError = yes; |
|||
} |
|||
else if (c <= 0x7FFFFFFF) /* 1111 110X six bytes */ |
|||
{ |
|||
buf[0] = (tmbchar) (0xFC | (c >> 30)); |
|||
buf[1] = (tmbchar) (0x80 | ((c >> 24) & 0x3F)); |
|||
buf[2] = (tmbchar) (0x80 | ((c >> 18) & 0x3F)); |
|||
buf[3] = (tmbchar) (0x80 | ((c >> 12) & 0x3F)); |
|||
buf[4] = (tmbchar) (0x80 | ((c >> 6) & 0x3F)); |
|||
buf[5] = (tmbchar) (0x80 | (c & 0x3F)); |
|||
bytes = 6; |
|||
hasError = yes; |
|||
} |
|||
else |
|||
hasError = yes; |
|||
|
|||
/* don't output invalid UTF-8 byte sequence to a stream */ |
|||
if ( !hasError && outp != NULL ) |
|||
{ |
|||
int ix; |
|||
for ( ix=0; ix < bytes; ++ix ) |
|||
outp->putByte( outp->sinkData, buf[ix] ); |
|||
} |
|||
|
|||
#if 1 && defined(_DEBUG) |
|||
if ( hasError ) |
|||
{ |
|||
int i; |
|||
fprintf( stderr, "UTF-8 encoding error for U+%x : ", c ); |
|||
for (i = 0; i < bytes; i++) |
|||
fprintf( stderr, "0x%02x ", buf[i] ); |
|||
fprintf( stderr, "\n" ); |
|||
} |
|||
#endif |
|||
|
|||
*count = bytes; |
|||
if (hasError) |
|||
return -1; |
|||
return 0; |
|||
} |
|||
|
|||
|
|||
/* return one less than the number of bytes used by the UTF-8 byte sequence */ |
|||
/* str points to the UTF-8 byte sequence */ |
|||
/* the Unicode char is returned in *ch */ |
|||
uint TY_(GetUTF8)( ctmbstr str, uint *ch ) |
|||
{ |
|||
uint n; |
|||
int bytes; |
|||
|
|||
int err; |
|||
|
|||
bytes = 0; |
|||
|
|||
/* first byte "str[0]" is passed in separately from the */ |
|||
/* rest of the UTF-8 byte sequence starting at "str[1]" */ |
|||
err = TY_(DecodeUTF8BytesToChar)( &n, str[0], str+1, NULL, &bytes ); |
|||
if (err) |
|||
{ |
|||
#if 1 && defined(_DEBUG) |
|||
fprintf(stderr, "pprint UTF-8 decoding error for U+%x : ", n); |
|||
#endif |
|||
n = 0xFFFD; /* replacement char */ |
|||
} |
|||
|
|||
*ch = n; |
|||
return bytes - 1; |
|||
} |
|||
|
|||
/* store char c as UTF-8 encoded byte stream */ |
|||
tmbstr TY_(PutUTF8)( tmbstr buf, uint c ) |
|||
{ |
|||
int err, count = 0; |
|||
|
|||
err = TY_(EncodeCharToUTF8Bytes)( c, buf, NULL, &count ); |
|||
if (err) |
|||
{ |
|||
#if 1 && defined(_DEBUG) |
|||
fprintf(stderr, "pprint UTF-8 encoding error for U+%x : ", c); |
|||
#endif |
|||
/* replacement char 0xFFFD encoded as UTF-8 */ |
|||
buf[0] = (byte) 0xEF; |
|||
buf[1] = (byte) 0xBF; |
|||
buf[2] = (byte) 0xBD; |
|||
count = 3; |
|||
} |
|||
|
|||
buf += count; |
|||
return buf; |
|||
} |
|||
|
|||
Bool TY_(IsValidUTF16FromUCS4)( tchar ucs4 ) |
|||
{ |
|||
return ( ucs4 <= kMaxUTF16FromUCS4 ); |
|||
} |
|||
|
|||
Bool TY_(IsHighSurrogate)( tchar ch ) |
|||
{ |
|||
return ( ch >= kUTF16HighSurrogateBegin && ch <= kUTF16HighSurrogateEnd ); |
|||
} |
|||
Bool TY_(IsLowSurrogate)( tchar ch ) |
|||
{ |
|||
return ( ch >= kUTF16LowSurrogateBegin && ch <= kUTF16LowSurrogateEnd ); |
|||
} |
|||
|
|||
tchar TY_(CombineSurrogatePair)( tchar high, tchar low ) |
|||
{ |
|||
assert( TY_(IsHighSurrogate)(high) && TY_(IsLowSurrogate)(low) ); |
|||
return ( ((low - kUTF16LowSurrogateBegin) * 0x400) + |
|||
high - kUTF16HighSurrogateBegin + 0x10000 ); |
|||
} |
|||
|
|||
Bool TY_(SplitSurrogatePair)( tchar utf16, tchar* low, tchar* high ) |
|||
{ |
|||
Bool status = ( TY_(IsValidCombinedChar)( utf16 ) && high && low ); |
|||
if ( status ) |
|||
{ |
|||
*low = (utf16 - kUTF16SurrogatesBegin) / 0x400 + kUTF16LowSurrogateBegin; |
|||
*high = (utf16 - kUTF16SurrogatesBegin) % 0x400 + kUTF16HighSurrogateBegin; |
|||
} |
|||
return status; |
|||
} |
|||
|
|||
Bool TY_(IsValidCombinedChar)( tchar ch ) |
|||
{ |
|||
return ( ch >= kUTF16SurrogatesBegin && |
|||
(ch & 0x0000FFFE) != 0x0000FFFE && |
|||
(ch & 0x0000FFFF) != 0x0000FFFF ); |
|||
} |
|||
|
|||
Bool TY_(IsCombinedChar)( tchar ch ) |
|||
{ |
|||
return ( ch >= kUTF16SurrogatesBegin ); |
|||
} |
|||
|
|||
/*
|
|||
* local variables: |
|||
* mode: c |
|||
* indent-tabs-mode: nil |
|||
* c-basic-offset: 4 |
|||
* eval: (c-set-offset 'substatement-open 0) |
|||
* end: |
|||
*/ |
@ -0,0 +1,46 @@ |
|||
#ifndef __UTF8_H__ |
|||
#define __UTF8_H__ |
|||
|
|||
/* utf8.h -- convert characters to/from UTF-8
|
|||
|
|||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
#include "tidyplatform.h" |
|||
#include "tidybuffio.h" |
|||
|
|||
/* UTF-8 encoding/decoding support
|
|||
** Does not convert character "codepoints", i.e. to/from 10646. |
|||
*/ |
|||
|
|||
int TY_(DecodeUTF8BytesToChar)( uint* c, uint firstByte, ctmbstr successorBytes, |
|||
TidyInputSource* inp, int* count ); |
|||
|
|||
int TY_(EncodeCharToUTF8Bytes)( uint c, tmbstr encodebuf, |
|||
TidyOutputSink* outp, int* count ); |
|||
|
|||
|
|||
uint TY_(GetUTF8)( ctmbstr str, uint *ch ); |
|||
tmbstr TY_(PutUTF8)( tmbstr buf, uint c ); |
|||
|
|||
#define UNICODE_BOM_BE 0xFEFF /* big-endian (default) UNICODE BOM */ |
|||
#define UNICODE_BOM UNICODE_BOM_BE |
|||
#define UNICODE_BOM_LE 0xFFFE /* little-endian UNICODE BOM */ |
|||
#define UNICODE_BOM_UTF8 0xEFBBBF /* UTF-8 UNICODE BOM */ |
|||
|
|||
|
|||
Bool TY_(IsValidUTF16FromUCS4)( tchar ucs4 ); |
|||
Bool TY_(IsHighSurrogate)( tchar ch ); |
|||
Bool TY_(IsLowSurrogate)( tchar ch ); |
|||
|
|||
Bool TY_(IsCombinedChar)( tchar ch ); |
|||
Bool TY_(IsValidCombinedChar)( tchar ch ); |
|||
|
|||
tchar TY_(CombineSurrogatePair)( tchar high, tchar low ); |
|||
Bool TY_(SplitSurrogatePair)( tchar utf16, tchar* high, tchar* low ); |
|||
|
|||
|
|||
|
|||
#endif /* __UTF8_H__ */ |
@ -0,0 +1,23 @@ |
|||
/* version information
|
|||
|
|||
(c) 2007-2015 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
#ifdef RELEASE_DATE |
|||
static const char TY_(release_date)[] = RELEASE_DATE; |
|||
#else |
|||
static const char TY_(release_date)[] = "2015/01/22"; |
|||
#endif |
|||
#ifdef LIBTIDY_VERSION |
|||
#ifdef RC_NUMBER |
|||
static const char TY_(library_version)[] = LIBTIDY_VERSION "." RC_NUMBER; |
|||
#else |
|||
static const char TY_(library_version)[] = LIBTIDY_VERSION; |
|||
#endif |
|||
#else |
|||
static const char TY_(library_version)[] = "5.0.0"; |
|||
#endif |
|||
|
|||
/* eof */ |
@ -0,0 +1,794 @@ |
|||
/* win32tc.c -- Interface to Win32 transcoding routines
|
|||
|
|||
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
/* keep these here to keep file non-empty */ |
|||
#include "tidy.h" |
|||
#include "forward.h" |
|||
#include "streamio.h" |
|||
#include "tmbstr.h" |
|||
#include "utf8.h" |
|||
|
|||
#ifdef TIDY_WIN32_MLANG_SUPPORT |
|||
|
|||
#define VC_EXTRALEAN |
|||
#define CINTERFACE |
|||
#define COBJMACROS |
|||
|
|||
#include <windows.h> |
|||
#include <mlang.h> |
|||
|
|||
#undef COBJMACROS |
|||
#undef CINTERFACE |
|||
#undef VC_EXTRALEAN |
|||
|
|||
/* maximum number of bytes for a single character */ |
|||
#define TC_INBUFSIZE 16 |
|||
|
|||
/* maximum number of characters per byte sequence */ |
|||
#define TC_OUTBUFSIZE 16 |
|||
|
|||
#define CreateMLangObject(p) \ |
|||
CoCreateInstance( \ |
|||
&CLSID_CMLangConvertCharset, \ |
|||
NULL, \ |
|||
CLSCTX_ALL, \ |
|||
&IID_IMLangConvertCharset, \ |
|||
(VOID **)&p); |
|||
|
|||
|
|||
/* Character Set to Microsoft Windows Codepage Identifier map, */ |
|||
/* from <rotor/sscli/clr/src/classlibnative/nls/encodingdata.cpp>. */ |
|||
|
|||
/* note: the 'safe' field indicates whether this encoding can be */ |
|||
/* read/written character-by-character; this does not apply to */ |
|||
/* various stateful encodings such as ISO-2022 or UTF-7, these */ |
|||
/* must be read/written as a complete stream. It is possible that */ |
|||
/* some 'unsafe' encodings are marked as 'save'. */ |
|||
|
|||
/* todo: cleanup; Tidy should use only a single mapping table to */ |
|||
/* circumvent unsupported aliases in other transcoding libraries, */ |
|||
/* enable reverse lookup of encoding names and ease maintenance. */ |
|||
|
|||
static struct _nameWinCPMap |
|||
{ |
|||
tmbstr name; |
|||
uint wincp; |
|||
Bool safe; |
|||
} const NameWinCPMap[] = { |
|||
{ "cp037", 37, yes }, |
|||
{ "csibm037", 37, yes }, |
|||
{ "ebcdic-cp-ca", 37, yes }, |
|||
{ "ebcdic-cp-nl", 37, yes }, |
|||
{ "ebcdic-cp-us", 37, yes }, |
|||
{ "ebcdic-cp-wt", 37, yes }, |
|||
{ "ibm037", 37, yes }, |
|||
{ "cp437", 437, yes }, |
|||
{ "cspc8codepage437", 437, yes }, |
|||
{ "ibm437", 437, yes }, |
|||
{ "cp500", 500, yes }, |
|||
{ "csibm500", 500, yes }, |
|||
{ "ebcdic-cp-be", 500, yes }, |
|||
{ "ebcdic-cp-ch", 500, yes }, |
|||
{ "ibm500", 500, yes }, |
|||
{ "asmo-708", 708, yes }, |
|||
{ "dos-720", 720, yes }, |
|||
{ "ibm737", 737, yes }, |
|||
{ "ibm775", 775, yes }, |
|||
{ "cp850", 850, yes }, |
|||
{ "ibm850", 850, yes }, |
|||
{ "cp852", 852, yes }, |
|||
{ "ibm852", 852, yes }, |
|||
{ "cp855", 855, yes }, |
|||
{ "ibm855", 855, yes }, |
|||
{ "cp857", 857, yes }, |
|||
{ "ibm857", 857, yes }, |
|||
{ "ccsid00858", 858, yes }, |
|||
{ "cp00858", 858, yes }, |
|||
{ "cp858", 858, yes }, |
|||
{ "ibm00858", 858, yes }, |
|||
{ "pc-multilingual-850+euro", 858, yes }, |
|||
{ "cp860", 860, yes }, |
|||
{ "ibm860", 860, yes }, |
|||
{ "cp861", 861, yes }, |
|||
{ "ibm861", 861, yes }, |
|||
{ "cp862", 862, yes }, |
|||
{ "dos-862", 862, yes }, |
|||
{ "ibm862", 862, yes }, |
|||
{ "cp863", 863, yes }, |
|||
{ "ibm863", 863, yes }, |
|||
{ "cp864", 864, yes }, |
|||
{ "ibm864", 864, yes }, |
|||
{ "cp865", 865, yes }, |
|||
{ "ibm865", 865, yes }, |
|||
{ "cp866", 866, yes }, |
|||
{ "ibm866", 866, yes }, |
|||
{ "cp869", 869, yes }, |
|||
{ "ibm869", 869, yes }, |
|||
{ "cp870", 870, yes }, |
|||
{ "csibm870", 870, yes }, |
|||
{ "ebcdic-cp-roece", 870, yes }, |
|||
{ "ebcdic-cp-yu", 870, yes }, |
|||
{ "ibm870", 870, yes }, |
|||
{ "dos-874", 874, yes }, |
|||
{ "iso-8859-11", 874, yes }, |
|||
{ "tis-620", 874, yes }, |
|||
{ "windows-874", 874, yes }, |
|||
{ "cp875", 875, yes }, |
|||
{ "csshiftjis", 932, yes }, |
|||
{ "cswindows31j", 932, yes }, |
|||
{ "ms_kanji", 932, yes }, |
|||
{ "shift-jis", 932, yes }, |
|||
{ "shift_jis", 932, yes }, |
|||
{ "sjis", 932, yes }, |
|||
{ "x-ms-cp932", 932, yes }, |
|||
{ "x-sjis", 932, yes }, |
|||
{ "chinese", 936, yes }, |
|||
{ "cn-gb", 936, yes }, |
|||
{ "csgb2312", 936, yes }, |
|||
{ "csgb231280", 936, yes }, |
|||
{ "csiso58gb231280", 936, yes }, |
|||
{ "gb2312", 936, yes }, |
|||
{ "gb2312-80", 936, yes }, |
|||
{ "gb231280", 936, yes }, |
|||
{ "gb_2312-80", 936, yes }, |
|||
{ "gbk", 936, yes }, |
|||
{ "iso-ir-58", 936, yes }, |
|||
{ "csksc56011987", 949, yes }, |
|||
{ "iso-ir-149", 949, yes }, |
|||
{ "korean", 949, yes }, |
|||
{ "ks-c-5601", 949, yes }, |
|||
{ "ks-c5601", 949, yes }, |
|||
{ "ks_c_5601", 949, yes }, |
|||
{ "ks_c_5601-1987", 949, yes }, |
|||
{ "ks_c_5601-1989", 949, yes }, |
|||
{ "ks_c_5601_1987", 949, yes }, |
|||
{ "ksc5601", 949, yes }, |
|||
{ "ksc_5601", 949, yes }, |
|||
{ "big5", 950, yes }, |
|||
{ "big5-hkscs", 950, yes }, |
|||
{ "cn-big5", 950, yes }, |
|||
{ "csbig5", 950, yes }, |
|||
{ "x-x-big5", 950, yes }, |
|||
{ "cp1026", 1026, yes }, |
|||
{ "csibm1026", 1026, yes }, |
|||
{ "ibm1026", 1026, yes }, |
|||
{ "ibm01047", 1047, yes }, |
|||
{ "ccsid01140", 1140, yes }, |
|||
{ "cp01140", 1140, yes }, |
|||
{ "ebcdic-us-37+euro", 1140, yes }, |
|||
{ "ibm01140", 1140, yes }, |
|||
{ "ccsid01141", 1141, yes }, |
|||
{ "cp01141", 1141, yes }, |
|||
{ "ebcdic-de-273+euro", 1141, yes }, |
|||
{ "ibm01141", 1141, yes }, |
|||
{ "ccsid01142", 1142, yes }, |
|||
{ "cp01142", 1142, yes }, |
|||
{ "ebcdic-dk-277+euro", 1142, yes }, |
|||
{ "ebcdic-no-277+euro", 1142, yes }, |
|||
{ "ibm01142", 1142, yes }, |
|||
{ "ccsid01143", 1143, yes }, |
|||
{ "cp01143", 1143, yes }, |
|||
{ "ebcdic-fi-278+euro", 1143, yes }, |
|||
{ "ebcdic-se-278+euro", 1143, yes }, |
|||
{ "ibm01143", 1143, yes }, |
|||
{ "ccsid01144", 1144, yes }, |
|||
{ "cp01144", 1144, yes }, |
|||
{ "ebcdic-it-280+euro", 1144, yes }, |
|||
{ "ibm01144", 1144, yes }, |
|||
{ "ccsid01145", 1145, yes }, |
|||
{ "cp01145", 1145, yes }, |
|||
{ "ebcdic-es-284+euro", 1145, yes }, |
|||
{ "ibm01145", 1145, yes }, |
|||
{ "ccsid01146", 1146, yes }, |
|||
{ "cp01146", 1146, yes }, |
|||
{ "ebcdic-gb-285+euro", 1146, yes }, |
|||
{ "ibm01146", 1146, yes }, |
|||
{ "ccsid01147", 1147, yes }, |
|||
{ "cp01147", 1147, yes }, |
|||
{ "ebcdic-fr-297+euro", 1147, yes }, |
|||
{ "ibm01147", 1147, yes }, |
|||
{ "ccsid01148", 1148, yes }, |
|||
{ "cp01148", 1148, yes }, |
|||
{ "ebcdic-international-500+euro", 1148, yes }, |
|||
{ "ibm01148", 1148, yes }, |
|||
{ "ccsid01149", 1149, yes }, |
|||
{ "cp01149", 1149, yes }, |
|||
{ "ebcdic-is-871+euro", 1149, yes }, |
|||
{ "ibm01149", 1149, yes }, |
|||
{ "iso-10646-ucs-2", 1200, yes }, |
|||
{ "ucs-2", 1200, yes }, |
|||
{ "unicode", 1200, yes }, |
|||
{ "utf-16", 1200, yes }, |
|||
{ "utf-16le", 1200, yes }, |
|||
{ "unicodefffe", 1201, yes }, |
|||
{ "utf-16be", 1201, yes }, |
|||
{ "windows-1250", 1250, yes }, |
|||
{ "x-cp1250", 1250, yes }, |
|||
{ "windows-1251", 1251, yes }, |
|||
{ "x-cp1251", 1251, yes }, |
|||
{ "windows-1252", 1252, yes }, |
|||
{ "x-ansi", 1252, yes }, |
|||
{ "windows-1253", 1253, yes }, |
|||
{ "windows-1254", 1254, yes }, |
|||
{ "windows-1255", 1255, yes }, |
|||
{ "cp1256", 1256, yes }, |
|||
{ "windows-1256", 1256, yes }, |
|||
{ "windows-1257", 1257, yes }, |
|||
{ "windows-1258", 1258, yes }, |
|||
{ "johab", 1361, yes }, |
|||
{ "macintosh", 10000, yes }, |
|||
{ "x-mac-japanese", 10001, yes }, |
|||
{ "x-mac-chinesetrad", 10002, yes }, |
|||
{ "x-mac-korean", 10003, yes }, |
|||
{ "x-mac-arabic", 10004, yes }, |
|||
{ "x-mac-hebrew", 10005, yes }, |
|||
{ "x-mac-greek", 10006, yes }, |
|||
{ "x-mac-cyrillic", 10007, yes }, |
|||
{ "x-mac-chinesesimp", 10008, yes }, |
|||
{ "x-mac-romanian", 10010, yes }, |
|||
{ "x-mac-ukrainian", 10017, yes }, |
|||
{ "x-mac-thai", 10021, yes }, |
|||
{ "x-mac-ce", 10029, yes }, |
|||
{ "x-mac-icelandic", 10079, yes }, |
|||
{ "x-mac-turkish", 10081, yes }, |
|||
{ "x-mac-croatian", 10082, yes }, |
|||
{ "x-chinese-cns", 20000, yes }, |
|||
{ "x-cp20001", 20001, yes }, |
|||
{ "x-chinese-eten", 20002, yes }, |
|||
{ "x-cp20003", 20003, yes }, |
|||
{ "x-cp20004", 20004, yes }, |
|||
{ "x-cp20005", 20005, yes }, |
|||
{ "irv", 20105, yes }, |
|||
{ "x-ia5", 20105, yes }, |
|||
{ "din_66003", 20106, yes }, |
|||
{ "german", 20106, yes }, |
|||
{ "x-ia5-german", 20106, yes }, |
|||
{ "sen_850200_b", 20107, yes }, |
|||
{ "swedish", 20107, yes }, |
|||
{ "x-ia5-swedish", 20107, yes }, |
|||
{ "norwegian", 20108, yes }, |
|||
{ "ns_4551-1", 20108, yes }, |
|||
{ "x-ia5-norwegian", 20108, yes }, |
|||
{ "ansi_x3.4-1968", 20127, yes }, |
|||
{ "ansi_x3.4-1986", 20127, yes }, |
|||
{ "ascii", 20127, yes }, |
|||
{ "cp367", 20127, yes }, |
|||
{ "csascii", 20127, yes }, |
|||
{ "ibm367", 20127, yes }, |
|||
{ "iso-ir-6", 20127, yes }, |
|||
{ "iso646-us", 20127, yes }, |
|||
{ "iso_646.irv:1991", 20127, yes }, |
|||
{ "us", 20127, yes }, |
|||
{ "us-ascii", 20127, yes }, |
|||
{ "x-cp20261", 20261, yes }, |
|||
{ "x-cp20269", 20269, yes }, |
|||
{ "cp273", 20273, yes }, |
|||
{ "csibm273", 20273, yes }, |
|||
{ "ibm273", 20273, yes }, |
|||
{ "csibm277", 20277, yes }, |
|||
{ "ebcdic-cp-dk", 20277, yes }, |
|||
{ "ebcdic-cp-no", 20277, yes }, |
|||
{ "ibm277", 20277, yes }, |
|||
{ "cp278", 20278, yes }, |
|||
{ "csibm278", 20278, yes }, |
|||
{ "ebcdic-cp-fi", 20278, yes }, |
|||
{ "ebcdic-cp-se", 20278, yes }, |
|||
{ "ibm278", 20278, yes }, |
|||
{ "cp280", 20280, yes }, |
|||
{ "csibm280", 20280, yes }, |
|||
{ "ebcdic-cp-it", 20280, yes }, |
|||
{ "ibm280", 20280, yes }, |
|||
{ "cp284", 20284, yes }, |
|||
{ "csibm284", 20284, yes }, |
|||
{ "ebcdic-cp-es", 20284, yes }, |
|||
{ "ibm284", 20284, yes }, |
|||
{ "cp285", 20285, yes }, |
|||
{ "csibm285", 20285, yes }, |
|||
{ "ebcdic-cp-gb", 20285, yes }, |
|||
{ "ibm285", 20285, yes }, |
|||
{ "cp290", 20290, yes }, |
|||
{ "csibm290", 20290, yes }, |
|||
{ "ebcdic-jp-kana", 20290, yes }, |
|||
{ "ibm290", 20290, yes }, |
|||
{ "cp297", 20297, yes }, |
|||
{ "csibm297", 20297, yes }, |
|||
{ "ebcdic-cp-fr", 20297, yes }, |
|||
{ "ibm297", 20297, yes }, |
|||
{ "cp420", 20420, yes }, |
|||
{ "csibm420", 20420, yes }, |
|||
{ "ebcdic-cp-ar1", 20420, yes }, |
|||
{ "ibm420", 20420, yes }, |
|||
{ "cp423", 20423, yes }, |
|||
{ "csibm423", 20423, yes }, |
|||
{ "ebcdic-cp-gr", 20423, yes }, |
|||
{ "ibm423", 20423, yes }, |
|||
{ "cp424", 20424, yes }, |
|||
{ "csibm424", 20424, yes }, |
|||
{ "ebcdic-cp-he", 20424, yes }, |
|||
{ "ibm424", 20424, yes }, |
|||
{ "x-ebcdic-koreanextended", 20833, yes }, |
|||
{ "csibmthai", 20838, yes }, |
|||
{ "ibm-thai", 20838, yes }, |
|||
{ "cskoi8r", 20866, yes }, |
|||
{ "koi", 20866, yes }, |
|||
{ "koi8", 20866, yes }, |
|||
{ "koi8-r", 20866, yes }, |
|||
{ "koi8r", 20866, yes }, |
|||
{ "cp871", 20871, yes }, |
|||
{ "csibm871", 20871, yes }, |
|||
{ "ebcdic-cp-is", 20871, yes }, |
|||
{ "ibm871", 20871, yes }, |
|||
{ "cp880", 20880, yes }, |
|||
{ "csibm880", 20880, yes }, |
|||
{ "ebcdic-cyrillic", 20880, yes }, |
|||
{ "ibm880", 20880, yes }, |
|||
{ "cp905", 20905, yes }, |
|||
{ "csibm905", 20905, yes }, |
|||
{ "ebcdic-cp-tr", 20905, yes }, |
|||
{ "ibm905", 20905, yes }, |
|||
{ "ccsid00924", 20924, yes }, |
|||
{ "cp00924", 20924, yes }, |
|||
{ "ebcdic-latin9--euro", 20924, yes }, |
|||
{ "ibm00924", 20924, yes }, |
|||
{ "x-cp20936", 20936, yes }, |
|||
{ "x-cp20949", 20949, yes }, |
|||
{ "cp1025", 21025, yes }, |
|||
{ "x-cp21027", 21027, yes }, |
|||
{ "koi8-ru", 21866, yes }, |
|||
{ "koi8-u", 21866, yes }, |
|||
{ "cp819", 28591, yes }, |
|||
{ "csisolatin1", 28591, yes }, |
|||
{ "ibm819", 28591, yes }, |
|||
{ "iso-8859-1", 28591, yes }, |
|||
{ "iso-ir-100", 28591, yes }, |
|||
{ "iso8859-1", 28591, yes }, |
|||
{ "iso_8859-1", 28591, yes }, |
|||
{ "iso_8859-1:1987", 28591, yes }, |
|||
{ "l1", 28591, yes }, |
|||
{ "latin1", 28591, yes }, |
|||
{ "csisolatin2", 28592, yes }, |
|||
{ "iso-8859-2", 28592, yes }, |
|||
{ "iso-ir-101", 28592, yes }, |
|||
{ "iso8859-2", 28592, yes }, |
|||
{ "iso_8859-2", 28592, yes }, |
|||
{ "iso_8859-2:1987", 28592, yes }, |
|||
{ "l2", 28592, yes }, |
|||
{ "latin2", 28592, yes }, |
|||
{ "csisolatin3", 28593, yes }, |
|||
{ "iso-8859-3", 28593, yes }, |
|||
{ "iso-ir-109", 28593, yes }, |
|||
{ "iso_8859-3", 28593, yes }, |
|||
{ "iso_8859-3:1988", 28593, yes }, |
|||
{ "l3", 28593, yes }, |
|||
{ "latin3", 28593, yes }, |
|||
{ "csisolatin4", 28594, yes }, |
|||
{ "iso-8859-4", 28594, yes }, |
|||
{ "iso-ir-110", 28594, yes }, |
|||
{ "iso_8859-4", 28594, yes }, |
|||
{ "iso_8859-4:1988", 28594, yes }, |
|||
{ "l4", 28594, yes }, |
|||
{ "latin4", 28594, yes }, |
|||
{ "csisolatincyrillic", 28595, yes }, |
|||
{ "cyrillic", 28595, yes }, |
|||
{ "iso-8859-5", 28595, yes }, |
|||
{ "iso-ir-144", 28595, yes }, |
|||
{ "iso_8859-5", 28595, yes }, |
|||
{ "iso_8859-5:1988", 28595, yes }, |
|||
{ "arabic", 28596, yes }, |
|||
{ "csisolatinarabic", 28596, yes }, |
|||
{ "ecma-114", 28596, yes }, |
|||
{ "iso-8859-6", 28596, yes }, |
|||
{ "iso-ir-127", 28596, yes }, |
|||
{ "iso_8859-6", 28596, yes }, |
|||
{ "iso_8859-6:1987", 28596, yes }, |
|||
{ "csisolatingreek", 28597, yes }, |
|||
{ "ecma-118", 28597, yes }, |
|||
{ "elot_928", 28597, yes }, |
|||
{ "greek", 28597, yes }, |
|||
{ "greek8", 28597, yes }, |
|||
{ "iso-8859-7", 28597, yes }, |
|||
{ "iso-ir-126", 28597, yes }, |
|||
{ "iso_8859-7", 28597, yes }, |
|||
{ "iso_8859-7:1987", 28597, yes }, |
|||
{ "csisolatinhebrew", 28598, yes }, |
|||
{ "hebrew", 28598, yes }, |
|||
{ "iso-8859-8", 28598, yes }, |
|||
{ "iso-ir-138", 28598, yes }, |
|||
{ "iso_8859-8", 28598, yes }, |
|||
{ "iso_8859-8:1988", 28598, yes }, |
|||
{ "logical", 28598, yes }, |
|||
{ "visual", 28598, yes }, |
|||
{ "csisolatin5", 28599, yes }, |
|||
{ "iso-8859-9", 28599, yes }, |
|||
{ "iso-ir-148", 28599, yes }, |
|||
{ "iso_8859-9", 28599, yes }, |
|||
{ "iso_8859-9:1989", 28599, yes }, |
|||
{ "l5", 28599, yes }, |
|||
{ "latin5", 28599, yes }, |
|||
{ "iso-8859-13", 28603, yes }, |
|||
{ "csisolatin9", 28605, yes }, |
|||
{ "iso-8859-15", 28605, yes }, |
|||
{ "iso_8859-15", 28605, yes }, |
|||
{ "l9", 28605, yes }, |
|||
{ "latin9", 28605, yes }, |
|||
{ "x-europa", 29001, yes }, |
|||
{ "iso-8859-8-i", 38598, yes }, |
|||
{ "iso-2022-jp", 50220, no }, |
|||
{ "csiso2022jp", 50221, no }, |
|||
{ "csiso2022kr", 50225, no }, |
|||
{ "iso-2022-kr", 50225, no }, |
|||
{ "iso-2022-kr-7", 50225, no }, |
|||
{ "iso-2022-kr-7bit", 50225, no }, |
|||
{ "cp50227", 50227, no }, |
|||
{ "x-cp50227", 50227, no }, |
|||
{ "cp930", 50930, yes }, |
|||
{ "x-ebcdic-japaneseanduscanada", 50931, yes }, |
|||
{ "cp933", 50933, yes }, |
|||
{ "cp935", 50935, yes }, |
|||
{ "cp937", 50937, yes }, |
|||
{ "cp939", 50939, yes }, |
|||
{ "cseucpkdfmtjapanese", 51932, yes }, |
|||
{ "euc-jp", 51932, yes }, |
|||
{ "extended_unix_code_packed_format_for_japanese", 51932, yes }, |
|||
{ "iso-2022-jpeuc", 51932, yes }, |
|||
{ "x-euc", 51932, yes }, |
|||
{ "x-euc-jp", 51932, yes }, |
|||
{ "euc-cn", 51936, yes }, |
|||
{ "x-euc-cn", 51936, yes }, |
|||
{ "cseuckr", 51949, yes }, |
|||
{ "euc-kr", 51949, yes }, |
|||
{ "iso-2022-kr-8", 51949, yes }, |
|||
{ "iso-2022-kr-8bit", 51949, yes }, |
|||
{ "hz-gb-2312", 52936, no }, |
|||
{ "gb18030", 54936, yes }, |
|||
{ "x-iscii-de", 57002, yes }, |
|||
{ "x-iscii-be", 57003, yes }, |
|||
{ "x-iscii-ta", 57004, yes }, |
|||
{ "x-iscii-te", 57005, yes }, |
|||
{ "x-iscii-as", 57006, yes }, |
|||
{ "x-iscii-or", 57007, yes }, |
|||
{ "x-iscii-ka", 57008, yes }, |
|||
{ "x-iscii-ma", 57009, yes }, |
|||
{ "x-iscii-gu", 57010, yes }, |
|||
{ "x-iscii-pa", 57011, yes }, |
|||
{ "csunicode11utf7", 65000, no }, |
|||
{ "unicode-1-1-utf-7", 65000, no }, |
|||
{ "unicode-2-0-utf-7", 65000, no }, |
|||
{ "utf-7", 65000, no }, |
|||
{ "x-unicode-1-1-utf-7", 65000, no }, |
|||
{ "x-unicode-2-0-utf-7", 65000, no }, |
|||
{ "unicode-1-1-utf-8", 65001, yes }, |
|||
{ "unicode-2-0-utf-8", 65001, yes }, |
|||
{ "utf-8", 65001, yes }, |
|||
{ "x-unicode-1-1-utf-8", 65001, yes }, |
|||
{ "x-unicode-2-0-utf-8", 65001, yes }, |
|||
|
|||
/* final entry */ |
|||
{ NULL, 0, no } |
|||
}; |
|||
|
|||
uint TY_(Win32MLangGetCPFromName)(TidyAllocator *allocator, ctmbstr encoding) |
|||
{ |
|||
uint i; |
|||
tmbstr enc; |
|||
|
|||
/* ensure name is in lower case */ |
|||
enc = TY_(tmbstrdup)(allocator,encoding); |
|||
enc = TY_(tmbstrtolower)(enc); |
|||
|
|||
for (i = 0; NameWinCPMap[i].name; ++i) |
|||
{ |
|||
if (TY_(tmbstrcmp)(NameWinCPMap[i].name, enc) == 0) |
|||
{ |
|||
IMLangConvertCharset * p = NULL; |
|||
uint wincp = NameWinCPMap[i].wincp; |
|||
HRESULT hr; |
|||
|
|||
TidyFree(allocator, enc); |
|||
|
|||
/* currently no support for unsafe encodings */ |
|||
if (!NameWinCPMap[i].safe) |
|||
return 0; |
|||
|
|||
/* hack for config.c */ |
|||
CoInitialize(NULL); |
|||
hr = CreateMLangObject(p); |
|||
|
|||
if (hr != S_OK || !p) |
|||
{ |
|||
wincp = 0; |
|||
} |
|||
else |
|||
{ |
|||
hr = IMLangConvertCharset_Initialize(p, wincp, 1200, 0); |
|||
|
|||
if (hr != S_OK) |
|||
wincp = 0; |
|||
|
|||
IMLangConvertCharset_Release(p); |
|||
p = NULL; |
|||
} |
|||
|
|||
CoUninitialize(); |
|||
|
|||
return wincp; |
|||
} |
|||
} |
|||
|
|||
TidyFree(allocator, enc); |
|||
return 0; |
|||
} |
|||
|
|||
Bool TY_(Win32MLangInitInputTranscoder)(StreamIn * in, uint wincp) |
|||
{ |
|||
IMLangConvertCharset * p = NULL; |
|||
HRESULT hr; |
|||
|
|||
assert( in != NULL ); |
|||
|
|||
CoInitialize(NULL); |
|||
|
|||
if (wincp == 0) |
|||
{ |
|||
/* no codepage found for this encoding */ |
|||
return no; |
|||
} |
|||
|
|||
hr = CreateMLangObject(p); |
|||
|
|||
if (hr != S_OK || !p) |
|||
{ |
|||
/* MLang not supported */ |
|||
return no; |
|||
} |
|||
|
|||
hr = IMLangConvertCharset_Initialize(p, wincp, 1200, 0); |
|||
|
|||
if (hr != S_OK) |
|||
{ |
|||
/* encoding not supported, insufficient memory, etc. */ |
|||
return no; |
|||
} |
|||
|
|||
in->mlang = p; |
|||
|
|||
return yes; |
|||
} |
|||
|
|||
void TY_(Win32MLangUninitInputTranscoder)(StreamIn * in) |
|||
{ |
|||
IMLangConvertCharset * p; |
|||
|
|||
assert( in != NULL ); |
|||
|
|||
p = (IMLangConvertCharset *)in->mlang; |
|||
if (p) |
|||
{ |
|||
IMLangConvertCharset_Release(p); |
|||
p = NULL; |
|||
in->mlang = NULL; |
|||
} |
|||
|
|||
CoUninitialize(); |
|||
} |
|||
|
|||
#if 0 |
|||
Bool Win32MLangInitOutputTranscoder(TidyAllocator *allocator, StreamOut * out, tmbstr encoding) |
|||
{ |
|||
IMLangConvertCharset * p = NULL; |
|||
HRESULT hr; |
|||
uint wincp; |
|||
|
|||
assert( out != NULL ); |
|||
|
|||
CoInitialize(NULL); |
|||
|
|||
wincp = TY_(Win32MLangGetCPFromName)(allocator, encoding); |
|||
if (wincp == 0) |
|||
{ |
|||
/* no codepage found for this encoding */ |
|||
return no; |
|||
} |
|||
|
|||
hr = CreateMLangObject(p); |
|||
|
|||
if (hr != S_OK || !p) |
|||
{ |
|||
/* MLang not supported */ |
|||
return no; |
|||
} |
|||
|
|||
IMLangConvertCharset_Initialize(p, 1200, wincp, MLCONVCHARF_NOBESTFITCHARS); |
|||
|
|||
if (hr != S_OK) |
|||
{ |
|||
/* encoding not supported, insufficient memory, etc. */ |
|||
return no; |
|||
} |
|||
|
|||
out->mlang = p; |
|||
|
|||
return yes; |
|||
} |
|||
|
|||
void Win32MLangUninitOutputTranscoder(StreamOut * out) |
|||
{ |
|||
IMLangConvertCharset * p; |
|||
|
|||
assert( out != NULL ); |
|||
|
|||
p = (IMLangConvertCharset *)out->mlang; |
|||
if (p) |
|||
{ |
|||
IMLangConvertCharset_Release(p); |
|||
p = NULL; |
|||
out->mlang = NULL; |
|||
} |
|||
|
|||
CoUninitialize(); |
|||
} |
|||
#endif |
|||
|
|||
int TY_(Win32MLangGetChar)(byte firstByte, StreamIn * in, uint * bytesRead) |
|||
{ |
|||
IMLangConvertCharset * p; |
|||
TidyInputSource * source; |
|||
CHAR inbuf[TC_INBUFSIZE] = { 0 }; |
|||
WCHAR outbuf[TC_OUTBUFSIZE] = { 0 }; |
|||
HRESULT hr = S_OK; |
|||
size_t inbufsize = 0; |
|||
|
|||
assert( in != NULL ); |
|||
assert( &in->source != NULL ); |
|||
assert( bytesRead != NULL ); |
|||
assert( in->mlang != NULL ); |
|||
|
|||
p = (IMLangConvertCharset *)in->mlang; |
|||
source = &in->source; |
|||
|
|||
inbuf[inbufsize++] = (CHAR)firstByte; |
|||
|
|||
while(inbufsize < TC_INBUFSIZE) |
|||
{ |
|||
UINT outbufsize = TC_OUTBUFSIZE; |
|||
UINT readNow = inbufsize; |
|||
int nextByte = EndOfStream; |
|||
|
|||
hr = IMLangConvertCharset_DoConversionToUnicode(p, inbuf, &readNow, outbuf, &outbufsize); |
|||
|
|||
assert( hr == S_OK ); |
|||
assert( outbufsize <= 2 ); |
|||
|
|||
if (outbufsize == 2) |
|||
{ |
|||
/* U+10000-U+10FFFF are returned as a pair of surrogates */ |
|||
tchar m = (tchar)outbuf[0]; |
|||
tchar n = (tchar)outbuf[1]; |
|||
assert( TY_(IsHighSurrogate)(n) && TY_(IsLowSurrogate)(m) ); |
|||
*bytesRead = readNow; |
|||
return (int)TY_(CombineSurrogatePair)(n, m); |
|||
} |
|||
|
|||
if (outbufsize == 1) |
|||
{ |
|||
/* we found the character */ |
|||
/* set bytesRead and return */ |
|||
*bytesRead = readNow; |
|||
return (int)outbuf[0]; |
|||
} |
|||
|
|||
/* we need more bytes */ |
|||
nextByte = source->getByte(source->sourceData); |
|||
|
|||
if (nextByte == EndOfStream) |
|||
{ |
|||
/* todo: error message for broken stream? */ |
|||
|
|||
*bytesRead = readNow; |
|||
return EndOfStream; |
|||
} |
|||
|
|||
inbuf[inbufsize++] = (CHAR)nextByte; |
|||
} |
|||
|
|||
/* No full character found after reading TC_INBUFSIZE bytes, */ |
|||
/* give up to read this stream, it's obviously unreadable. */ |
|||
|
|||
/* todo: error message for broken stream? */ |
|||
return EndOfStream; |
|||
} |
|||
|
|||
Bool Win32MLangIsConvertible(tchar c, StreamOut * out) |
|||
{ |
|||
IMLangConvertCharset * p; |
|||
UINT i = 1; |
|||
HRESULT hr; |
|||
WCHAR inbuf[2] = { 0 }; |
|||
UINT inbufsize = 0; |
|||
|
|||
assert( c != 0 ); |
|||
assert( c <= 0x10FFFF ); |
|||
assert( out != NULL ); |
|||
assert( out->mlang != NULL ); |
|||
|
|||
if (c > 0xFFFF) |
|||
{ |
|||
tchar high = 0; |
|||
tchar low = 0; |
|||
|
|||
TY_(SplitSurrogatePair)(c, &low, &high); |
|||
|
|||
inbuf[inbufsize++] = (WCHAR)low; |
|||
inbuf[inbufsize++] = (WCHAR)high; |
|||
} |
|||
else |
|||
inbuf[inbufsize++] = (WCHAR)c; |
|||
|
|||
p = (IMLangConvertCharset *)out->mlang; |
|||
hr = IMLangConvertCharset_DoConversionFromUnicode(p, inbuf, &inbufsize, NULL, NULL); |
|||
|
|||
return hr == S_OK ? yes : no; |
|||
} |
|||
|
|||
void Win32MLangPutChar(tchar c, StreamOut * out, uint * bytesWritten) |
|||
{ |
|||
IMLangConvertCharset * p; |
|||
TidyOutputSink * sink; |
|||
CHAR outbuf[TC_OUTBUFSIZE] = { 0 }; |
|||
UINT outbufsize = TC_OUTBUFSIZE; |
|||
HRESULT hr = S_OK; |
|||
WCHAR inbuf[2] = { 0 }; |
|||
UINT inbufsize = 0; |
|||
uint i; |
|||
|
|||
assert( c != 0 ); |
|||
assert( c <= 0x10FFFF ); |
|||
assert( bytesWritten != NULL ); |
|||
assert( out != NULL ); |
|||
assert( &out->sink != NULL ); |
|||
assert( out->mlang != NULL ); |
|||
|
|||
p = (IMLangConvertCharset *)out->mlang; |
|||
sink = &out->sink; |
|||
|
|||
if (c > 0xFFFF) |
|||
{ |
|||
tchar high = 0; |
|||
tchar low = 0; |
|||
|
|||
TY_(SplitSurrogatePair)(c, &low, &high); |
|||
|
|||
inbuf[inbufsize++] = (WCHAR)low; |
|||
inbuf[inbufsize++] = (WCHAR)high; |
|||
} |
|||
else |
|||
inbuf[inbufsize++] = (WCHAR)c; |
|||
|
|||
hr = IMLangConvertCharset_DoConversionFromUnicode(p, inbuf, &inbufsize, outbuf, &outbufsize); |
|||
|
|||
assert( hr == S_OK ); |
|||
assert( outbufsize > 0 ); |
|||
assert( inbufsize == 1 || inbufsize == 2 ); |
|||
|
|||
for (i = 0; i < outbufsize; ++i) |
|||
sink->putByte(sink->sinkData, (byte)(outbuf[i])); |
|||
|
|||
*bytesWritten = outbufsize; |
|||
|
|||
return; |
|||
} |
|||
|
|||
#endif /* TIDY_WIN32_MLANG_SUPPORT */ |
|||
|
|||
/*
|
|||
* local variables: |
|||
* mode: c |
|||
* indent-tabs-mode: nil |
|||
* c-basic-offset: 4 |
|||
* eval: (c-set-offset 'substatement-open 0) |
|||
* end: |
|||
*/ |
@ -0,0 +1,18 @@ |
|||
#ifndef __WIN32TC_H__ |
|||
#define __WIN32TC_H__ |
|||
#ifdef TIDY_WIN32_MLANG_SUPPORT |
|||
|
|||
/* win32tc.h -- Interface to Win32 transcoding routines
|
|||
|
|||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University |
|||
See tidy.h for the copyright notice. |
|||
|
|||
*/ |
|||
|
|||
uint TY_(Win32MLangGetCPFromName)(TidyAllocator *allocator,ctmbstr encoding); |
|||
Bool TY_(Win32MLangInitInputTranscoder)(StreamIn * in, uint wincp); |
|||
void TY_(Win32MLangUninitInputTranscoder)(StreamIn * in); |
|||
int TY_(Win32MLangGetChar)(byte firstByte, StreamIn * in, uint * bytesRead); |
|||
|
|||
#endif /* TIDY_WIN32_MLANG_SUPPORT */ |
|||
#endif /* __WIN32TC_H__ */ |
@ -1,7 +1,7 @@ |
|||
context("basic functionality") |
|||
test_that("we can do something", { |
|||
test_that("tidying works", { |
|||
|
|||
expect_gte(nchar(tidy("<b><p><a href='http://google.com'>google ></a></p></b>")), |
|||
256) |
|||
expect_gte(nchar(tidy_html("<b><p><a href='http://google.com'>google ></a></p></b>")), |
|||
249) |
|||
|
|||
}) |
|||
|
Loading…
Reference in new issue