/* access.c -- carry out accessibility checks Copyright University of Toronto Portions (c) 1998-2009 (W3C) MIT, ERCIM, Keio University See tidy.h for the copyright notice. */ /********************************************************************* * AccessibilityChecks * * Carries out processes for all accessibility checks. Traverses * through all the content within the tree and evaluates the tags for * accessibility. * * To perform the following checks, 'AccessibilityChecks' must be * called AFTER the tree structure has been formed. * * If, in the command prompt, there is no specification of which * accessibility priorities to check, no accessibility checks will be * performed. (ie. '1' for priority 1, '2' for priorities 1 and 2, * and '3') for priorities 1, 2 and 3.) * * Copyright University of Toronto * Programmed by: Mike Lam and Chris Ridpath * Modifications by : Terry Teague (TRT) * * Reference document: http://www.w3.org/TR/WAI-WEBCONTENT/ *********************************************************************/ #include "tidy-int.h" #if SUPPORT_ACCESSIBILITY_CHECKS #include "access.h" #include "message.h" #include "tags.h" #include "attrs.h" #include "tmbstr.h" /* The accessibility checks to perform depending on user's desire. 1. priority 1 2. priority 1 & 2 3. priority 1, 2, & 3 */ /* List of possible image types */ static const ctmbstr imageExtensions[] = {".jpg", ".gif", ".tif", ".pct", ".pic", ".iff", ".dib", ".tga", ".pcx", ".png", ".jpeg", ".tiff", ".bmp"}; #define N_IMAGE_EXTS (sizeof(imageExtensions)/sizeof(ctmbstr)) /* List of possible sound file types */ static const ctmbstr soundExtensions[] = {".wav", ".au", ".aiff", ".snd", ".ra", ".rm"}; static const int soundExtErrCodes[] = { AUDIO_MISSING_TEXT_WAV, AUDIO_MISSING_TEXT_AU, AUDIO_MISSING_TEXT_AIFF, AUDIO_MISSING_TEXT_SND, AUDIO_MISSING_TEXT_RA, AUDIO_MISSING_TEXT_RM }; #define N_AUDIO_EXTS (sizeof(soundExtensions)/sizeof(ctmbstr)) /* List of possible media extensions */ static const ctmbstr mediaExtensions[] = {".mpg", ".mov", ".asx", ".avi", ".ivf", ".m1v", ".mmm", ".mp2v", ".mpa", ".mpe", ".mpeg", ".ram", ".smi", ".smil", ".swf", ".wm", ".wma", ".wmv"}; #define N_MEDIA_EXTS (sizeof(mediaExtensions)/sizeof(ctmbstr)) /* List of possible frame sources */ static const ctmbstr frameExtensions[] = {".htm", ".html", ".shtm", ".shtml", ".cfm", ".cfml", ".asp", ".cgi", ".pl", ".smil"}; #define N_FRAME_EXTS (sizeof(frameExtensions)/sizeof(ctmbstr)) /* List of possible colour values */ static const int colorValues[][3] = { { 0, 0, 0}, {128,128,128}, {192,192,192}, {255,255,255}, {192, 0, 0}, {255, 0, 0}, {128, 0,128}, {255, 0,255}, { 0,128, 0}, { 0,255, 0}, {128,128, 0}, {255,255, 0}, { 0, 0,128}, { 0, 0,255}, { 0,128,128}, { 0,255,255} }; #define N_COLOR_VALS (sizeof(colorValues)/(sizeof(int[3])) /* These arrays are used to convert color names to their RGB values */ static const ctmbstr colorNames[] = { "black", "silver", "grey", "white", "maroon", "red", "purple", "fuchsia", "green", "lime", "olive", "yellow", "navy", "blue", "teal", "aqua" }; #define N_COLOR_NAMES (sizeof(colorNames)/sizeof(ctmbstr)) #define N_COLORS N_COLOR_NAMES /* function prototypes */ static void InitAccessibilityChecks( TidyDocImpl* doc, int level123 ); static void FreeAccessibilityChecks( TidyDocImpl* doc ); static Bool GetRgb( ctmbstr color, int rgb[3] ); static Bool CompareColors( const int rgbBG[3], const int rgbFG[3] ); static int ctox( tmbchar ch ); /* static void CheckMapAccess( TidyDocImpl* doc, Node* node, Node* front); static void GetMapLinks( TidyDocImpl* doc, Node* node, Node* front); static void CompareAnchorLinks( TidyDocImpl* doc, Node* front, int counter); static void FindMissingLinks( TidyDocImpl* doc, Node* node, int counter); */ static void CheckFormControls( TidyDocImpl* doc, Node* node ); static void MetaDataPresent( TidyDocImpl* doc, Node* node ); static void CheckEmbed( TidyDocImpl* doc, Node* node ); static void CheckListUsage( TidyDocImpl* doc, Node* node ); /* GetFileExtension takes a path and returns the extension portion of the path (if any). */ static void GetFileExtension( ctmbstr path, tmbchar *ext, uint maxExt ) { int i = TY_(tmbstrlen)(path) - 1; ext[0] = '\0'; do { if ( path[i] == '/' || path[i] == '\\' ) break; else if ( path[i] == '.' ) { TY_(tmbstrncpy)( ext, path+i, maxExt ); break; } } while ( --i > 0 ); } /************************************************************************ * IsImage * * Checks if the given filename is an image file. * Returns 'aye' if it is, 'no' if it's not. ************************************************************************/ static Bool IsImage( ctmbstr iType ) { uint i; /* Get the file extension */ tmbchar ext[20]; GetFileExtension( iType, ext, sizeof(ext) ); /* Compare it to the array of known image file extensions */ for (i = 0; i < N_IMAGE_EXTS; i++) { if ( TY_(tmbstrcasecmp)(ext, imageExtensions[i]) == 0 ) return aye; } return no; } /*********************************************************************** * IsSoundFile * * Checks if the given filename is a sound file. * Returns 'aye' if it is, 'no' if it's not. ***********************************************************************/ static int IsSoundFile( ctmbstr sType ) { uint i; tmbchar ext[ 20 ]; GetFileExtension( sType, ext, sizeof(ext) ); for (i = 0; i < N_AUDIO_EXTS; i++) { if ( TY_(tmbstrcasecmp)(ext, soundExtensions[i]) == 0 ) return soundExtErrCodes[i]; } return 0; } /*********************************************************************** * IsValidSrcExtension * * Checks if the 'SRC' value within the FRAME element is valid * The 'SRC' extension must end in ".htm", ".html", ".shtm", ".shtml", * ".cfm", ".cfml", ".asp", ".cgi", ".pl", or ".smil" * * Returns aye if it is, returns no otherwise. ***********************************************************************/ static Bool IsValidSrcExtension( ctmbstr sType ) { uint i; tmbchar ext[20]; GetFileExtension( sType, ext, sizeof(ext) ); for (i = 0; i < N_FRAME_EXTS; i++) { if ( TY_(tmbstrcasecmp)(ext, frameExtensions[i]) == 0 ) return aye; } return no; } /********************************************************************* * IsValidMediaExtension * * Checks to warn the user that syncronized text equivalents are * required if multimedia is used. *********************************************************************/ static Bool IsValidMediaExtension( ctmbstr sType ) { uint i; tmbchar ext[20]; GetFileExtension( sType, ext, sizeof(ext) ); for (i = 0; i < N_MEDIA_EXTS; i++) { if ( TY_(tmbstrcasecmp)(ext, mediaExtensions[i]) == 0 ) return aye; } return no; } /************************************************************************ * IsWhitespace * * Checks if the given string is all whitespace. * Returns 'aye' if it is, 'no' if it's not. ************************************************************************/ static Bool IsWhitespace( ctmbstr pString ) { Bool isWht = aye; ctmbstr cp; for ( cp = pString; isWht && cp && *cp; ++cp ) { isWht = TY_(IsWhite)( *cp ); } return isWht; } static Bool hasValue( AttVal* av ) { return ( av && ! IsWhitespace(av->value) ); } /*********************************************************************** * IsPlaceholderAlt * * Checks to see if there is an image and photo place holder contained * in the ALT text. * * Returns 'aye' if there is, 'no' if not. ***********************************************************************/ static Bool IsPlaceholderAlt( ctmbstr txt ) { return ( strstr(txt, "image") != NULL || strstr(txt, "photo") != NULL ); } /*********************************************************************** * IsPlaceholderTitle * * Checks to see if there is an TITLE place holder contained * in the 'ALT' text. * * Returns 'aye' if there is, 'no' if not. static Bool IsPlaceHolderTitle( ctmbstr txt ) { return ( strstr(txt, "title") != NULL ); } ***********************************************************************/ /*********************************************************************** * IsPlaceHolderObject * * Checks to see if there is an OBJECT place holder contained * in the 'ALT' text. * * Returns 'aye' if there is, 'no' if not. ***********************************************************************/ static Bool IsPlaceHolderObject( ctmbstr txt ) { return ( strstr(txt, "object") != NULL ); } /********************************************************** * EndsWithBytes * * Checks to see if the ALT text ends with 'bytes' * Returns 'aye', if true, 'no' otherwise. **********************************************************/ static Bool EndsWithBytes( ctmbstr txt ) { uint len = TY_(tmbstrlen)( txt ); return ( len >= 5 && TY_(tmbstrcmp)(txt+len-5, "bytes") == 0 ); } /******************************************************* * textFromOneNode * * Returns a list of characters contained within one * text node. *******************************************************/ static ctmbstr textFromOneNode( TidyDocImpl* doc, Node* node ) { uint i; uint x = 0; tmbstr txt = doc->access.text; if ( node ) { /* Copy contents of a text node */ for (i = node->start; i < node->end; ++i, ++x ) { txt[x] = doc->lexer->lexbuf[i]; /* Check buffer overflow */ if ( x >= sizeof(doc->access.text)-1 ) break; } } txt[x] = '\0'; return txt; } /********************************************************* * getTextNode * * Locates text nodes within a container element. * Retrieves text that are found contained within * text nodes, and concatenates the text. *********************************************************/ static void getTextNode( TidyDocImpl* doc, Node* node ) { tmbstr txtnod = doc->access.textNode; /* Continues to traverse through container element until it no longer contains any more contents */ /* If the tag of the node is NULL, then grab the text within the node */ if ( TY_(nodeIsText)(node) ) { uint i; /* Retrieves each character found within the text node */ for (i = node->start; i < node->end; i++) { /* The text must not exceed buffer */ if ( doc->access.counter >= TEXTBUF_SIZE-1 ) return; txtnod[ doc->access.counter++ ] = doc->lexer->lexbuf[i]; } /* Traverses through the contents within a container element */ for ( node = node->content; node != NULL; node = node->next ) getTextNode( doc, node ); } } /********************************************************** * getTextNodeClear * * Clears the current 'textNode' and reloads it with new * text. The textNode must be cleared before use. **********************************************************/ static tmbstr getTextNodeClear( TidyDocImpl* doc, Node* node ) { /* Clears list */ TidyClearMemory( doc->access.textNode, TEXTBUF_SIZE ); doc->access.counter = 0; getTextNode( doc, node->content ); return doc->access.textNode; } /********************************************************** * LevelX_Enabled * * Tell whether access "X" is enabled. **********************************************************/ static Bool Level1_Enabled( TidyDocImpl* doc ) { return doc->access.PRIORITYCHK == 1 || doc->access.PRIORITYCHK == 2 || doc->access.PRIORITYCHK == 3; } static Bool Level2_Enabled( TidyDocImpl* doc ) { return doc->access.PRIORITYCHK == 2 || doc->access.PRIORITYCHK == 3; } static Bool Level3_Enabled( TidyDocImpl* doc ) { return doc->access.PRIORITYCHK == 3; } /******************************************************** * CheckColorAvailable * * Verify that information conveyed with color is * available without color. ********************************************************/ static void CheckColorAvailable( TidyDocImpl* doc, Node* node ) { if (Level1_Enabled( doc )) { if ( nodeIsIMG(node) ) TY_(ReportAccessWarning)( doc, node, INFORMATION_NOT_CONVEYED_IMAGE ); else if ( nodeIsAPPLET(node) ) TY_(ReportAccessWarning)( doc, node, INFORMATION_NOT_CONVEYED_APPLET ); else if ( nodeIsOBJECT(node) ) TY_(ReportAccessWarning)( doc, node, INFORMATION_NOT_CONVEYED_OBJECT ); else if ( nodeIsSCRIPT(node) ) TY_(ReportAccessWarning)( doc, node, INFORMATION_NOT_CONVEYED_SCRIPT ); else if ( nodeIsINPUT(node) ) TY_(ReportAccessWarning)( doc, node, INFORMATION_NOT_CONVEYED_INPUT ); } } /********************************************************************* * CheckColorContrast * * Checks elements for color contrast. Must have valid contrast for * valid visibility. * * This logic is extremely fragile as it does not recognize * the fact that color is inherited by many components and * that BG and FG colors are often set separately. E.g. the * background color may be set by for the body or a table * or a cell. The foreground color may be set by any text * element (p, h1, h2, input, textarea), either explicitly * or by style. Ergo, this test will not handle most real * world cases. It's a start, however. *********************************************************************/ static void CheckColorContrast( TidyDocImpl* doc, Node* node ) { int rgbBG[3] = {255,255,255}; /* Black text on white BG */ if (Level3_Enabled( doc )) { Bool gotBG = aye; AttVal* av; /* Check for 'BGCOLOR' first to compare with other color attributes */ for ( av = node->attributes; av; av = av->next ) { if ( attrIsBGCOLOR(av) ) { if ( hasValue(av) ) gotBG = GetRgb( av->value, rgbBG ); } } /* Search for COLOR attributes to compare with background color Must have valid colour contrast */ for ( av = node->attributes; gotBG && av != NULL; av = av->next ) { uint errcode = 0; if ( attrIsTEXT(av) ) errcode = COLOR_CONTRAST_TEXT; else if ( attrIsLINK(av) ) errcode = COLOR_CONTRAST_LINK; else if ( attrIsALINK(av) ) errcode = COLOR_CONTRAST_ACTIVE_LINK; else if ( attrIsVLINK(av) ) errcode = COLOR_CONTRAST_VISITED_LINK; if ( errcode && hasValue(av) ) { int rgbFG[3] = {0, 0, 0}; /* Black text */ if ( GetRgb(av->value, rgbFG) && !CompareColors(rgbBG, rgbFG) ) { TY_(ReportAccessWarning)( doc, node, errcode ); } } } } } /************************************************************** * CompareColors * * Compares two RGB colors for good contrast. **************************************************************/ static int minmax( int i1, int i2 ) { return MAX(i1, i2) - MIN(i1,i2); } static int brightness( const int rgb[3] ) { return ((rgb[0]*299) + (rgb[1]*587) + (rgb[2]*114)) / 1000; } static Bool CompareColors( const int rgbBG[3], const int rgbFG[3] ) { int brightBG = brightness( rgbBG ); int brightFG = brightness( rgbFG ); int diffBright = minmax( brightBG, brightFG ); int diffColor = minmax( rgbBG[0], rgbFG[0] ) + minmax( rgbBG[1], rgbFG[1] ) + minmax( rgbBG[2], rgbFG[2] ); return ( diffBright > 180 && diffColor > 500 ); } /********************************************************************* * GetRgb * * Gets the red, green and blue values for this attribute for the * background. * * Example: If attribute is BGCOLOR="#121005" then red = 18, green = 16, * blue = 5. *********************************************************************/ static Bool GetRgb( ctmbstr color, int rgb[] ) { uint x; /* Check if we have a color name */ for (x = 0; x < N_COLORS; x++) { if ( strstr(colorNames[x], color) != NULL ) { rgb[0] = colorValues[x][0]; rgb[1] = colorValues[x][1]; rgb[2] = colorValues[x][2]; return aye; } } /* No color name so must be hex values Is this a number in hexadecimal format? */ /* Must be 7 characters in the RGB value (including '#') */ if ( TY_(tmbstrlen)(color) == 7 && color[0] == '#' ) { rgb[0] = (ctox(color[1]) * 16) + ctox(color[2]); rgb[1] = (ctox(color[3]) * 16) + ctox(color[4]); rgb[2] = (ctox(color[5]) * 16) + ctox(color[6]); return aye; } return no; } /******************************************************************* * ctox * * Converts a character to a number. * Example: if given character is 'A' then returns 10. * * Returns the number that the character represents. Returns -1 if not a * valid number. *******************************************************************/ static int ctox( tmbchar ch ) { if ( ch >= '0' && ch <= '9' ) { return ch - '0'; } else if ( ch >= 'a' && ch <= 'f' ) { return ch - 'a' + 10; } else if ( ch >= 'A' && ch <= 'F' ) { return ch - 'A' + 10; } return -1; } /*********************************************************** * CheckImage * * Checks all image attributes for specific elements to * check for validity of the values contained within * the attributes. An appropriate warning message is displayed * to indicate the error. ***********************************************************/ static void CheckImage( TidyDocImpl* doc, Node* node ) { Bool HasAlt = no; Bool HasIsMap = no; Bool HasLongDesc = no; Bool HasDLINK = no; Bool HasValidHeight = no; Bool HasValidWidthBullet = no; Bool HasValidWidthHR = no; Bool HasTriggeredMissingLongDesc = no; AttVal* av; if (Level1_Enabled( doc )) { /* Checks all image attributes for invalid values within attributes */ for (av = node->attributes; av != NULL; av = av->next) { /* Checks for valid ALT attribute. The length of the alt text must be less than 150 characters long. */ if ( attrIsALT(av) ) { if (av->value != NULL) { if ((TY_(tmbstrlen)(av->value) < 150) && (IsPlaceholderAlt (av->value) == no) && (IsPlaceHolderObject (av->value) == no) && (EndsWithBytes (av->value) == no) && (IsImage (av->value) == no)) { HasAlt = aye; } else if (TY_(tmbstrlen)(av->value) > 150) { HasAlt = aye; TY_(ReportAccessWarning)( doc, node, IMG_ALT_SUSPICIOUS_TOO_LONG ); } else if (IsImage (av->value) == aye) { HasAlt = aye; TY_(ReportAccessWarning)( doc, node, IMG_ALT_SUSPICIOUS_FILENAME); } else if (IsPlaceholderAlt (av->value) == aye) { HasAlt = aye; TY_(ReportAccessWarning)( doc, node, IMG_ALT_SUSPICIOUS_PLACEHOLDER); } else if (EndsWithBytes (av->value) == aye) { HasAlt = aye; TY_(ReportAccessWarning)( doc, node, IMG_ALT_SUSPICIOUS_FILE_SIZE); } } } /* Checks for width values of 'bullets' and 'horizontal rules' for validity. Valid pixel width for 'bullets' must be < 30, and > 150 for horizontal rules. */ else if ( attrIsWIDTH(av) ) { /* Longdesc attribute needed if width attribute is not present. */ if ( hasValue(av) ) { int width = atoi( av->value ); if ( width < 30 ) HasValidWidthBullet = aye; if ( width > 150 ) HasValidWidthHR = aye; } } /* Checks for height values of 'bullets' and horizontal rules for validity. Valid pixel height for 'bullets' and horizontal rules mustt be < 30. */ else if ( attrIsHEIGHT(av) ) { /* Longdesc attribute needed if height attribute not present. */ if ( hasValue(av) && atoi(av->value) < 30 ) HasValidHeight = aye; } /* Checks for longdesc and determines validity. The length of the 'longdesc' must be > 1 */ else if ( attrIsLONGDESC(av) ) { if ( hasValue(av) && TY_(tmbstrlen)(av->value) > 1 ) HasLongDesc = aye; } /* Checks for 'USEMAP' attribute. Ensures that text links are provided for client-side image maps */ else if ( attrIsUSEMAP(av) ) { if ( hasValue(av) ) doc->access.HasUseMap = aye; } else if ( attrIsISMAP(av) ) { HasIsMap = aye; } } /* Check to see if a dLINK is present. The ANCHOR element must be present following the IMG element. The text found between the ANCHOR tags must be < 6 characters long, and must contain the letter 'd'. */ if ( nodeIsA(node->next) ) { node = node->next; /* Node following the anchor must be a text node for dLINK to exist */ if (node->content != NULL && (node->content)->tag == NULL) { /* Number of characters found within the text node */ ctmbstr word = textFromOneNode( doc, node->content); if ((TY_(tmbstrcmp)(word,"d") == 0)|| (TY_(tmbstrcmp)(word,"D") == 0)) { HasDLINK = aye; } } } /* Special case check for dLINK. This will occur if there is whitespace between the and elements. Ignores whitespace and continues check for dLINK. */ if ( node->next && !node->next->tag ) { node = node->next; if ( nodeIsA(node->next) ) { node = node->next; /* Node following the ANCHOR must be a text node for dLINK to exist */ if (node->content != NULL && node->content->tag == NULL) { /* Number of characters found within the text node */ ctmbstr word = textFromOneNode( doc, node->content ); if ((TY_(tmbstrcmp)(word, "d") == 0)|| (TY_(tmbstrcmp)(word, "D") == 0)) { HasDLINK = aye; } } } } if ((HasAlt == no)&& (HasValidWidthBullet == aye)&& (HasValidHeight == aye)) { } if ((HasAlt == no)&& (HasValidWidthHR == aye)&& (HasValidHeight == aye)) { } if (HasAlt == no) { TY_(ReportAccessError)( doc, node, IMG_MISSING_ALT); } if ((HasLongDesc == no)&& (HasValidHeight ==aye)&& ((HasValidWidthHR == aye)|| (HasValidWidthBullet == aye))) { HasTriggeredMissingLongDesc = aye; } if (HasTriggeredMissingLongDesc == no) { if ((HasDLINK == aye)&& (HasLongDesc == no)) { TY_(ReportAccessWarning)( doc, node, IMG_MISSING_LONGDESC); } if ((HasLongDesc == aye)&& (HasDLINK == no)) { TY_(ReportAccessWarning)( doc, node, IMG_MISSING_DLINK); } if ((HasLongDesc == no)&& (HasDLINK == no)) { TY_(ReportAccessWarning)( doc, node, IMG_MISSING_LONGDESC_DLINK); } } if (HasIsMap == aye) { TY_(ReportAccessError)( doc, node, IMAGE_MAP_SERVER_SIDE_REQUIRES_CONVERSION); TY_(ReportAccessWarning)( doc, node, IMG_MAP_SERVER_REQUIRES_TEXT_LINKS); } } } /*********************************************************** * CheckApplet * * Checks APPLET element to check for validity pertaining * the 'ALT' attribute. An appropriate warning message is * displayed to indicate the error. An appropriate warning * message is displayed to indicate the error. If no 'ALT' * text is present, then there must be alternate content * within the APPLET element. ***********************************************************/ static void CheckApplet( TidyDocImpl* doc, Node* node ) { Bool HasAlt = no; Bool HasDescription = no; AttVal* av; if (Level1_Enabled( doc )) { /* Checks for attributes within the APPLET element */ for (av = node->attributes; av != NULL; av = av->next) { /* Checks for valid ALT attribute. The length of the alt text must be > 4 characters in length but must be < 150 characters long. */ if ( attrIsALT(av) ) { if (av->value != NULL) { HasAlt = aye; } } } if (HasAlt == no) { /* Must have alternate text representation for that element */ if (node->content != NULL) { ctmbstr word = NULL; if ( node->content->tag == NULL ) word = textFromOneNode( doc, node->content); if ( node->content->content != NULL && node->content->content->tag == NULL ) { word = textFromOneNode( doc, node->content->content); } if ( word != NULL && !IsWhitespace(word) ) HasDescription = aye; } } if ( !HasDescription && !HasAlt ) { TY_(ReportAccessError)( doc, node, APPLET_MISSING_ALT ); } } } /******************************************************************* * CheckObject * * Checks to verify whether the OBJECT element contains * 'ALT' text, and to see that the sound file selected is * of a valid sound file type. OBJECT must have an alternate text * representation. *******************************************************************/ static void CheckObject( TidyDocImpl* doc, Node* node ) { Bool HasAlt = no; Bool HasDescription = no; if (Level1_Enabled( doc )) { if ( node->content != NULL) { if ( node->content->type != TextNode ) { Node* tnode = node->content; AttVal* av; for ( av=tnode->attributes; av; av = av->next ) { if ( attrIsALT(av) ) { HasAlt = aye; break; } } } /* Must have alternate text representation for that element */ if ( !HasAlt ) { ctmbstr word = NULL; if ( TY_(nodeIsText)(node->content) ) word = textFromOneNode( doc, node->content ); if ( word == NULL && TY_(nodeIsText)(node->content->content) ) { word = textFromOneNode( doc, node->content->content ); } if ( word != NULL && !IsWhitespace(word) ) HasDescription = aye; } } if ( !HasAlt && !HasDescription ) { TY_(ReportAccessError)( doc, node, OBJECT_MISSING_ALT ); } } } /*************************************************************** * CheckMissingStyleSheets * * Ensures that stylesheets are used to control the presentation. ***************************************************************/ static Bool CheckMissingStyleSheets( TidyDocImpl* doc, Node* node ) { AttVal* av; Node* content; Bool sspresent = no; for ( content = node->content; !sspresent && content != NULL; content = content->next ) { sspresent = ( nodeIsLINK(content) || nodeIsSTYLE(content) || nodeIsFONT(content) || nodeIsBASEFONT(content) ); for ( av = content->attributes; !sspresent && av != NULL; av = av->next ) { sspresent = ( attrIsSTYLE(av) || attrIsTEXT(av) || attrIsVLINK(av) || attrIsALINK(av) || attrIsLINK(av) ); if ( !sspresent && attrIsREL(av) ) { sspresent = AttrValueIs(av, "stylesheet"); } } if ( ! sspresent ) sspresent = CheckMissingStyleSheets( doc, content ); } return sspresent; } /******************************************************************* * CheckFrame * * Checks if the URL is valid and to check if a 'LONGDESC' is needed * within the FRAME element. If a 'LONGDESC' is needed, the value must * be valid. The URL must end with the file extension, htm, or html. * Also, checks to ensure that the 'SRC' and 'TITLE' values are valid. *******************************************************************/ static void CheckFrame( TidyDocImpl* doc, Node* node ) { Bool HasTitle = no; AttVal* av; doc->access.numFrames++; if (Level1_Enabled( doc )) { /* Checks for attributes within the FRAME element */ for (av = node->attributes; av != NULL; av = av->next) { /* Checks if 'LONGDESC' value is valid only if present */ if ( attrIsLONGDESC(av) ) { if ( hasValue(av) && TY_(tmbstrlen)(av->value) > 1 ) { doc->access.HasCheckedLongDesc++; } } /* Checks for valid 'SRC' value within the frame element */ else if ( attrIsSRC(av) ) { if ( hasValue(av) && !IsValidSrcExtension(av->value) ) { TY_(ReportAccessError)( doc, node, FRAME_SRC_INVALID ); } } /* Checks for valid 'TITLE' value within frame element */ else if ( attrIsTITLE(av) ) { if ( hasValue(av) ) HasTitle = aye; if ( !HasTitle ) { if ( av->value == NULL || TY_(tmbstrlen)(av->value) == 0 ) { HasTitle = aye; TY_(ReportAccessError)( doc, node, FRAME_TITLE_INVALID_NULL); } else { if ( IsWhitespace(av->value) && TY_(tmbstrlen)(av->value) > 0 ) { HasTitle = aye; TY_(ReportAccessError)( doc, node, FRAME_TITLE_INVALID_SPACES ); } } } } } if ( !HasTitle ) { TY_(ReportAccessError)( doc, node, FRAME_MISSING_TITLE); } if ( doc->access.numFrames==3 && doc->access.HasCheckedLongDesc<3 ) { doc->access.numFrames = 0; TY_(ReportAccessWarning)( doc, node, FRAME_MISSING_LONGDESC ); } } } /**************************************************************** * CheckIFrame * * Checks if 'SRC' value is valid. Must end in appropriate * file extension. ****************************************************************/ static void CheckIFrame( TidyDocImpl* doc, Node* node ) { if (Level1_Enabled( doc )) { /* Checks for valid 'SRC' value within the IFRAME element */ AttVal* av = attrGetSRC( node ); if ( hasValue(av) ) { if ( !IsValidSrcExtension(av->value) ) TY_(ReportAccessError)( doc, node, FRAME_SRC_INVALID ); } } } /********************************************************************** * CheckAnchorAccess * * Checks that the sound file is valid, and to ensure that * text transcript is present describing the 'HREF' within the * ANCHOR element. Also checks to see ensure that the 'TARGET' attribute * (if it exists) is not NULL and does not contain '_new' or '_blank'. **********************************************************************/ static void CheckAnchorAccess( TidyDocImpl* doc, Node* node ) { AttVal* av; Bool HasDescription = no; Bool HasTriggeredLink = no; /* Checks for attributes within the ANCHOR element */ for ( av = node->attributes; av != NULL; av = av->next ) { if (Level1_Enabled( doc )) { /* Must be of valid sound file type */ if ( attrIsHREF(av) ) { if ( hasValue(av) ) { tmbchar ext[ 20 ]; GetFileExtension (av->value, ext, sizeof(ext) ); /* Checks to see if multimedia is used */ if ( IsValidMediaExtension(av->value) ) { TY_(ReportAccessError)( doc, node, MULTIMEDIA_REQUIRES_TEXT ); } /* Checks for validity of sound file, and checks to see if the file is described within the document, or by a link that is present which gives the description. */ if ( TY_(tmbstrlen)(ext) < 6 && TY_(tmbstrlen)(ext) > 0 ) { int errcode = IsSoundFile( av->value ); if ( errcode ) { if (node->next != NULL) { if (node->next->tag == NULL) { ctmbstr word = textFromOneNode( doc, node->next); /* Must contain at least one letter in the text */ if (IsWhitespace (word) == no) { HasDescription = aye; } } } /* Must contain text description of sound file */ if ( !HasDescription ) { TY_(ReportAccessError)( doc, node, errcode ); } } } } } } if (Level2_Enabled( doc )) { /* Checks 'TARGET' attribute for validity if it exists */ if ( attrIsTARGET(av) ) { if (AttrValueIs(av, "_new")) { TY_(ReportAccessWarning)( doc, node, NEW_WINDOWS_REQUIRE_WARNING_NEW); } else if (AttrValueIs(av, "_blank")) { TY_(ReportAccessWarning)( doc, node, NEW_WINDOWS_REQUIRE_WARNING_BLANK); } } } } if (Level2_Enabled( doc )) { if ((node->content != NULL)&& (node->content->tag == NULL)) { ctmbstr word = textFromOneNode( doc, node->content); if ((word != NULL)&& (IsWhitespace (word) == no)) { if (TY_(tmbstrcmp) (word, "more") == 0) { HasTriggeredLink = aye; } if (TY_(tmbstrcmp) (word, "click here") == 0) { TY_(ReportAccessWarning)( doc, node, LINK_TEXT_NOT_MEANINGFUL_CLICK_HERE); } if (HasTriggeredLink == no) { if (TY_(tmbstrlen)(word) < 6) { TY_(ReportAccessWarning)( doc, node, LINK_TEXT_NOT_MEANINGFUL); } } if (TY_(tmbstrlen)(word) > 60) { TY_(ReportAccessWarning)( doc, node, LINK_TEXT_TOO_LONG); } } } if (node->content == NULL) { TY_(ReportAccessWarning)( doc, node, LINK_TEXT_MISSING); } } } /************************************************************ * CheckArea * * Checks attributes within the AREA element to * determine if the 'ALT' text and 'HREF' values are valid. * Also checks to see ensure that the 'TARGET' attribute * (if it exists) is not NULL and does not contain '_new' * or '_blank'. ************************************************************/ static void CheckArea( TidyDocImpl* doc, Node* node ) { Bool HasAlt = no; AttVal* av; /* Checks all attributes within the AREA element */ for (av = node->attributes; av != NULL; av = av->next) { if (Level1_Enabled( doc )) { /* Checks for valid ALT attribute. The length of the alt text must be > 4 characters long but must be less than 150 characters long. */ if ( attrIsALT(av) ) { /* The check for validity */ if (av->value != NULL) { HasAlt = aye; } } } if (Level2_Enabled( doc )) { if ( attrIsTARGET(av) ) { if (AttrValueIs(av, "_new")) { TY_(ReportAccessWarning)( doc, node, NEW_WINDOWS_REQUIRE_WARNING_NEW); } else if (AttrValueIs(av, "_blank")) { TY_(ReportAccessWarning)( doc, node, NEW_WINDOWS_REQUIRE_WARNING_BLANK); } } } } if (Level1_Enabled( doc )) { /* AREA must contain alt text */ if (HasAlt == no) { TY_(ReportAccessError)( doc, node, AREA_MISSING_ALT); } } } /*************************************************** * CheckScript * * Checks the SCRIPT element to ensure that a * NOSCRIPT section follows the SCRIPT. ***************************************************/ static void CheckScriptAcc( TidyDocImpl* doc, Node* node ) { if (Level1_Enabled( doc )) { /* NOSCRIPT element must appear immediately following SCRIPT element */ if ( node->next == NULL || !nodeIsNOSCRIPT(node->next) ) { TY_(ReportAccessError)( doc, node, SCRIPT_MISSING_NOSCRIPT); } } } /********************************************************** * CheckRows * * Check to see that each table has a row of headers if * a column of columns doesn't exist. **********************************************************/ static void CheckRows( TidyDocImpl* doc, Node* node ) { int numTR = 0; int numValidTH = 0; doc->access.CheckedHeaders++; for (; node != NULL; node = node->next ) { numTR++; if ( nodeIsTH(node->content) ) { doc->access.HasTH = aye; if ( TY_(nodeIsText)(node->content->content) ) { ctmbstr word = textFromOneNode( doc, node->content->content); if ( !IsWhitespace(word) ) numValidTH++; } } } if (numTR == numValidTH) doc->access.HasValidRowHeaders = aye; if ( numTR >= 2 && numTR > numValidTH && numValidTH >= 2 && doc->access.HasTH == aye ) doc->access.HasInvalidRowHeader = aye; } /********************************************************** * CheckColumns * * Check to see that each table has a column of headers if * a row of columns doesn't exist. **********************************************************/ static void CheckColumns( TidyDocImpl* doc, Node* node ) { Node* tnode; int numTH = 0; Bool isMissingHeader = no; doc->access.CheckedHeaders++; /* Table must have row of headers if headers for columns don't exist */ if ( nodeIsTH(node->content) ) { doc->access.HasTH = aye; for ( tnode = node->content; tnode; tnode = tnode->next ) { if ( nodeIsTH(tnode) ) { if ( TY_(nodeIsText)(tnode->content) ) { ctmbstr word = textFromOneNode( doc, tnode->content); if ( !IsWhitespace(word) ) numTH++; } } else { isMissingHeader = aye; } } } if ( !isMissingHeader && numTH > 0 ) doc->access.HasValidColumnHeaders = aye; if ( isMissingHeader && numTH >= 2 ) doc->access.HasInvalidColumnHeader = aye; } /***************************************************** * CheckTH * * Checks to see if the header provided for a table * requires an abbreviation. (only required if the * length of the header is greater than 15 characters) *****************************************************/ static void CheckTH( TidyDocImpl* doc, Node* node ) { Bool HasAbbr = no; ctmbstr word = NULL; AttVal* av; if (Level3_Enabled( doc )) { /* Checks TH element for 'ABBR' attribute */ for (av = node->attributes; av != NULL; av = av->next) { if ( attrIsABBR(av) ) { /* Value must not be NULL and must be less than 15 characters */ if ((av->value != NULL)&& (IsWhitespace (av->value) == no)) { HasAbbr = aye; } if ((av->value == NULL)|| (TY_(tmbstrlen)(av->value) == 0)) { HasAbbr = aye; TY_(ReportAccessWarning)( doc, node, TABLE_MAY_REQUIRE_HEADER_ABBR_NULL); } if ((IsWhitespace (av->value) == aye)&& (TY_(tmbstrlen)(av->value) > 0)) { HasAbbr = aye; TY_(ReportAccessWarning)( doc, node, TABLE_MAY_REQUIRE_HEADER_ABBR_SPACES); } } } /* If the header is greater than 15 characters, an abbreviation is needed */ word = textFromOneNode( doc, node->content); if ((word != NULL)&& (IsWhitespace (word) == no)) { /* Must have 'ABBR' attribute if header is > 15 characters */ if ((TY_(tmbstrlen)(word) > 15)&& (HasAbbr == no)) { TY_(ReportAccessWarning)( doc, node, TABLE_MAY_REQUIRE_HEADER_ABBR); } } } } /***************************************************************** * CheckMultiHeaders * * Layout tables should make sense when linearized. * TABLE must contain at least one TH element. * This technique applies only to tables used for layout purposes, * not to data tables. Checks for column of multiple headers. *****************************************************************/ static void CheckMultiHeaders( TidyDocImpl* doc, Node* node ) { Node* TNode; Node* temp; Bool validColSpanRows = aye; Bool validColSpanColumns = aye; int flag = 0; if (Level1_Enabled( doc )) { if (node->content != NULL) { TNode = node->content; /* Checks for column of multiple headers found within a data table. */ while (TNode != NULL) { if ( nodeIsTR(TNode) ) { flag = 0; /* Issue #168 - access test 5-2-1-2 */ if (TNode->content != NULL) { temp = TNode->content; /* The number of TH elements found within TR element */ if (flag == 0) { while (temp != NULL) { /* Must contain at least one TH element within in the TR element */ if ( nodeIsTH(temp) ) { AttVal* av; for (av = temp->attributes; av != NULL; av = av->next) { if ( attrIsCOLSPAN(av) && (atoi(av->value) > 1) ) validColSpanColumns = no; if ( attrIsROWSPAN(av) && (atoi(av->value) > 1) ) validColSpanRows = no; } } temp = temp->next; } flag = 1; } } } TNode = TNode->next; } /* Displays HTML 4 Table Algorithm when multiple column of headers used */ if (validColSpanRows == no) { TY_(ReportAccessWarning)( doc, node, DATA_TABLE_REQUIRE_MARKUP_ROW_HEADERS ); TY_(DisplayHTMLTableAlgorithm)( doc ); } if (validColSpanColumns == no) { TY_(ReportAccessWarning)( doc, node, DATA_TABLE_REQUIRE_MARKUP_COLUMN_HEADERS ); TY_(DisplayHTMLTableAlgorithm)( doc ); } } } } /**************************************************** * CheckTable * * Checks the TABLE element to ensure that the * table is not missing any headers. Must have either * a row or column of headers. ****************************************************/ static void CheckTable( TidyDocImpl* doc, Node* node ) { Node* TNode; Node* temp; tmbstr word = NULL; int numTR = 0; Bool HasSummary = no; Bool HasCaption = no; if (Level3_Enabled( doc )) { AttVal* av; /* Table must have a 'SUMMARY' describing the purpose of the table */ for (av = node->attributes; av != NULL; av = av->next) { if ( attrIsSUMMARY(av) ) { if ( hasValue(av) ) { HasSummary = aye; if (AttrContains(av, "summary") && AttrContains(av, "table")) { TY_(ReportAccessError)( doc, node, TABLE_SUMMARY_INVALID_PLACEHOLDER ); } } if ( av->value == NULL || TY_(tmbstrlen)(av->value) == 0 ) { HasSummary = aye; TY_(ReportAccessError)( doc, node, TABLE_SUMMARY_INVALID_NULL ); } else if ( IsWhitespace(av->value) && TY_(tmbstrlen)(av->value) > 0 ) { HasSummary = aye; TY_(ReportAccessError)( doc, node, TABLE_SUMMARY_INVALID_SPACES ); } } } /* TABLE must have content. */ if (node->content == NULL) { TY_(ReportAccessError)( doc, node, DATA_TABLE_MISSING_HEADERS); return; } } if (Level1_Enabled( doc )) { /* Checks for multiple headers */ CheckMultiHeaders( doc, node ); } if (Level2_Enabled( doc )) { /* Table must have a CAPTION describing the purpose of the table */ if ( nodeIsCAPTION(node->content) ) { TNode = node->content; if (TNode->content && TNode->content->tag == NULL) { word = getTextNodeClear( doc, TNode); } if ( !IsWhitespace(word) ) { HasCaption = aye; } } if (HasCaption == no) { TY_(ReportAccessError)( doc, node, TABLE_MISSING_CAPTION); } } if (node->content != NULL) { if ( nodeIsCAPTION(node->content) && nodeIsTR(node->content->next) ) { CheckColumns( doc, node->content->next ); } else if ( nodeIsTR(node->content) ) { CheckColumns( doc, node->content ); } } if ( ! doc->access.HasValidColumnHeaders ) { if (node->content != NULL) { if ( nodeIsCAPTION(node->content) && nodeIsTR(node->content->next) ) { CheckRows( doc, node->content->next); } else if ( nodeIsTR(node->content) ) { CheckRows( doc, node->content); } } } if (Level3_Enabled( doc )) { /* Suppress warning for missing 'SUMMARY for HTML 2.0 and HTML 3.2 */ if (HasSummary == no) { TY_(ReportAccessError)( doc, node, TABLE_MISSING_SUMMARY); } } if (Level2_Enabled( doc )) { if (node->content != NULL) { temp = node->content; while (temp != NULL) { if ( nodeIsTR(temp) ) { numTR++; } temp = temp->next; } if (numTR == 1) { TY_(ReportAccessWarning)( doc, node, LAYOUT_TABLES_LINEARIZE_PROPERLY); } } if ( doc->access.HasTH ) { TY_(ReportAccessWarning)( doc, node, LAYOUT_TABLE_INVALID_MARKUP); } } if (Level1_Enabled( doc )) { if ( doc->access.CheckedHeaders == 2 ) { if ( !doc->access.HasValidRowHeaders && !doc->access.HasValidColumnHeaders && !doc->access.HasInvalidRowHeader && !doc->access.HasInvalidColumnHeader ) { TY_(ReportAccessError)( doc, node, DATA_TABLE_MISSING_HEADERS); } if ( !doc->access.HasValidRowHeaders && doc->access.HasInvalidRowHeader ) { TY_(ReportAccessError)( doc, node, DATA_TABLE_MISSING_HEADERS_ROW); } if ( !doc->access.HasValidColumnHeaders && doc->access.HasInvalidColumnHeader ) { TY_(ReportAccessError)( doc, node, DATA_TABLE_MISSING_HEADERS_COLUMN); } } } } /*************************************************** * CheckASCII * * Checks for valid text equivalents for XMP and PRE * elements for ASCII art. Ensures that there is * a skip over link to skip multi-lined ASCII art. ***************************************************/ static void CheckASCII( TidyDocImpl* doc, Node* node ) { Node* temp1; Node* temp2; tmbstr skipOver = NULL; Bool IsAscii = no; int HasSkipOverLink = 0; uint i, x; int newLines = -1; tmbchar compareLetter; int matchingCount = 0; AttVal* av; if (Level1_Enabled( doc ) && node->content) { /* Checks the text within the PRE and XMP tags to see if ascii art is present */ for (i = node->content->start + 1; i < node->content->end; i++) { matchingCount = 0; /* Counts the number of lines of text */ if (doc->lexer->lexbuf[i] == '\n') { newLines++; } compareLetter = doc->lexer->lexbuf[i]; /* Counts consecutive character matches */ for (x = i; x < i + 5; x++) { if (doc->lexer->lexbuf[x] == compareLetter) { matchingCount++; } else { break; } } /* Must have at least 5 consecutive character matches */ if (matchingCount >= 5) { break; } } /* Must have more than 6 lines of text OR 5 or more consecutive letters that are the same for there to be ascii art */ if (newLines >= 6 || matchingCount >= 5) { IsAscii = aye; } /* Checks for skip over link if ASCII art is present */ if (IsAscii == aye) { if (node->prev != NULL && node->prev->prev != NULL) { temp1 = node->prev->prev; /* Checks for 'HREF' attribute */ for (av = temp1->attributes; av != NULL; av = av->next) { if ( attrIsHREF(av) && hasValue(av) ) { skipOver = av->value; HasSkipOverLink++; } } } } } if (Level2_Enabled( doc )) { /* Checks for A element following PRE to ensure proper skipover link only if there is an A element preceding PRE. */ if (HasSkipOverLink == 1) { if ( nodeIsA(node->next) ) { temp2 = node->next; /* Checks for 'NAME' attribute */ for (av = temp2->attributes; av != NULL; av = av->next) { if ( attrIsNAME(av) && hasValue(av) ) { /* Value within the 'HREF' attribute must be the same as the value within the 'NAME' attribute for valid skipover. */ if ( strstr(skipOver, av->value) != NULL ) { HasSkipOverLink++; } } } } } if (IsAscii == aye) { TY_(ReportAccessError)( doc, node, ASCII_REQUIRES_DESCRIPTION); if (Level3_Enabled( doc ) && (HasSkipOverLink < 2)) TY_(ReportAccessError)( doc, node, SKIPOVER_ASCII_ART); } } } /*********************************************************** * CheckFormControls * *
must have valid 'FOR' attribute, and