You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2693 lines
70 KiB
2693 lines
70 KiB
6 years ago
|
/*
|
||
|
clean.c -- clean up misuse of presentation markup
|
||
|
|
||
|
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University
|
||
|
See tidy.h for the copyright notice.
|
||
|
|
||
|
Filters from other formats such as Microsoft Word
|
||
|
often make excessive use of presentation markup such
|
||
|
as font tags, B, I, and the align attribute. By applying
|
||
|
a set of production rules, it is straight forward to
|
||
|
transform this to use CSS.
|
||
|
|
||
|
Some rules replace some of the children of an element by
|
||
|
style properties on the element, e.g.
|
||
|
|
||
|
<p><b>...</b></p> -> <p style="font-weight: bold">...</p>
|
||
|
|
||
|
Such rules are applied to the element's content and then
|
||
|
to the element itself until none of the rules more apply.
|
||
|
Having applied all the rules to an element, it will have
|
||
|
a style attribute with one or more properties.
|
||
|
|
||
|
Other rules strip the element they apply to, replacing
|
||
|
it by style properties on the contents, e.g.
|
||
|
|
||
|
<dir><li><p>...</li></dir> -> <p style="margin-left 1em">...
|
||
|
|
||
|
These rules are applied to an element before processing
|
||
|
its content and replace the current element by the first
|
||
|
element in the exposed content.
|
||
|
|
||
|
After applying both sets of rules, you can replace the
|
||
|
style attribute by a class value and style rule in the
|
||
|
document head. To support this, an association of styles
|
||
|
and class names is built.
|
||
|
|
||
|
A naive approach is to rely on string matching to test
|
||
|
when two property lists are the same. A better approach
|
||
|
would be to first sort the properties before matching.
|
||
|
|
||
|
*/
|
||
|
|
||
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
|
||
|
#include "tidy-int.h"
|
||
|
#include "clean.h"
|
||
|
#include "lexer.h"
|
||
|
#include "parser.h"
|
||
|
#include "attrs.h"
|
||
|
#include "message.h"
|
||
|
#include "tmbstr.h"
|
||
|
#include "utf8.h"
|
||
|
|
||
|
static Node* CleanNode( TidyDocImpl* doc, Node *node );
|
||
|
|
||
|
static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
|
||
|
{
|
||
|
const Dict* dict = TY_(LookupTagDef)( tid );
|
||
|
TidyDocFree( doc, node->element );
|
||
|
node->element = TY_(tmbstrdup)( doc->allocator, dict->name );
|
||
|
node->tag = dict;
|
||
|
}
|
||
|
|
||
|
static void FreeStyleProps(TidyDocImpl* doc, StyleProp *props)
|
||
|
{
|
||
|
StyleProp *next;
|
||
|
|
||
|
while (props)
|
||
|
{
|
||
|
next = props->next;
|
||
|
TidyDocFree(doc, props->name);
|
||
|
TidyDocFree(doc, props->value);
|
||
|
TidyDocFree(doc, props);
|
||
|
props = next;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static StyleProp *InsertProperty( TidyDocImpl* doc, StyleProp* props, ctmbstr name, ctmbstr value )
|
||
|
{
|
||
|
StyleProp *first, *prev, *prop;
|
||
|
int cmp;
|
||
|
|
||
|
prev = NULL;
|
||
|
first = props;
|
||
|
|
||
|
while (props)
|
||
|
{
|
||
|
cmp = TY_(tmbstrcmp)(props->name, name);
|
||
|
|
||
|
if (cmp == 0)
|
||
|
{
|
||
|
/* this property is already defined, ignore new value */
|
||
|
return first;
|
||
|
}
|
||
|
|
||
|
if (cmp > 0)
|
||
|
{
|
||
|
/* insert before this */
|
||
|
|
||
|
prop = (StyleProp *)TidyDocAlloc(doc, sizeof(StyleProp));
|
||
|
prop->name = TY_(tmbstrdup)(doc->allocator, name);
|
||
|
prop->value = TY_(tmbstrdup)(doc->allocator, value);
|
||
|
prop->next = props;
|
||
|
|
||
|
if (prev)
|
||
|
prev->next = prop;
|
||
|
else
|
||
|
first = prop;
|
||
|
|
||
|
return first;
|
||
|
}
|
||
|
|
||
|
prev = props;
|
||
|
props = props->next;
|
||
|
}
|
||
|
|
||
|
prop = (StyleProp *)TidyDocAlloc(doc, sizeof(StyleProp));
|
||
|
prop->name = TY_(tmbstrdup)(doc->allocator, name);
|
||
|
prop->value = TY_(tmbstrdup)(doc->allocator, value);
|
||
|
prop->next = NULL;
|
||
|
|
||
|
if (prev)
|
||
|
prev->next = prop;
|
||
|
else
|
||
|
first = prop;
|
||
|
|
||
|
return first;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
Create sorted linked list of properties from style string
|
||
|
It temporarily places nulls in place of ':' and ';' to
|
||
|
delimit the strings for the property name and value.
|
||
|
Some systems don't allow you to NULL literal strings,
|
||
|
so to avoid this, a copy is made first.
|
||
|
*/
|
||
|
static StyleProp* CreateProps( TidyDocImpl* doc, StyleProp* prop, ctmbstr style )
|
||
|
{
|
||
|
tmbstr name, value = NULL, name_end, value_end, line;
|
||
|
Bool more;
|
||
|
|
||
|
line = TY_(tmbstrdup)(doc->allocator, style);
|
||
|
name = line;
|
||
|
|
||
|
while (*name)
|
||
|
{
|
||
|
while (*name == ' ')
|
||
|
++name;
|
||
|
|
||
|
name_end = name;
|
||
|
|
||
|
while (*name_end)
|
||
|
{
|
||
|
if (*name_end == ':')
|
||
|
{
|
||
|
value = name_end + 1;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
++name_end;
|
||
|
}
|
||
|
|
||
|
if (*name_end != ':')
|
||
|
break;
|
||
|
|
||
|
while ( value && *value == ' ')
|
||
|
++value;
|
||
|
|
||
|
value_end = value;
|
||
|
more = no;
|
||
|
|
||
|
while (*value_end)
|
||
|
{
|
||
|
if (*value_end == ';')
|
||
|
{
|
||
|
more = yes;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
++value_end;
|
||
|
}
|
||
|
|
||
|
*name_end = '\0';
|
||
|
*value_end = '\0';
|
||
|
|
||
|
prop = InsertProperty(doc, prop, name, value);
|
||
|
*name_end = ':';
|
||
|
|
||
|
if (more)
|
||
|
{
|
||
|
*value_end = ';';
|
||
|
name = value_end + 1;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
TidyDocFree(doc, line); /* free temporary copy */
|
||
|
return prop;
|
||
|
}
|
||
|
|
||
|
static tmbstr CreatePropString(TidyDocImpl* doc, StyleProp *props)
|
||
|
{
|
||
|
tmbstr style, p, s;
|
||
|
uint len;
|
||
|
StyleProp *prop;
|
||
|
|
||
|
/* compute length */
|
||
|
|
||
|
for (len = 0, prop = props; prop; prop = prop->next)
|
||
|
{
|
||
|
len += TY_(tmbstrlen)(prop->name) + 2;
|
||
|
if (prop->value)
|
||
|
len += TY_(tmbstrlen)(prop->value) + 2;
|
||
|
}
|
||
|
|
||
|
style = (tmbstr) TidyDocAlloc(doc, len+1);
|
||
|
style[0] = '\0';
|
||
|
|
||
|
for (p = style, prop = props; prop; prop = prop->next)
|
||
|
{
|
||
|
s = prop->name;
|
||
|
|
||
|
while((*p++ = *s++))
|
||
|
continue;
|
||
|
|
||
|
if (prop->value)
|
||
|
{
|
||
|
*--p = ':';
|
||
|
*++p = ' ';
|
||
|
++p;
|
||
|
|
||
|
s = prop->value;
|
||
|
while((*p++ = *s++))
|
||
|
continue;
|
||
|
}
|
||
|
if (prop->next == NULL)
|
||
|
break;
|
||
|
|
||
|
*--p = ';';
|
||
|
*++p = ' ';
|
||
|
++p;
|
||
|
}
|
||
|
|
||
|
return style;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
create string with merged properties
|
||
|
static tmbstr AddProperty( ctmbstr style, ctmbstr property )
|
||
|
{
|
||
|
tmbstr line;
|
||
|
StyleProp *prop;
|
||
|
|
||
|
prop = CreateProps(doc, NULL, style);
|
||
|
prop = CreateProps(doc, prop, property);
|
||
|
line = CreatePropString(doc, prop);
|
||
|
FreeStyleProps(doc, prop);
|
||
|
return line;
|
||
|
}
|
||
|
*/
|
||
|
|
||
|
void TY_(FreeStyles)( TidyDocImpl* doc )
|
||
|
{
|
||
|
Lexer* lexer = doc->lexer;
|
||
|
if ( lexer )
|
||
|
{
|
||
|
TagStyle *style, *next;
|
||
|
for ( style = lexer->styles; style; style = next )
|
||
|
{
|
||
|
next = style->next;
|
||
|
TidyDocFree( doc, style->tag );
|
||
|
TidyDocFree( doc, style->tag_class );
|
||
|
TidyDocFree( doc, style->properties );
|
||
|
TidyDocFree( doc, style );
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static tmbstr GensymClass( TidyDocImpl* doc )
|
||
|
{
|
||
|
tmbchar buf[512]; /* CSSPrefix is limited to 256 characters */
|
||
|
ctmbstr pfx = cfgStr(doc, TidyCSSPrefix);
|
||
|
if ( pfx == NULL || *pfx == 0 )
|
||
|
pfx = "c";
|
||
|
|
||
|
TY_(tmbsnprintf)(buf, sizeof(buf), "%s%u", pfx, ++doc->nClassId );
|
||
|
return TY_(tmbstrdup)(doc->allocator, buf);
|
||
|
}
|
||
|
|
||
|
static ctmbstr FindStyle( TidyDocImpl* doc, ctmbstr tag, ctmbstr properties )
|
||
|
{
|
||
|
Lexer* lexer = doc->lexer;
|
||
|
TagStyle* style;
|
||
|
|
||
|
for (style = lexer->styles; style; style=style->next)
|
||
|
{
|
||
|
if (TY_(tmbstrcmp)(style->tag, tag) == 0 &&
|
||
|
TY_(tmbstrcmp)(style->properties, properties) == 0)
|
||
|
return style->tag_class;
|
||
|
}
|
||
|
|
||
|
style = (TagStyle *)TidyDocAlloc( doc, sizeof(TagStyle) );
|
||
|
style->tag = TY_(tmbstrdup)(doc->allocator, tag);
|
||
|
style->tag_class = GensymClass( doc );
|
||
|
style->properties = TY_(tmbstrdup)( doc->allocator, properties );
|
||
|
style->next = lexer->styles;
|
||
|
lexer->styles = style;
|
||
|
return style->tag_class;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
Add class="foo" to node
|
||
|
*/
|
||
|
static void AddClass( TidyDocImpl* doc, Node* node, ctmbstr classname )
|
||
|
{
|
||
|
AttVal *classattr = TY_(AttrGetById)(node, TidyAttr_CLASS);;
|
||
|
|
||
|
/*
|
||
|
if there already is a class attribute
|
||
|
then append class name after a space.
|
||
|
*/
|
||
|
if (classattr)
|
||
|
TY_(AppendToClassAttr)( doc, classattr, classname );
|
||
|
else /* create new class attribute */
|
||
|
TY_(AddAttribute)( doc, node, "class", classname );
|
||
|
}
|
||
|
|
||
|
void TY_(AddStyleAsClass)( TidyDocImpl* doc, Node *node, ctmbstr stylevalue )
|
||
|
{
|
||
|
ctmbstr classname;
|
||
|
|
||
|
classname = FindStyle( doc, node->element, stylevalue );
|
||
|
AddClass( doc, node, classname);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
Find style attribute in node, and replace it
|
||
|
by corresponding class attribute. Search for
|
||
|
class in style dictionary otherwise gensym
|
||
|
new class and add to dictionary.
|
||
|
|
||
|
Assumes that node doesn't have a class attribute
|
||
|
*/
|
||
|
static void Style2Rule( TidyDocImpl* doc, Node *node)
|
||
|
{
|
||
|
AttVal *styleattr, *classattr;
|
||
|
ctmbstr classname;
|
||
|
|
||
|
styleattr = TY_(AttrGetById)(node, TidyAttr_STYLE);
|
||
|
|
||
|
if (styleattr)
|
||
|
{
|
||
|
/* fix for http://tidy.sf.net/bug/850215 */
|
||
|
if (!styleattr->value)
|
||
|
{
|
||
|
TY_(RemoveAttribute)(doc, node, styleattr);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
classname = FindStyle( doc, node->element, styleattr->value );
|
||
|
classattr = TY_(AttrGetById)(node, TidyAttr_CLASS);
|
||
|
|
||
|
/*
|
||
|
if there already is a class attribute
|
||
|
then append class name after an underscore
|
||
|
*/
|
||
|
if (classattr)
|
||
|
{
|
||
|
TY_(AppendToClassAttr)( doc, classattr, classname );
|
||
|
TY_(RemoveAttribute)( doc, node, styleattr );
|
||
|
}
|
||
|
else /* reuse style attribute for class attribute */
|
||
|
{
|
||
|
TidyDocFree(doc, styleattr->attribute);
|
||
|
TidyDocFree(doc, styleattr->value);
|
||
|
styleattr->attribute = TY_(tmbstrdup)(doc->allocator, "class");
|
||
|
styleattr->value = TY_(tmbstrdup)(doc->allocator, classname);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void AddColorRule( Lexer* lexer, ctmbstr selector, ctmbstr color )
|
||
|
{
|
||
|
if ( selector && color )
|
||
|
{
|
||
|
TY_(AddStringLiteral)(lexer, selector);
|
||
|
TY_(AddStringLiteral)(lexer, " { color: ");
|
||
|
TY_(AddStringLiteral)(lexer, color);
|
||
|
TY_(AddStringLiteral)(lexer, " }\n");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
move presentation attribs from body to style element
|
||
|
|
||
|
background="foo" -> body { background-image: url(foo) }
|
||
|
bgcolor="foo" -> body { background-color: foo }
|
||
|
text="foo" -> body { color: foo }
|
||
|
link="foo" -> :link { color: foo }
|
||
|
vlink="foo" -> :visited { color: foo }
|
||
|
alink="foo" -> :active { color: foo }
|
||
|
*/
|
||
|
static void CleanBodyAttrs( TidyDocImpl* doc, Node* body )
|
||
|
{
|
||
|
Lexer* lexer = doc->lexer;
|
||
|
tmbstr bgurl = NULL;
|
||
|
tmbstr bgcolor = NULL;
|
||
|
tmbstr color = NULL;
|
||
|
AttVal* attr;
|
||
|
|
||
|
if (NULL != (attr = TY_(AttrGetById)(body, TidyAttr_BACKGROUND)))
|
||
|
{
|
||
|
bgurl = attr->value;
|
||
|
attr->value = NULL;
|
||
|
TY_(RemoveAttribute)( doc, body, attr );
|
||
|
}
|
||
|
|
||
|
if (NULL != (attr = TY_(AttrGetById)(body, TidyAttr_BGCOLOR)))
|
||
|
{
|
||
|
bgcolor = attr->value;
|
||
|
attr->value = NULL;
|
||
|
TY_(RemoveAttribute)( doc, body, attr );
|
||
|
}
|
||
|
|
||
|
if (NULL != (attr = TY_(AttrGetById)(body, TidyAttr_TEXT)))
|
||
|
{
|
||
|
color = attr->value;
|
||
|
attr->value = NULL;
|
||
|
TY_(RemoveAttribute)( doc, body, attr );
|
||
|
}
|
||
|
|
||
|
if ( bgurl || bgcolor || color )
|
||
|
{
|
||
|
TY_(AddStringLiteral)(lexer, " body {\n");
|
||
|
if (bgurl)
|
||
|
{
|
||
|
TY_(AddStringLiteral)(lexer, " background-image: url(");
|
||
|
TY_(AddStringLiteral)(lexer, bgurl);
|
||
|
TY_(AddStringLiteral)(lexer, ");\n");
|
||
|
TidyDocFree(doc, bgurl);
|
||
|
}
|
||
|
if (bgcolor)
|
||
|
{
|
||
|
TY_(AddStringLiteral)(lexer, " background-color: ");
|
||
|
TY_(AddStringLiteral)(lexer, bgcolor);
|
||
|
TY_(AddStringLiteral)(lexer, ";\n");
|
||
|
TidyDocFree(doc, bgcolor);
|
||
|
}
|
||
|
if (color)
|
||
|
{
|
||
|
TY_(AddStringLiteral)(lexer, " color: ");
|
||
|
TY_(AddStringLiteral)(lexer, color);
|
||
|
TY_(AddStringLiteral)(lexer, ";\n");
|
||
|
TidyDocFree(doc, color);
|
||
|
}
|
||
|
|
||
|
TY_(AddStringLiteral)(lexer, " }\n");
|
||
|
}
|
||
|
|
||
|
if (NULL != (attr = TY_(AttrGetById)(body, TidyAttr_LINK)))
|
||
|
{
|
||
|
AddColorRule(lexer, " :link", attr->value);
|
||
|
TY_(RemoveAttribute)( doc, body, attr );
|
||
|
}
|
||
|
|
||
|
if (NULL != (attr = TY_(AttrGetById)(body, TidyAttr_VLINK)))
|
||
|
{
|
||
|
AddColorRule(lexer, " :visited", attr->value);
|
||
|
TY_(RemoveAttribute)( doc, body, attr );
|
||
|
}
|
||
|
|
||
|
if (NULL != (attr = TY_(AttrGetById)(body, TidyAttr_ALINK)))
|
||
|
{
|
||
|
AddColorRule(lexer, " :active", attr->value);
|
||
|
TY_(RemoveAttribute)( doc, body, attr );
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static Bool NiceBody( TidyDocImpl* doc )
|
||
|
{
|
||
|
Node* node = TY_(FindBody)(doc);
|
||
|
if (node)
|
||
|
{
|
||
|
if (TY_(AttrGetById)(node, TidyAttr_BACKGROUND) ||
|
||
|
TY_(AttrGetById)(node, TidyAttr_BGCOLOR) ||
|
||
|
TY_(AttrGetById)(node, TidyAttr_TEXT) ||
|
||
|
TY_(AttrGetById)(node, TidyAttr_LINK) ||
|
||
|
TY_(AttrGetById)(node, TidyAttr_VLINK) ||
|
||
|
TY_(AttrGetById)(node, TidyAttr_ALINK))
|
||
|
{
|
||
|
doc->badLayout |= USING_BODY;
|
||
|
return no;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return yes;
|
||
|
}
|
||
|
|
||
|
/* create style element using rules from dictionary */
|
||
|
static void CreateStyleElement( TidyDocImpl* doc )
|
||
|
{
|
||
|
Lexer* lexer = doc->lexer;
|
||
|
Node *node, *head, *body;
|
||
|
TagStyle *style;
|
||
|
AttVal *av;
|
||
|
|
||
|
if ( lexer->styles == NULL && NiceBody(doc) )
|
||
|
return;
|
||
|
|
||
|
node = TY_(NewNode)( doc->allocator, lexer );
|
||
|
node->type = StartTag;
|
||
|
node->implicit = yes;
|
||
|
node->element = TY_(tmbstrdup)(doc->allocator, "style");
|
||
|
TY_(FindTag)( doc, node );
|
||
|
|
||
|
/* insert type attribute */
|
||
|
av = TY_(NewAttributeEx)( doc, "type", "text/css", '"' );
|
||
|
TY_(InsertAttributeAtStart)( node, av );
|
||
|
|
||
|
body = TY_(FindBody)( doc );
|
||
|
lexer->txtstart = lexer->lexsize;
|
||
|
if ( body )
|
||
|
CleanBodyAttrs( doc, body );
|
||
|
|
||
|
for (style = lexer->styles; style; style = style->next)
|
||
|
{
|
||
|
TY_(AddCharToLexer)(lexer, ' ');
|
||
|
TY_(AddStringLiteral)(lexer, style->tag);
|
||
|
TY_(AddCharToLexer)(lexer, '.');
|
||
|
TY_(AddStringLiteral)(lexer, style->tag_class);
|
||
|
TY_(AddCharToLexer)(lexer, ' ');
|
||
|
TY_(AddCharToLexer)(lexer, '{');
|
||
|
TY_(AddStringLiteral)(lexer, style->properties);
|
||
|
TY_(AddCharToLexer)(lexer, '}');
|
||
|
TY_(AddCharToLexer)(lexer, '\n');
|
||
|
}
|
||
|
|
||
|
lexer->txtend = lexer->lexsize;
|
||
|
|
||
|
TY_(InsertNodeAtEnd)( node, TY_(TextToken)(lexer) );
|
||
|
|
||
|
/*
|
||
|
now insert style element into document head
|
||
|
|
||
|
doc is root node. search its children for html node
|
||
|
the head node should be first child of html node
|
||
|
*/
|
||
|
if ( NULL != (head = TY_(FindHEAD)( doc )) )
|
||
|
TY_(InsertNodeAtEnd)( head, node );
|
||
|
}
|
||
|
|
||
|
|
||
|
/* ensure bidirectional links are consistent */
|
||
|
void TY_(FixNodeLinks)(Node *node)
|
||
|
{
|
||
|
Node *child;
|
||
|
|
||
|
if (node->prev)
|
||
|
node->prev->next = node;
|
||
|
else
|
||
|
node->parent->content = node;
|
||
|
|
||
|
if (node->next)
|
||
|
node->next->prev = node;
|
||
|
else
|
||
|
node->parent->last = node;
|
||
|
|
||
|
for (child = node->content; child; child = child->next)
|
||
|
child->parent = node;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
used to strip child of node when
|
||
|
the node has one and only one child
|
||
|
*/
|
||
|
static void StripOnlyChild(TidyDocImpl* doc, Node *node)
|
||
|
{
|
||
|
Node *child;
|
||
|
|
||
|
child = node->content;
|
||
|
node->content = child->content;
|
||
|
node->last = child->last;
|
||
|
child->content = NULL;
|
||
|
TY_(FreeNode)(doc, child);
|
||
|
|
||
|
for (child = node->content; child; child = child->next)
|
||
|
child->parent = node;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
used to strip font start and end tags.
|
||
|
Extricate "element", replace it by its content and delete it.
|
||
|
*/
|
||
|
static void DiscardContainer( TidyDocImpl* doc, Node *element, Node **pnode)
|
||
|
{
|
||
|
if (element->content)
|
||
|
{
|
||
|
Node *node, *parent = element->parent;
|
||
|
|
||
|
element->last->next = element->next;
|
||
|
|
||
|
if (element->next)
|
||
|
{
|
||
|
element->next->prev = element->last;
|
||
|
}
|
||
|
else
|
||
|
parent->last = element->last;
|
||
|
|
||
|
if (element->prev)
|
||
|
{
|
||
|
element->content->prev = element->prev;
|
||
|
element->prev->next = element->content;
|
||
|
}
|
||
|
else
|
||
|
parent->content = element->content;
|
||
|
|
||
|
for (node = element->content; node; node = node->next)
|
||
|
node->parent = parent;
|
||
|
|
||
|
*pnode = element->content;
|
||
|
|
||
|
element->next = element->content = NULL;
|
||
|
TY_(FreeNode)(doc, element);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
*pnode = TY_(DiscardElement)(doc, element);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
Create new string that consists of the
|
||
|
combined style properties in s1 and s2
|
||
|
|
||
|
To merge property lists, we build a linked
|
||
|
list of property/values and insert properties
|
||
|
into the list in order, merging values for
|
||
|
the same property name.
|
||
|
*/
|
||
|
static tmbstr MergeProperties( TidyDocImpl* doc, ctmbstr s1, ctmbstr s2 )
|
||
|
{
|
||
|
tmbstr s;
|
||
|
StyleProp *prop;
|
||
|
|
||
|
prop = CreateProps(doc, NULL, s1);
|
||
|
prop = CreateProps(doc, prop, s2);
|
||
|
s = CreatePropString(doc, prop);
|
||
|
FreeStyleProps(doc, prop);
|
||
|
return s;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
Add style property to element, creating style
|
||
|
attribute as needed and adding ; delimiter
|
||
|
*/
|
||
|
void TY_(AddStyleProperty)(TidyDocImpl* doc, Node *node, ctmbstr property )
|
||
|
{
|
||
|
AttVal *av = TY_(AttrGetById)(node, TidyAttr_STYLE);
|
||
|
|
||
|
/* if style attribute already exists then insert property */
|
||
|
|
||
|
if ( av )
|
||
|
{
|
||
|
if (av->value != NULL)
|
||
|
{
|
||
|
tmbstr s = MergeProperties( doc, av->value, property );
|
||
|
TidyDocFree( doc, av->value );
|
||
|
av->value = s;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
av->value = TY_(tmbstrdup)( doc->allocator, property );
|
||
|
}
|
||
|
}
|
||
|
else /* else create new style attribute */
|
||
|
{
|
||
|
av = TY_(NewAttributeEx)( doc, "style", property, '"' );
|
||
|
TY_(InsertAttributeAtStart)( node, av );
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void MergeClasses(TidyDocImpl* doc, Node *node, Node *child)
|
||
|
{
|
||
|
AttVal *av;
|
||
|
tmbstr s1, s2, names;
|
||
|
|
||
|
for (s2 = NULL, av = child->attributes; av; av = av->next)
|
||
|
{
|
||
|
if (attrIsCLASS(av))
|
||
|
{
|
||
|
s2 = av->value;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for (s1 = NULL, av = node->attributes; av; av = av->next)
|
||
|
{
|
||
|
if (attrIsCLASS(av))
|
||
|
{
|
||
|
s1 = av->value;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (s1)
|
||
|
{
|
||
|
if (s2) /* merge class names from both */
|
||
|
{
|
||
|
uint l1, l2;
|
||
|
l1 = TY_(tmbstrlen)(s1);
|
||
|
l2 = TY_(tmbstrlen)(s2);
|
||
|
names = (tmbstr) TidyDocAlloc(doc, l1 + l2 + 2);
|
||
|
TY_(tmbstrcpy)(names, s1);
|
||
|
names[l1] = ' ';
|
||
|
TY_(tmbstrcpy)(names+l1+1, s2);
|
||
|
TidyDocFree(doc, av->value);
|
||
|
av->value = names;
|
||
|
}
|
||
|
}
|
||
|
else if (s2) /* copy class names from child */
|
||
|
{
|
||
|
av = TY_(NewAttributeEx)( doc, "class", s2, '"' );
|
||
|
TY_(InsertAttributeAtStart)( node, av );
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void MergeStyles(TidyDocImpl* doc, Node *node, Node *child)
|
||
|
{
|
||
|
AttVal *av;
|
||
|
tmbstr s1, s2, style;
|
||
|
|
||
|
/*
|
||
|
the child may have a class attribute used
|
||
|
for attaching styles, if so the class name
|
||
|
needs to be copied to node's class
|
||
|
*/
|
||
|
MergeClasses(doc, node, child);
|
||
|
|
||
|
for (s2 = NULL, av = child->attributes; av; av = av->next)
|
||
|
{
|
||
|
if (attrIsSTYLE(av))
|
||
|
{
|
||
|
s2 = av->value;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for (s1 = NULL, av = node->attributes; av; av = av->next)
|
||
|
{
|
||
|
if (attrIsSTYLE(av))
|
||
|
{
|
||
|
s1 = av->value;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (s1)
|
||
|
{
|
||
|
if (s2) /* merge styles from both */
|
||
|
{
|
||
|
style = MergeProperties(doc, s1, s2);
|
||
|
TidyDocFree(doc, av->value);
|
||
|
av->value = style;
|
||
|
}
|
||
|
}
|
||
|
else if (s2) /* copy style of child */
|
||
|
{
|
||
|
av = TY_(NewAttributeEx)( doc, "style", s2, '"' );
|
||
|
TY_(InsertAttributeAtStart)( node, av );
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static ctmbstr FontSize2Name(ctmbstr size)
|
||
|
{
|
||
|
static const ctmbstr sizes[7] =
|
||
|
{
|
||
|
"60%", "70%", "80%", NULL,
|
||
|
"120%", "150%", "200%"
|
||
|
};
|
||
|
|
||
|
/* increment of 0.8 */
|
||
|
static const ctmbstr minussizes[] =
|
||
|
{
|
||
|
"100%", "80%", "64%", "51%",
|
||
|
"40%", "32%", "26%"
|
||
|
};
|
||
|
|
||
|
/* increment of 1.2 */
|
||
|
static const ctmbstr plussizes[] =
|
||
|
{
|
||
|
"100%", "120%", "144%", "172%",
|
||
|
"207%", "248%", "298%"
|
||
|
};
|
||
|
|
||
|
if (size[0] == '\0')
|
||
|
return NULL;
|
||
|
|
||
|
if ('0' <= size[0] && size[0] <= '6')
|
||
|
{
|
||
|
int n = size[0] - '0';
|
||
|
return sizes[n];
|
||
|
}
|
||
|
|
||
|
if (size[0] == '-')
|
||
|
{
|
||
|
if ('0' <= size[1] && size[1] <= '6')
|
||
|
{
|
||
|
int n = size[1] - '0';
|
||
|
return minussizes[n];
|
||
|
}
|
||
|
return "smaller"; /*"70%"; */
|
||
|
}
|
||
|
|
||
|
if ('0' <= size[1] && size[1] <= '6')
|
||
|
{
|
||
|
int n = size[1] - '0';
|
||
|
return plussizes[n];
|
||
|
}
|
||
|
|
||
|
return "larger"; /* "140%" */
|
||
|
}
|
||
|
|
||
|
static void AddFontFace( TidyDocImpl* doc, Node *node, ctmbstr face )
|
||
|
{
|
||
|
tmbchar buf[256];
|
||
|
TY_(tmbsnprintf)(buf, sizeof(buf), "font-family: %s", face );
|
||
|
TY_(AddStyleProperty)( doc, node, buf );
|
||
|
}
|
||
|
|
||
|
static void AddFontSize( TidyDocImpl* doc, Node* node, ctmbstr size )
|
||
|
{
|
||
|
ctmbstr value = NULL;
|
||
|
|
||
|
if (nodeIsP(node))
|
||
|
{
|
||
|
if (TY_(tmbstrcmp)(size, "6") == 0)
|
||
|
value = "h1";
|
||
|
else if (TY_(tmbstrcmp)(size, "5") == 0)
|
||
|
value = "h2";
|
||
|
else if (TY_(tmbstrcmp)(size, "4") == 0)
|
||
|
value = "h3";
|
||
|
|
||
|
if (value)
|
||
|
{
|
||
|
TidyDocFree(doc, node->element);
|
||
|
node->element = TY_(tmbstrdup)(doc->allocator, value);
|
||
|
TY_(FindTag)(doc, node);
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
value = FontSize2Name(size);
|
||
|
|
||
|
if (value)
|
||
|
{
|
||
|
tmbchar buf[64];
|
||
|
TY_(tmbsnprintf)(buf, sizeof(buf), "font-size: %s", value);
|
||
|
TY_(AddStyleProperty)( doc, node, buf );
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void AddFontColor( TidyDocImpl* doc, Node *node, ctmbstr color)
|
||
|
{
|
||
|
tmbchar buf[128];
|
||
|
TY_(tmbsnprintf)(buf, sizeof(buf), "color: %s", color);
|
||
|
TY_(AddStyleProperty)( doc, node, buf );
|
||
|
}
|
||
|
|
||
|
/* force alignment value to lower case */
|
||
|
static void AddAlign( TidyDocImpl* doc, Node *node, ctmbstr align )
|
||
|
{
|
||
|
uint i;
|
||
|
tmbchar buf[128];
|
||
|
|
||
|
TY_(tmbstrcpy)( buf, "text-align: " );
|
||
|
for ( i = 12; i < sizeof(buf)/sizeof(buf[0])-1; ++i )
|
||
|
{
|
||
|
if ( (buf[i] = (tmbchar)TY_(ToLower)(*align++)) == '\0' )
|
||
|
break;
|
||
|
}
|
||
|
buf[i] = '\0';
|
||
|
TY_(AddStyleProperty)( doc, node, buf );
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
add style properties to node corresponding to
|
||
|
the font face, size and color attributes
|
||
|
*/
|
||
|
static void AddFontStyles( TidyDocImpl* doc, Node *node, AttVal *av)
|
||
|
{
|
||
|
while (av)
|
||
|
{
|
||
|
if (AttrHasValue(av))
|
||
|
{
|
||
|
if (attrIsFACE(av))
|
||
|
AddFontFace( doc, node, av->value );
|
||
|
else if (attrIsSIZE(av))
|
||
|
AddFontSize( doc, node, av->value );
|
||
|
else if (attrIsCOLOR(av))
|
||
|
AddFontColor( doc, node, av->value );
|
||
|
}
|
||
|
av = av->next;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
Symptom: <p align=center>
|
||
|
Action: <p style="text-align: center">
|
||
|
*/
|
||
|
static void TextAlign( TidyDocImpl* doc, Node* node )
|
||
|
{
|
||
|
AttVal *av, *prev;
|
||
|
|
||
|
prev = NULL;
|
||
|
|
||
|
for (av = node->attributes; av; av = av->next)
|
||
|
{
|
||
|
if (attrIsALIGN(av))
|
||
|
{
|
||
|
if (prev)
|
||
|
prev->next = av->next;
|
||
|
else
|
||
|
node->attributes = av->next;
|
||
|
|
||
|
if (av->value)
|
||
|
AddAlign( doc, node, av->value );
|
||
|
|
||
|
TY_(FreeAttribute)(doc, av);
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
prev = av;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
Symptom: <table bgcolor="red">
|
||
|
Action: <table style="background-color: red">
|
||
|
*/
|
||
|
static void TableBgColor( TidyDocImpl* doc, Node* node )
|
||
|
{
|
||
|
AttVal* attr;
|
||
|
tmbchar buf[256];
|
||
|
|
||
|
if (NULL != (attr = TY_(AttrGetById)(node, TidyAttr_BGCOLOR)))
|
||
|
{
|
||
|
TY_(tmbsnprintf)(buf, sizeof(buf), "background-color: %s", attr->value );
|
||
|
TY_(RemoveAttribute)( doc, node, attr );
|
||
|
TY_(AddStyleProperty)( doc, node, buf );
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
The clean up rules use the pnode argument to return the
|
||
|
next node when the original node has been deleted
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
Symptom: <dir> <li> where <li> is only child
|
||
|
Action: coerce <dir> <li> to <div> with indent.
|
||
|
*/
|
||
|
|
||
|
static Bool Dir2Div( TidyDocImpl* doc, Node *node, Node **ARG_UNUSED(pnode))
|
||
|
{
|
||
|
Node *child;
|
||
|
|
||
|
if ( nodeIsDIR(node) || nodeIsUL(node) || nodeIsOL(node) )
|
||
|
{
|
||
|
child = node->content;
|
||
|
|
||
|
if (child == NULL)
|
||
|
return no;
|
||
|
|
||
|
/* check child has no peers */
|
||
|
|
||
|
if (child->next)
|
||
|
return no;
|
||
|
|
||
|
if ( !nodeIsLI(child) )
|
||
|
return no;
|
||
|
|
||
|
if ( !child->implicit )
|
||
|
return no;
|
||
|
|
||
|
/* coerce dir to div */
|
||
|
node->tag = TY_(LookupTagDef)( TidyTag_DIV );
|
||
|
TidyDocFree( doc, node->element );
|
||
|
node->element = TY_(tmbstrdup)(doc->allocator, "div");
|
||
|
TY_(AddStyleProperty)( doc, node, "margin-left: 2em" );
|
||
|
StripOnlyChild( doc, node );
|
||
|
return yes;
|
||
|
}
|
||
|
|
||
|
return no;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
Symptom: <center>
|
||
|
Action: replace <center> by <div style="text-align: center">
|
||
|
*/
|
||
|
|
||
|
static Bool Center2Div( TidyDocImpl* doc, Node *node, Node **pnode)
|
||
|
{
|
||
|
if ( nodeIsCENTER(node) )
|
||
|
{
|
||
|
#if 0 // 00000000 what is this doing inside an nodeIsCENTER(node)??? 0000000
|
||
|
if ( cfgBool(doc, TidyDropFontTags) )
|
||
|
{
|
||
|
if (node->content)
|
||
|
{
|
||
|
Node *last = node->last;
|
||
|
DiscardContainer( doc, node, pnode );
|
||
|
|
||
|
node = TY_(InferredTag)(doc, TidyTag_BR);
|
||
|
TY_(InsertNodeAfterElement)(last, node);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
Node *prev = node->prev, *next = node->next,
|
||
|
*parent = node->parent;
|
||
|
DiscardContainer( doc, node, pnode );
|
||
|
|
||
|
node = TY_(InferredTag)(doc, TidyTag_BR);
|
||
|
if (next)
|
||
|
TY_(InsertNodeBeforeElement)(next, node);
|
||
|
else if (prev)
|
||
|
TY_(InsertNodeAfterElement)(prev, node);
|
||
|
else
|
||
|
TY_(InsertNodeAtStart)(parent, node);
|
||
|
}
|
||
|
|
||
|
return yes;
|
||
|
}
|
||
|
#endif // 00000000 what is this doing inside an nodeIsCENTER(node)??? 0000000
|
||
|
RenameElem( doc, node, TidyTag_DIV );
|
||
|
TY_(AddStyleProperty)( doc, node, "text-align: center" );
|
||
|
return yes;
|
||
|
}
|
||
|
|
||
|
return no;
|
||
|
}
|
||
|
|
||
|
/* Copy child attributes to node. Duplicate attributes are overwritten.
|
||
|
Unique attributes (such as ID) disable the action.
|
||
|
Attributes style and class are not dealt with. A call to MergeStyles
|
||
|
will do that.
|
||
|
*/
|
||
|
static Bool CopyAttrs( TidyDocImpl* doc, Node *node, Node *child)
|
||
|
{
|
||
|
AttVal *av1, *av2;
|
||
|
TidyAttrId id;
|
||
|
|
||
|
/* Detect attributes that cannot be merged or overwritten. */
|
||
|
if (TY_(AttrGetById)(child, TidyAttr_ID) != NULL
|
||
|
&& TY_(AttrGetById)(node, TidyAttr_ID) != NULL)
|
||
|
return no;
|
||
|
|
||
|
/* Move child attributes to node. Attributes in node
|
||
|
can be overwritten or merged. */
|
||
|
for (av2 = child->attributes; av2; )
|
||
|
{
|
||
|
/* Dealt by MergeStyles. */
|
||
|
if (attrIsSTYLE(av2) || attrIsCLASS(av2))
|
||
|
{
|
||
|
av2 = av2->next;
|
||
|
continue;
|
||
|
}
|
||
|
/* Avoid duplicates in node */
|
||
|
if ((id=AttrId(av2)) != TidyAttr_UNKNOWN
|
||
|
&& (av1=TY_(AttrGetById)(node, id))!= NULL)
|
||
|
TY_(RemoveAttribute)( doc, node, av1 );
|
||
|
|
||
|
/* Move attribute from child to node */
|
||
|
TY_(DetachAttribute)( child, av2 );
|
||
|
av1 = av2;
|
||
|
av2 = av2->next;
|
||
|
av1->next = NULL;
|
||
|
TY_(InsertAttributeAtEnd)( node, av1 );
|
||
|
}
|
||
|
|
||
|
return yes;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
Symptom <XX><XX>...</XX></XX>
|
||
|
Action: merge the two XXs
|
||
|
|
||
|
For instance, this is useful after nested <dir>s used by Word
|
||
|
for indenting have been converted to <div>s
|
||
|
|
||
|
If state is "no", no merging.
|
||
|
If state is "yes", inner element is discarded. Only Style and Class
|
||
|
attributes are merged using MergeStyles().
|
||
|
If state is "auto", atttibutes are merged as described in CopyAttrs().
|
||
|
Style and Class attributes are merged using MergeStyles().
|
||
|
*/
|
||
|
static Bool MergeNestedElements( TidyDocImpl* doc,
|
||
|
TidyTagId Id, TidyTriState state, Node *node,
|
||
|
Node **ARG_UNUSED(pnode))
|
||
|
{
|
||
|
Node *child;
|
||
|
|
||
|
if ( state == TidyNoState
|
||
|
|| !TagIsId(node, Id) )
|
||
|
return no;
|
||
|
|
||
|
child = node->content;
|
||
|
|
||
|
if ( child == NULL
|
||
|
|| child->next != NULL
|
||
|
|| !TagIsId(child, Id) )
|
||
|
return no;
|
||
|
|
||
|
if ( state == TidyAutoState
|
||
|
&& CopyAttrs(doc, node, child) == no )
|
||
|
return no;
|
||
|
|
||
|
MergeStyles( doc, node, child );
|
||
|
StripOnlyChild( doc, node );
|
||
|
return yes;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
Symptom: <ul><li><ul>...</ul></li></ul>
|
||
|
Action: discard outer list
|
||
|
*/
|
||
|
|
||
|
static Bool NestedList( TidyDocImpl* doc, Node *node, Node **pnode )
|
||
|
{
|
||
|
Node *child, *list;
|
||
|
|
||
|
if ( nodeIsUL(node) || nodeIsOL(node) )
|
||
|
{
|
||
|
child = node->content;
|
||
|
|
||
|
if (child == NULL)
|
||
|
return no;
|
||
|
|
||
|
/* check child has no peers */
|
||
|
|
||
|
if (child->next)
|
||
|
return no;
|
||
|
|
||
|
list = child->content;
|
||
|
|
||
|
if (!list)
|
||
|
return no;
|
||
|
|
||
|
if (list->tag != node->tag)
|
||
|
return no;
|
||
|
|
||
|
/* check list has no peers */
|
||
|
if (list->next)
|
||
|
return no;
|
||
|
|
||
|
*pnode = list; /* Set node to resume iteration */
|
||
|
|
||
|
/* move inner list node into position of outer node */
|
||
|
list->prev = node->prev;
|
||
|
list->next = node->next;
|
||
|
list->parent = node->parent;
|
||
|
TY_(FixNodeLinks)(list);
|
||
|
|
||
|
/* get rid of outer ul and its li */
|
||
|
child->content = NULL;
|
||
|
TY_(FreeNode)( doc, child ); /* See test #427841. */
|
||
|