A small menubar app that allows you to switch between R versions quickly (if you have multiple versions of R framework installed). https://rud.is/rswitch
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

772 lines
27 KiB

5 years ago
//
// HtmlTreeBuilder.swift
// SwiftSoup
//
// Created by Nabil Chatbi on 24/10/16.
// Copyright © 2016 Nabil Chatbi.. All rights reserved.
//
import Foundation
/**
* HTML Tree Builder; creates a DOM from Tokens.
*/
class HtmlTreeBuilder: TreeBuilder {
// tag searches
public static let TagsSearchInScope: [String] = ["applet", "caption", "html", "table", "td", "th", "marquee", "object"]
private static let TagSearchList: [String] = ["ol", "ul"]
private static let TagSearchButton: [String] = ["button"]
private static let TagSearchTableScope: [String] = ["html", "table"]
private static let TagSearchSelectScope: [String] = ["optgroup", "option"]
private static let TagSearchEndTags: [String] = ["dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"]
private static let TagSearchSpecial: [String] = ["address", "applet", "area", "article", "aside", "base", "basefont", "bgsound",
"blockquote", "body", "br", "button", "caption", "center", "col", "colgroup", "command", "dd",
"details", "dir", "div", "dl", "dt", "embed", "fieldset", "figcaption", "figure", "footer", "form",
"frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html",
"iframe", "img", "input", "isindex", "li", "link", "listing", "marquee", "menu", "meta", "nav",
"noembed", "noframes", "noscript", "object", "ol", "p", "param", "plaintext", "pre", "script",
"section", "select", "style", "summary", "table", "tbody", "td", "textarea", "tfoot", "th", "thead",
"title", "tr", "ul", "wbr", "xmp"]
private var _state: HtmlTreeBuilderState = HtmlTreeBuilderState.Initial // the current state
private var _originalState: HtmlTreeBuilderState = HtmlTreeBuilderState.Initial // original / marked state
private var baseUriSetFromDoc: Bool = false
private var headElement: Element? // the current head element
private var formElement: FormElement? // the current form element
private var contextElement: Element? // fragment parse context -- could be null even if fragment parsing
private var formattingElements: Array<Element?> = Array<Element?>() // active (open) formatting elements
private var pendingTableCharacters: Array<String> = Array<String>() // chars in table to be shifted out
private var emptyEnd: Token.EndTag = Token.EndTag() // reused empty end tag
private var _framesetOk: Bool = true // if ok to go into frameset
private var fosterInserts: Bool = false // if next inserts should be fostered
private var fragmentParsing: Bool = false // if parsing a fragment of html
public override init() {
super.init()
}
public override func defaultSettings() -> ParseSettings {
return ParseSettings.htmlDefault
}
override func parse(_ input: String, _ baseUri: String, _ errors: ParseErrorList, _ settings: ParseSettings)throws->Document {
_state = HtmlTreeBuilderState.Initial
baseUriSetFromDoc = false
return try super.parse(input, baseUri, errors, settings)
}
func parseFragment(_ inputFragment: String, _ context: Element?, _ baseUri: String, _ errors: ParseErrorList, _ settings: ParseSettings)throws->Array<Node> {
// context may be null
_state = HtmlTreeBuilderState.Initial
initialiseParse(inputFragment, baseUri, errors, settings)
contextElement = context
fragmentParsing = true
var root: Element? = nil
if let context = context {
if let d = context.ownerDocument() { // quirks setup:
doc.quirksMode(d.quirksMode())
}
// initialise the tokeniser state:
let contextTag: String = context.tagName()
if (StringUtil.inString(contextTag, haystack: "title", "textarea")) {
tokeniser.transition(TokeniserState.Rcdata)
} else if (StringUtil.inString(contextTag, haystack: "iframe", "noembed", "noframes", "style", "xmp")) {
tokeniser.transition(TokeniserState.Rawtext)
} else if (contextTag=="script") {
tokeniser.transition(TokeniserState.ScriptData)
} else if (contextTag==("noscript")) {
tokeniser.transition(TokeniserState.Data) // if scripting enabled, rawtext
} else if (contextTag=="plaintext") {
tokeniser.transition(TokeniserState.Data)
} else {
tokeniser.transition(TokeniserState.Data) // default
}
root = try Element(Tag.valueOf("html", settings), baseUri)
try Validate.notNull(obj: root)
try doc.appendChild(root!)
stack.append(root!)
resetInsertionMode()
// setup form element to nearest form on context (up ancestor chain). ensures form controls are associated
// with form correctly
let contextChain: Elements = context.parents()
contextChain.add(0, context)
for parent: Element in contextChain.array() {
if let x = (parent as? FormElement) {
formElement = x
break
}
}
}
try runParser()
if (context != nil && root != nil) {
return root!.getChildNodes()
} else {
return doc.getChildNodes()
}
}
@discardableResult
public override func process(_ token: Token)throws->Bool {
currentToken = token
return try self._state.process(token, self)
}
@discardableResult
func process(_ token: Token, _ state: HtmlTreeBuilderState)throws->Bool {
currentToken = token
return try state.process(token, self)
}
func transition(_ state: HtmlTreeBuilderState) {
self._state = state
}
func state() -> HtmlTreeBuilderState {
return _state
}
func markInsertionMode() {
_originalState = _state
}
func originalState() -> HtmlTreeBuilderState {
return _originalState
}
func framesetOk(_ framesetOk: Bool) {
self._framesetOk = framesetOk
}
func framesetOk() -> Bool {
return _framesetOk
}
func getDocument() -> Document {
return doc
}
func getBaseUri() -> String {
return baseUri
}
func maybeSetBaseUri(_ base: Element)throws {
if (baseUriSetFromDoc) { // only listen to the first <base href> in parse
return
}
let href: String = try base.absUrl("href")
if (href.count != 0) { // ignore <base target> etc
baseUri = href
baseUriSetFromDoc = true
try doc.setBaseUri(href) // set on the doc so doc.createElement(Tag) will get updated base, and to update all descendants
}
}
func isFragmentParsing() -> Bool {
return fragmentParsing
}
func error(_ state: HtmlTreeBuilderState) {
if (errors.canAddError() && currentToken != nil) {
errors.add(ParseError(reader.getPos(), "Unexpected token [\(currentToken!.tokenType())] when in state [\(state.rawValue)]"))
}
}
@discardableResult
func insert(_ startTag: Token.StartTag)throws->Element {
// handle empty unknown tags
// when the spec expects an empty tag, will directly hit insertEmpty, so won't generate this fake end tag.
if (startTag.isSelfClosing()) {
let el: Element = try insertEmpty(startTag)
stack.append(el)
tokeniser.transition(TokeniserState.Data) // handles <script />, otherwise needs breakout steps from script data
try tokeniser.emit(emptyEnd.reset().name(el.tagName())) // ensure we get out of whatever state we are in. emitted for yielded processing
return el
}
try Validate.notNull(obj: startTag._attributes)
let el: Element = try Element(Tag.valueOf(startTag.name(), settings), baseUri, settings.normalizeAttributes(startTag._attributes))
try insert(el)
return el
}
@discardableResult
func insertStartTag(_ startTagName: String)throws->Element {
let el: Element = try Element(Tag.valueOf(startTagName, settings), baseUri)
try insert(el)
return el
}
func insert(_ el: Element)throws {
try insertNode(el)
stack.append(el)
}
@discardableResult
func insertEmpty(_ startTag: Token.StartTag)throws->Element {
let tag: Tag = try Tag.valueOf(startTag.name(), settings)
try Validate.notNull(obj: startTag._attributes)
let el: Element = Element(tag, baseUri, startTag._attributes)
try insertNode(el)
if (startTag.isSelfClosing()) {
if (tag.isKnownTag()) {
if (tag.isSelfClosing()) {tokeniser.acknowledgeSelfClosingFlag()} // if not acked, promulagates error
} else {
// unknown tag, remember this is self closing for output
tag.setSelfClosing()
tokeniser.acknowledgeSelfClosingFlag() // not an distinct error
}
}
return el
}
@discardableResult
func insertForm(_ startTag: Token.StartTag, _ onStack: Bool)throws->FormElement {
let tag: Tag = try Tag.valueOf(startTag.name(), settings)
try Validate.notNull(obj: startTag._attributes)
let el: FormElement = FormElement(tag, baseUri, startTag._attributes)
setFormElement(el)
try insertNode(el)
if (onStack) {
stack.append(el)
}
return el
}
func insert(_ commentToken: Token.Comment)throws {
let comment: Comment = Comment(commentToken.getData(), baseUri)
try insertNode(comment)
}
func insert(_ characterToken: Token.Char)throws {
var node: Node
// characters in script and style go in as datanodes, not text nodes
let tagName: String? = currentElement()?.tagName()
if (tagName=="script" || tagName=="style") {
try Validate.notNull(obj: characterToken.getData())
node = DataNode(characterToken.getData()!, baseUri)
} else {
try Validate.notNull(obj: characterToken.getData())
node = TextNode(characterToken.getData()!, baseUri)
}
try currentElement()?.appendChild(node) // doesn't use insertNode, because we don't foster these; and will always have a stack.
}
private func insertNode(_ node: Node)throws {
// if the stack hasn't been set up yet, elements (doctype, comments) go into the doc
if (stack.count == 0) {
try doc.appendChild(node)
} else if (isFosterInserts()) {
try insertInFosterParent(node)
} else {
try currentElement()?.appendChild(node)
}
// connect form controls to their form element
if let n = (node as? Element) {
if(n.tag().isFormListed()) {
if ( formElement != nil) {
formElement!.addElement(n)
}
}
}
}
@discardableResult
func pop() -> Element {
let size: Int = stack.count
return stack.remove(at: size-1)
}
func push(_ element: Element) {
stack.append(element)
}
func getStack()->Array<Element> {
return stack
}
@discardableResult
func onStack(_ el: Element) -> Bool {
return isElementInQueue(stack, el)
}
private func isElementInQueue(_ queue: Array<Element?>, _ element: Element?) -> Bool {
for pos in (0..<queue.count).reversed() {
let next: Element? = queue[pos]
if (next == element) {
return true
}
}
return false
}
func getFromStack(_ elName: String) -> Element? {
for pos in (0..<stack.count).reversed() {
let next: Element = stack[pos]
if next.nodeName() == elName {
return next
}
}
return nil
}
@discardableResult
func removeFromStack(_ el: Element) -> Bool {
for pos in (0..<stack.count).reversed() {
let next: Element = stack[pos]
if (next == el) {
stack.remove(at: pos)
return true
}
}
return false
}
func popStackToClose(_ elName: String) {
for pos in (0..<stack.count).reversed() {
let next: Element = stack[pos]
stack.remove(at: pos)
if (next.nodeName() == elName) {
break
}
}
}
func popStackToClose(_ elNames: String...) {
popStackToClose(elNames)
}
func popStackToClose(_ elNames: [String]) {
for pos in (0..<stack.count).reversed() {
let next: Element = stack[pos]
stack.remove(at: pos)
if (StringUtil.inString(next.nodeName(), elNames)) {
break
}
}
}
func popStackToBefore(_ elName: String) {
for pos in (0..<stack.count).reversed() {
let next: Element = stack[pos]
if (next.nodeName() == elName) {
break
} else {
stack.remove(at: pos)
}
}
}
func clearStackToTableContext() {
clearStackToContext("table")
}
func clearStackToTableBodyContext() {
clearStackToContext("tbody", "tfoot", "thead")
}
func clearStackToTableRowContext() {
clearStackToContext("tr")
}
private func clearStackToContext(_ nodeNames: String...) {
clearStackToContext(nodeNames)
}
private func clearStackToContext(_ nodeNames: [String]) {
for pos in (0..<stack.count).reversed() {
let next: Element = stack[pos]
if (StringUtil.inString(next.nodeName(), nodeNames) || next.nodeName()=="html") {
break
} else {
stack.remove(at: pos)
}
}
}
func aboveOnStack(_ el: Element) -> Element? {
//assert(onStack(el), "Invalid parameter")
onStack(el)
for pos in (0..<stack.count).reversed() {
let next: Element = stack[pos]
if (next == el) {
return stack[pos-1]
}
}
return nil
}
func insertOnStackAfter(_ after: Element, _ input: Element)throws {
let i: Int = stack.lastIndexOf(after)
try Validate.isTrue(val: i != -1)
stack.insert(input, at: i + 1 )
}
func replaceOnStack(_ out: Element, _ input: Element)throws {
try stack = replaceInQueue(stack, out, input)
}
private func replaceInQueue(_ queue: Array<Element>, _ out: Element, _ input: Element)throws->Array<Element> {
var queue = queue
let i: Int = queue.lastIndexOf(out)
try Validate.isTrue(val: i != -1)
queue[i] = input
return queue
}
private func replaceInQueue(_ queue: Array<Element?>, _ out: Element, _ input: Element)throws->Array<Element?> {
var queue = queue
var i: Int = -1
for index in 0..<queue.count {
if(out == queue[index]) {
i = index
}
}
try Validate.isTrue(val: i != -1)
queue[i] = input
return queue
}
func resetInsertionMode() {
var last = false
for pos in (0..<stack.count).reversed() {
var node: Element = stack[pos]
if (pos == 0) {
last = true
//Validate node
node = contextElement!
}
let name: String = node.nodeName()
if ("select".equals(name)) {
transition(HtmlTreeBuilderState.InSelect)
break // frag
} else if (("td".equals(name) || "th".equals(name) && !last)) {
transition(HtmlTreeBuilderState.InCell)
break
} else if ("tr".equals(name)) {
transition(HtmlTreeBuilderState.InRow)
break
} else if ("tbody".equals(name) || "thead".equals(name) || "tfoot".equals(name)) {
transition(HtmlTreeBuilderState.InTableBody)
break
} else if ("caption".equals(name)) {
transition(HtmlTreeBuilderState.InCaption)
break
} else if ("colgroup".equals(name)) {
transition(HtmlTreeBuilderState.InColumnGroup)
break // frag
} else if ("table".equals(name)) {
transition(HtmlTreeBuilderState.InTable)
break
} else if ("head".equals(name)) {
transition(HtmlTreeBuilderState.InBody)
break // frag
} else if ("body".equals(name)) {
transition(HtmlTreeBuilderState.InBody)
break
} else if ("frameset".equals(name)) {
transition(HtmlTreeBuilderState.InFrameset)
break // frag
} else if ("html".equals(name)) {
transition(HtmlTreeBuilderState.BeforeHead)
break // frag
} else if (last) {
transition(HtmlTreeBuilderState.InBody)
break // frag
}
}
}
// todo: tidy up in specific scope methods
private var specificScopeTarget: [String?] = [nil]
private func inSpecificScope(_ targetName: String, _ baseTypes: [String], _ extraTypes: [String]?)throws->Bool {
specificScopeTarget[0] = targetName
return try inSpecificScope(specificScopeTarget, baseTypes, extraTypes)
}
private func inSpecificScope(_ targetNames: [String?], _ baseTypes: [String], _ extraTypes: [String]?)throws->Bool {
for pos in (0..<stack.count).reversed() {
let el: Element = stack[pos]
let elName: String = el.nodeName()
if (StringUtil.inString(elName, targetNames)) {
return true
}
if (StringUtil.inString(elName, baseTypes)) {
return false
}
if (extraTypes != nil && StringUtil.inString(elName, extraTypes!)) {
return false
}
}
try Validate.fail(msg: "Should not be reachable")
return false
}
func inScope(_ targetNames: [String])throws->Bool {
return try inSpecificScope(targetNames, HtmlTreeBuilder.TagsSearchInScope, nil)
}
func inScope(_ targetName: String)throws->Bool {
return try inScope(targetName, nil)
}
func inScope(_ targetName: String, _ extras: [String]?)throws->Bool {
return try inSpecificScope(targetName, HtmlTreeBuilder.TagsSearchInScope, extras)
// todo: in mathml namespace: mi, mo, mn, ms, mtext annotation-xml
// todo: in svg namespace: forignOjbect, desc, title
}
func inListItemScope(_ targetName: String)throws->Bool {
return try inScope(targetName, HtmlTreeBuilder.TagSearchList)
}
func inButtonScope(_ targetName: String)throws->Bool {
return try inScope(targetName, HtmlTreeBuilder.TagSearchButton)
}
func inTableScope(_ targetName: String)throws->Bool {
return try inSpecificScope(targetName, HtmlTreeBuilder.TagSearchTableScope, nil)
}
func inSelectScope(_ targetName: String)throws->Bool {
for pos in (0..<stack.count).reversed() {
let el: Element = stack[pos]
let elName: String = el.nodeName()
if (elName.equals(targetName)) {
return true
}
if (!StringUtil.inString(elName, HtmlTreeBuilder.TagSearchSelectScope)) { // all elements except
return false
}
}
try Validate.fail(msg: "Should not be reachable")
return false
}
func setHeadElement(_ headElement: Element) {
self.headElement = headElement
}
func getHeadElement() -> Element? {
return headElement
}
func isFosterInserts() -> Bool {
return fosterInserts
}
func setFosterInserts(_ fosterInserts: Bool) {
self.fosterInserts = fosterInserts
}
func getFormElement() -> FormElement? {
return formElement
}
func setFormElement(_ formElement: FormElement?) {
self.formElement = formElement
}
func newPendingTableCharacters() {
pendingTableCharacters = Array<String>()
}
func getPendingTableCharacters()->Array<String> {
return pendingTableCharacters
}
func setPendingTableCharacters(_ pendingTableCharacters: Array<String>) {
self.pendingTableCharacters = pendingTableCharacters
}
/**
11.2.5.2 Closing elements that have implied end tags<p/>
When the steps below require the UA to generate implied end tags, then, while the current node is a dd element, a
dt element, an li element, an option element, an optgroup element, a p element, an rp element, or an rt element,
the UA must pop the current node off the stack of open elements.
@param excludeTag If a step requires the UA to generate implied end tags but lists an element to exclude from the
process, then the UA must perform the above steps as if that element was not in the above list.
*/
func generateImpliedEndTags(_ excludeTag: String?) {
while ((excludeTag != nil && !currentElement()!.nodeName().equals(excludeTag!)) &&
StringUtil.inString(currentElement()!.nodeName(), HtmlTreeBuilder.TagSearchEndTags)) {
pop()
}
}
func generateImpliedEndTags() {
generateImpliedEndTags(nil)
}
func isSpecial(_ el: Element) -> Bool {
// todo: mathml's mi, mo, mn
// todo: svg's foreigObject, desc, title
let name: String = el.nodeName()
return StringUtil.inString(name, HtmlTreeBuilder.TagSearchSpecial)
}
func lastFormattingElement() -> Element? {
return formattingElements.count > 0 ? formattingElements[formattingElements.count-1] : nil
}
func removeLastFormattingElement() -> Element? {
let size: Int = formattingElements.count
if (size > 0) {
return formattingElements.remove(at: size-1)
} else {
return nil
}
}
// active formatting elements
func pushActiveFormattingElements(_ input: Element) {
var numSeen: Int = 0
for pos in (0..<formattingElements.count).reversed() {
let el: Element? = formattingElements[pos]
if (el == nil) { // marker
break
}
if (isSameFormattingElement(input, el!)) {
numSeen += 1
}
if (numSeen == 3) {
formattingElements.remove(at: pos)
break
}
}
formattingElements.append(input)
}
private func isSameFormattingElement(_ a: Element, _ b: Element) -> Bool {
// same if: same namespace, tag, and attributes. Element.equals only checks tag, might in future check children
if(a.attributes == nil) {
return false
}
return a.nodeName().equals(b.nodeName()) &&
// a.namespace().equals(b.namespace()) &&
a.getAttributes()!.equals(o: b.getAttributes())
// todo: namespaces
}
func reconstructFormattingElements()throws {
let last: Element? = lastFormattingElement()
if (last == nil || onStack(last!)) {
return
}
var entry: Element? = last
let size: Int = formattingElements.count
var pos: Int = size - 1
var skip: Bool = false
while (true) {
if (pos == 0) { // step 4. if none before, skip to 8
skip = true
break
}
pos -= 1
entry = formattingElements[pos] // step 5. one earlier than entry
if (entry == nil || onStack(entry!)) // step 6 - neither marker nor on stack
{break} // jump to 8, else continue back to 4
}
while(true) {
if (!skip) // step 7: on later than entry
{
pos += 1
entry = formattingElements[pos]
}
try Validate.notNull(obj: entry) // should not occur, as we break at last element
// 8. create new element from element, 9 insert into current node, onto stack
skip = false // can only skip increment from 4.
let newEl: Element = try insertStartTag(entry!.nodeName()) // todo: avoid fostering here?
// newEl.namespace(entry.namespace()) // todo: namespaces
newEl.getAttributes()?.addAll(incoming: entry!.getAttributes())
// 10. replace entry with new entry
formattingElements[pos] = newEl
// 11
if (pos == size-1) // if not last entry in list, jump to 7
{break}
}
}
func clearFormattingElementsToLastMarker() {
while (!formattingElements.isEmpty) {
let el: Element? = removeLastFormattingElement()
if (el == nil) {
break
}
}
}
func removeFromActiveFormattingElements(_ el: Element?) {
for pos in (0..<formattingElements.count).reversed() {
let next: Element? = formattingElements[pos]
if (next == el) {
formattingElements.remove(at: pos)
break
}
}
}
func isInActiveFormattingElements(_ el: Element) -> Bool {
return isElementInQueue(formattingElements, el)
}
func getActiveFormattingElement(_ nodeName: String) -> Element? {
for pos in (0..<formattingElements.count).reversed() {
let next: Element? = formattingElements[pos]
if (next == nil) { // scope marker
break
} else if (next!.nodeName().equals(nodeName)) {
return next
}
}
return nil
}
func replaceActiveFormattingElement(_ out: Element, _ input: Element)throws {
try formattingElements = replaceInQueue(formattingElements, out, input)
}
func insertMarkerToFormattingElements() {
formattingElements.append(nil)
}
func insertInFosterParent(_ input: Node)throws {
let fosterParent: Element?
let lastTable: Element? = getFromStack("table")
var isLastTableParent: Bool = false
if let lastTable = lastTable {
if (lastTable.parent() != nil) {
fosterParent = lastTable.parent()!
isLastTableParent = true
} else {
fosterParent = aboveOnStack(lastTable)
}
} else { // no table == frag
fosterParent = stack[0]
}
if (isLastTableParent) {
try Validate.notNull(obj: lastTable) // last table cannot be null by this point.
try lastTable!.before(input)
} else {
try fosterParent?.appendChild(input)
}
}
}