diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index 74d829d9..f06a1008 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -205,45 +205,44 @@ def mainLoop(self): prev_token = None new_token = token while new_token is not None: - prev_token = new_token - currentNode = self.tree.openElements[-1] if self.tree.openElements else None - currentNodeNamespace = currentNode.namespace if currentNode else None - currentNodeName = currentNode.name if currentNode else None - type = new_token["type"] - if type == ParseErrorToken: self.parseError(new_token["data"], new_token.get("datavars", {})) - new_token = None - else: - if (len(self.tree.openElements) == 0 or - currentNodeNamespace == self.tree.defaultNamespace or - (self.isMathMLTextIntegrationPoint(currentNode) and - ((type == StartTagToken and - token["name"] not in frozenset(["mglyph", "malignmark"])) or - type in (CharactersToken, SpaceCharactersToken))) or - (currentNodeNamespace == namespaces["mathml"] and - currentNodeName == "annotation-xml" and - type == StartTagToken and - token["name"] == "svg") or - (self.isHTMLIntegrationPoint(currentNode) and - type in (StartTagToken, CharactersToken, SpaceCharactersToken))): - phase = self.phase - else: - phase = self.phases["inForeignContent"] - - if type == CharactersToken: - new_token = phase.processCharacters(new_token) - elif type == SpaceCharactersToken: - new_token = phase.processSpaceCharacters(new_token) - elif type == StartTagToken: - new_token = phase.processStartTag(new_token) - elif type == EndTagToken: - new_token = phase.processEndTag(new_token) - elif type == CommentToken: - new_token = phase.processComment(new_token) - elif type == DoctypeToken: - new_token = phase.processDoctype(new_token) + break + + prev_token = new_token + currentNode = self.tree.openElements[-1] if self.tree.openElements else None + + in_foreign_content = True + if currentNode is None: + in_foreign_content = False + elif currentNode.namespace == self.tree.defaultNamespace: + in_foreign_content = False + elif self.isHTMLIntegrationPoint(currentNode): + if type in (StartTagToken, CharactersToken, SpaceCharactersToken): + in_foreign_content = False + elif self.isMathMLTextIntegrationPoint(currentNode): + if type == StartTagToken and token["name"] not in frozenset(["mglyph", "malignmark"]): + in_foreign_content = False + elif type in (CharactersToken, SpaceCharactersToken): + in_foreign_content = False + elif currentNode.namespace == namespaces["mathml"] and currentNode.name == "annotation-xml": + if type == StartTagToken and token["name"] == "svg": + in_foreign_content = False + + phase = self.phases["inForeignContent"] if in_foreign_content else self.phase + if type == CharactersToken: + new_token = phase.processCharacters(new_token) + elif type == SpaceCharactersToken: + new_token = phase.processSpaceCharacters(new_token) + elif type == StartTagToken: + new_token = phase.processStartTag(new_token) + elif type == EndTagToken: + new_token = phase.processEndTag(new_token) + elif type == CommentToken: + new_token = phase.processComment(new_token) + elif type == DoctypeToken: + new_token = phase.processDoctype(new_token) if (type == StartTagToken and prev_token["selfClosing"] and not prev_token["selfClosingAcknowledged"]):