Skip to content
71 changes: 35 additions & 36 deletions html5lib/html5parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,45 +205,44 @@ def mainLoop(self):
prev_token = None
new_token = token
while new_token is not None:
prev_token = new_token
currentNode = self.tree.openElements[-1] if self.tree.openElements else None
currentNodeNamespace = currentNode.namespace if currentNode else None
currentNodeName = currentNode.name if currentNode else None

type = new_token["type"]

if type == ParseErrorToken:
self.parseError(new_token["data"], new_token.get("datavars", {}))
new_token = None
else:
if (len(self.tree.openElements) == 0 or
currentNodeNamespace == self.tree.defaultNamespace or
(self.isMathMLTextIntegrationPoint(currentNode) and
((type == StartTagToken and
token["name"] not in frozenset(["mglyph", "malignmark"])) or
type in (CharactersToken, SpaceCharactersToken))) or
(currentNodeNamespace == namespaces["mathml"] and
currentNodeName == "annotation-xml" and
type == StartTagToken and
token["name"] == "svg") or
(self.isHTMLIntegrationPoint(currentNode) and
type in (StartTagToken, CharactersToken, SpaceCharactersToken))):
phase = self.phase
else:
phase = self.phases["inForeignContent"]

if type == CharactersToken:
new_token = phase.processCharacters(new_token)
elif type == SpaceCharactersToken:
new_token = phase.processSpaceCharacters(new_token)
elif type == StartTagToken:
new_token = phase.processStartTag(new_token)
elif type == EndTagToken:
new_token = phase.processEndTag(new_token)
elif type == CommentToken:
new_token = phase.processComment(new_token)
elif type == DoctypeToken:
new_token = phase.processDoctype(new_token)
break

prev_token = new_token
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NB: This is technically a behaviour change since prev_token could previously have referenced a ParseErrorToken. That said, I don't believe that the code that inspects the prev_token would ever be relevant for error tokens.

currentNode = self.tree.openElements[-1] if self.tree.openElements else None

in_foreign_content = True
if currentNode is None:
in_foreign_content = False
elif currentNode.namespace == self.tree.defaultNamespace:
in_foreign_content = False
elif self.isHTMLIntegrationPoint(currentNode):
if type in (StartTagToken, CharactersToken, SpaceCharactersToken):
in_foreign_content = False
elif self.isMathMLTextIntegrationPoint(currentNode):
if type == StartTagToken and token["name"] not in frozenset(["mglyph", "malignmark"]):
in_foreign_content = False
elif type in (CharactersToken, SpaceCharactersToken):
in_foreign_content = False
elif currentNode.namespace == namespaces["mathml"] and currentNode.name == "annotation-xml":
if type == StartTagToken and token["name"] == "svg":
in_foreign_content = False

phase = self.phases["inForeignContent"] if in_foreign_content else self.phase
if type == CharactersToken:
new_token = phase.processCharacters(new_token)
elif type == SpaceCharactersToken:
new_token = phase.processSpaceCharacters(new_token)
elif type == StartTagToken:
new_token = phase.processStartTag(new_token)
elif type == EndTagToken:
new_token = phase.processEndTag(new_token)
elif type == CommentToken:
new_token = phase.processComment(new_token)
elif type == DoctypeToken:
new_token = phase.processDoctype(new_token)

if (type == StartTagToken and prev_token["selfClosing"] and
not prev_token["selfClosingAcknowledged"]):
Expand Down