Java tutorial
/* * Copyright 2016 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.template.soy.passes; import static com.google.common.base.Ascii.toLowerCase; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkState; import static com.google.common.collect.Multimaps.asMap; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Ascii; import com.google.common.base.CharMatcher; import com.google.common.base.Function; import com.google.common.base.Functions; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.ListMultimap; import com.google.common.collect.MultimapBuilder; import com.google.common.collect.Sets; import com.google.template.soy.base.SourceLocation; import com.google.template.soy.base.internal.IdGenerator; import com.google.template.soy.data.SanitizedContent.ContentKind; import com.google.template.soy.error.ErrorReporter; import com.google.template.soy.error.ErrorReporter.Checkpoint; import com.google.template.soy.error.SoyErrorKind; import com.google.template.soy.soytree.AbstractSoyNodeVisitor; import com.google.template.soy.soytree.CallNode; import com.google.template.soy.soytree.CallParamContentNode; import com.google.template.soy.soytree.CallParamValueNode; import com.google.template.soy.soytree.CssNode; import com.google.template.soy.soytree.DebuggerNode; import com.google.template.soy.soytree.ForNode; import com.google.template.soy.soytree.ForeachNode; import com.google.template.soy.soytree.ForeachNonemptyNode; import com.google.template.soy.soytree.HtmlAttributeNode; import com.google.template.soy.soytree.HtmlAttributeValueNode; import com.google.template.soy.soytree.HtmlAttributeValueNode.Quotes; import com.google.template.soy.soytree.HtmlCloseTagNode; import com.google.template.soy.soytree.HtmlOpenTagNode; import com.google.template.soy.soytree.IfCondNode; import com.google.template.soy.soytree.IfNode; import com.google.template.soy.soytree.LetContentNode; import com.google.template.soy.soytree.LetValueNode; import com.google.template.soy.soytree.LogNode; import com.google.template.soy.soytree.MsgFallbackGroupNode; import com.google.template.soy.soytree.MsgHtmlTagNode; import com.google.template.soy.soytree.PrintNode; import com.google.template.soy.soytree.RawTextNode; import com.google.template.soy.soytree.SoyFileNode; import com.google.template.soy.soytree.SoyNode; import com.google.template.soy.soytree.SoyNode.BlockNode; import com.google.template.soy.soytree.SoyNode.Kind; import com.google.template.soy.soytree.SoyNode.ParentSoyNode; import com.google.template.soy.soytree.SoyNode.StandaloneNode; import com.google.template.soy.soytree.SoyTreeUtils; import com.google.template.soy.soytree.SwitchCaseNode; import com.google.template.soy.soytree.SwitchNode; import com.google.template.soy.soytree.TagName; import com.google.template.soy.soytree.TemplateNode; import com.google.template.soy.soytree.XidNode; import java.util.ArrayList; import java.util.Collections; import java.util.EnumSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; import javax.annotation.Nullable; /** * Rewrites templates and blocks of {@code kind="html"} or {@code kind="attributes"} to contain * {@link HtmlOpenTagNode}, {@link HtmlCloseTagNode}, {@link HtmlAttributeNode}, and {@link * HtmlAttributeValueNode}. * * <p>The strategy is to parse the raw text nodes for html tokens and then trigger AST rewriting * based on what we find. * * <p>An obvious question upon reading this should be "Why isn't this implemented in the grammar?" * the answer is that soy has some language features that interfere with writing such a grammar. * * <ul> * <li>Soy has special commands for manipulating text, notably: {@code {nil}, {sp}, {\n}} cause * difficulties when writing grammar productions for html elements. This would be manageable * by preventing the use of these commands within html tags (though some of them such as * {@code {sp}} are popular so there are some compatibility concerns) * <li>Soy has a {@code {literal}...{/literal}} command. such commands often contain html tags so * within the grammar we would need to start writing grammar production which match the * contents of literal blocks. This is possible but would require duplicating all the lexical * states for literals. (Also we would need to deal with tags split across literal blocks e.g. * {@code <div{literal} a="foo">{/literal}}). * <li>Transitioning between lexical states after seeing a {@code <script>} tag or after parsing a * {@code kind="html"} attributes is complex (And 'semantic lexical transitions' are not * recommended). * </ul> * * <p>On the other hand by implementing this here, a lot of these issues go away since all the text * has already been processed. Of course this doesn't mean it is easy since we need to implement our * own parser and state tracking system that is normally handled by the javacc grammar. * * <p>This class tries to be faithful to the <a * href="https://www.w3.org/TR/html5/syntax.html#syntax">Html syntax standard</a>. Though we do not * attempt to implement the contextual element model, and matching tags is handled by a different * pass, the {@link StrictHtmlValidationPass}. * * <p>TODO(lukes): there are some missing features: * * <ul> * <li>Remove the parsing of {@link MsgHtmlTagNode} from the parser and move it here * </ul> */ @VisibleForTesting public final class HtmlRewritePass extends CompilerFilePass { /** * If set to true, causes every state transition to be logged to stderr. Useful when debugging. */ private static final boolean DEBUG = false; private static final SoyErrorKind BLOCK_CHANGES_CONTEXT = SoyErrorKind .of("{0} changes context from ''{1}'' to ''{2}''.{3}"); private static final SoyErrorKind BLOCK_ENDS_IN_INVALID_STATE = SoyErrorKind .of("''{0}'' block ends in an invalid state ''{1}''"); private static final SoyErrorKind BLOCK_TRANSITION_DISALLOWED = SoyErrorKind .of("{0} started in ''{1}'', cannot create a {2}."); private static final SoyErrorKind CONDITIONAL_BLOCK_ISNT_GUARANTEED_TO_PRODUCE_ONE_ATTRIBUTE_VALUE = SoyErrorKind .of("expected exactly one attribute value, the {0} isn''t guaranteed to produce exactly " + "one"); private static final SoyErrorKind EXPECTED_ATTRIBUTE_VALUE = SoyErrorKind.of("expected an attribute value"); private static final SoyErrorKind EXPECTED_WS_EQ_OR_CLOSE_AFTER_ATTRIBUTE_NAME = SoyErrorKind .of("expected whitespace, ''='' or tag close after an attribute name"); private static final SoyErrorKind EXPECTED_WS_OR_CLOSE_AFTER_TAG_OR_ATTRIBUTE = SoyErrorKind .of("expected whitespace or tag close after a tag name or attribute"); private static final SoyErrorKind FOUND_END_OF_ATTRIBUTE_STARTED_IN_ANOTHER_BLOCK = SoyErrorKind .of("found the end of an html attribute that was started in another block. Html attributes " + "should be opened and closed in the same block"); private static final SoyErrorKind FOUND_END_TAG_STARTED_IN_ANOTHER_BLOCK = SoyErrorKind .of("found the end of a tag that was started in another block. Html tags should be opened " + "and closed in the same block"); private static final SoyErrorKind FOUND_EQ_WITH_ATTRIBUTE_IN_ANOTHER_BLOCK = SoyErrorKind .of("found an ''='' character in a different block than the attribute name."); private static final SoyErrorKind GENERIC_UNEXPECTED_CHAR = SoyErrorKind .of("unexpected character, expected ''{0}'' instead"); private static final SoyErrorKind ILLEGAL_HTML_ATTRIBUTE_CHARACTER = SoyErrorKind .of("illegal unquoted attribute value character"); private static final SoyErrorKind INVALID_IDENTIFIER = SoyErrorKind .of("invalid html identifier, ''{0}'' is an illegal character"); private static final SoyErrorKind INVALID_LOCATION_FOR_CONTROL_FLOW = SoyErrorKind .of("invalid location for a ''{0}'' node, {1}"); private static final SoyErrorKind INVALID_LOCATION_FOR_NONPRINTABLE = SoyErrorKind .of("invalid location for a non-printable node: {0}"); private static final SoyErrorKind INVALID_TAG_NAME = SoyErrorKind .of("tag names may only be raw text or print nodes, consider extracting a '''{'let...'' " + "variable"); private static final SoyErrorKind SELF_CLOSING_CLOSE_TAG = SoyErrorKind .of("close tags should not be self closing"); private static final SoyErrorKind UNEXPECTED_CLOSE_TAG_CONTENT = SoyErrorKind .of("unexpected close tag content, only whitespace is allowed in close tags"); private static final SoyErrorKind UNEXPECTED_WS_AFTER_LT = SoyErrorKind .of("unexpected whitespace after ''<'', did you mean ''<''?"); /** Represents features of the parser states. */ private enum StateFeature { /** Means the state is part of an html 'tag' of a node (but not, inside an attribute value). */ TAG, INVALID_END_STATE_FOR_BLOCK; } /** * Represents the contexutal state of the parser. * * <p>NOTE: {@link #reconcile(State)} depends on the ordering. So don't change the order without * also inspecting {@link #reconcile(State)}. */ private enum State { NONE, PCDATA, RCDATA_SCRIPT, RCDATA_TEXTAREA, RCDATA_TITLE, RCDATA_STYLE, HTML_COMMENT, CDATA, /** * <!doctype, <!element, or <?xml> these work like normal tags but don't require attribute * values to be matched with attribute names */ XML_DECLARATION, SINGLE_QUOTED_XML_ATTRIBUTE_VALUE, DOUBLE_QUOTED_XML_ATTRIBUTE_VALUE, HTML_TAG_NAME, /** * This state is weird - it is for <code> * <pre>foo ="bar" * ^ * </pre> * </code> * * <p>The state right after the attribute name. This is normally not useful (we could transition * right to BEFORE_ATTRIBUTE_VALUE by looking ahead for an '=' character, but we need it so we * can have dynamic attribute names. e.g. {xid foo}=bar. */ AFTER_ATTRIBUTE_NAME(StateFeature.TAG), BEFORE_ATTRIBUTE_VALUE( StateFeature.INVALID_END_STATE_FOR_BLOCK), SINGLE_QUOTED_ATTRIBUTE_VALUE, DOUBLE_QUOTED_ATTRIBUTE_VALUE, UNQUOTED_ATTRIBUTE_VALUE, AFTER_TAG_NAME_OR_ATTRIBUTE( StateFeature.TAG), BEFORE_ATTRIBUTE_NAME(StateFeature.TAG),; /** Gets the {@link State} for the given kind. */ static State fromKind(@Nullable ContentKind kind) { if (kind == null) { return NONE; } switch (kind) { case ATTRIBUTES: return BEFORE_ATTRIBUTE_NAME; case HTML: return PCDATA; // You might be thinking that some of these should be RCDATA_STYLE or RCDATA_SCRIPT, but // that wouldn't be accurate since rcdata is specific to the context of js on an html page // in a script tag. General js has different limitations and the autoescaper knows how to // escape js into rcdata_style case CSS: case JS: case TEXT: case TRUSTED_RESOURCE_URI: case URI: return NONE; } throw new AssertionError("unhandled kind: " + kind); } private final ImmutableSet<StateFeature> stateTypes; State(StateFeature... stateTypes) { EnumSet<StateFeature> set = EnumSet.noneOf(StateFeature.class); Collections.addAll(set, stateTypes); this.stateTypes = Sets.immutableEnumSet(set); } /** * Given 2 states, return a state that is compatible with both of them. This is useful for * calculating states when 2 branches of a conditional don't end in the same state. Returns * {@code null} if no such state exists. */ @Nullable State reconcile(State that) { checkNotNull(that); if (that == this) { return this; } if (this.compareTo(that) > 0) { return that.reconcile(this); } // the order of comparisons here depends on the compareTo above to ensure 'this < that' if (this == BEFORE_ATTRIBUTE_VALUE && (that == UNQUOTED_ATTRIBUTE_VALUE || that == AFTER_TAG_NAME_OR_ATTRIBUTE || that == BEFORE_ATTRIBUTE_NAME)) { // These aren't exactly compatible, but rather are an allowed transition because // 1. before an unquoted attribute value and in an unquoted attribute value are not that // different // 2. a complete attribute value is a reasonable thing to constitute a block. This enables // code like class={if $foo}"bar"{else}"baz"{/if} // and it depends on additional support in the handling of control flow nodes. return that; } if (isTagState() && that.isTagState()) { return AFTER_TAG_NAME_OR_ATTRIBUTE; } switch (this) { case NONE: case PCDATA: case RCDATA_STYLE: case RCDATA_TITLE: case RCDATA_SCRIPT: case RCDATA_TEXTAREA: case DOUBLE_QUOTED_ATTRIBUTE_VALUE: case SINGLE_QUOTED_ATTRIBUTE_VALUE: case BEFORE_ATTRIBUTE_VALUE: case HTML_COMMENT: case HTML_TAG_NAME: case XML_DECLARATION: case CDATA: case DOUBLE_QUOTED_XML_ATTRIBUTE_VALUE: case SINGLE_QUOTED_XML_ATTRIBUTE_VALUE: case AFTER_ATTRIBUTE_NAME: case UNQUOTED_ATTRIBUTE_VALUE: case BEFORE_ATTRIBUTE_NAME: case AFTER_TAG_NAME_OR_ATTRIBUTE: // these all require exact matches return null; } throw new AssertionError("unexpected state: " + this); } boolean isTagState() { return stateTypes.contains(StateFeature.TAG); } boolean isInvalidForEndOfBlock() { return stateTypes.contains(StateFeature.INVALID_END_STATE_FOR_BLOCK); } @Override public String toString() { return Ascii.toLowerCase(name().replace('_', ' ')); } } private final ErrorReporter errorReporter; private final boolean enabled; public HtmlRewritePass(ImmutableList<String> experimentalFeatures, ErrorReporter errorReporter) { // TODO(lukes): this is currently conditionally enabled for stricthtml to enable testing. // Turn it on unconditionally. this.enabled = experimentalFeatures.contains("stricthtml"); this.errorReporter = errorReporter; } @Override public void run(SoyFileNode file, IdGenerator nodeIdGen) { if (enabled) { new Visitor(nodeIdGen, file.getFilePath(), errorReporter).exec(file); } else { // otherwise, run on a copy of the node. // this will cause all of our edits to be discarded new Visitor(nodeIdGen, file.getFilePath(), errorReporter).exec(SoyTreeUtils.cloneNode(file)); } } private static final class Visitor extends AbstractSoyNodeVisitor<Void> { /** * Matches a subset of the inverse of {@link #TAG_IDENTIFIER_MATCHER} which indicate a parsing * error instead of a delimiter. */ static final CharMatcher INVALID_IDENTIFIER_CHAR_MATCHER = CharMatcher.anyOf("\0'\"").precomputed(); /** * Matches raw text in a tag that isn't a special character or whitespace. * * <p>This is based on the attribute parsing spec: * https://www.w3.org/TR/html5/syntax.html#attributes-0 */ static final CharMatcher TAG_IDENTIFIER_MATCHER = // delimiter charachters CharMatcher.whitespace().or(CharMatcher.anyOf(">=/")).or(INVALID_IDENTIFIER_CHAR_MATCHER) .or(new CharMatcher() { @Override public boolean matches(char c) { return Character.getType(c) == Character.CONTROL; } }).negate().precomputed(); // see https://www.w3.org/TR/html5/syntax.html#attributes-0 /** Matches all the characters that are allowed to appear in an unquoted attribute value. */ static final CharMatcher UNQUOTED_ATTRIBUTE_VALUE_MATCHER = CharMatcher.whitespace() .or(CharMatcher.anyOf("<>='\"`")).negate().precomputed(); /** Matches the delimiter characters of an unquoted attribute value. */ static final CharMatcher UNQUOTED_ATTRIBUTE_VALUE_DELIMITER = CharMatcher.whitespace() .or(CharMatcher.is('>')).precomputed(); static final CharMatcher NOT_DOUBLE_QUOTE = CharMatcher.isNot('"').precomputed(); static final CharMatcher NOT_SINGLE_QUOTE = CharMatcher.isNot('\'').precomputed(); static final CharMatcher NOT_LT = CharMatcher.isNot('<').precomputed(); static final CharMatcher NOT_RSQUARE_BRACE = CharMatcher.isNot(']').precomputed(); static final CharMatcher NOT_HYPHEN = CharMatcher.isNot('-').precomputed(); static final CharMatcher XML_DECLARATION_NON_DELIMITERS = CharMatcher.noneOf(">\"'").precomputed(); final IdGenerator nodeIdGen; final String filePath; final AstEdits edits = new AstEdits(); final ErrorReporter errorReporter; // RawText handling fields. RawTextNode currentRawTextNode; String currentRawText; int currentRawTextOffset; int currentRawTextIndex; ParsingContext context; Visitor(IdGenerator nodeIdGen, String filePath, ErrorReporter errorReporter) { this.nodeIdGen = nodeIdGen; this.filePath = filePath; this.errorReporter = errorReporter; } /** * For RawText we need to examine every character. * * <p>We track an index and an offset into the current RawTextNode (currentRawTextIndex and * currentRawTextOffset respectively). 'advance' methods move the index and 'consume' methods * optionally move the index and always set the offset == index. (they 'consume' the text * between the offset and the index. * * <p>handle* methods will 'handle the current state' * * <ul> * <li>Precondition : They are in the given state and not at the end of the input * <li> Postcondition: They have either advanced the current index or changed states * (generally both) * </ul> * * <p>NOTE: a consequence of these conditions is that they are only guaranteed to be able to * consume a single character. * * <p>At the end of visiting a raw text node, all the input will be consumed. */ @Override protected void visitRawTextNode(RawTextNode node) { currentRawTextNode = node; currentRawText = node.getRawText(); currentRawTextOffset = 0; currentRawTextIndex = 0; int prevStartIndex = -1; while (currentRawTextIndex < currentRawText.length()) { int startIndex = currentRawTextIndex; // if whitespace was trimmed prior to the current character (e.g. leading whitespace) // handle it. // However, we should only handle it once, otherwise state transitions which don't consume // input may cause the same joined whitespace to be handled multiple times. if (startIndex != prevStartIndex && currentRawTextNode.missingWhitespaceAt(startIndex)) { handleJoinedWhitespace(currentPoint()); } prevStartIndex = startIndex; State startState = context.getState(); switch (startState) { case NONE: // no replacements, no parsing, just jump to the end currentRawTextIndex = currentRawTextOffset = currentRawText.length(); break; case PCDATA: handlePcData(); break; case DOUBLE_QUOTED_ATTRIBUTE_VALUE: handleQuotedAttributeValue(true); break; case SINGLE_QUOTED_ATTRIBUTE_VALUE: handleQuotedAttributeValue(false); break; case BEFORE_ATTRIBUTE_VALUE: handleBeforeAttributeValue(); break; case AFTER_TAG_NAME_OR_ATTRIBUTE: handleAfterTagNameOrAttribute(); break; case BEFORE_ATTRIBUTE_NAME: handleBeforeAttributeName(); break; case UNQUOTED_ATTRIBUTE_VALUE: handleUnquotedAttributeValue(); break; case AFTER_ATTRIBUTE_NAME: handleAfterAttributeName(); break; case HTML_TAG_NAME: handleHtmlTagName(); break; case RCDATA_STYLE: handleRcData(TagName.RcDataTagName.STYLE); break; case RCDATA_TITLE: handleRcData(TagName.RcDataTagName.TITLE); break; case RCDATA_SCRIPT: handleRcData(TagName.RcDataTagName.SCRIPT); break; case RCDATA_TEXTAREA: handleRcData(TagName.RcDataTagName.TEXTAREA); break; case CDATA: handleCData(); break; case HTML_COMMENT: handleHtmlComment(); break; case XML_DECLARATION: handleXmlDeclaration(); break; case DOUBLE_QUOTED_XML_ATTRIBUTE_VALUE: handleXmlAttributeQuoted(true); break; case SINGLE_QUOTED_XML_ATTRIBUTE_VALUE: handleXmlAttributeQuoted(false); break; default: throw new UnsupportedOperationException("cant yet handle: " + startState); } if (context.getState() == startState && startIndex == currentRawTextIndex) { // sanity! make sure we are making progress. Calling handle* should ensure that we // advance at least one character or change states. (generally both will happen, but // after producing an error we may only switch states). throw new IllegalStateException( "failed to make progress in state: " + startState.name() + " at " + currentLocation()); } if (currentRawTextOffset > currentRawTextIndex) { throw new IllegalStateException("offset is greater than index! offset: " + currentRawTextOffset + " index: " + currentRawTextIndex); } } if (currentRawTextIndex != currentRawText.length()) { throw new AssertionError("failed to visit all of the raw text"); } if (currentRawTextOffset < currentRawTextIndex && currentRawTextOffset != 0) { // This handles all the states that just advance to the end without consuming // TODO(lukes): maybe this should be an error and all such states will need to consume? RawTextNode suffix = consumeAsRawText(); edits.replace(node, suffix); } // handle trailing joined whitespace. if (currentRawTextNode.missingWhitespaceAt(currentRawText.length())) { handleJoinedWhitespace(currentRawTextNode.getSourceLocation().getEndPoint()); } } /** Called to handle whitespace that was completely removed from a raw text node. */ void handleJoinedWhitespace(SourceLocation.Point point) { switch (context.getState()) { case UNQUOTED_ATTRIBUTE_VALUE: context.createUnquotedAttributeValue(point); // fall-through case AFTER_TAG_NAME_OR_ATTRIBUTE: context.setState(State.BEFORE_ATTRIBUTE_NAME, point); return; case AFTER_ATTRIBUTE_NAME: int currentChar = currentChar(); // We are at the end of the raw text, or it is some character other than whitespace or an // equals sign -> BEFORE_ATTRIBUTE_NAME if (currentChar == -1 || (!CharMatcher.whitespace().matches((char) currentChar) && '=' != (char) currentChar)) { context.setState(State.BEFORE_ATTRIBUTE_NAME, point); return; } // fall through case BEFORE_ATTRIBUTE_VALUE: case BEFORE_ATTRIBUTE_NAME: case DOUBLE_QUOTED_XML_ATTRIBUTE_VALUE: case HTML_TAG_NAME: case HTML_COMMENT: case CDATA: case NONE: case SINGLE_QUOTED_ATTRIBUTE_VALUE: case DOUBLE_QUOTED_ATTRIBUTE_VALUE: // TODO(lukes): line joining and whitespace removal can happen within quoted attribute // values... we could take steps to undo it... should we? case PCDATA: case RCDATA_SCRIPT: case RCDATA_STYLE: case RCDATA_TEXTAREA: case RCDATA_TITLE: case SINGLE_QUOTED_XML_ATTRIBUTE_VALUE: case XML_DECLARATION: // no op return; } throw new AssertionError(); } /** * Handle rcdata blocks (script, style, title, textarea). * * <p>Scans for {@code </tagName} and if it finds it, enters {@link State#PCDATA}. */ void handleRcData(TagName.RcDataTagName tagName) { boolean foundLt = advanceWhileMatches(NOT_LT); if (foundLt) { if (matchPrefixIgnoreCase("</" + tagName, false /* don't advance */)) { // we don't advance. instead we just switch to pcdata and since the current index is on // a '<' character, this will cause us to parse a close tag, which is what we want context.setState(State.PCDATA, currentPoint()); } else { advance(); } } } /** * Handle cdata. * * <p>Scans for {@code ]]>} and if it finds it, enters {@link State#PCDATA}. */ void handleCData() { boolean foundBrace = advanceWhileMatches(NOT_RSQUARE_BRACE); if (foundBrace) { if (matchPrefix("]]>", true)) { context.setState(State.PCDATA, currentPointOrEnd()); } else { advance(); } } } /** * Handle html comments. * * <p>Scans for {@code -->} and if it finds it, enters {@link State#PCDATA}. */ void handleHtmlComment() { boolean foundHyphen = advanceWhileMatches(NOT_HYPHEN); if (foundHyphen) { if (matchPrefix("-->", true)) { context.setState(State.PCDATA, currentPointOrEnd()); } else { advance(); } } } /** * Handle {@link State#XML_DECLARATION}. * * <p>This is for things like {@code <!DOCTYPE HTML PUBLIC * "http://www.w3.org/TR/html4/strict.dtd">}. . We are looking for the end or a quoted * 'attribute'. */ void handleXmlDeclaration() { boolean foundDelimiter = advanceWhileMatches(XML_DECLARATION_NON_DELIMITERS); if (foundDelimiter) { int c = currentChar(); SourceLocation.Point currentPoint = currentPoint(); advance(); if (c == '"') { context.setState(State.DOUBLE_QUOTED_XML_ATTRIBUTE_VALUE, currentPoint); } else if (c == '\'') { context.setState(State.SINGLE_QUOTED_XML_ATTRIBUTE_VALUE, currentPoint); } else if (c == '>') { context.setState(State.PCDATA, currentPoint); } else { throw new AssertionError("unexpected character: " + c); } } } /** Handle an xml quoted attribute. We just scan for the appropriate quote character. */ void handleXmlAttributeQuoted(boolean doubleQuoted) { boolean foundQuote = advanceWhileMatches(doubleQuoted ? NOT_DOUBLE_QUOTE : NOT_SINGLE_QUOTE); if (foundQuote) { advance(); context.setState(State.XML_DECLARATION, currentPoint()); } } /** * Handle pcdata. * * <p>Scan for {@code <} if we find it we know we are at the start of a tag, a comment, cdata or * an xml declaration. Create a text node for everything up to the beginning of the tag-like * thing. */ void handlePcData() { boolean foundLt = advanceWhileMatches(NOT_LT); RawTextNode node = consumeAsRawText(); if (node != null) { edits.replace(currentRawTextNode, node); } // if there is more, then we stopped advancing because we hit a '<' character if (foundLt) { SourceLocation.Point ltPoint = currentPoint(); if (matchPrefix("<!--", true)) { context.setState(State.HTML_COMMENT, ltPoint); } else if (matchPrefixIgnoreCase("<![cdata", true)) { context.setState(State.CDATA, ltPoint); } else if (matchPrefix("<!", true) || matchPrefix("<?", true)) { context.setState(State.XML_DECLARATION, ltPoint); } else { // if it isn't either of those special cases, enter a tag boolean isCloseTag = matchPrefix("</", false); context.startTag(currentRawTextNode, isCloseTag, currentPoint()); advance(); // go past the '<' if (isCloseTag) { advance(); // go past the '/' } consume(); // set the offset to the current index context.setState(State.HTML_TAG_NAME, ltPoint); } } } /** * Handle parsing an html tag name. * * <p>Look for an html identifier and transition to AFTER_ATTRIBUTE_OR_TAG_NAME. */ void handleHtmlTagName() { // special case error message handle whitespace following a <, report an error and assume it // wasn't the start of a tag. if (consumeWhitespace()) { errorReporter.report(context.tagStartLocation(), UNEXPECTED_WS_AFTER_LT); context.reset(); context.setState(State.PCDATA, currentPointOrEnd()); return; } RawTextNode node = consumeHtmlIdentifier(); if (node == null) { // we must have immediately come across a delimiter like <, =, >, ' or " errorReporter.report(currentLocation(), GENERIC_UNEXPECTED_CHAR, "an html tag"); context.setTagName( new TagName(new RawTextNode(nodeIdGen.genId(), "$parse-error$", currentLocation()))); } else { context.setTagName(new TagName(node)); } } /** * Handle the state immediately after an attribute or tag. * * <p>Look for either whitespace or the end of the tag. */ void handleAfterTagNameOrAttribute() { if (consumeWhitespace()) { context.setState(State.BEFORE_ATTRIBUTE_NAME, currentPointOrEnd()); return; } if (!tryCreateTagEnd()) { errorReporter.report(currentLocation(), EXPECTED_WS_OR_CLOSE_AFTER_TAG_OR_ATTRIBUTE); // transition to a new state and try to keep going, note, we don't consume the current // character context.setState(State.BEFORE_ATTRIBUTE_NAME, currentPoint()); advance(); // move ahead } } /** * Handle the state where we are right before an attribute. * * <p>This state is kind of confusing, it just means we are in the middle of a tag and are * definitely after some whitespace. */ void handleBeforeAttributeName() { if (tryCreateTagEnd()) { return; } if (consumeWhitespace()) { // if we consumed whitespace, return and keep going. // we don't necessarily expect whitespace, but it is ok if there is extra whitespace here // this can happen in the case of kind="attributes" blocks which start in this state, or // if raw text nodes are split strangely. // We have to return in case we hit the end of the raw text. return; } RawTextNode identifier = consumeHtmlIdentifier(); if (identifier == null) { // consumeHtmlIdentifier will have already reported an error context.resetAttribute(); return; } context.startAttribute(identifier); } /** * Handle immediately after an attribute name. * * <p>Look for an '=' sign to signal the presence of an attribute value */ void handleAfterAttributeName() { boolean ws = consumeWhitespace(); int current = currentChar(); if (current == '=') { SourceLocation.Point equalsSignPoint = currentPoint(); advance(); consume(); // eat the '=' consumeWhitespace(); context.setEqualsSignLocation(equalsSignPoint, currentPointOrEnd()); } else { // we must have seen some non '=' character (or the end of the text), it doesn't matter // what it is, switch to the next state. (creation of the attribute will happen // automatically if it hasn't already). context.setState(ws ? State.BEFORE_ATTRIBUTE_NAME : State.AFTER_TAG_NAME_OR_ATTRIBUTE, currentPointOrEnd()); } } /** * Handle immediately before an attribute value. * * <p>Look for a quote character to signal the beginning of a quoted attribute value or switch * to UNQUOTED_ATTRIBUTE_VALUE to handle that. */ void handleBeforeAttributeValue() { // per https://www.w3.org/TR/html5/syntax.html#attributes-0 // we are allowed an arbitrary amount of whitespace preceding an attribute value. boolean ws = consumeWhitespace(); if (ws) { // return without changing states to handle eof conditions return; } int c = currentChar(); if (c == '\'' || c == '"') { SourceLocation.Point quotePoint = currentPoint(); context.startQuotedAttributeValue(currentRawTextNode, quotePoint, c == '"' ? State.DOUBLE_QUOTED_ATTRIBUTE_VALUE : State.SINGLE_QUOTED_ATTRIBUTE_VALUE); advance(); consume(); } else { context.setState(State.UNQUOTED_ATTRIBUTE_VALUE, currentPoint()); } } /** * Handle unquoted attribute values. * * <p>Search for whitespace or the end of the tag as a delimiter. */ void handleUnquotedAttributeValue() { boolean foundDelimiter = advanceWhileMatches(UNQUOTED_ATTRIBUTE_VALUE_MATCHER); RawTextNode node = consumeAsRawText(); if (node != null) { context.addAttributeValuePart(node); } if (foundDelimiter) { context.createUnquotedAttributeValue(currentPoint()); char c = (char) currentChar(); if (!UNQUOTED_ATTRIBUTE_VALUE_DELIMITER.matches(c)) { errorReporter.report(currentLocation(), ILLEGAL_HTML_ATTRIBUTE_CHARACTER); // go past it and consume it advance(); consume(); } } // otherwise keep going, to support things like // <div a=a{$p}> // <div a={$p}a> // <div a=a{$p}a> } /** * Handle a quoted attribute value. * * <p>These are easy we just look for the end quote. */ void handleQuotedAttributeValue(boolean doubleQuoted) { boolean hasQuote = advanceWhileMatches(doubleQuoted ? NOT_DOUBLE_QUOTE : NOT_SINGLE_QUOTE); RawTextNode data = consumeAsRawText(); if (data != null) { context.addAttributeValuePart(data); } if (hasQuote) { if (context.hasQuotedAttributeValueParts()) { context.createQuotedAttributeValue(currentRawTextNode, doubleQuoted, currentPoint()); } else { errorReporter.report(currentLocation(), FOUND_END_OF_ATTRIBUTE_STARTED_IN_ANOTHER_BLOCK); throw new AbortParsingBlockError(); } // consume the quote advance(); consume(); } } /** Attempts to finish the current tag, returns true if it did. */ boolean tryCreateTagEnd() { int c = currentChar(); if (c == '>') { if (context.hasTagStart()) { SourceLocation.Point point = currentPoint(); context.setState(context.createTag(currentRawTextNode, false, point), point); } else { errorReporter.report(currentLocation(), FOUND_END_TAG_STARTED_IN_ANOTHER_BLOCK); throw new AbortParsingBlockError(); } advance(); consume(); return true; } else if (matchPrefix("/>", false)) { // position the index on the '>' so that the end location of the tag calculated by // currentPoint is accurate advance(); if (context.hasTagStart()) { SourceLocation.Point point = currentPoint(); context.setState(context.createTag(currentRawTextNode, true, point), point); } else { errorReporter.report(currentLocation(), FOUND_END_TAG_STARTED_IN_ANOTHER_BLOCK); throw new AbortParsingBlockError(); } // consume the rest of the '/>' advance(); consume(); return true; } return false; } /** Returns {@code true} if we haven't reached the end of the string. */ boolean advanceWhileMatches(CharMatcher c) { int next = currentChar(); while (next != -1 && c.matches((char) next)) { advance(); next = currentChar(); } return next != -1; } /** * Eats all whitespace from the input prefix. Returns {@code true} if we matched any whitespace * characters. */ boolean consumeWhitespace() { int startIndex = currentRawTextIndex; advanceWhileMatches(CharMatcher.whitespace()); consume(); edits.remove(currentRawTextNode); // mark the current node for removal return currentRawTextIndex != startIndex; } /** * Scans until the next whitespace, > or />, validates that the matched text is an html * identifier and returns it. */ @Nullable RawTextNode consumeHtmlIdentifier() { // rather than use a regex to match the prefix, we just consume all non-whitespace/non-meta // characters and then validate the text afterwards. boolean foundDelimiter = advanceWhileMatches(TAG_IDENTIFIER_MATCHER); RawTextNode node = consumeAsRawText(); if (node != null) { if (foundDelimiter && INVALID_IDENTIFIER_CHAR_MATCHER.matches((char) currentChar())) { errorReporter.report(currentLocation(), INVALID_IDENTIFIER, (char) currentChar()); // consume the bad char advance(); consume(); } } else { errorReporter.report(currentLocation(), GENERIC_UNEXPECTED_CHAR, "an html identifier"); // consume the character advance(); consume(); } return node; } /** * Returns [{@link #currentRawTextOffset}, {@link #currentRawTextIndex}) as a RawTextNode, or * {@code null} if it is empty. */ @Nullable RawTextNode consumeAsRawText() { if (currentRawTextIndex == currentRawTextOffset) { return null; } edits.remove(currentRawTextNode); RawTextNode node = currentRawTextNode.substring(nodeIdGen.genId(), currentRawTextOffset, currentRawTextIndex); consume(); return node; } /** Returns the location of the current character. */ SourceLocation currentLocation() { return currentRawTextNode.substringLocation(currentRawTextIndex, currentRawTextIndex + 1); } /** The {@code SourceLocation.Point} of the {@code currentRawTextIndex}. */ SourceLocation.Point currentPoint() { return currentRawTextNode.locationOf(currentRawTextIndex); } /** * The {@code SourceLocation.Point} of the {@code currentRawTextIndex} or the end of the raw * text if we are at the end. */ SourceLocation.Point currentPointOrEnd() { if (currentRawText.length() > currentRawTextIndex) { return currentPoint(); } return currentRawTextNode.getSourceLocation().getEndPoint(); } /** Returns the current character or {@code -1} if we are at the end of the output. */ int currentChar() { if (currentRawTextIndex < currentRawText.length()) { return currentRawText.charAt(currentRawTextIndex); } return -1; } /** Advances the {@link #currentRawTextIndex} by {@code n} */ void advance(int n) { checkArgument(n > 0); for (int i = 0; i < n; i++) { advance(); } } /** Advances the {@link #currentRawTextIndex} by {@code 1} */ void advance() { if (currentRawTextIndex >= currentRawText.length()) { throw new AssertionError("already advanced to the end, shouldn't advance any more"); } currentRawTextIndex++; } /** Sets the {@link #currentRawTextOffset} to be equal to {@link #currentRawTextIndex}. */ void consume() { currentRawTextOffset = currentRawTextIndex; } /** * Returns true if the beginning of the input matches the given prefix. * * @param advance if the prefix matches, advance the length of the prefix. */ boolean matchPrefix(String prefix, boolean advance) { if (currentRawText.startsWith(prefix, currentRawTextIndex)) { if (advance) { advance(prefix.length()); } return true; } return false; } /** * Returns true if the beginning of the input matches the given prefix ignoring ASCII case. * * @param advance if the prefix matches, advance the length of the prefix. */ boolean matchPrefixIgnoreCase(String s, boolean advance) { if (currentRawTextIndex + s.length() <= currentRawText.length()) { // we use an explicit loop instead of Ascii.equalsIgnoringCase + substring to avoid the // allocations implied by substring for (int i = 0; i < s.length(); i++) { char c1 = s.charAt(i); char c2 = currentRawText.charAt(i + currentRawTextIndex); if (c1 != c2 && toLowerCase(c1) != toLowerCase(c2)) { return false; } } if (advance) { advance(s.length()); } return true; } return false; } // scoped blocks, each one of these can enter/exit a new state @Override protected void visitTemplateNode(TemplateNode node) { // reset everything for each template edits.clear(); context = null; Checkpoint checkPoint = errorReporter.checkpoint(); visitScopedBlock(node.getContentKind(), node, "template"); // we only rewrite the template if there were no new errors while parsing it if (!errorReporter.errorsSince(checkPoint)) { edits.apply(); } } @Override protected void visitLetValueNode(LetValueNode node) { processNonPrintableNode(node); } @Override protected void visitLetContentNode(LetContentNode node) { visitScopedBlock(node.getContentKind(), node, "let"); processNonPrintableNode(node); } @Override protected void visitDebuggerNode(DebuggerNode node) { processNonPrintableNode(node); } @Override protected void visitCallParamContentNode(CallParamContentNode node) { visitScopedBlock(node.getContentKind(), node, "param"); } @Override protected void visitCallParamValueNode(CallParamValueNode node) { // do nothing } @Override protected void visitCallNode(CallNode node) { visitChildren(node); processPrintableNode(node); if (context.getState() == State.PCDATA) { node.setIsPcData(true); } } @Override protected void visitMsgFallbackGroupNode(MsgFallbackGroupNode node) { // TODO(lukes): Start parsing html tags in {msg} nodes here instead of in the parser. // to avoid doing it now, we don't visit children processPrintableNode(node); } // control flow blocks @Override protected void visitForNode(ForNode node) { visitControlFlowStructure(node, ImmutableList.<BlockNode>of(node), "for loop", Functions.constant("loop body"), false /* no guarantee that the children will execute exactly once. */, false /* no guarantee that the children will execute at least once. */); } @Override protected void visitForeachNode(ForeachNode node) { visitControlFlowStructure(node, node.getChildren(), "foreach loop", new Function<BlockNode, String>() { @Override public String apply(@Nullable BlockNode input) { if (input instanceof ForeachNonemptyNode) { return "loop body"; } return "ifempty block"; } }, false /* no guarantee that the children will only execute once. */, node.hasIfEmptyBlock() /* one branch will execute if there is an ifempty block. */); } @Override protected void visitIfNode(final IfNode node) { boolean hasElse = node.hasElse(); visitControlFlowStructure(node, node.getChildren(), "if", new Function<BlockNode, String>() { @Override public String apply(@Nullable BlockNode input) { if (input instanceof IfCondNode) { if (node.getChild(0) == input) { return "if block"; } return "elseif block"; } return "else block"; } }, // one and only one child will execute if we have an else hasElse, hasElse); } @Override protected void visitSwitchNode(SwitchNode node) { boolean hasDefault = node.hasDefaultCase(); visitControlFlowStructure(node, node.getChildren(), "switch", new Function<BlockNode, String>() { @Override public String apply(@Nullable BlockNode input) { if (input instanceof SwitchCaseNode) { return "case block"; } return "default block"; } }, // one and only one child will execute if we have a default hasDefault, hasDefault); } @Override protected void visitLogNode(LogNode node) { // we don't need to create a new context, just set the state to NONE there is no transition // from NONE to anything else. State oldState = context.setState(State.NONE, node.getSourceLocation().getBeginPoint()); visitChildren(node); context.setState(oldState, node.getSourceLocation().getEndPoint()); processNonPrintableNode(node); } @Override protected void visitCssNode(CssNode node) { processPrintableNode(node); } @Override protected void visitPrintNode(PrintNode node) { processPrintableNode(node); // no need to visit children. The only children are PrintDirectiveNodes which are more like // expressions than soy nodes. } @Override protected void visitXidNode(XidNode node) { processPrintableNode(node); } void processNonPrintableNode(StandaloneNode node) { switch (context.getState()) { case AFTER_TAG_NAME_OR_ATTRIBUTE: case BEFORE_ATTRIBUTE_NAME: case AFTER_ATTRIBUTE_NAME: context.addTagChild(node); break; case BEFORE_ATTRIBUTE_VALUE: errorReporter.report(node.getSourceLocation(), INVALID_LOCATION_FOR_NONPRINTABLE, "move it before the start of the tag or after the tag name"); break; case HTML_TAG_NAME: errorReporter.report(node.getSourceLocation(), INVALID_LOCATION_FOR_NONPRINTABLE, "it creates ambiguity with an unquoted attribute value"); break; case UNQUOTED_ATTRIBUTE_VALUE: case DOUBLE_QUOTED_ATTRIBUTE_VALUE: case SINGLE_QUOTED_ATTRIBUTE_VALUE: context.addAttributeValuePart(node); break; case HTML_COMMENT: case NONE: case PCDATA: case RCDATA_SCRIPT: case RCDATA_STYLE: case RCDATA_TEXTAREA: case RCDATA_TITLE: case XML_DECLARATION: case CDATA: case DOUBLE_QUOTED_XML_ATTRIBUTE_VALUE: case SINGLE_QUOTED_XML_ATTRIBUTE_VALUE: // do nothing break; default: throw new AssertionError(); } } /** Printable nodes are things like {xid..} and {print ..}. */ void processPrintableNode(StandaloneNode node) { checkState(node.getKind() != Kind.RAW_TEXT_NODE); switch (context.getState()) { case AFTER_TAG_NAME_OR_ATTRIBUTE: errorReporter.report(node.getSourceLocation(), EXPECTED_WS_OR_CLOSE_AFTER_TAG_OR_ATTRIBUTE); break; case AFTER_ATTRIBUTE_NAME: errorReporter.report(node.getSourceLocation(), EXPECTED_WS_EQ_OR_CLOSE_AFTER_ATTRIBUTE_NAME); break; case BEFORE_ATTRIBUTE_NAME: context.startAttribute(node); break; case HTML_TAG_NAME: if (node.getKind() == Kind.PRINT_NODE) { context.setTagName(new TagName((PrintNode) node)); edits.remove(node); } else { errorReporter.report(node.getSourceLocation(), INVALID_TAG_NAME); } break; case BEFORE_ATTRIBUTE_VALUE: // we didn't see a quote, so just turn this into an attribute value. context.setState(State.UNQUOTED_ATTRIBUTE_VALUE, node.getSourceLocation().getBeginPoint()); // fall through case DOUBLE_QUOTED_ATTRIBUTE_VALUE: case SINGLE_QUOTED_ATTRIBUTE_VALUE: case UNQUOTED_ATTRIBUTE_VALUE: context.addAttributeValuePart(node); break; case HTML_COMMENT: case NONE: case PCDATA: case RCDATA_SCRIPT: case RCDATA_STYLE: case RCDATA_TEXTAREA: case RCDATA_TITLE: case XML_DECLARATION: case CDATA: case DOUBLE_QUOTED_XML_ATTRIBUTE_VALUE: case SINGLE_QUOTED_XML_ATTRIBUTE_VALUE: // do nothing break; default: throw new AssertionError("unexpected state: " + context.getState()); } } @Override protected void visitSoyFileNode(SoyFileNode node) { visitChildren(node); } @Override protected void visitSoyNode(SoyNode node) { throw new UnsupportedOperationException(node.getKind() + " isn't supported yet"); } /** Visits a block whose content is in an entirely separate content scope. */ void visitScopedBlock(ContentKind blockKind, BlockNode parent, String name) { State startState = State.fromKind(blockKind); Checkpoint checkpoint = errorReporter.checkpoint(); ParsingContext newCtx = newParsingContext(name, startState, parent.getSourceLocation().getBeginPoint()); ParsingContext oldCtx = setContext(newCtx); visitBlock(startState, parent, name, checkpoint); setContext(oldCtx); // restore } /** * Visits a control flow structure like an if, switch or a loop. * * <p>The main thing this is responsible for is calculating what state to enter after the * control flow is complete. * * @param parent The parent node, each child will be a block representing one of the branches * @param children The child blocks. We don't use {@code parent.getChildren()} directly to make * it possible to handle ForNodes using this method. * @param overallName The name, for error reporting purposes, to assign to the control flow * structure * @param blockNamer A function to provide a name for each child block, the key is the index of * the block * @param willExactlyOneBranchExecuteOnce Whether or not it is guaranteed that exactly one * branch of the structure will execute exactly one time. * @param willAtLeastOneBranchExecute Whether or not it is guaranteed that at least one of the * branches will execute (as opposed to no branches executing). */ void visitControlFlowStructure(StandaloneNode parent, List<? extends BlockNode> children, String overallName, Function<? super BlockNode, String> blockNamer, boolean willExactlyOneBranchExecuteOnce, boolean willAtLeastOneBranchExecute) { // this insane case can happen with SwitchNodes. if (children.isEmpty()) { return; } State startingState = context.getState(); State endingState = visitBranches(children, blockNamer); SourceLocation.Point endPoint = parent.getSourceLocation().getEndPoint(); switch (startingState) { case AFTER_TAG_NAME_OR_ATTRIBUTE: case BEFORE_ATTRIBUTE_NAME: case AFTER_ATTRIBUTE_NAME: context.addTagChild(parent); // at this point we are in AFTER_TAG_NAME_OR_ATTRIBUTE, switch to whatever the branches // ended in, the reconcilation logic may have calculated a better state (like // BEFORE_ATTRIBUTE_NAME). context.setState(endingState, endPoint); break; case HTML_TAG_NAME: errorReporter.report(parent.getSourceLocation(), INVALID_LOCATION_FOR_CONTROL_FLOW, overallName, "html tag names can only be constants or print nodes"); // give up on parsing this tag :( throw new AbortParsingBlockError(); case BEFORE_ATTRIBUTE_VALUE: if (!willExactlyOneBranchExecuteOnce) { errorReporter.report(parent.getSourceLocation(), CONDITIONAL_BLOCK_ISNT_GUARANTEED_TO_PRODUCE_ONE_ATTRIBUTE_VALUE, overallName); // we continue and pretend like everything was ok } // theoretically we might want to support x={if $p}y{else}z{/if}w, in which case this // should be an attribute value part. We could support this if the branch ending state // was UNQUOTED_ATTRIBUTE_VALUE and at least one of the branches will execute if (willAtLeastOneBranchExecute && endingState == State.UNQUOTED_ATTRIBUTE_VALUE) { context.addAttributeValuePart(parent); context.setState(State.UNQUOTED_ATTRIBUTE_VALUE, endPoint); } else { context.setAttributeValue(parent); if (willAtLeastOneBranchExecute && endingState == State.BEFORE_ATTRIBUTE_NAME) { context.setState(State.BEFORE_ATTRIBUTE_NAME, endPoint); } } break; case UNQUOTED_ATTRIBUTE_VALUE: case DOUBLE_QUOTED_ATTRIBUTE_VALUE: case SINGLE_QUOTED_ATTRIBUTE_VALUE: context.addAttributeValuePart(parent); // no need to tweak any state, addAttributeValuePart doesn't modify anything break; case HTML_COMMENT: case NONE: case PCDATA: case RCDATA_SCRIPT: case RCDATA_STYLE: case RCDATA_TEXTAREA: case RCDATA_TITLE: case XML_DECLARATION: case CDATA: case DOUBLE_QUOTED_XML_ATTRIBUTE_VALUE: case SINGLE_QUOTED_XML_ATTRIBUTE_VALUE: // do nothing break; default: throw new AssertionError("unexpected control flow starting state: " + startingState); } } /** Visit the branches of a control flow structure. */ State visitBranches(List<? extends BlockNode> children, Function<? super BlockNode, String> blockNamer) { Checkpoint checkpoint = errorReporter.checkpoint(); State startState = context.getState(); State endingState = null; for (BlockNode block : children) { String blockName = blockNamer.apply(block); ParsingContext newCtx = newParsingContext(blockName, startState, block.getSourceLocation().getBeginPoint()); ParsingContext oldCtx = setContext(newCtx); endingState = visitBlock(startState, block, blockName, checkpoint); setContext(oldCtx); // restore } if (errorReporter.errorsSince(checkpoint)) { return startState; } return endingState; } /** Visits a block and returns the finalState. */ State visitBlock(State startState, BlockNode node, String blockName, Checkpoint checkpoint) { try { visitChildren(node); } catch (AbortParsingBlockError abortProcessingError) { // we reported some error and just gave up on the block // try to switch back to a reasonable state based on the start state and keep going. switch (startState) { case AFTER_ATTRIBUTE_NAME: case AFTER_TAG_NAME_OR_ATTRIBUTE: case BEFORE_ATTRIBUTE_NAME: case BEFORE_ATTRIBUTE_VALUE: case SINGLE_QUOTED_ATTRIBUTE_VALUE: case DOUBLE_QUOTED_ATTRIBUTE_VALUE: case UNQUOTED_ATTRIBUTE_VALUE: case HTML_TAG_NAME: context.resetAttribute(); context.setState(State.BEFORE_ATTRIBUTE_NAME, node.getSourceLocation().getEndPoint()); break; case CDATA: case DOUBLE_QUOTED_XML_ATTRIBUTE_VALUE: case HTML_COMMENT: case NONE: case PCDATA: case RCDATA_SCRIPT: case RCDATA_STYLE: case RCDATA_TEXTAREA: case RCDATA_TITLE: case SINGLE_QUOTED_XML_ATTRIBUTE_VALUE: case XML_DECLARATION: context.reset(); context.setState(startState, node.getSourceLocation().getEndPoint()); break; } } context.finishBlock(); State finalState = context.getState(); SourceLocation.Point finalStateTransitionPoint = context.getStateTransitionPoint(); if (finalState.isInvalidForEndOfBlock()) { errorReporter.report(node.getSourceLocation(), BLOCK_ENDS_IN_INVALID_STATE, blockName, finalState); finalState = startState; } if (!errorReporter.errorsSince(checkpoint)) { State reconciled = startState.reconcile(finalState); if (reconciled == null) { String suggestion = reconciliationFailureHint(startState, finalState); errorReporter.report(finalStateTransitionPoint.asLocation(filePath), BLOCK_CHANGES_CONTEXT, blockName, startState, finalState, suggestion != null ? " " + suggestion : ""); } else { finalState = reconciled; reparentNodes(node, context, finalState); } } else { // an error occured, restore the start state to help avoid an error explosion finalState = startState; } context.setState(finalState, node.getSourceLocation().getEndPoint()); return finalState; } /** * After visiting a block, this will transfer all the partial values in the blockCtx to the * parent. * * @param parent The block node * @param blockCtx The context after visiting the block * @param finalState The reconciled state after visiting the block */ static void reparentNodes(BlockNode parent, ParsingContext blockCtx, State finalState) { // if there were no errors we may need to conditionally add new children, this only really // applies to attributes which may be partially finished (to allow for things like // foo=a{if $x}b{/if} // TODO(lukes): consider eliminating this method by moving the logic for reparenting into // ParsingContext and do it as part of creating the nodes. switch (finalState) { case AFTER_TAG_NAME_OR_ATTRIBUTE: blockCtx.maybeFinishPendingAttribute(parent.getSourceLocation().getEndPoint()); // fall-through case BEFORE_ATTRIBUTE_NAME: case AFTER_ATTRIBUTE_NAME: blockCtx.reparentDirectTagChildren(parent); break; case UNQUOTED_ATTRIBUTE_VALUE: case DOUBLE_QUOTED_ATTRIBUTE_VALUE: case SINGLE_QUOTED_ATTRIBUTE_VALUE: blockCtx.reparentAttributeValueChildren(parent); break; case HTML_COMMENT: case NONE: case PCDATA: case RCDATA_SCRIPT: case RCDATA_STYLE: case RCDATA_TEXTAREA: case RCDATA_TITLE: case XML_DECLARATION: case CDATA: case DOUBLE_QUOTED_XML_ATTRIBUTE_VALUE: case SINGLE_QUOTED_XML_ATTRIBUTE_VALUE: // do nothing.. there should be nothing in context break; case BEFORE_ATTRIBUTE_VALUE: case HTML_TAG_NAME: // impossible? default: throw new AssertionError("found non-empty context for unexpected state: " + blockCtx.getState()); } blockCtx.checkEmpty("context not fully reparented after '%s'", finalState); } /** Gives a hint when we fail to reconcile to states. */ static String reconciliationFailureHint(State startState, State finalState) { switch (finalState) { case PCDATA: // we could suggest for this one based on the start state maybe? return null; // no suggestion case BEFORE_ATTRIBUTE_VALUE: return "Expected an attribute value before the end of the block"; case CDATA: return didYouForgetToCloseThe("CDATA section"); case SINGLE_QUOTED_ATTRIBUTE_VALUE: case DOUBLE_QUOTED_ATTRIBUTE_VALUE: case DOUBLE_QUOTED_XML_ATTRIBUTE_VALUE: case SINGLE_QUOTED_XML_ATTRIBUTE_VALUE: return didYouForgetToCloseThe("attribute value"); case HTML_COMMENT: return didYouForgetToCloseThe("html comment"); case RCDATA_SCRIPT: return didYouForgetToCloseThe("<script> block"); case RCDATA_STYLE: return didYouForgetToCloseThe("<style> block"); case RCDATA_TEXTAREA: return didYouForgetToCloseThe("<textare> block"); case RCDATA_TITLE: return didYouForgetToCloseThe("<title> block"); case HTML_TAG_NAME: // kind of crazy case AFTER_ATTRIBUTE_NAME: case AFTER_TAG_NAME_OR_ATTRIBUTE: case BEFORE_ATTRIBUTE_NAME: case XML_DECLARATION: case UNQUOTED_ATTRIBUTE_VALUE: // if this wasn't reconciled, it means they probably forgot to close the tag if (startState == State.PCDATA) { return "Did you forget to close the tag?"; } return null; case NONE: // should be impossible, there are no transitions into NONE from non-NONE } throw new AssertionError("unexpected final state: " + finalState); } static String didYouForgetToCloseThe(String thing) { return "Did you forget to close the " + thing + "?"; } ParsingContext setContext(ParsingContext ctx) { ParsingContext old = context; context = ctx; return old; } /** @param state The state to start the context in. */ ParsingContext newParsingContext(String blockName, State state, SourceLocation.Point startPoint) { return new ParsingContext(blockName, state, startPoint, filePath, edits, errorReporter, nodeIdGen); } } /** * A class to record all the edits to the AST we need to make. * * <p>Instead of editing the AST as we go, we record the edits and wait until the end, this makes * a few things easier. * * <ul> * <li>we don't have to worry about editing nodes while visiting them * <li>we can easily avoid making any edits if errors were recorded. * <p>This is encapsulated in its own class so we can easily pass it around and to provide * some encapsulation. * </ul> */ private static final class AstEdits { final Set<StandaloneNode> toRemove = new LinkedHashSet<>(); final ListMultimap<StandaloneNode, StandaloneNode> replacements = MultimapBuilder.linkedHashKeys() .arrayListValues().build(); final ListMultimap<ParentSoyNode<StandaloneNode>, StandaloneNode> newChildren = MultimapBuilder .linkedHashKeys().arrayListValues().build(); /** Apply all edits. */ void apply() { for (StandaloneNode nodeToRemove : toRemove) { ParentSoyNode<StandaloneNode> parent = nodeToRemove.getParent(); int index = parent.getChildIndex(nodeToRemove); // NOTE: we need to remove the child before adding the new children to handle the case // where we are doing a no-op replacement or the replacement nodes contains nodeToRemove. // no-op replacements can occur when there are pcdata sections that contain no tags parent.removeChild(index); List<StandaloneNode> children = replacements.get(nodeToRemove); if (!children.isEmpty()) { parent.addChildren(index, children); } } for (Map.Entry<ParentSoyNode<StandaloneNode>, List<StandaloneNode>> entry : asMap(newChildren) .entrySet()) { entry.getKey().addChildren(entry.getValue()); } clear(); } /** Mark a node for removal. */ void remove(StandaloneNode node) { checkNotNull(node); // only record this if the node is actually in the tree already. Sometimes we call remove // on new nodes that don't have parents yet. if (node.getParent() != null) { toRemove.add(node); } } /** Add children to the given parent. */ void addChildren(ParentSoyNode<StandaloneNode> parent, Iterable<StandaloneNode> children) { checkNotNull(parent); newChildren.putAll(parent, children); } /** Adds the child to the given parent. */ void addChild(ParentSoyNode<StandaloneNode> parent, StandaloneNode child) { checkNotNull(parent); checkNotNull(child); newChildren.put(parent, child); } /** Replace a given node with the new nodes. */ void replace(StandaloneNode oldNode, Iterable<StandaloneNode> newNodes) { checkState(oldNode.getParent() != null, "oldNode must be in the tree in order to replace it"); remove(oldNode); replacements.putAll(oldNode, newNodes); } /** Replace a given node with the new node. */ void replace(StandaloneNode oldNode, StandaloneNode newNode) { replace(oldNode, ImmutableList.of(newNode)); } /** Clear all the edits. */ void clear() { toRemove.clear(); replacements.clear(); newChildren.clear(); } } /** * Parsing context records the current {@link State} as well as all the extra information needed * to produce our new nodes. * * <p>This is extracted into a separate class so we can create new ones when visiting branches. * * <p>This isn't a perfect abstraction. It is set up to parse full HTML tags, but sometimes we * only want to parse attributes or attribute values (or parts of attribute values). In theory, we * could split it into 3-4 classes one for each case, but I'm not sure it simplifies things over * the current solution. See {@link Visitor#reparentNodes} for how we handle those cases. * * <p>The handling of attributes is particularly tricky. It is difficult to decide when to * complete an attribute (that is, create the {@link HtmlAttributeNode}). In theory it should be * obvious, the attribute is 'done' when we see one of the following: * * <ul> * <li>A non '=' character after an attribute name (a value-less attribute) * <li>A ' or " character after a quoted attribute value * <li>A whitespace character after an unquoted attribute value * </ul> * * However, we want to support various control flow for creating attribute values. For example, * <code>href={if $v2}"/foo2"{else}"/foo"{/if}</code>. Here when we see the closing double quote * characters we know that the attribute value is done, but it is too early to close the * attribute. So we need to delay. Thus the rules for when we 'finish' an attribute are: * * <ul> * <li>If a block starts in {@link State#BEFORE_ATTRIBUTE_VALUE} then the block must be the * attribute value * <li>If we see the beginning of a new attribute, we should finish the previous one * <li>If we see the end of a tag, we should finish the previous attribute. * <li>At the end of a block, we should complete attribute nodes if the block started in a tag * state * </ul> */ private static final class ParsingContext { final String blockName; final State startingState; final String filePath; final IdGenerator nodeIdGen; final ErrorReporter errorReporter; final AstEdits edits; // The current parser state. State state; SourceLocation.Point stateTransitionPoint; // for tracking the current tag being built /** Whether the current tag is a close tag. e.g. {@code </div>} */ boolean isCloseTag; SourceLocation.Point tagStartPoint; RawTextNode tagStartNode; TagName tagName; // TODO(lukes): consider lazily allocating these lists. /** All the 'direct' children of the current tag. */ final List<StandaloneNode> directTagChildren = new ArrayList<>(); // for tracking the current attribute being built StandaloneNode attributeName; SourceLocation.Point equalsSignLocation; StandaloneNode attributeValue; // For the attribute value, where the quoted attribute value started /** Where the open quote of a quoted attribute value starts. */ SourceLocation.Point quotedAttributeValueStart; /** all the direct children of the attribute value. */ final List<StandaloneNode> attributeValueChildren = new ArrayList<>(); ParsingContext(String blockName, State startingState, SourceLocation.Point startPoint, String filePath, AstEdits edits, ErrorReporter errorReporter, IdGenerator nodeIdGen) { this.blockName = checkNotNull(blockName); this.startingState = checkNotNull(startingState); this.state = checkNotNull(startingState); this.stateTransitionPoint = checkNotNull(startPoint); this.filePath = checkNotNull(filePath); this.nodeIdGen = checkNotNull(nodeIdGen); this.edits = checkNotNull(edits); this.errorReporter = checkNotNull(errorReporter); } /** Called at the end of a block to finish any pending attribute nodes. */ void finishBlock() { if (startingState.isTagState()) { maybeFinishPendingAttribute(stateTransitionPoint); } } /** Attaches the attributeValueChildren to the parent. */ void reparentAttributeValueChildren(BlockNode parent) { edits.addChildren(parent, attributeValueChildren); attributeValueChildren.clear(); } /** Attaches the directTagChildren to the parent. */ void reparentDirectTagChildren(BlockNode parent) { if (attributeValue != null) { edits.addChild(parent, attributeValue); attributeValue = null; } edits.addChildren(parent, directTagChildren); directTagChildren.clear(); } /** Returns true if this has accumulated parts of an unquoted attribute value. */ boolean hasUnquotedAttributeValueParts() { return quotedAttributeValueStart == null && !attributeValueChildren.isEmpty(); } /** Returns true if this has accumulated parts of a quoted attribute value. */ boolean hasQuotedAttributeValueParts() { return quotedAttributeValueStart != null; } boolean hasTagStart() { return tagStartNode != null && tagStartPoint != null; } /** Sets the given node as a direct child of the tag currently being built. */ void addTagChild(StandaloneNode node) { maybeFinishPendingAttribute(node.getSourceLocation().getBeginPoint()); checkNotNull(node); directTagChildren.add(node); edits.remove(node); setState(State.AFTER_TAG_NAME_OR_ATTRIBUTE, node.getSourceLocation().getEndPoint()); } /** Asserts that the context is empty. */ void checkEmpty(String fmt, Object... args) { StringBuilder error = null; if (!directTagChildren.isEmpty()) { error = format(error, "Expected directTagChildren to be empty, got: %s", directTagChildren); } if (attributeName != null) { error = format(error, "Expected attributeName to be null, got: %s", attributeName); } if (equalsSignLocation != null) { error = format(error, "Expected equalsSignLocation to be null, got: %s", equalsSignLocation); } if (attributeValue != null) { error = format(error, "Expected attributeValue to be null, got: %s", attributeValue); } if (!attributeValueChildren.isEmpty()) { error = format(error, "Expected attributeValueChildren to be empty, got: %s", attributeValueChildren); } if (tagStartPoint != null) { error = format(error, "Expected tagStartPoint to be null, got: %s", tagStartPoint); } if (tagName != null) { error = format(error, "Expected tagName to be null, got: %s", tagName); } if (tagStartNode != null) { error = format(error, "Expected tagStartNode to be null, got: %s", tagStartNode); } if (quotedAttributeValueStart != null) { error = format(error, "Expected quotedAttributeValueStart to be null, got: %s", quotedAttributeValueStart); } if (tagName != null) { error = format(error, "Expected tagName to be null, got: %s", tagName); } if (error != null) { throw new IllegalStateException(String.format(fmt + "\n", args) + error); } } private static StringBuilder format(StringBuilder error, String fmt, Object... args) { if (error == null) { error = new StringBuilder(); } error.append(String.format(fmt, args)); error.append('\n'); return error; } /** Resets all parsing state, this is useful for error recovery. */ void reset() { tagStartPoint = null; tagStartNode = null; tagName = null; directTagChildren.clear(); resetAttribute(); } void resetAttribute() { attributeName = null; equalsSignLocation = null; attributeValue = null; quotedAttributeValueStart = null; attributeValueChildren.clear(); } /** * Records the start of an html tag * * @param tagStartNode The node where it started * @param isCloseTag is is a close tag * @param point the source location of the {@code <} character. */ void startTag(RawTextNode tagStartNode, boolean isCloseTag, SourceLocation.Point point) { checkState(this.tagStartPoint == null); checkState(this.tagStartNode == null); checkState(this.directTagChildren.isEmpty()); // need to check if it is safe to transition into a tag. // this is only true if our starting location is pcdata if (startingState != State.PCDATA) { errorReporter.report(point.asLocation(filePath), BLOCK_TRANSITION_DISALLOWED, blockName, startingState, "tag"); throw new AbortParsingBlockError(); } this.tagStartPoint = checkNotNull(point); this.tagStartNode = checkNotNull(tagStartNode); this.isCloseTag = isCloseTag; } /** Returns the tag start location, for error reporting. */ SourceLocation tagStartLocation() { return tagStartPoint.asLocation(filePath); } /** Sets the tag name of the tag currently being built. */ void setTagName(TagName tagName) { this.tagName = checkNotNull(tagName); edits.remove(tagName.getNode()); setState(State.AFTER_TAG_NAME_OR_ATTRIBUTE, tagName.getTagLocation().getEndPoint()); } void startAttribute(StandaloneNode attrName) { maybeFinishPendingAttribute(attrName.getSourceLocation().getBeginPoint()); checkNotNull(attrName); checkState(attributeName == null); if (startingState == State.BEFORE_ATTRIBUTE_VALUE) { errorReporter.report(attrName.getSourceLocation(), BLOCK_TRANSITION_DISALLOWED, blockName, startingState, "attribute"); throw new AbortParsingBlockError(); } edits.remove(attrName); attributeName = attrName; setState(State.AFTER_ATTRIBUTE_NAME, attrName.getSourceLocation().getEndPoint()); } void setEqualsSignLocation(SourceLocation.Point equalsSignPoint, SourceLocation.Point stateTransitionPoint) { checkNotNull(equalsSignPoint); if (attributeName == null) { // the attribute must have been started in another block errorReporter.report(stateTransitionPoint.asLocation(filePath), FOUND_EQ_WITH_ATTRIBUTE_IN_ANOTHER_BLOCK); throw new AbortParsingBlockError(); } checkState(equalsSignLocation == null); equalsSignLocation = equalsSignPoint; setState(State.BEFORE_ATTRIBUTE_VALUE, stateTransitionPoint); } void setAttributeValue(StandaloneNode node) { checkNotNull(node); checkState(attributeValue == null, "attribute value already set at: %s", node.getSourceLocation()); edits.remove(node); attributeValue = node; setState(State.AFTER_TAG_NAME_OR_ATTRIBUTE, node.getSourceLocation().getEndPoint()); } /** * Records the start of a quoted attribute value. * * @param node The node where it started * @param point The source location where it started. * @param nextState The next state, either {@link State#DOUBLE_QUOTED_ATTRIBUTE_VALUE} or {@link * State#SINGLE_QUOTED_ATTRIBUTE_VALUE}. */ void startQuotedAttributeValue(RawTextNode node, SourceLocation.Point point, State nextState) { checkState(!hasQuotedAttributeValueParts()); checkState(!hasUnquotedAttributeValueParts()); edits.remove(node); quotedAttributeValueStart = checkNotNull(point); setState(nextState, point); } /** Adds a new attribute value part and marks the node for removal. */ void addAttributeValuePart(StandaloneNode node) { attributeValueChildren.add(node); edits.remove(node); } /** Completes an unquoted attribute value. */ void createUnquotedAttributeValue(SourceLocation.Point endPoint) { if (!hasUnquotedAttributeValueParts()) { if (attributeName != null) { errorReporter.report(endPoint.asLocation(filePath), EXPECTED_ATTRIBUTE_VALUE); } else { errorReporter.report(endPoint.asLocation(filePath), FOUND_END_OF_ATTRIBUTE_STARTED_IN_ANOTHER_BLOCK); throw new AbortParsingBlockError(); } resetAttribute(); setState(State.AFTER_TAG_NAME_OR_ATTRIBUTE, endPoint); return; } HtmlAttributeValueNode valueNode = new HtmlAttributeValueNode(nodeIdGen.genId(), getLocationOf(attributeValueChildren), Quotes.NONE); edits.addChildren(valueNode, attributeValueChildren); attributeValueChildren.clear(); setAttributeValue(valueNode); } /** Completes a quoted attribute value. */ void createQuotedAttributeValue(RawTextNode end, boolean doubleQuoted, SourceLocation.Point endPoint) { HtmlAttributeValueNode valueNode = new HtmlAttributeValueNode(nodeIdGen.genId(), new SourceLocation(filePath, quotedAttributeValueStart, endPoint), doubleQuoted ? Quotes.DOUBLE : Quotes.SINGLE); edits.remove(end); edits.addChildren(valueNode, attributeValueChildren); attributeValueChildren.clear(); quotedAttributeValueStart = null; setAttributeValue(valueNode); } /** * Creates an HtmlOpenTagNode or an HtmlCloseTagNode * * @param tagEndNode The node where the tag ends * @param selfClosing Whether it is self closing, e.g. {@code <div />} is self closing * @param endPoint The point where the {@code >} character is. * @return The state to transition into, typically this is {@link State#PCDATA} but could be one * of the rcdata states for special tags. */ State createTag(RawTextNode tagEndNode, boolean selfClosing, SourceLocation.Point endPoint) { maybeFinishPendingAttribute(endPoint); ParentSoyNode<StandaloneNode> replacement; SourceLocation sourceLocation = new SourceLocation(filePath, tagStartPoint, endPoint); if (isCloseTag) { // TODO(lukes): move the error reporting into the caller? if (!directTagChildren.isEmpty()) { errorReporter.report(directTagChildren.get(0).getSourceLocation(), UNEXPECTED_CLOSE_TAG_CONTENT); } if (selfClosing) { errorReporter.report(endPoint.asLocation(filePath).offsetStartCol(-1), SELF_CLOSING_CLOSE_TAG); } replacement = new HtmlCloseTagNode(nodeIdGen.genId(), tagName, sourceLocation); } else { replacement = new HtmlOpenTagNode(nodeIdGen.genId(), tagName, sourceLocation, selfClosing); } // Depending on the tag name, we may need to enter a special state after the tag. State nextState = State.PCDATA; if (!selfClosing && !isCloseTag) { TagName.RcDataTagName rcDataTag = tagName.getRcDataTagName(); if (rcDataTag != null) { switch (rcDataTag) { case SCRIPT: nextState = State.RCDATA_SCRIPT; break; case STYLE: nextState = State.RCDATA_STYLE; break; case TEXTAREA: nextState = State.RCDATA_TEXTAREA; break; case TITLE: nextState = State.RCDATA_TITLE; break; default: throw new AssertionError(rcDataTag); } } } edits.remove(tagEndNode); edits.addChild(replacement, tagName.getNode()); edits.addChildren(replacement, directTagChildren); // cast is safe because Html(Open|Close)TagNode implement StandaloneNode edits.replace(tagStartNode, (StandaloneNode) replacement); directTagChildren.clear(); tagStartPoint = null; tagName = null; tagStartNode = null; checkEmpty("Expected state to be empty after completing a tag"); return nextState; } void maybeFinishPendingAttribute(SourceLocation.Point currentPoint) { // For quoted attribute values we should have already finished them (when we saw the closing // quote). But for unquoted attribute values we delay closing them until we see a delimiter // so create one now if we have parts. if (hasUnquotedAttributeValueParts()) { createUnquotedAttributeValue(currentPoint); } else if (hasQuotedAttributeValueParts()) { // if there is a quoted attribute, it should have been finished // which means the only way we could get here is if the attribute was not finished // in a block errorReporter.report(currentPoint.asLocation(filePath), FOUND_END_OF_ATTRIBUTE_STARTED_IN_ANOTHER_BLOCK); throw new AbortParsingBlockError(); } if (attributeName != null) { SourceLocation location = attributeName.getSourceLocation(); HtmlAttributeNode attribute; if (attributeValue != null) { attribute = new HtmlAttributeNode(nodeIdGen.genId(), location, checkNotNull(equalsSignLocation)); location = location.extend(attributeValue.getSourceLocation()); edits.addChild(attribute, attributeName); edits.addChild(attribute, attributeValue); } else { attribute = new HtmlAttributeNode(nodeIdGen.genId(), location, null); edits.addChild(attribute, attributeName); } attributeName = null; equalsSignLocation = null; attributeValue = null; // We don't call addDirectTagChild to avoid // 1. calling maybeFinishPendingAttribute recursively // 2. to avoid changing the state field directTagChildren.add(attribute); edits.remove(attribute); } } /** * Changes the state of this context. * * @param s the new state * @param point the current location where the transition ocurred. * @return the previous state */ State setState(State s, SourceLocation.Point point) { State old = state; state = checkNotNull(s); stateTransitionPoint = checkNotNull(point); if (DEBUG) { System.err .println(point.asLocation(filePath) + "\tState: " + s.name() + " errors: " + errorReporter); } return old; } State getState() { checkState(state != null); return state; } SourceLocation.Point getStateTransitionPoint() { checkState(stateTransitionPoint != null); return stateTransitionPoint; } static SourceLocation getLocationOf(List<StandaloneNode> nodes) { SourceLocation location = nodes.get(0).getSourceLocation(); if (nodes.size() > 1) { location = location.extend(Iterables.getLast(nodes).getSourceLocation()); } return location; } } /** A custom error to halt processing of a given control flow block. */ private static final class AbortParsingBlockError extends Error { } }