com.google.template.soy.pysrc.internal.GeneratePySanitizeEscapingDirectiveCode.java Source code

Java tutorial

Introduction

Here is the source code for com.google.template.soy.pysrc.internal.GeneratePySanitizeEscapingDirectiveCode.java

Source

/*
 * Copyright 2014 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.template.soy.pysrc.internal;

import com.google.common.base.CaseFormat;
import com.google.template.soy.shared.internal.AbstractGenerateSoyEscapingDirectiveCode;
import com.google.template.soy.shared.internal.DirectiveDigest;
import com.google.template.soy.shared.restricted.EscapingConventions;
import com.google.template.soy.shared.restricted.EscapingConventions.EscapingLanguage;
import com.google.template.soy.shared.restricted.TagWhitelist;

import java.io.IOException;
import java.util.regex.Pattern;

import javax.annotation.ParametersAreNonnullByDefault;

/**
 * Generates Python code in generated_sanitize.py used by the public functions in sanitize.py.
 *
 * <p>
 * This is an ant task and can be invoked as:
 * <xmp>
 *   <taskdef name="gen.escape.directives"
 *    classname="com.google.template.soy.pysrc.internal.GeneratePySanitizeEscapingDirectiveCode">
 *     <classpath>
 *       <!-- classpath to Soy classes and dependencies -->
 *     </classpath>
 *   </taskdef>
 *   <gen.escape.directives>
 *     <input path="one or more Python files that use the generated helpers"/>
 *     <output path="the output Python file"/>
 *   </gen.escape.directives>
 * </xmp>
 *
 * <p>
 * In the above, the first {@code <taskdef>} is an Ant builtin which links the element named
 * {@code <gen.escape.directives>} to this class.
 * <p>
 * That element contains zero or more {@code <input>}s which are Python source files that may
 * use the helper functions generated by this task.
 * <p>
 * There must be exactly one {@code <output>} element which specifies where the output should be
 * written.  That output contains the input sources and the generated helper functions.
 *
 */
@ParametersAreNonnullByDefault
public final class GeneratePySanitizeEscapingDirectiveCode extends AbstractGenerateSoyEscapingDirectiveCode {

    @Override
    protected EscapingLanguage getLanguage() {
        return EscapingLanguage.PYTHON;
    }

    @Override
    protected String getLineCommentSyntax() {
        return "#";
    }

    @Override
    protected String getLineEndSyntax() {
        return "";
    }

    @Override
    protected String getRegexStart() {
        return "re.compile(r'";
    }

    @Override
    protected String getRegexEnd() {
        return "', re.U)";
    }

    @Override
    protected String escapeOutputString(String input) {
        String escapeCharacters = "\\\'\"\b\f\n\r\t";

        // Give the string builder a little bit of extra space to account for new escape characters.
        StringBuilder result = new StringBuilder((int) (input.length() * 1.2));
        for (char c : input.toCharArray()) {
            if (escapeCharacters.indexOf(c) != -1) {
                result.append('\\');
            }
            result.append(c);
        }

        return result.toString();
    }

    @Override
    protected String convertFromJavaRegex(Pattern javaPattern) {
        String body = javaPattern.pattern().replace("\r", "\\r").replace("\n", "\\n").replace("\u2028", "\\u2028")
                .replace("\u2029", "\\u2029").replace("\\z", "\\Z");
        // DOTALL is not allowed to keep the syntax simple (it's also not available in JavaScript).
        if ((javaPattern.flags() & Pattern.DOTALL) != 0) {
            throw new IllegalArgumentException("Pattern " + javaPattern + " uses DOTALL.");
        }

        StringBuilder buffer = new StringBuilder(body.length() + 40);
        // Default to using unicode character classes.
        buffer.append("re.compile(r\"\"\"").append(body).append("\"\"\", re.U");
        if ((javaPattern.flags() & Pattern.CASE_INSENSITIVE) != 0) {
            buffer.append(" | re.I");
        }
        if ((javaPattern.flags() & Pattern.MULTILINE) != 0) {
            buffer.append(" | re.M");
        }
        buffer.append(")");
        return buffer.toString();
    }

    @Override
    protected void generatePrefix(StringBuilder outputCode) {
        // Emulate Python 3 style unicode string literals, and import necessary libraries.
        outputCode.append("from __future__ import unicode_literals\n").append("\n").append("import re\n")
                .append("import urllib\n").append("\n").append("try:\n").append("  str = unicode\n")
                .append("except NameError:\n").append("  pass\n\n");
    }

    @Override
    protected void generateCharacterMapSignature(StringBuilder outputCode, String mapName) {
        outputCode.append("_ESCAPE_MAP_FOR_").append(mapName);
    }

    @Override
    protected void generateMatcher(StringBuilder outputCode, String name, String matcher) {
        outputCode.append("\n_MATCHER_FOR_").append(name).append(" = ").append(matcher).append("\n");
    }

    @Override
    protected void generateFilter(StringBuilder outputCode, String name, String filter) {
        outputCode.append("\n_FILTER_FOR_").append(name).append(" = ").append(filter).append("\n");
    }

    @Override
    protected void generateReplacerFunction(StringBuilder outputCode, String mapName) {
        outputCode.append("\ndef _REPLACER_FOR_").append(mapName).append("(match):\n")
                .append("  ch = match.group(0)\n").append("  return _ESCAPE_MAP_FOR_").append(mapName)
                .append("[ch]\n").append("\n");
    }

    @Override
    protected void useExistingLibraryFunction(StringBuilder outputCode, String identifier,
            String existingFunction) {
        String fnName = CaseFormat.LOWER_CAMEL.to(CaseFormat.LOWER_UNDERSCORE, identifier);
        outputCode.append("\ndef ").append(fnName).append("_helper(v):\n").append("  return ")
                .append(existingFunction).append("(str(v))\n").append("\n");
    }

    @Override
    protected void generateHelperFunction(StringBuilder outputCode, DirectiveDigest digest) {
        String name = digest.getDirectiveName();
        String fnName = CaseFormat.LOWER_CAMEL.to(CaseFormat.LOWER_UNDERSCORE, name);
        outputCode.append("\ndef ").append(fnName).append("_helper(value):\n").append("  value = str(value)\n");
        if (digest.getFilterName() != null) {
            String filterName = digest.getFilterName();
            outputCode.append("  if not _FILTER_FOR_").append(filterName).append(".search(value):\n");
            // TODO(dcphillips): Raising a debugging assertion error could be useful.
            outputCode.append("    return '").append(digest.getInnocuousOutput()).append("'\n").append("\n");
        }

        if (digest.getNonAsciiPrefix() != null) {
            // TODO: We can add a second replace of all non-ascii codepoints below.
            throw new UnsupportedOperationException("Non ASCII prefix escapers not implemented yet.");
        }
        if (digest.getEscapesName() != null) {
            String escapeMapName = digest.getEscapesName();
            String matcherName = digest.getMatcherName();
            outputCode.append("  return _MATCHER_FOR_").append(matcherName).append(".sub(\n")
                    .append("      _REPLACER_FOR_").append(escapeMapName).append(", value)\n");
        } else {
            outputCode.append("  return value\n");
        }
        outputCode.append("\n");
    }

    @Override
    protected void generateCommonConstants(StringBuilder outputCode) {
        // Emit patterns and constants needed by escaping functions that are not part of any one
        // escaping convention.
        outputCode.append("_HTML_TAG_REGEX = ").append(convertFromJavaRegex(EscapingConventions.HTML_TAG_CONTENT))
                .append("\n\n").append("_LT_REGEX = re.compile('<')\n").append("\n")
                .append("_SAFE_TAG_WHITELIST = ").append(toPyStringTuple(TagWhitelist.FORMATTING.asSet()))
                .append("\n\n");
    }

    /** ["foo", "bar"] -> '("foo", "bar")' */
    private String toPyStringTuple(Iterable<String> strings) {
        StringBuilder sb = new StringBuilder();
        boolean isFirst = true;
        sb.append('(');
        for (String str : strings) {
            if (!isFirst) {
                sb.append(", ");
            }
            isFirst = false;
            writeStringLiteral(str, sb);
        }
        sb.append(')');
        return sb.toString();
    }

    /**
     * A non Ant interface for this class.
     */
    public static void main(String[] args) throws IOException {
        GeneratePySanitizeEscapingDirectiveCode generator = new GeneratePySanitizeEscapingDirectiveCode();
        generator.configure(args);
        generator.execute();
    }
}