com.satisfyingstructures.J2S.J2SRewriter.java Source code

Introduction

Here is the source code for com.satisfyingstructures.J2S.J2SRewriter.java
Source

/*
The MIT License (MIT)
    
Copyright (c) 2016 Torsten Louland
    
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
    
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
    
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/

/**
 * Created by Torsten Louland on 21/10/2016.
 */
package com.satisfyingstructures.J2S;

import com.satisfyingstructures.J2S.antlr.Java8Parser;

import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.tree.ParseTree;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/// J2SRewriter provides additional rewriter utility functions specific to the needs of J2S
public class J2SRewriter extends ParseTreeRewriter {

    public final String lineBreak; // the standard linebreak used in the token stream
    public final String singleIndent; // the standard single indent used in the token stream

    public J2SRewriter(ParseTree tree, TokenStream tokens) {
        super(tree, tokens);
        lineBreak = discoverLineBreakType(tokens);
        singleIndent = discoverSingelIndentType(tokens);
    }

    static String discoverLineBreakType(TokenStream tokens) {
        String s = "\n";
        for (int i = 0, sz = tokens.size(); i < sz; i++) {
            Token t = tokens.get(i);
            if (null == t || t.getType() != Java8Parser.LB)
                continue;
            s = t.getText();
            if (-1 != (i = s.indexOf(s.charAt(0), 1))) // remove any later repetitions
                s = s.substring(0, i);
            break;
        }
        return s;
    }

    static String discoverSingelIndentType(TokenStream tokens) {
        String singleIndent = "\t";
        int depth = 0;
        List<Token> recentIndentsByDepth = new ArrayList<>();
        Map<String, Integer> countOfIndentStyle = new HashMap<>();
        int countOfIndents = 0;
        for (int i = 0, sz = tokens.size(); i < sz; i++) {
            Token t = tokens.get(i);
            if (null == t)
                continue;
            switch (t.getType()) {
            case Java8Parser.RBRACE:
                break;
            case Java8Parser.LBRACE:
                depth++;
                for (int j = recentIndentsByDepth.size(); j <= depth; j++)
                    recentIndentsByDepth.add(j, null);
            default:
                continue;
            }
            if (i > 2) {
                Token tWS = tokens.get(i - 1);
                if (tWS.getType() == Java8Parser.WS && tokens.get(i - 2).getType() == Java8Parser.LB) {
                    recentIndentsByDepth.set(depth, tWS);
                    if (depth + 1 < recentIndentsByDepth.size()
                            && null != (t = recentIndentsByDepth.get(depth + 1))) {
                        String deep = tWS.getText();
                        String deeper = t.getText();
                        if (deeper.startsWith(deep)) {
                            singleIndent = deeper.substring(deep.length());
                            Integer count = countOfIndentStyle.get(singleIndent);
                            if (null == count)
                                countOfIndentStyle.put(singleIndent, 1);
                            else {
                                float share = count.floatValue() / (float) countOfIndents;
                                if (countOfIndents >= 4 && share == 1)
                                    return singleIndent; // winner, consistent use
                                if (countOfIndents > 10 && share > .9)
                                    return singleIndent; // winner, inconsistent use
                                if (countOfIndents > 20 && share > .7)
                                    return singleIndent; // winner, variable use
                                countOfIndentStyle.put(singleIndent, count + 1);
                            }
                            countOfIndents++;
                        }
                        recentIndentsByDepth.set(depth + 1, null);
                    }
                }
            }
            depth--;
        }
        // No early winner, so pick most frequent
        int best = 0;
        for (Map.Entry<String, Integer> indentStyle : countOfIndentStyle.entrySet()) {
            int count = indentStyle.getValue();
            countOfIndents -= count; // --> remaining
            if (best < count) {
                best = count;
                singleIndent = indentStyle.getKey();
                if (countOfIndents < count)
                    return singleIndent; // cant be beaten now
            }
        }
        return singleIndent;
    }

    // tokens

    public Token getTokenFollowing(Token token) {
        int index = null != token ? token.getTokenIndex() + 1 : tokens.size();
        if (index < tokens.size())
            return tokens.get(index);
        return null;
    }

    public Token getTokenPreceding(Token token) {
        int index = null != token ? token.getTokenIndex() - 1 : -1;
        if (index >= 0)
            return tokens.get(index);
        return null;
    }

    // insert comments

    enum CommentWhere {
        beforeLineBreak, // whole line comment on line inserted before that containing token
        afterLineBreak, // whole line comment on line inserted before line following that containing token
        beforeToken, // insert /* comment */ before token
        afterToken, // insert /* comment */ after token
        atEndOfLine // append comment to line after //
    }

    public void insertComment(String comment, ParseTree pt, CommentWhere where) {
        int idx = where == CommentWhere.beforeToken || where == CommentWhere.beforeLineBreak
                ? pt.getSourceInterval().a
                : pt.getSourceInterval().b;
        insertComment(comment, idx, where);
    }

    public void insertComment(String comment, Token token, CommentWhere where) {
        insertComment(comment, token.getTokenIndex(), where);
    }

    public void insertComment(String comment, int tokenIndex, CommentWhere where) {
        Token token, tokenWS;
        String s;
        switch (where) {
        case beforeLineBreak:
            for (token = null; tokenIndex >= 0; tokenIndex--)
                if (Java8Parser.LB == (token = tokens.get(tokenIndex)).getType())
                    break;
            s = getText(token); // have to get text, because it could be multiline
            if (Java8Parser.WS == (tokenWS = getTokenFollowing(token)).getType())
                s += getText(tokenWS); // Add indent
            s += "// " + comment + lineBreak;
            replace(token, s);
            break;
        case beforeToken:
            insertBefore(tokenIndex, "/* " + comment + " */ ");
            break;
        case afterToken:
            insertAfter(tokenIndex, " /* " + comment + " */");
            break;
        case afterLineBreak:
        case atEndOfLine:
            for (token = null; tokenIndex < tokens.size(); tokenIndex++)
                if (Java8Parser.LB == (token = tokens.get(tokenIndex)).getType())
                    break;
            if (where == CommentWhere.afterLineBreak) {
                s = "";
                if (Java8Parser.WS == (tokenWS = getTokenFollowing(token)).getType())
                    s = getText(tokenWS); // Add indent
                insertBefore(token, lineBreak + s + "// " + comment);
            } else
                insertBefore(token, " // " + comment);
            break;
        }
    }

    // delete

    public void deleteAndAdjustWhitespace(Token token) {
        int i = token.getTokenIndex();
        deleteAndAdjustWhitespace(i, i);
    }

    public void deleteAndAdjustWhitespace(ParseTree pt) {
        Interval interval = pt.getSourceInterval();
        deleteAndAdjustWhitespace(interval.a, interval.b);
    }

    public void deleteAndAdjustWhitespace(int i, int j) {
        replaceAndAdjustWhitespace(i, j, "");
    }

    public void replaceAndAdjustWhitespace(Token token, String text) {
        int i = token.getTokenIndex();
        replaceAndAdjustWhitespace(i, i, text);
    }

    // replace

    public void replaceAndAdjustWhitespace(ParseTree pt, String text) {
        Interval interval = pt.getSourceInterval();
        replaceAndAdjustWhitespace(interval.a, interval.b, text);
    }

    boolean recommendKeepSeparate(int codePointL, int codePointR) {
        boolean idCharL = codePointL == '`' || Character.isJavaIdentifierPart(codePointL);
        boolean idCharR = codePointR == '`' || Character.isJavaIdentifierPart(codePointR);
        // ...characters may be from an identifier that has already been wrapped in back ticks to resolve a clash with
        // a swift keyword, hence we also consider back tick to be an identifier character.
        boolean keepSeparate = false;
        if (idCharL && idCharR)
            keepSeparate = true;
        else if (idCharR) {
            // crude and not comprehensive, but good enough for now...
            switch (codePointL) {
            case '=':
            case ',':
            case '?':
            case '}':
            case '*':
            case '/':
            case '+':
            case '-':
                keepSeparate = true;
                break;
            }
        } else if (idCharL) {
            switch (codePointR) {
            case '=':
            case ',':
            case '?':
            case '{':
            case '!':
            case '*':
            case '/':
            case '+':
            case '-':
                keepSeparate = true;
                break;
            }
        }
        return keepSeparate;
    }

    public void replaceAndAdjustWhitespace(int from, int to, String text) {
        // The aim here is to prevent text merging with adjacent tokens (and to separate as looks reasonable).

        // It would be much better if TokenStreamRewriter recorded alterations as token insertions, deletions, moves and
        // alterations, so that lexical and structural information is retained as much as possible, then we would just
        // need a final pass before the end of processing to insert essential whitespace tokens. However, it doesn't.

        // The problem is that because the insertions and replacements are expressed as strings, lexical information is
        // lost, and we can no longer be certain what the type of adjacent tokens is - it may well have already been
        // rewritten. Hence we have to fallback to testing the adjacent token text - what a pain. Which leads to this
        // fragile, messy code.

        // Furthermore, when we scan backwards and forwards for adjacent non-empty tokens, we can find 'residue': where
        // a whole rule context has been replaced with an empty string, getText for the first token returns empty, but
        // getText for the following token (i.e. inside the subtree of the context) can return the original token text
        // - rewriter doesn't expect us to be asking for this token's text, and does not apply correct transform - so
        // we see something that will not actually be output in the final rewritten stream. We can't trust what we see.
        // (Root cause is TokenStreamRewriter.reduceToSingleOperationPerIndex - operation per index only returns the
        // right operation for the first index of the range it changes.)
        // Hard one to workaround.

        // OK, worked around it - here's how: in ParseTreeRewriter, we maintain a changed interval map. When we scan
        // through adjacent text, if a token is in a changed interval, get the whole changed text, otherwise get the
        // token text.

        // Gotcha: adding separation whitespace is ok, but removing excess separation can be problematic: on a later
        // call to getText(), the rewriter can consolidate accumulated rewrite operations, such that a reduction of the
        // trailing WS token gets merged with whatever transformed text there is for the preceding token, i.e. combined
        // replacement text for a widened range of tokens, extending beyond the original range of significant (i.e.
        // non-white) tokens; if there is a further transformation on the significant range of tokens, a
        // replacement will be requested up to the end of the last significant token, but this now falls in the
        // middle of the consolidated replacement op, hence TokenStreamRewriter considers it an overlapping replacement
        // request and throws an exception. !!! What a pain. The ideal time for adjusting whitespace would be in an
        // extra pass at the end  if we still had lexical information about our replacements; but we do not. :-(

        String textL, textR;
        Interval interval;
        int i, tokenIdxL, tokenIdxR, lenL, lenR, wsL, wsR, wsWanted;
        int tokenIdxL_WSFrom, tokenIdxL_WSTo, tokenIdxR_WSFrom, tokenIdxR_WSTo;
        int codePointL, codePointR;
        boolean leftIsIndent;
        boolean keepSeparate;

        text = text.trim();

        // Assess left: count whitespace and stop when first non-white character reached
        tokenIdxL = from - 1;
        textL = null;
        tokenIdxL_WSFrom = tokenIdxL_WSTo = -1;
        codePointL = 0;
        wsL = 0;
        i = -1;
        assessL: while (0 <= tokenIdxL) {
            if (null != (interval = getChangedIntervalContaining(tokenIdxL, tokenIdxL)))
                textL = getText(interval);
            else
                textL = getText(tokenIdxL, tokenIdxL);
            for (lenL = textL.length(), i = lenL - 1; 0 <= i; i--) {
                codePointL = textL.codePointAt(i);
                //  if (Character.SPACE_SEPARATOR != Character.getType(codePointL))
                //  ...doesn't work as expected - getType('\t') primary category is not SPACE_SEPARATOR, so test explicitly
                if (' ' != codePointL && '\t' != codePointL)
                    break assessL;
                wsL++;
                tokenIdxL_WSFrom = null != interval ? interval.a : tokenIdxL;
                if (tokenIdxL_WSTo == -1)
                    tokenIdxL_WSTo = null != interval ? interval.b : tokenIdxL;
            }
            tokenIdxL = null != interval ? interval.a - 1 : tokenIdxL - 1;
            textL = null;
        }
        // leftIsIndent = Character.LINE_SEPARATOR == Character.getType(codePointL)
        // ...doesn't work as expected - getType('\n') primary category is CONTROL not LINE_SEPARATOR, so test explicitly
        leftIsIndent = codePointL == '\n' || codePointL == '\r' || (0 > tokenIdxL && 0 > i);

        // Assess right: count whitespace and stop when first non-white character reached
        tokenIdxR = to + 1;
        textR = null;
        tokenIdxR_WSFrom = tokenIdxR_WSTo = -1;
        codePointR = 0;
        wsR = 0;
        assessR: while (tokenIdxR < tokens.size()) {
            if (null != (interval = getChangedIntervalContaining(tokenIdxR, tokenIdxR)))
                textR = getText(interval);
            else
                textR = getText(tokenIdxR, tokenIdxR);
            for (i = 0, lenR = textR.length(); i < lenR; i++) {
                codePointR = textR.codePointAt(i);
                //  if (Character.SPACE_SEPARATOR != Character.getType(codePointR))
                //  ...doesn't work as expected - getType('\t') primary category is not SPACE_SEPARATOR, so test explicitly
                if (' ' != codePointR && '\t' != codePointR)
                    break assessR;
                wsR++;
                if (tokenIdxR_WSFrom == -1)
                    tokenIdxR_WSFrom = null != interval ? interval.a : tokenIdxR;
                tokenIdxR_WSTo = null != interval ? interval.b : tokenIdxR;
            }
            tokenIdxR = null != interval ? interval.b + 1 : tokenIdxR + 1;
            textR = null;
        }

        if (0 < text.length()) {
            keepSeparate = recommendKeepSeparate(codePointL, text.codePointAt(0));
            wsWanted = keepSeparate ? 1 : 0;
            if (!leftIsIndent && wsL != wsWanted) {
                if (wsL == 0) {
                    // with no white space on the left, we can use the replacement text
                    text = " " + text;
                } else {
                    // We have an excess of whitespace
                    String s = tokenIdxL == tokenIdxL_WSTo ? textL : getText(tokenIdxL_WSFrom, tokenIdxL_WSTo);
                    s = s.substring(0, s.length() - (wsL - wsWanted)); // trim excess from end
                    replace(tokenIdxL_WSFrom, tokenIdxL_WSTo, s);
                }
            }

            // Same again for RHS
            keepSeparate = recommendKeepSeparate(text.codePointAt(text.length() - 1), codePointR);
            wsWanted = keepSeparate ? 1 : 0;
            if (wsR != wsWanted) {
                if (wsR == 0) {
                    // with no white space on the right, we can use the replacement text
                    text += " ";
                } else {
                    // We have an excess of whitespace
                    String s = tokenIdxR == tokenIdxR_WSFrom ? textR : getText(tokenIdxR_WSFrom, tokenIdxR_WSTo);
                    s = s.substring(wsR - wsWanted); // trim excess from start
                    replace(tokenIdxR_WSFrom, tokenIdxR_WSTo, s);
                }
            }
        } else if (0 < from && to < tokens.size() - 1) {
            keepSeparate = recommendKeepSeparate(codePointL, codePointR);
            wsWanted = keepSeparate && !leftIsIndent ? 1 : 0;
            if (leftIsIndent)
                ; // do nothing to left
            else if (wsL == wsWanted)
                wsWanted -= wsL;
            else if (wsL > wsWanted) {
                // We have an excess of whitespace
                String s = tokenIdxL == tokenIdxL_WSTo ? textL : getText(tokenIdxL_WSFrom, tokenIdxL_WSTo);
                s = s.substring(0, s.length() - (wsL - wsWanted)); // trim excess from end
                replace(tokenIdxL_WSFrom, tokenIdxL_WSTo, s);
                wsWanted = 0;
            }

            if (wsR > wsWanted) {
                String s = tokenIdxR == tokenIdxR_WSFrom ? textR : getText(tokenIdxR_WSFrom, tokenIdxR_WSTo);
                s = s.substring(wsR - wsWanted); // trim excess from start
                replace(tokenIdxR_WSFrom, tokenIdxR_WSTo, s);
            } else if (wsR < wsWanted) {
                text = " ";
            }
        }

        // Finally replace the requested interval
        replace(from, to, text);
    }
}