org.sonar.duplications.detector.suffixtree.Search.java Source code

Introduction

Here is the source code for org.sonar.duplications.detector.suffixtree.Search.java
Source

/*
 * Sonar, open source software quality management tool.
 * Copyright (C) 2008-2012 SonarSource
 * mailto:contact AT sonarsource DOT com
 *
 * Sonar is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 3 of the License, or (at your option) any later version.
 *
 * Sonar is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with Sonar; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
 */
package org.sonar.duplications.detector.suffixtree;

import com.google.common.collect.Lists;

import java.util.*;

public final class Search {

    private final SuffixTree tree;
    private final TextSet text;
    private final Collector reporter;

    private final List<Integer> list = Lists.newArrayList();
    private final List<Node> innerNodes = Lists.newArrayList();

    public static void perform(TextSet text, Collector reporter) {
        new Search(SuffixTree.create(text), text, reporter).compute();
    }

    private Search(SuffixTree tree, TextSet text, Collector reporter) {
        this.tree = tree;
        this.text = text;
        this.reporter = reporter;
    }

    private void compute() {
        // O(N)
        dfs();

        // O(N * log(N))
        Collections.sort(innerNodes, DEPTH_COMPARATOR);

        // O(N)
        visitInnerNodes();
    }

    private static final Comparator<Node> DEPTH_COMPARATOR = new Comparator<Node>() {
        public int compare(Node o1, Node o2) {
            return o2.depth - o1.depth;
        }
    };

    /**
     * Depth-first search (DFS).
     */
    private void dfs() {
        LinkedList<Node> stack = Lists.newLinkedList();
        stack.add(tree.getRootNode());
        while (!stack.isEmpty()) {
            Node node = stack.removeLast();
            node.startSize = list.size();
            if (node.getEdges().isEmpty()) {
                // leaf
                list.add(node.depth);
                node.endSize = list.size();
            } else {
                if (!node.equals(tree.getRootNode())) {
                    // inner node = not leaf and not root
                    innerNodes.add(node);
                }
                for (Edge edge : node.getEdges()) {
                    Node endNode = edge.getEndNode();
                    endNode.depth = node.depth + edge.getSpan() + 1;
                    stack.addLast(endNode);
                }
            }
        }
        // At this point all inner nodes are ordered by the time of entering, so we visit them from last to first
        ListIterator<Node> iterator = innerNodes.listIterator(innerNodes.size());
        while (iterator.hasPrevious()) {
            Node node = iterator.previous();
            int max = -1;
            for (Edge edge : node.getEdges()) {
                max = Math.max(edge.getEndNode().endSize, max);
            }
            node.endSize = max;
        }
    }

    /**
     * Each inner-node represents prefix of some suffixes, thus substring of text.
     */
    private void visitInnerNodes() {
        for (Node node : innerNodes) {
            if (containsOrigin(node)) {
                report(node);
            }
        }
    }

    /**
     * TODO Godin: in fact computations here are the same as in {@link #report(Node)},
     * so maybe would be better to remove this duplication,
     * however it should be noted that this check can't be done in {@link Collector#endOfGroup()},
     * because it might lead to creation of unnecessary new objects
     */
    private boolean containsOrigin(Node node) {
        for (int i = node.startSize; i < node.endSize; i++) {
            int start = tree.text.length() - list.get(i);
            int end = start + node.depth;
            if (text.isInsideOrigin(end)) {
                return true;
            }
        }
        return false;
    }

    private void report(Node node) {
        reporter.startOfGroup(node.endSize - node.startSize, node.depth);
        for (int i = node.startSize; i < node.endSize; i++) {
            int start = tree.text.length() - list.get(i);
            int end = start + node.depth;
            reporter.part(start, end);
        }
        reporter.endOfGroup();
    }

    public abstract static class Collector {

        /**
         * Invoked at the beginning of processing for current node.
         * <p>
         * Length - is a depth of node. And nodes are visited in descending order of depth,
         * thus we guaranty that length will not increase between two sequential calls of this method
         * (can be equal or less than previous value).
         * </p>
         *
         * @param size number of parts in group
         * @param length length of each part in group
         */
        abstract void startOfGroup(int size, int length);

        /**
         * Invoked as many times as leaves in the subtree, where current node is root.
         *
         * @param start start position in generalised text
         * @param end end position in generalised text
         */
        abstract void part(int start, int end);

        /**
         * Invoked at the end of processing for current node.
         */
        abstract void endOfGroup();

    }

}