Java tutorial
/* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 2009-2010 Sun Microsystems, Inc. All rights reserved. * * The contents of this file are subject to the terms of either the GNU * General Public License Version 2 only ("GPL") or the Common * Development and Distribution License("CDDL") (collectively, the * "License"). You may not use this file except in compliance with the * License. You can obtain a copy of the License at * http://www.netbeans.org/cddl-gplv2.html * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the * specific language governing permissions and limitations under the * License. When distributing the software, include this License Header * Notice in each file and include the License file at * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this * particular file as subject to the "Classpath" exception as provided * by Sun in the GPL Version 2 section of the License file that * accompanied this code. If applicable, add the following below the * License Header, with the fields enclosed by brackets [] replaced by * your own identifying information: * "Portions Copyrighted [year] [name of copyright owner]" * * If you wish your version of this file to be governed by only the CDDL * or only the GPL Version 2, indicate your decision by adding * "[Contributor] elects to include this software in this distribution * under the [CDDL or GPL Version 2] license." If you do not indicate a * single choice of license, a recipient has the option to distribute * your version of this file under either the CDDL, the GPL Version 2 or * to extend the choice of license to its licensees as provided above. * However, if you add GPL Version 2 code and therefore, elected the GPL * Version 2 license, then the option applies only if the new code is * made subject to such option by the copyright holder. * * Contributor(s): * * Portions Copyrighted 2009-2010 Sun Microsystems, Inc. */ package org.netbeans.modules.jackpot30.impl.duplicates; import com.sun.source.tree.CompilationUnitTree; import com.sun.source.tree.IdentifierTree; import com.sun.source.tree.NewClassTree; import com.sun.source.tree.Tree; import com.sun.source.tree.VariableTree; import com.sun.source.util.SourcePositions; import com.sun.source.util.TreePath; import com.sun.source.util.TreePathScanner; import com.sun.source.util.Trees; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.io.UnsupportedEncodingException; import java.net.URL; import java.security.DigestOutputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.NoSuchElementException; import java.util.Set; import java.util.TreeMap; import java.util.concurrent.atomic.AtomicBoolean; import javax.lang.model.element.Element; import javax.lang.model.element.Modifier; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermEnum; import org.apache.lucene.search.Collector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Searcher; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.FSDirectory; import org.netbeans.api.annotations.common.CheckForNull; import org.netbeans.api.annotations.common.NonNull; import org.netbeans.api.java.classpath.ClassPath; import org.netbeans.api.java.classpath.GlobalPathRegistry; import org.netbeans.api.java.source.CompilationInfo; import org.netbeans.api.progress.ProgressHandle; import org.netbeans.modules.jackpot30.common.api.LuceneHelpers.BitSetCollector; import org.netbeans.modules.jackpot30.impl.duplicates.indexing.DuplicatesCustomIndexerImpl; import org.netbeans.modules.jackpot30.impl.duplicates.indexing.DuplicatesIndex; import org.netbeans.modules.parsing.impl.indexing.CacheFolder; import org.openide.filesystems.FileObject; import org.openide.filesystems.FileUtil; import org.openide.filesystems.URLMapper; import org.openide.util.Exceptions; /** * * @author lahvac */ public class ComputeDuplicates { public Iterator<? extends DuplicateDescription> computeDuplicatesForAllOpenedProjects(ProgressHandle progress, AtomicBoolean cancel) throws IOException { Set<URL> urls = new HashSet<URL>(); for (ClassPath cp : GlobalPathRegistry.getDefault().getPaths(ClassPath.SOURCE)) { for (ClassPath.Entry e : cp.entries()) { urls.add(e.getURL()); } } long start = System.currentTimeMillis(); try { return computeDuplicates(urls, progress, cancel); } finally { System.err.println("duplicates for all open projects: " + (System.currentTimeMillis() - start)); } } public Iterator<? extends DuplicateDescription> computeDuplicates(Set<URL> forURLs, ProgressHandle progress, AtomicBoolean cancel) throws IOException { Map<IndexReader, FileObject> readers2Roots = new LinkedHashMap<IndexReader, FileObject>(); progress.progress("Updating indices"); for (URL u : forURLs) { try { //TODO: needs to be removed for server mode new DuplicatesCustomIndexerImpl.FactoryImpl().updateIndex(u, cancel); //TODO: show updating progress to the user File cacheRoot = cacheRoot(u); File dir = new File(cacheRoot, DuplicatesIndex.NAME); if (dir.listFiles() != null && dir.listFiles().length > 0) { IndexReader reader = IndexReader.open(FSDirectory.open(dir), true); readers2Roots.put(reader, URLMapper.findFileObject(u)); } } catch (IOException ex) { Exceptions.printStackTrace(ex); } } progress.progress("Searching for duplicates"); MultiReader r = new MultiReader(readers2Roots.keySet().toArray(new IndexReader[0])); List<String> dd = new ArrayList<String>(getDuplicatedValues(r, "duplicatesGeneralized", cancel)); sortHashes(dd); //TODO: only show valuable duplicates?: // dd = dd.subList(0, dd.size() / 10 + 1); return new DuplicatesIterator(readers2Roots, dd, 2); } public static Iterator<? extends DuplicateDescription> XXXduplicatesOf( Map<IndexReader, FileObject> readers2Roots, Collection<String> hashes) { List<String> hashesList = new ArrayList<String>(hashes); sortHashes(hashesList); return new DuplicatesIterator(readers2Roots, hashesList, 1); } private static File cacheRoot(URL sourceRoot) throws IOException { FileObject dataFolder = CacheFolder.getDataFolder(sourceRoot); FileObject cacheFO = dataFolder.getFileObject(DuplicatesIndex.NAME + "/" + DuplicatesIndex.VERSION); File cache = cacheFO != null ? FileUtil.toFile(cacheFO) : null; return cache; } private static final class DuplicatesIterator implements Iterator<DuplicateDescription> { private final Map<IndexReader, FileObject> readers2Roots; private final Iterator<String> duplicateCandidates; private final int minDuplicates; private final List<DuplicateDescription> result = new LinkedList<DuplicateDescription>(); public DuplicatesIterator(Map<IndexReader, FileObject> readers2Roots, Iterable<String> duplicateCandidates, int minDuplicates) { this.readers2Roots = readers2Roots; this.duplicateCandidates = duplicateCandidates.iterator(); this.minDuplicates = minDuplicates; } private DuplicateDescription nextDescription() throws IOException { while (duplicateCandidates.hasNext()) { String longest = duplicateCandidates.next(); List<Span> foundDuplicates = new LinkedList<Span>(); Query query = new TermQuery(new Term("duplicatesGeneralized", longest)); for (Entry<IndexReader, FileObject> e : readers2Roots.entrySet()) { Searcher s = new IndexSearcher(e.getKey()); BitSet matchingDocuments = new BitSet(e.getKey().maxDoc()); Collector c = new BitSetCollector(matchingDocuments); s.search(query, c); for (int docNum = matchingDocuments.nextSetBit(0); docNum >= 0; docNum = matchingDocuments .nextSetBit(docNum + 1)) { final Document doc = e.getKey().document(docNum); int pos = Arrays.binarySearch(doc.getValues("duplicatesGeneralized"), longest); if (pos < 0) { continue; } String spanSpec = doc.getValues("duplicatesPositions")[pos]; String relPath = doc.getField("duplicatesPath").stringValue(); for (String spanPart : spanSpec.split(";")) { Span span = Span.of(e.getValue().getFileObject(relPath), spanPart); if (span != null) { foundDuplicates.add(span); } } } } if (foundDuplicates.size() >= minDuplicates) { DuplicateDescription current = DuplicateDescription.of(foundDuplicates, getValue(longest), longest); boolean add = true; for (Iterator<DuplicateDescription> it = result.iterator(); it.hasNext();) { DuplicateDescription existing = it.next(); if (subsumes(existing, current)) { add = false; break; } if (subsumes(current, existing)) { //can happen? (note that the duplicates are sorted by value) it.remove(); } } if (add) { result.add(current); return current; } } } return null; } private DuplicateDescription next; public boolean hasNext() { if (next == null) { try { next = nextDescription(); } catch (IOException ex) { Exceptions.printStackTrace(ex); } } return next != null; } public DuplicateDescription next() { if (!hasNext()) { throw new NoSuchElementException(); } DuplicateDescription r = next; next = null; return r; } public void remove() { throw new UnsupportedOperationException("Not supported."); } } private static List<String> getDuplicatedValues(IndexReader ir, String field, AtomicBoolean cancel) throws IOException { List<String> values = new ArrayList<String>(); TermEnum terms = ir.terms(new Term(field)); //while (terms.next()) { do { if (cancel.get()) return Collections.emptyList(); final Term term = terms.term(); if (!field.equals(term.field())) { break; } if (terms.docFreq() < 2) continue; values.add(term.text()); } while (terms.next()); return values; } private static long getValue(String encoded) { return Long.parseLong(encoded.substring(encoded.lastIndexOf(":") + 1)); } private static void sortHashes(List<String> hashes) { Collections.sort(hashes, new Comparator<String>() { public int compare(String arg0, String arg1) { return (int) Math.signum(getValue(arg1) - getValue(arg0)); } }); } private static boolean subsumes(DuplicateDescription bigger, DuplicateDescription smaller) { Set<FileObject> bFiles = new HashSet<FileObject>(); for (Span s : bigger.dupes) { bFiles.add(s.file); } Set<FileObject> sFiles = new HashSet<FileObject>(); for (Span s : smaller.dupes) { sFiles.add(s.file); } if (!bFiles.equals(sFiles)) return false; Span testAgainst = bigger.dupes.get(0); for (Span s : smaller.dupes) { if (s.file == testAgainst.file) { if ((testAgainst.startOff <= s.startOff && testAgainst.endOff > s.endOff) || (testAgainst.startOff < s.startOff && testAgainst.endOff >= s.endOff)) { return true; } } } return false; } public static Map<String, long[]> encodeGeneralized(CompilationInfo info) { return encodeGeneralized(info.getTrees(), info.getCompilationUnit()); } public static Map<String, long[]> encodeGeneralized(final Trees trees, final CompilationUnitTree cut) { final SourcePositions sp = trees.getSourcePositions(); final Map<String, Collection<Long>> positions = new HashMap<String, Collection<Long>>(); new TreePathScanner<Void, Void>() { @Override public Void scan(Tree tree, Void p) { if (tree == null) return null; if (getCurrentPath() != null) { DigestOutputStream baos = null; PrintWriter out = null; try { baos = new DigestOutputStream(new ByteArrayOutputStream(), MessageDigest.getInstance("MD5")); out = new PrintWriter(new OutputStreamWriter(baos, "UTF-8")); GeneralizePattern gen = new GeneralizePattern(out, trees); gen.scan(new TreePath(getCurrentPath(), tree), null); out.close(); if (gen.value >= MINIMAL_VALUE) { StringBuilder text = new StringBuilder(); byte[] bytes = baos.getMessageDigest().digest(); for (int cntr = 0; cntr < 4; cntr++) { text.append(String.format("%02X", bytes[cntr])); } text.append(':').append(gen.value); String enc = text.toString(); Collection<Long> spanSpecs = positions.get(enc); if (spanSpecs == null) { positions.put(enc, spanSpecs = new LinkedList<Long>()); // } else { // spanSpecs.append(";"); } long start = sp.getStartPosition(cut, tree); // spanSpecs.append(start).append(":").append(sp.getEndPosition(cut, tree) - start); spanSpecs.add(start); spanSpecs.add(sp.getEndPosition(cut, tree)); } } catch (UnsupportedEncodingException ex) { Exceptions.printStackTrace(ex); } catch (NoSuchAlgorithmException ex) { Exceptions.printStackTrace(ex); } finally { try { baos.close(); } catch (IOException ex) { Exceptions.printStackTrace(ex); } out.close(); } } return super.scan(tree, p); } }.scan(cut, null); Map<String, long[]> result = new TreeMap<String, long[]>(); for (Entry<String, Collection<Long>> e : positions.entrySet()) { long[] spans = new long[e.getValue().size()]; int idx = 0; for (Long l : e.getValue()) { spans[idx++] = l; } result.put(e.getKey(), spans); } return result; } private static final class GeneralizePattern extends TreePathScanner<Void, Void> { public final Map<Tree, Tree> tree2Variable = new HashMap<Tree, Tree>(); private final Map<Element, String> element2Variable = new HashMap<Element, String>(); private final PrintWriter to; private final Trees javacTrees; private long value; private int currentVariableIndex = 0; public GeneralizePattern(PrintWriter to, Trees javacTrees) { this.to = to; this.javacTrees = javacTrees; } private @NonNull String getVariable(@NonNull Element el) { String var = element2Variable.get(el); if (var == null) { element2Variable.put(el, var = "$" + currentVariableIndex++); } return var; } private boolean shouldBeGeneralized(@NonNull Element el) { if (el.getModifiers().contains(Modifier.PRIVATE)) { return true; } switch (el.getKind()) { case LOCAL_VARIABLE: case EXCEPTION_PARAMETER: case PARAMETER: return true; } return false; } @Override public Void scan(Tree tree, Void p) { if (tree != null) { to.append(tree.getKind().name()); value++; } return super.scan(tree, p); } @Override public Void visitIdentifier(IdentifierTree node, Void p) { Element e = javacTrees.getElement(getCurrentPath()); if (e != null && shouldBeGeneralized(e)) { to.append(getVariable(e)); value--; return null; } else { to.append(node.getName()); } return super.visitIdentifier(node, p); } @Override public Void visitVariable(VariableTree node, Void p) { Element e = javacTrees.getElement(getCurrentPath()); if (e != null && shouldBeGeneralized(e)) { to.append(getVariable(e)); } else { to.append(node.getName()); } return super.visitVariable(node, p); } @Override public Void visitNewClass(NewClassTree node, Void p) { return null; } } private static final int MINIMAL_VALUE = 10; public static final class DuplicateDescription { public final List<Span> dupes; public final long value; public final String hash; private DuplicateDescription(List<Span> dupes, long value, String hash) { this.dupes = dupes; this.value = value; this.hash = hash; } public static DuplicateDescription of(List<Span> dupes, long value, String hash) { return new DuplicateDescription(dupes, value, hash); } } public static final class Span { public final FileObject file; public final int startOff; public final int endOff; public Span(FileObject file, int startOff, int endOff) { this.file = file; this.startOff = startOff; this.endOff = endOff; } public static @CheckForNull Span of(FileObject file, String spanSpec) { String[] split = spanSpec.split(":"); int start = Integer.valueOf(split[0]); int end = start + Integer.valueOf(split[1]); if (start < 0 || end < 0) return null; //XXX return new Span(file, start, end); } } }