Java tutorial
/* * Copyright 2014 Google Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.devtools.kythe.analyzers.java; import com.google.common.collect.HashMultiset; import com.google.common.collect.Lists; import com.google.common.collect.Multiset; import com.google.devtools.kythe.analyzers.base.CorpusPath; import com.google.devtools.kythe.analyzers.base.EdgeKind; import com.google.devtools.kythe.analyzers.base.EntrySet; import com.google.devtools.kythe.analyzers.base.FactEmitter; import com.google.devtools.kythe.analyzers.base.KytheEntrySets; import com.google.devtools.kythe.analyzers.base.NodeKind; import com.google.devtools.kythe.analyzers.java.SourceText.Positions; import com.google.devtools.kythe.common.FormattingLogger; import com.google.devtools.kythe.platform.java.helpers.SignatureGenerator; import com.google.devtools.kythe.platform.shared.StatisticsCollector; import com.google.devtools.kythe.proto.Analysis.CompilationUnit.FileInput; import com.google.devtools.kythe.proto.Diagnostic; import com.google.devtools.kythe.proto.Link; import com.google.devtools.kythe.proto.MarkedSource; import com.google.devtools.kythe.proto.Storage.VName; import com.google.devtools.kythe.util.KytheURI; import com.google.devtools.kythe.util.Span; import com.sun.tools.javac.code.Flags; import com.sun.tools.javac.code.Symbol; import com.sun.tools.javac.code.Symbol.ClassSymbol; import com.sun.tools.javac.code.Symbol.MethodSymbol; import com.sun.tools.javac.code.Symbol.PackageSymbol; import com.sun.tools.javac.code.Type; import com.sun.tools.javac.code.TypeTag; import com.sun.tools.javac.tree.JCTree; import java.io.UnsupportedEncodingException; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import javax.annotation.Nullable; import javax.lang.model.element.ElementKind; import javax.lang.model.element.Modifier; import javax.lang.model.element.Name; import javax.tools.JavaFileObject; /** Specialization of {@link KytheEntrySets} for Java. */ public class JavaEntrySets extends KytheEntrySets { private static final FormattingLogger logger = FormattingLogger.getLogger(JavaEntrySets.class); private final Map<Symbol, EntrySet> symbolNodes = new HashMap<>(); private final Map<Symbol, Integer> symbolHashes = new HashMap<>(); private final Map<Symbol, Set<String>> symbolSigs = new HashMap<Symbol, Set<String>>(); private final boolean ignoreVNamePaths; private final String overrideJdkCorpus; private Map<String, Integer> sourceToWildcardCounter = new HashMap<>(); public JavaEntrySets(StatisticsCollector statistics, FactEmitter emitter, VName compilationVName, List<FileInput> requiredInputs, boolean ignoreVNamePaths, String overrideJdkCorpus) { super(statistics, emitter, compilationVName, requiredInputs); this.ignoreVNamePaths = ignoreVNamePaths; this.overrideJdkCorpus = overrideJdkCorpus; } /** * The only place the integer index for nested classes/anonymous classes is stored is in the * flatname of the symbol. (This index is determined at compile time using linear search; see * 'localClassName' in Check.java). The simple name can't be relied on; for nested classes it * drops the name of the parent class (so 'pkg.OuterClass$Inner' yields only 'Inner') and for anonymous * classes it's blank. For multiply-nested classes, we'll see tokens like 'OuterClass$Inner$1$1'. */ private String getIdentToken(Symbol sym) { String flatName = sym.flatName().toString(); int lastDot = flatName.lastIndexOf('.'); // A$1 is a valid variable/method name, so make sure we only look at $ in class names. int lastCash = (sym instanceof ClassSymbol) ? flatName.lastIndexOf('$') : -1; int lastTok = lastDot > lastCash ? lastDot : lastCash; String identToken = lastTok < 0 ? flatName : flatName.substring(lastTok + 1); if (!identToken.isEmpty() && Character.isDigit(identToken.charAt(0))) { identToken = "(anon " + identToken + ")"; } return identToken; } /** * Returns the Symbol for sym's parent in qualified names, assuming that we'll be using * getIdentToken() to print nodes. * * <p>We're going through this extra effort to try and give people unsurprising qualified names. * To do that we have to deal with javac's mangling (in {@link #getIdentToken} above), since for * anonymous classes javac only stores mangled symbols. The code as written will emit only dotted * fully-qualified names, even for inner or anonymous classes, and considers concrete type, * package, or method names to be appropriate dot points. (If we weren't careful here we might, * for example, observe nodes in a qualified name corresponding to variables that are initialized * to anonymous classes.) This reflects the nesting structure from the Java side, not the JVM * side. */ @Nullable private Symbol getQualifiedNameParent(Symbol sym) { sym = sym.owner; while (sym != null) { switch (sym.kind) { case TYP: if (!sym.type.hasTag(TypeTag.TYPEVAR)) { return sym; } break; case PCK: case MTH: return sym; // TODO(T227): resolve non-exhaustive switch statements w/o defaults default: break; } sym = sym.owner; } return null; } /** * Returns a {@link MarkedSource} instance for sym's type (or its return type, if sym is a * method). If there is no appropriate type for sym, returns null. Generates links with * signatureGenerator. */ @Nullable private MarkedSource markType(SignatureGenerator signatureGenerator, Symbol sym) { // TODO(zarko): Mark up any annotations. Type type = sym.type; if (type == null || sym == type.tsym) { return null; } boolean wasArray = false; if (type.getReturnType() != null) { type = type.getReturnType(); } if (type.hasTag(TypeTag.ARRAY) && ((Type.ArrayType) type).elemtype != null) { wasArray = true; type = ((Type.ArrayType) type).elemtype; } MarkedSource.Builder builder = MarkedSource.newBuilder().setKind(MarkedSource.Kind.TYPE); if (type.hasTag(TypeTag.CLASS)) { MarkedSource.Builder context = MarkedSource.newBuilder(); String identToken = buildContext(context, type.tsym); builder.addChild(context.build()); builder.addChild(MarkedSource.newBuilder().setKind(MarkedSource.Kind.IDENTIFIER) .setPreText(identToken + (wasArray ? "[] " : " ")).build()); Optional<String> signature = signatureGenerator.getSignature(type.tsym); if (signature.isPresent()) { EntrySet node = getNode(signatureGenerator, type.tsym, signature.get(), null); builder.addLink(Link.newBuilder().addDefinition(new KytheURI(node.getVName()).toString())); } } else { builder.addChild(MarkedSource.newBuilder().setKind(MarkedSource.Kind.IDENTIFIER) .setPreText(type.toString() + (wasArray ? "[] " : " ")).build()); } return builder.build(); } /** * Sets the provided {@link MarkedSource.Builder} to a CONTEXT node, populating it with the * fully-qualified parent scope for sym. Returns the identifier corresponding to sym. */ private String buildContext(MarkedSource.Builder context, Symbol sym) { context.setKind(MarkedSource.Kind.CONTEXT).setPostChildText(".").setAddFinalListToken(true); String identToken = getIdentToken(sym); Symbol parent = getQualifiedNameParent(sym); List<MarkedSource> parents = Lists.newArrayList(); while (parent != null) { String parentName = getIdentToken(parent); if (!parentName.isEmpty()) { parents.add(MarkedSource.newBuilder().setKind(MarkedSource.Kind.IDENTIFIER).setPreText(parentName) .build()); } parent = getQualifiedNameParent(parent); } for (int i = 0; i < parents.size(); ++i) { context.addChild(parents.get(parents.size() - i - 1)); } return identToken; } /** * Returns a node for the given {@link Symbol} and its signature. A new node is created and * emitted if necessary. If non-null, msBuilder will be used to generate a signature. */ public EntrySet getNode(SignatureGenerator signatureGenerator, Symbol sym, String signature, MarkedSource.Builder msBuilder) { checkSignature(sym, signature); EntrySet node; if ((node = symbolNodes.get(sym)) != null) { return node; } ClassSymbol enclClass = sym.enclClass(); VName v = lookupVName(enclClass); if ((v == null || overrideJdkCorpus != null) && fromJDK(sym)) { v = VName.newBuilder().setCorpus(overrideJdkCorpus != null ? overrideJdkCorpus : "jdk").build(); } if (v == null) { node = getNameAndEmit(signature); // NAME node was already emitted } else { if (ignoreVNamePaths) { v = v.toBuilder().setPath(enclClass != null ? enclClass.toString() : "").build(); } MarkedSource.Builder markedSource = msBuilder == null ? MarkedSource.newBuilder() : msBuilder; MarkedSource markedType = markType(signatureGenerator, sym); if (markedType != null) { markedSource.addChild(markedType); } MarkedSource.Builder context = MarkedSource.newBuilder(); String identToken = buildContext(context, sym); markedSource.addChild(context.build()); switch (sym.getKind()) { case TYPE_PARAMETER: markedSource.addChild(MarkedSource.newBuilder().setKind(MarkedSource.Kind.IDENTIFIER) .setPreText("<" + sym.getSimpleName().toString() + ">").build()); break; case CONSTRUCTOR: case METHOD: String methodName; if (sym.getKind() == ElementKind.CONSTRUCTOR && enclClass != null) { methodName = enclClass.getSimpleName().toString(); } else { methodName = sym.getSimpleName().toString(); } markedSource.addChild(MarkedSource.newBuilder().setKind(MarkedSource.Kind.IDENTIFIER) .setPreText(methodName).build()); markedSource.addChild(MarkedSource.newBuilder().setKind(MarkedSource.Kind.PARAMETER_LOOKUP_BY_PARAM) .setPreText("(").setPostChildText(", ").setPostText(")").build()); break; default: markedSource.addChild(MarkedSource.newBuilder().setKind(MarkedSource.Kind.IDENTIFIER) .setPreText(identToken).build()); break; } NodeKind kind = elementNodeKind(sym.getKind()); NodeBuilder builder = kind != null ? newNode(kind) : newNode(sym.getKind().toString()); node = builder.setCorpusPath(CorpusPath.fromVName(v)).addSignatureSalt(signature) .addSignatureSalt("" + hashSymbol(sym)).setProperty("code", markedSource.build()).build(); node.emit(getEmitter()); } symbolNodes.put(sym, node); return node; } /** Emits and returns a new {@link EntrySet} representing Javadoc. */ public EntrySet newDocAndEmit(Positions filePositions, String text, Iterable<EntrySet> params) { VName fileVName = getFileVName(getDigest(filePositions.getSourceFile())); byte[] encodedText; try { encodedText = text.getBytes("UTF-8"); } catch (UnsupportedEncodingException ex) { encodedText = new byte[0]; } NodeBuilder builder = newNode(NodeKind.DOC).setCorpusPath(CorpusPath.fromVName(fileVName)) .setProperty("text", encodedText).addSignatureSalt(text); params.forEach(param -> builder.addSignatureSalt(param.getVName())); EntrySet node = emitAndReturn(builder); emitOrdinalEdges(node, EdgeKind.PARAM, params); return node; } /** Emits and returns a new {@link EntrySet} representing the Java file. */ public EntrySet newFileNodeAndEmit(Positions file) { return newFileNodeAndEmit(getDigest(file.getSourceFile()), file.getData(), file.getEncoding()); } /** Emits and returns a new {@link EntrySet} representing a Java package. */ public EntrySet newPackageNodeAndEmit(PackageSymbol sym) { return newPackageNodeAndEmit(sym.getQualifiedName().toString()); } /** Emits and returns a new {@link EntrySet} representing a Java package. */ public EntrySet newPackageNodeAndEmit(String name) { EntrySet node = emitAndReturn(newNode(NodeKind.PACKAGE).addSignatureSalt(name).setProperty("code", MarkedSource.newBuilder().setPreText(name).setKind(MarkedSource.Kind.IDENTIFIER).build())); return node; } /** Emits and returns a new {@link EntrySet} for the given wildcard. */ public EntrySet newWildcardNodeAndEmit(JCTree.JCWildcard wild, String sourcePath) { int counter = sourceToWildcardCounter.getOrDefault(sourcePath, 0); sourceToWildcardCounter.put(sourcePath, counter + 1); return emitAndReturn(newNode(NodeKind.ABS_VAR).addSignatureSalt(sourcePath + counter)); } /** Returns and emits a Java anchor for the given offset span. */ public EntrySet newAnchorAndEmit(Positions filePositions, Span loc) { return newAnchorAndEmit(filePositions, loc, null); } /** Returns and emits a Java anchor for the given offset span. */ public EntrySet newAnchorAndEmit(Positions filePositions, Span loc, Span snippet) { return newAnchorAndEmit(getFileVName(getDigest(filePositions.getSourceFile())), loc, snippet); } /** Returns and emits a Java anchor for the given identifier. */ public EntrySet newAnchorAndEmit(Positions filePositions, Name name, int startOffset, Span snippet) { Span span = filePositions.findIdentifier(name, startOffset); return span == null ? null : newAnchorAndEmit(getFileVName(getDigest(filePositions.getSourceFile())), span, snippet); } /** Emits and returns a DIAGNOSTIC node attached to the given file. */ public EntrySet emitDiagnostic(Positions filePositions, Diagnostic d) { return emitDiagnostic(getFileVName(getDigest(filePositions.getSourceFile())), d); } /** Returns the equivalent {@link NodeKind} for the given {@link ElementKind}. */ @Nullable private static NodeKind elementNodeKind(ElementKind kind) { switch (kind) { case CLASS: return NodeKind.RECORD_CLASS; case ENUM: return NodeKind.SUM_ENUM_CLASS; case ENUM_CONSTANT: return NodeKind.CONSTANT; case ANNOTATION_TYPE: case INTERFACE: return NodeKind.INTERFACE; case EXCEPTION_PARAMETER: return NodeKind.VARIABLE_EXCEPTION; case FIELD: return NodeKind.VARIABLE_FIELD; case LOCAL_VARIABLE: return NodeKind.VARIABLE_LOCAL; case PARAMETER: return NodeKind.VARIABLE_PARAMETER; case RESOURCE_VARIABLE: return NodeKind.VARIABLE_RESOURCE; case CONSTRUCTOR: return NodeKind.FUNCTION_CONSTRUCTOR; case METHOD: return NodeKind.FUNCTION; case TYPE_PARAMETER: return NodeKind.ABS_VAR; default: // TODO(T227): handle all cases, make this exceptional, and remove all null checks return null; } } // Returns a consistent hash for the given symbol across separate compilations and JVM instances. private int hashSymbol(Symbol sym) { // This method is necessary because Symbol, and most other javac internals, do not overload the // Object#hashCode() method and the default implementation, System#identityHashCode(Object), is // practically useless because it can change across JVM instances. This method instead only // uses stable hashing methods such as String#hashCode(), Multiset#hashCode(), and // Integer#hashCode(). if (symbolHashes.containsKey(sym)) { return symbolHashes.get(sym); } Multiset<Integer> hashes = HashMultiset.create(); if (sym.members() != null) { for (Symbol member : sym.members().getSymbols()) { if (member.isPrivate() || member instanceof MethodSymbol && ((MethodSymbol) member).isStaticOrInstanceInit() || ((member.flags_field & (Flags.BRIDGE | Flags.SYNTHETIC)) != 0)) { // Ignore initializers, private members, and synthetic members. It's possible these do // not appear in the symbol's scope outside of its .java source compilation (i.e. they do // not appear in dependent compilations for Bazel's java rules). continue; } // We can't recursively get the result of hashSymbol(member) since the extractor removes all // .class files not directly used by a compilation meaning that member may not be complete. hashes.add(member.getSimpleName().toString().hashCode()); hashes.add(member.kind.ordinal()); } } hashes.add(sym.getQualifiedName().toString().hashCode()); hashes.add(sym.getKind().ordinal()); for (Modifier mod : sym.getModifiers()) { hashes.add(mod.ordinal()); } int h = hashes.hashCode(); symbolHashes.put(sym, h); return h; } @Nullable private VName lookupVName(@Nullable ClassSymbol cls) { if (cls == null) { return null; } VName clsVName = lookupVName(getDigest(cls.classfile)); return clsVName != null ? clsVName : lookupVName(getDigest(cls.sourcefile)); } @Nullable private static String getDigest(@Nullable JavaFileObject sourceFile) { if (sourceFile == null) { return null; } // This matches our {@link CustomFileObject#toUri()} logic return sourceFile.toUri().getHost(); } /** Ensures that a particular {@link Symbol} is only associated with a single signature. */ private void checkSignature(Symbol sym, String signature) { // TODO(schroederc): remove this check in production releases if (!symbolSigs.containsKey(sym)) { symbolSigs.put(sym, new HashSet<String>()); } Set<String> signatures = symbolSigs.get(sym); signatures.add(signature); if (signatures.size() > 1) { throw new IllegalStateException("Multiple signatures found for " + sym + ": " + signatures); } } private static boolean fromJDK(@Nullable Symbol sym) { if (sym == null || sym.enclClass() == null) { return false; } String cls = sym.enclClass().className(); return cls.startsWith("java.") || cls.startsWith("javax.") || cls.startsWith("com.sun.") || cls.startsWith("sun."); } }