Java tutorial
/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * See LICENSE.txt included in this distribution for the specific * language governing permissions and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at LICENSE.txt. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2008, 2014, Oracle and/or its affiliates. All rights reserved. */ package org.opensolaris.opengrok.index; import java.io.BufferedInputStream; import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; import java.io.Writer; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.List; import java.util.concurrent.ExecutorService; import java.util.logging.Level; import java.util.logging.Logger; import java.util.zip.GZIPOutputStream; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.LockFactory; import org.apache.lucene.store.NoLockFactory; import org.apache.lucene.store.SimpleFSLockFactory; import org.apache.lucene.util.BytesRef; import org.opensolaris.opengrok.analysis.AnalyzerGuru; import org.opensolaris.opengrok.analysis.Ctags; import org.opensolaris.opengrok.analysis.Definitions; import org.opensolaris.opengrok.analysis.FileAnalyzer; import org.opensolaris.opengrok.analysis.FileAnalyzer.Genre; import org.opensolaris.opengrok.configuration.Project; import org.opensolaris.opengrok.configuration.RuntimeEnvironment; import org.opensolaris.opengrok.history.HistoryException; import org.opensolaris.opengrok.history.HistoryGuru; import org.opensolaris.opengrok.search.QueryBuilder; import org.opensolaris.opengrok.search.SearchEngine; import org.opensolaris.opengrok.util.IOUtils; import org.opensolaris.opengrok.web.Util; /** * This class is used to create / update the index databases. Currently we use * one index database per project. * * @author Trond Norbye * @author Lubos Kosco , update for lucene 4.x */ public class IndexDatabase { private Project project; private FSDirectory indexDirectory; private IndexWriter writer; private TermsEnum uidIter; private IgnoredNames ignoredNames; private Filter includedNames; private AnalyzerGuru analyzerGuru; private File xrefDir; private boolean interrupted; private List<IndexChangedListener> listeners; private File dirtyFile; private final Object lock = new Object(); private boolean dirty; private boolean running; private List<String> directories; static final Logger log = Logger.getLogger(IndexDatabase.class.getName()); private Ctags ctags; private LockFactory lockfact; private final BytesRef emptyBR = new BytesRef(""); //Directory where we store indexes public static final String INDEX_DIR = "index"; /** * Create a new instance of the Index Database. Use this constructor if you * don't use any projects * * @throws java.io.IOException if an error occurs while creating directories */ public IndexDatabase() throws IOException { this(null); } /** * Create a new instance of an Index Database for a given project * * @param project the project to create the database for * @throws java.io.IOException if an error occurs while creating * directories */ public IndexDatabase(Project project) throws IOException { this.project = project; lockfact = new SimpleFSLockFactory(); initialize(); } /** * Update the index database for all of the projects. Print progress to * standard out. * * @param executor An executor to run the job * @throws IOException if an error occurs */ public static void updateAll(ExecutorService executor) throws IOException { updateAll(executor, null); } /** * Update the index database for all of the projects * * @param executor An executor to run the job * @param listener where to signal the changes to the database * @throws IOException if an error occurs */ static void updateAll(ExecutorService executor, IndexChangedListener listener) throws IOException { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); List<IndexDatabase> dbs = new ArrayList<>(); if (env.hasProjects()) { for (Project project : env.getProjects()) { dbs.add(new IndexDatabase(project)); } } else { dbs.add(new IndexDatabase()); } for (IndexDatabase d : dbs) { final IndexDatabase db = d; if (listener != null) { db.addIndexChangedListener(listener); } executor.submit(new Runnable() { @Override public void run() { try { db.update(); } catch (Throwable e) { log.log(Level.SEVERE, "Problem updating lucene index database: ", e); } } }); } } /** * Update the index database for a number of sub-directories * * @param executor An executor to run the job * @param listener where to signal the changes to the database * @param paths list of paths to be indexed * @throws IOException if an error occurs */ public static void update(ExecutorService executor, IndexChangedListener listener, List<String> paths) throws IOException { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); List<IndexDatabase> dbs = new ArrayList<>(); for (String path : paths) { Project project = Project.getProject(path); if (project == null && env.hasProjects()) { log.log(Level.WARNING, "Could not find a project for \"{0}\"", path); } else { IndexDatabase db; try { if (project == null) { db = new IndexDatabase(); } else { db = new IndexDatabase(project); } int idx = dbs.indexOf(db); if (idx != -1) { db = dbs.get(idx); } if (db.addDirectory(path)) { if (idx == -1) { dbs.add(db); } } else { log.log(Level.WARNING, "Directory does not exist \"{0}\"", path); } } catch (IOException e) { log.log(Level.WARNING, "An error occured while updating index", e); } } for (final IndexDatabase db : dbs) { db.addIndexChangedListener(listener); executor.submit(new Runnable() { @Override public void run() { try { db.update(); } catch (Throwable e) { log.log(Level.SEVERE, "An error occured while updating index", e); } } }); } } } @SuppressWarnings("PMD.CollapsibleIfStatements") private void initialize() throws IOException { synchronized (this) { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); File indexDir = new File(env.getDataRootFile(), INDEX_DIR); if (project != null) { indexDir = new File(indexDir, project.getPath()); } if (!indexDir.exists() && !indexDir.mkdirs()) { // to avoid race conditions, just recheck.. if (!indexDir.exists()) { throw new FileNotFoundException( "Failed to create root directory [" + indexDir.getAbsolutePath() + "]"); } } if (!env.isUsingLuceneLocking()) { lockfact = NoLockFactory.getNoLockFactory(); } indexDirectory = FSDirectory.open(indexDir, lockfact); ignoredNames = env.getIgnoredNames(); includedNames = env.getIncludedNames(); analyzerGuru = new AnalyzerGuru(); if (env.isGenerateHtml()) { xrefDir = new File(env.getDataRootFile(), "xref"); } listeners = new ArrayList<>(); dirtyFile = new File(indexDir, "dirty"); dirty = dirtyFile.exists(); directories = new ArrayList<>(); } } /** * By default the indexer will traverse all directories in the project. If * you add directories with this function update will just process the * specified directories. * * @param dir The directory to scan * @return <code>true</code> if the file is added, false otherwise */ @SuppressWarnings("PMD.UseStringBufferForStringAppends") public boolean addDirectory(String dir) { String directory = dir; if (directory.startsWith("\\")) { directory = directory.replace('\\', '/'); } else if (directory.charAt(0) != '/') { directory = "/" + directory; } File file = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), directory); if (file.exists()) { directories.add(directory); return true; } return false; } /** * Update the content of this index database * * @throws IOException if an error occurs * @throws HistoryException if an error occurs when accessing the history */ public void update() throws IOException, HistoryException { synchronized (lock) { if (running) { throw new IOException("Indexer already running!"); } running = true; interrupted = false; } String ctgs = RuntimeEnvironment.getInstance().getCtags(); if (ctgs != null) { ctags = new Ctags(); ctags.setBinary(ctgs); } if (ctags == null) { log.severe("Unable to run ctags! searching definitions will not work!"); } if (ctags != null) { String filename = RuntimeEnvironment.getInstance().getCTagsExtraOptionsFile(); if (filename != null) { ctags.setCTagsExtraOptionsFile(filename); } } try { Analyzer analyzer = AnalyzerGuru.getAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iwc.setRAMBufferSizeMB(RuntimeEnvironment.getInstance().getRamBufferSize()); writer = new IndexWriter(indexDirectory, iwc); writer.commit(); // to make sure index exists on the disk if (directories.isEmpty()) { if (project == null) { directories.add(""); } else { directories.add(project.getPath()); } } for (String dir : directories) { File sourceRoot; if ("".equals(dir)) { sourceRoot = RuntimeEnvironment.getInstance().getSourceRootFile(); } else { sourceRoot = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), dir); } HistoryGuru.getInstance().ensureHistoryCacheExists(sourceRoot); String startuid = Util.path2uid(dir, ""); IndexReader reader = DirectoryReader.open(indexDirectory); // open existing index Terms terms = null; int numDocs = reader.numDocs(); if (numDocs > 0) { Fields uFields = MultiFields.getFields(reader);//reader.getTermVectors(0); terms = uFields.terms(QueryBuilder.U); } try { if (numDocs > 0) { uidIter = terms.iterator(uidIter); TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid)); //init uid if (stat == TermsEnum.SeekStatus.END) { uidIter = null; log.log(Level.WARNING, "Couldn't find a start term for {0}, empty u field?", startuid); } } // The code below traverses the tree to get total count. int file_cnt = 0; if (RuntimeEnvironment.getInstance().isPrintProgress()) { log.log(Level.INFO, "Counting files in {0} ...", dir); file_cnt = indexDown(sourceRoot, dir, true, 0, 0); if (log.isLoggable(Level.INFO)) { log.log(Level.INFO, "Need to process: {0} files for {1}", new Object[] { file_cnt, dir }); } } indexDown(sourceRoot, dir, false, 0, file_cnt); while (uidIter != null && uidIter.term() != null && uidIter.term().utf8ToString().startsWith(startuid)) { removeFile(); BytesRef next = uidIter.next(); if (next == null) { uidIter = null; } } } finally { reader.close(); } } } finally { if (writer != null) { try { writer.prepareCommit(); writer.commit(); writer.close(); } catch (IOException e) { log.log(Level.WARNING, "An error occured while closing writer", e); } } if (ctags != null) { try { ctags.close(); } catch (IOException e) { log.log(Level.WARNING, "An error occured while closing ctags process", e); } } synchronized (lock) { running = false; } } if (!isInterrupted() && isDirty()) { if (RuntimeEnvironment.getInstance().isOptimizeDatabase()) { optimize(); } RuntimeEnvironment env = RuntimeEnvironment.getInstance(); File timestamp = new File(env.getDataRootFile(), "timestamp"); String purpose = "used for timestamping the index database."; if (timestamp.exists()) { if (!timestamp.setLastModified(System.currentTimeMillis())) { log.log(Level.WARNING, "Failed to set last modified time on ''{0}'', {1}", new Object[] { timestamp.getAbsolutePath(), purpose }); } } else { if (!timestamp.createNewFile()) { log.log(Level.WARNING, "Failed to create file ''{0}'', {1}", new Object[] { timestamp.getAbsolutePath(), purpose }); } } } } /** * Optimize all index databases * * @param executor An executor to run the job * @throws IOException if an error occurs */ static void optimizeAll(ExecutorService executor) throws IOException { List<IndexDatabase> dbs = new ArrayList<>(); RuntimeEnvironment env = RuntimeEnvironment.getInstance(); if (env.hasProjects()) { for (Project project : env.getProjects()) { dbs.add(new IndexDatabase(project)); } } else { dbs.add(new IndexDatabase()); } for (IndexDatabase d : dbs) { final IndexDatabase db = d; if (db.isDirty()) { executor.submit(new Runnable() { @Override public void run() { try { db.update(); } catch (Throwable e) { log.log(Level.SEVERE, "Problem updating lucene index database: ", e); } } }); } } } /** * Optimize the index database */ public void optimize() { synchronized (lock) { if (running) { log.warning("Optimize terminated... Someone else is updating / optimizing it!"); return; } running = true; } IndexWriter wrt = null; try { log.info("Optimizing the index ... "); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig conf = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer); conf.setOpenMode(OpenMode.CREATE_OR_APPEND); wrt = new IndexWriter(indexDirectory, conf); wrt.forceMerge(1); // this is deprecated and not needed anymore log.info("done"); synchronized (lock) { if (dirtyFile.exists() && !dirtyFile.delete()) { log.log(Level.FINE, "Failed to remove \"dirty-file\": {0}", dirtyFile.getAbsolutePath()); } dirty = false; } } catch (IOException e) { log.log(Level.SEVERE, "ERROR: optimizing index: {0}", e); } finally { if (wrt != null) { try { wrt.close(); } catch (IOException e) { log.log(Level.WARNING, "An error occured while closing writer", e); } } synchronized (lock) { running = false; } } } private boolean isDirty() { synchronized (lock) { return dirty; } } private void setDirty() { synchronized (lock) { try { if (!dirty && !dirtyFile.createNewFile()) { if (!dirtyFile.exists()) { log.log(Level.FINE, "Failed to create \"dirty-file\": {0}", dirtyFile.getAbsolutePath()); } dirty = true; } } catch (IOException e) { log.log(Level.FINE, "When creating dirty file: ", e); } } } /** * Remove a stale file (uidIter.term().text()) from the index database (and * the xref file) * * @throws java.io.IOException if an error occurs */ private void removeFile() throws IOException { String path = Util.uid2url(uidIter.term().utf8ToString()); for (IndexChangedListener listener : listeners) { listener.fileRemove(path); } writer.deleteDocuments(new Term(QueryBuilder.U, uidIter.term())); writer.prepareCommit(); writer.commit(); File xrefFile; if (RuntimeEnvironment.getInstance().isCompressXref()) { xrefFile = new File(xrefDir, path + ".gz"); } else { xrefFile = new File(xrefDir, path); } File parent = xrefFile.getParentFile(); if (!xrefFile.delete() && xrefFile.exists()) { log.log(Level.INFO, "Failed to remove obsolete xref-file: {0}", xrefFile.getAbsolutePath()); } // Remove the parent directory if it's empty if (parent.delete()) { log.log(Level.FINE, "Removed empty xref dir:{0}", parent.getAbsolutePath()); } setDirty(); for (IndexChangedListener listener : listeners) { listener.fileRemoved(path); } } /** * Add a file to the Lucene index (and generate a xref file) * * @param file The file to add * @param path The path to the file (from source root) * @throws java.io.IOException if an error occurs */ private void addFile(File file, String path) throws IOException { FileAnalyzer fa; try (InputStream in = new BufferedInputStream(new FileInputStream(file))) { fa = AnalyzerGuru.getAnalyzer(in, path); } for (IndexChangedListener listener : listeners) { listener.fileAdd(path, fa.getClass().getSimpleName()); } fa.setCtags(ctags); fa.setProject(Project.getProject(path)); Document doc = new Document(); try (Writer xrefOut = getXrefWriter(fa, path)) { analyzerGuru.populateDocument(doc, file, path, fa, xrefOut); } catch (Exception e) { log.log(Level.INFO, "Skipped file ''{0}'' because the analyzer didn''t " + "understand it.", path); log.log(Level.FINE, "Exception from analyzer " + fa.getClass().getName(), e); cleanupResources(doc); return; } try { writer.addDocument(doc, fa); } catch (Throwable t) { cleanupResources(doc); throw t; } setDirty(); for (IndexChangedListener listener : listeners) { listener.fileAdded(path, fa.getClass().getSimpleName()); } } /** * Do a best effort to clean up all resources allocated when populating * a Lucene document. On normal execution, these resources should be * closed automatically by the index writer once it's done with them, but * we may not get that far if something fails. * * @param doc the document whose resources to clean up */ private void cleanupResources(Document doc) { for (IndexableField f : doc) { // If the field takes input from a reader, close the reader. IOUtils.close(f.readerValue()); // If the field takes input from a token stream, close the // token stream. if (f instanceof Field) { IOUtils.close(((Field) f).tokenStreamValue()); } } } /** * Check if I should accept this file into the index database * * @param file the file to check * @return true if the file should be included, false otherwise */ private boolean accept(File file) { if (!includedNames.isEmpty() && // the filter should not affect directory names (!(file.isDirectory() || includedNames.match(file)))) { return false; } if (ignoredNames.ignore(file)) { return false; } String absolutePath = file.getAbsolutePath(); if (!file.canRead()) { log.log(Level.WARNING, "Warning: could not read {0}", absolutePath); return false; } try { String canonicalPath = file.getCanonicalPath(); if (!absolutePath.equals(canonicalPath) && !acceptSymlink(absolutePath, canonicalPath)) { log.log(Level.FINE, "Skipped symlink ''{0}'' -> ''{1}''", new Object[] { absolutePath, canonicalPath }); return false; } //below will only let go files and directories, anything else is considered special and is not added if (!file.isFile() && !file.isDirectory()) { log.log(Level.WARNING, "Warning: ignored special file {0}", absolutePath); return false; } } catch (IOException exp) { log.log(Level.WARNING, "Warning: Failed to resolve name: {0}", absolutePath); log.log(Level.FINE, "Stack Trace: ", exp); } if (file.isDirectory()) { // always accept directories so that their files can be examined return true; } if (HistoryGuru.getInstance().hasHistory(file)) { // versioned files should always be accepted return true; } // this is an unversioned file, check if it should be indexed return !RuntimeEnvironment.getInstance().isIndexVersionedFilesOnly(); } boolean accept(File parent, File file) { try { File f1 = parent.getCanonicalFile(); File f2 = file.getCanonicalFile(); if (f1.equals(f2)) { log.log(Level.INFO, "Skipping links to itself...: {0} {1}", new Object[] { parent.getAbsolutePath(), file.getAbsolutePath() }); return false; } // Now, let's verify that it's not a link back up the chain... File t1 = f1; while ((t1 = t1.getParentFile()) != null) { if (f2.equals(t1)) { log.log(Level.INFO, "Skipping links to parent...: {0} {1}", new Object[] { parent.getAbsolutePath(), file.getAbsolutePath() }); return false; } } return accept(file); } catch (IOException ex) { log.log(Level.WARNING, "Warning: Failed to resolve name: {0} {1}", new Object[] { parent.getAbsolutePath(), file.getAbsolutePath() }); } return false; } /** * Check if I should accept the path containing a symlink * * @param absolutePath the path with a symlink to check * @param canonicalPath the canonical path to the file * @return true if the file should be accepted, false otherwise */ private boolean acceptSymlink(String absolutePath, String canonicalPath) throws IOException { // Always accept local symlinks if (isLocal(canonicalPath)) { return true; } for (String allowedSymlink : RuntimeEnvironment.getInstance().getAllowedSymlinks()) { if (absolutePath.startsWith(allowedSymlink)) { String allowedTarget = new File(allowedSymlink).getCanonicalPath(); if (canonicalPath.startsWith(allowedTarget) && absolutePath.substring(allowedSymlink.length()) .equals(canonicalPath.substring(allowedTarget.length()))) { return true; } } } return false; } /** * Check if a file is local to the current project. If we don't have * projects, check if the file is in the source root. * * @param path the path to a file * @return true if the file is local to the current repository */ private boolean isLocal(String path) { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); String srcRoot = env.getSourceRootPath(); boolean local = false; if (path.startsWith(srcRoot)) { if (env.hasProjects()) { String relPath = path.substring(srcRoot.length()); if (project.equals(Project.getProject(relPath))) { // File is under the current project, so it's local. local = true; } } else { // File is under source root, and we don't have projects, so // consider it local. local = true; } } return local; } /** * Generate indexes recursively * * @param dir the root indexDirectory to generate indexes for * @param path the path * @param count_only if true will just traverse the source root and count * files * @param cur_count current count during the traversal of the tree * @param est_total estimate total files to process * */ private int indexDown(File dir, String parent, boolean count_only, int cur_count, int est_total) throws IOException { int lcur_count = cur_count; if (isInterrupted()) { return lcur_count; } if (!accept(dir)) { return lcur_count; } File[] files = dir.listFiles(); if (files == null) { log.log(Level.SEVERE, "Failed to get file listing for: {0}", dir.getAbsolutePath()); return lcur_count; } Arrays.sort(files, new Comparator<File>() { @Override public int compare(File p1, File p2) { return p1.getName().compareTo(p2.getName()); } }); for (File file : files) { if (accept(dir, file)) { String path = parent + '/' + file.getName(); if (file.isDirectory()) { lcur_count = indexDown(file, path, count_only, lcur_count, est_total); } else { lcur_count++; if (count_only) { continue; } if (RuntimeEnvironment.getInstance().isPrintProgress() && est_total > 0 && log.isLoggable(Level.INFO)) { log.log(Level.INFO, "Progress: {0} ({1}%)", new Object[] { lcur_count, (lcur_count * 100.0f / est_total) }); } if (uidIter != null) { String uid = Util.path2uid(path, DateTools.timeToString(file.lastModified(), DateTools.Resolution.MILLISECOND)); // construct uid for doc BytesRef buid = new BytesRef(uid); while (uidIter != null && uidIter.term() != null && uidIter.term().compareTo(emptyBR) != 0 && uidIter.term().compareTo(buid) < 0) { removeFile(); BytesRef next = uidIter.next(); if (next == null) { uidIter = null; } } if (uidIter != null && uidIter.term() != null && uidIter.term().bytesEquals(buid)) { BytesRef next = uidIter.next(); // keep matching docs if (next == null) { uidIter = null; } continue; } } try { addFile(file, path); } catch (Exception e) { log.log(Level.WARNING, "Failed to add file " + file.getAbsolutePath(), e); } } } } return lcur_count; } /** * Interrupt the index generation (and the index generation will stop as * soon as possible) */ public void interrupt() { synchronized (lock) { interrupted = true; } } private boolean isInterrupted() { synchronized (lock) { return interrupted; } } /** * Register an object to receive events when modifications is done to the * index database. * * @param listener the object to receive the events */ public void addIndexChangedListener(IndexChangedListener listener) { listeners.add(listener); } /** * Remove an object from the lists of objects to receive events when * modifications is done to the index database * * @param listener the object to remove */ public void removeIndexChangedListener(IndexChangedListener listener) { listeners.remove(listener); } /** * List all files in all of the index databases * * @throws IOException if an error occurs */ public static void listAllFiles() throws IOException { listAllFiles(null); } /** * List all files in some of the index databases * * @param subFiles Subdirectories for the various projects to list the files * for (or null or an empty list to dump all projects) * @throws IOException if an error occurs */ public static void listAllFiles(List<String> subFiles) throws IOException { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); if (env.hasProjects()) { if (subFiles == null || subFiles.isEmpty()) { for (Project project : env.getProjects()) { IndexDatabase db = new IndexDatabase(project); db.listFiles(); } } else { for (String path : subFiles) { Project project = Project.getProject(path); if (project == null) { log.log(Level.WARNING, "Warning: Could not find a project for \"{0}\"", path); } else { IndexDatabase db = new IndexDatabase(project); db.listFiles(); } } } } else { IndexDatabase db = new IndexDatabase(); db.listFiles(); } } /** * List all of the files in this index database * * @throws IOException If an IO error occurs while reading from the database */ public void listFiles() throws IOException { IndexReader ireader = null; TermsEnum iter = null; Terms terms = null; try { ireader = DirectoryReader.open(indexDirectory); // open existing index int numDocs = ireader.numDocs(); if (numDocs > 0) { Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0); terms = uFields.terms(QueryBuilder.U); } iter = terms.iterator(iter); // init uid iterator while (iter != null && iter.term() != null) { log.fine(Util.uid2url(iter.term().utf8ToString())); BytesRef next = iter.next(); if (next == null) { iter = null; } } } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { log.log(Level.WARNING, "An error occured while closing index reader", e); } } } } static void listFrequentTokens() throws IOException { listFrequentTokens(null); } static void listFrequentTokens(List<String> subFiles) throws IOException { final int limit = 4; RuntimeEnvironment env = RuntimeEnvironment.getInstance(); if (env.hasProjects()) { if (subFiles == null || subFiles.isEmpty()) { for (Project project : env.getProjects()) { IndexDatabase db = new IndexDatabase(project); db.listTokens(4); } } else { for (String path : subFiles) { Project project = Project.getProject(path); if (project == null) { log.log(Level.WARNING, "Warning: Could not find a project for \"{0}\"", path); } else { IndexDatabase db = new IndexDatabase(project); db.listTokens(4); } } } } else { IndexDatabase db = new IndexDatabase(); db.listTokens(limit); } } public void listTokens(int freq) throws IOException { IndexReader ireader = null; TermsEnum iter = null; Terms terms = null; try { ireader = DirectoryReader.open(indexDirectory); int numDocs = ireader.numDocs(); if (numDocs > 0) { Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0); terms = uFields.terms(QueryBuilder.DEFS); } iter = terms.iterator(iter); // init uid iterator while (iter != null && iter.term() != null) { //if (iter.term().field().startsWith("f")) { if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) { log.warning(iter.term().utf8ToString()); } BytesRef next = iter.next(); if (next == null) { iter = null; } /*} else { break; }*/ } } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { log.log(Level.WARNING, "An error occured while closing index reader", e); } } } } /** * Get an indexReader for the Index database where a given file * * @param path the file to get the database for * @return The index database where the file should be located or null if it * cannot be located. */ public static IndexReader getIndexReader(String path) { IndexReader ret = null; RuntimeEnvironment env = RuntimeEnvironment.getInstance(); File indexDir = new File(env.getDataRootFile(), INDEX_DIR); if (env.hasProjects()) { Project p = Project.getProject(path); if (p == null) { return null; } indexDir = new File(indexDir, p.getPath()); } try { FSDirectory fdir = FSDirectory.open(indexDir, NoLockFactory.getNoLockFactory()); if (indexDir.exists() && DirectoryReader.indexExists(fdir)) { ret = DirectoryReader.open(fdir); } } catch (Exception ex) { log.log(Level.SEVERE, "Failed to open index: {0}", indexDir.getAbsolutePath()); log.log(Level.FINE, "Stack Trace: ", ex); } return ret; } /** * Get the latest definitions for a file from the index. * * @param file the file whose definitions to find * @return definitions for the file, or {@code null} if they could not be * found * @throws IOException if an error happens when accessing the index * @throws ParseException if an error happens when building the Lucene query * @throws ClassNotFoundException if the class for the stored definitions * instance cannot be found */ public static Definitions getDefinitions(File file) throws IOException, ParseException, ClassNotFoundException { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); String path = env.getPathRelativeToSourceRoot(file, 0); //sanitize windows path delimiters //in order not to conflict with Lucene escape character path = path.replace("\\", "/"); IndexReader ireader = getIndexReader(path); if (ireader == null) { // No index, no definitions... return null; } try { Query q = new QueryBuilder().setPath(path).build(); IndexSearcher searcher = new IndexSearcher(ireader); TopDocs top = searcher.search(q, 1); if (top.totalHits == 0) { // No hits, no definitions... return null; } Document doc = searcher.doc(top.scoreDocs[0].doc); String foundPath = doc.get(QueryBuilder.PATH); // Only use the definitions if we found an exact match. if (path.equals(foundPath)) { IndexableField tags = doc.getField(QueryBuilder.TAGS); if (tags != null) { return Definitions.deserialize(tags.binaryValue().bytes); } } } finally { ireader.close(); } // Didn't find any definitions. return null; } @Override public boolean equals(Object obj) { if (obj == null) { return false; } if (getClass() != obj.getClass()) { return false; } final IndexDatabase other = (IndexDatabase) obj; if (this.project != other.project && (this.project == null || !this.project.equals(other.project))) { return false; } return true; } @Override public int hashCode() { int hash = 7; hash = 41 * hash + (this.project == null ? 0 : this.project.hashCode()); return hash; } /** * Get a writer to which the xref can be written, or null if no xref * should be produced for files of this type. */ private Writer getXrefWriter(FileAnalyzer fa, String path) throws IOException { Genre g = fa.getFactory().getGenre(); if (xrefDir != null && (g == Genre.PLAIN || g == Genre.XREFABLE)) { File xrefFile = new File(xrefDir, path); // If mkdirs() returns false, the failure is most likely // because the file already exists. But to check for the // file first and only add it if it doesn't exists would // only increase the file IO... if (!xrefFile.getParentFile().mkdirs()) { assert xrefFile.getParentFile().exists(); } RuntimeEnvironment env = RuntimeEnvironment.getInstance(); boolean compressed = env.isCompressXref(); File file = new File(xrefDir, path + (compressed ? ".gz" : "")); return new BufferedWriter(new OutputStreamWriter( compressed ? new GZIPOutputStream(new FileOutputStream(file)) : new FileOutputStream(file))); } // no Xref for this analyzer return null; } }