List of usage examples for org.apache.lucene.index Fields terms
public abstract Terms terms(String field) throws IOException;
From source file:org.opengrok.indexer.index.IndexDatabase.java
License:Open Source License
/** * Update the content of this index database * * @throws IOException if an error occurs */// w ww. j av a 2 s.c om public void update() throws IOException { synchronized (lock) { if (running) { throw new IOException("Indexer already running!"); } running = true; interrupted = false; } RuntimeEnvironment env = RuntimeEnvironment.getInstance(); reader = null; writer = null; settings = null; uidIter = null; postsIter = null; acceptedNonlocalSymlinks.clear(); IOException finishingException = null; try { Analyzer analyzer = AnalyzerGuru.getAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iwc.setRAMBufferSizeMB(env.getRamBufferSize()); /** * Most data in OpenGrok is indexed but not stored, so use the best * compression on the minority of data that is stored, since it * should not have a detrimental impact on overall throughput. */ iwc.setCodec(new Lucene70Codec(Lucene50StoredFieldsFormat.Mode.BEST_COMPRESSION)); writer = new IndexWriter(indexDirectory, iwc); writer.commit(); // to make sure index exists on the disk completer = new PendingFileCompleter(); if (directories.isEmpty()) { if (project == null) { directories.add(""); } else { directories.add(project.getPath()); } } for (String dir : directories) { File sourceRoot; if ("".equals(dir)) { sourceRoot = env.getSourceRootFile(); } else { sourceRoot = new File(env.getSourceRootFile(), dir); } if (env.isHistoryEnabled()) { try { HistoryGuru.getInstance().ensureHistoryCacheExists(sourceRoot); } catch (HistoryException ex) { String exmsg = String.format("Failed to ensureHistoryCacheExists() for %s", sourceRoot); LOGGER.log(Level.SEVERE, exmsg, ex); continue; } } dir = Util.fixPathIfWindows(dir); String startuid = Util.path2uid(dir, ""); reader = DirectoryReader.open(indexDirectory); // open existing index settings = readAnalysisSettings(); if (settings == null) { settings = new IndexAnalysisSettings(); } Terms terms = null; int numDocs = reader.numDocs(); if (numDocs > 0) { Fields uFields = MultiFields.getFields(reader);//reader.getTermVectors(0); terms = uFields.terms(QueryBuilder.U); } try { if (terms != null) { uidIter = terms.iterator(); TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid)); //init uid if (stat == TermsEnum.SeekStatus.END) { uidIter = null; LOGGER.log(Level.WARNING, "Couldn''t find a start term for {0}, empty u field?", startuid); } } // The actual indexing happens in indexParallel(). IndexDownArgs args = new IndexDownArgs(); Statistics elapsed = new Statistics(); LOGGER.log(Level.INFO, "Starting traversal of directory {0}", dir); indexDown(sourceRoot, dir, args); showFileCount(dir, args, elapsed); args.cur_count = 0; elapsed = new Statistics(); LOGGER.log(Level.INFO, "Starting indexing of directory {0}", dir); indexParallel(dir, args); elapsed.report(LOGGER, String.format("Done indexing of directory %s", dir)); // Remove data for the trailing terms that indexDown() // did not traverse. These correspond to files that have been // removed and have higher ordering than any present files. while (uidIter != null && uidIter.term() != null && uidIter.term().utf8ToString().startsWith(startuid)) { removeFile(true); BytesRef next = uidIter.next(); if (next == null) { uidIter = null; } } markProjectIndexed(project); } finally { reader.close(); } } try { finishWriting(); } catch (IOException e) { finishingException = e; } } catch (RuntimeException ex) { LOGGER.log(Level.SEVERE, "Failed with unexpected RuntimeException", ex); throw ex; } finally { completer = null; try { if (writer != null) { writer.close(); } } catch (IOException e) { if (finishingException == null) { finishingException = e; } LOGGER.log(Level.WARNING, "An error occurred while closing writer", e); } finally { writer = null; synchronized (lock) { running = false; } } } if (finishingException != null) { throw finishingException; } if (!isInterrupted() && isDirty()) { if (env.isOptimizeDatabase()) { optimize(); } env.setIndexTimestamp(); } }
From source file:org.opengrok.indexer.index.IndexDatabase.java
License:Open Source License
/** * Get all files in this index database. * * @throws IOException If an IO error occurs while reading from the database * @return set of files in this index database */// w w w . j ava 2s. c om public Set<String> getFiles() throws IOException { IndexReader ireader = null; TermsEnum iter = null; Terms terms; Set<String> files = new HashSet<>(); try { ireader = DirectoryReader.open(indexDirectory); // open existing index int numDocs = ireader.numDocs(); if (numDocs > 0) { Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0); terms = uFields.terms(QueryBuilder.U); iter = terms.iterator(); // init uid iterator } while (iter != null && iter.term() != null) { String value = iter.term().utf8ToString(); if (value.isEmpty()) { iter.next(); continue; } files.add(Util.uid2url(value)); BytesRef next = iter.next(); if (next == null) { iter = null; } } } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e); } } } return files; }
From source file:org.opengrok.indexer.index.IndexDatabase.java
License:Open Source License
public void listTokens(int freq) throws IOException { IndexReader ireader = null;//from www . j a v a 2 s .co m TermsEnum iter = null; Terms terms; try { ireader = DirectoryReader.open(indexDirectory); int numDocs = ireader.numDocs(); if (numDocs > 0) { Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0); terms = uFields.terms(QueryBuilder.DEFS); iter = terms.iterator(); // init uid iterator } while (iter != null && iter.term() != null) { //if (iter.term().field().startsWith("f")) { if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) { LOGGER.warning(iter.term().utf8ToString()); } BytesRef next = iter.next(); if (next == null) { iter = null; } } } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e); } } } }
From source file:org.opensolaris.opengrok.index.IndexDatabase.java
License:Open Source License
/** * Update the content of this index database * * @throws IOException if an error occurs * @throws HistoryException if an error occurs when accessing the history *//*from www .ja v a2 s . c om*/ public void update() throws IOException, HistoryException { synchronized (lock) { if (running) { throw new IOException("Indexer already running!"); } running = true; interrupted = false; } String ctgs = RuntimeEnvironment.getInstance().getCtags(); if (ctgs != null) { ctags = new Ctags(); ctags.setBinary(ctgs); } if (ctags == null) { log.severe("Unable to run ctags! searching definitions will not work!"); } if (ctags != null) { String filename = RuntimeEnvironment.getInstance().getCTagsExtraOptionsFile(); if (filename != null) { ctags.setCTagsExtraOptionsFile(filename); } } try { Analyzer analyzer = AnalyzerGuru.getAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iwc.setRAMBufferSizeMB(RuntimeEnvironment.getInstance().getRamBufferSize()); writer = new IndexWriter(indexDirectory, iwc); writer.commit(); // to make sure index exists on the disk if (directories.isEmpty()) { if (project == null) { directories.add(""); } else { directories.add(project.getPath()); } } for (String dir : directories) { File sourceRoot; if ("".equals(dir)) { sourceRoot = RuntimeEnvironment.getInstance().getSourceRootFile(); } else { sourceRoot = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), dir); } HistoryGuru.getInstance().ensureHistoryCacheExists(sourceRoot); String startuid = Util.path2uid(dir, ""); IndexReader reader = DirectoryReader.open(indexDirectory); // open existing index Terms terms = null; int numDocs = reader.numDocs(); if (numDocs > 0) { Fields uFields = MultiFields.getFields(reader);//reader.getTermVectors(0); terms = uFields.terms(QueryBuilder.U); } try { if (numDocs > 0) { uidIter = terms.iterator(uidIter); TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid)); //init uid if (stat == TermsEnum.SeekStatus.END) { uidIter = null; log.log(Level.WARNING, "Couldn't find a start term for {0}, empty u field?", startuid); } } // The code below traverses the tree to get total count. int file_cnt = 0; if (RuntimeEnvironment.getInstance().isPrintProgress()) { log.log(Level.INFO, "Counting files in {0} ...", dir); file_cnt = indexDown(sourceRoot, dir, true, 0, 0); if (log.isLoggable(Level.INFO)) { log.log(Level.INFO, "Need to process: {0} files for {1}", new Object[] { file_cnt, dir }); } } indexDown(sourceRoot, dir, false, 0, file_cnt); while (uidIter != null && uidIter.term() != null && uidIter.term().utf8ToString().startsWith(startuid)) { removeFile(); BytesRef next = uidIter.next(); if (next == null) { uidIter = null; } } } finally { reader.close(); } } } finally { if (writer != null) { try { writer.prepareCommit(); writer.commit(); writer.close(); } catch (IOException e) { log.log(Level.WARNING, "An error occured while closing writer", e); } } if (ctags != null) { try { ctags.close(); } catch (IOException e) { log.log(Level.WARNING, "An error occured while closing ctags process", e); } } synchronized (lock) { running = false; } } if (!isInterrupted() && isDirty()) { if (RuntimeEnvironment.getInstance().isOptimizeDatabase()) { optimize(); } RuntimeEnvironment env = RuntimeEnvironment.getInstance(); File timestamp = new File(env.getDataRootFile(), "timestamp"); String purpose = "used for timestamping the index database."; if (timestamp.exists()) { if (!timestamp.setLastModified(System.currentTimeMillis())) { log.log(Level.WARNING, "Failed to set last modified time on ''{0}'', {1}", new Object[] { timestamp.getAbsolutePath(), purpose }); } } else { if (!timestamp.createNewFile()) { log.log(Level.WARNING, "Failed to create file ''{0}'', {1}", new Object[] { timestamp.getAbsolutePath(), purpose }); } } } }
From source file:org.opensolaris.opengrok.index.IndexDatabase.java
License:Open Source License
/** * List all of the files in this index database * * @throws IOException If an IO error occurs while reading from the database *//*from w w w.j av a 2 s .c o m*/ public void listFiles() throws IOException { IndexReader ireader = null; TermsEnum iter = null; Terms terms = null; try { ireader = DirectoryReader.open(indexDirectory); // open existing index int numDocs = ireader.numDocs(); if (numDocs > 0) { Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0); terms = uFields.terms(QueryBuilder.U); } iter = terms.iterator(iter); // init uid iterator while (iter != null && iter.term() != null) { log.fine(Util.uid2url(iter.term().utf8ToString())); BytesRef next = iter.next(); if (next == null) { iter = null; } } } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { log.log(Level.WARNING, "An error occured while closing index reader", e); } } } }
From source file:org.opensolaris.opengrok.index.IndexDatabase.java
License:Open Source License
public void listTokens(int freq) throws IOException { IndexReader ireader = null;/*from w w w . j av a2 s.c om*/ TermsEnum iter = null; Terms terms = null; try { ireader = DirectoryReader.open(indexDirectory); int numDocs = ireader.numDocs(); if (numDocs > 0) { Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0); terms = uFields.terms(QueryBuilder.DEFS); } iter = terms.iterator(iter); // init uid iterator while (iter != null && iter.term() != null) { //if (iter.term().field().startsWith("f")) { if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) { log.warning(iter.term().utf8ToString()); } BytesRef next = iter.next(); if (next == null) { iter = null; } /*} else { break; }*/ } } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { log.log(Level.WARNING, "An error occured while closing index reader", e); } } } }
From source file:org.pageseeder.flint.lucene.search.Terms.java
License:Apache License
/** * Loads all the fuzzy terms in the list of terms given the reader. * * @param reader Index reader to use./*from w ww . j ava 2 s . co m*/ * @param values The list of terms to load. * @param term The term to use. * * @throws IOException If an error is thrown by the fuzzy term enumeration. */ public static void fuzzy(IndexReader reader, List<String> values, Term term, int minSimilarity) throws IOException { AttributeSource atts = new AttributeSource(); Fields fields = MultiFields.getFields(reader); org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field()); if (terms == null) return; FuzzyTermsEnum fuzzy = new FuzzyTermsEnum(terms, atts, term, minSimilarity, 0, false); BytesRef val; BytesRef searched = term.bytes(); while ((val = fuzzy.next()) != null) { if (!searched.bytesEquals(val)) values.add(val.utf8ToString()); } }
From source file:org.pageseeder.flint.lucene.search.Terms.java
License:Apache License
/** * Loads all the fuzzy terms in the list of terms given the reader. * * @param reader Index reader to use.//from w w w . j a v a 2s. co m * @param bucket Where to store the terms. * @param term The term to use. * * @throws IOException If an error is thrown by the fuzzy term enumeration. */ @Beta public static void fuzzy(IndexReader reader, Bucket<Term> bucket, Term term, int minSimilarity) throws IOException { AttributeSource atts = new AttributeSource(); Fields fields = MultiFields.getFields(reader); org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field()); if (terms == null) return; FuzzyTermsEnum fuzzy = new FuzzyTermsEnum(terms, atts, term, minSimilarity, 0, true); BytesRef val; BytesRef searched = term.bytes(); while ((val = fuzzy.next()) != null) { if (!searched.bytesEquals(val)) { Term t = new Term(term.field(), BytesRef.deepCopyOf(val)); bucket.add(t, reader.docFreq(t)); } } }
From source file:org.pageseeder.flint.lucene.search.Terms.java
License:Apache License
/** * Loads all the prefix terms in the list of terms given the reader. * * @param reader Index reader to use.//from w ww .j a v a2 s. c o m * @param values The list of values to load. * @param term The term to use. * * @throws IOException If an error is thrown by the prefix term enumeration. */ public static void prefix(IndexReader reader, List<String> values, Term term) throws IOException { Fields fields = MultiFields.getFields(reader); org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field()); if (terms == null) return; TermsEnum prefixes = terms.intersect(new CompiledAutomaton(PrefixQuery.toAutomaton(term.bytes())), null); BytesRef val; while ((val = prefixes.next()) != null) { values.add(val.utf8ToString()); } }
From source file:org.pageseeder.flint.lucene.search.Terms.java
License:Apache License
/** * Loads all the prefix terms in the list of terms given the reader. * * @param reader Index reader to use./*w w w. j ava 2 s. c o m*/ * @param bucket Where to store the terms. * @param term The term to use. * * @throws IOException If an error is thrown by the prefix term enumeration. */ public static void prefix(IndexReader reader, Bucket<Term> bucket, Term term) throws IOException { Fields fields = MultiFields.getFields(reader); org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field()); if (terms == null) return; TermsEnum prefixes = terms.intersect(new CompiledAutomaton(PrefixQuery.toAutomaton(term.bytes())), term.bytes()); BytesRef val; while ((val = prefixes.next()) != null) { Term t = new Term(term.field(), BytesRef.deepCopyOf(val)); bucket.add(t, reader.docFreq(t)); } }