List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:org.sonatype.nexus.index.packer.DefaultIndexPacker.java
License:Open Source License
static void copyLegacyDocuments(IndexReader r, Directory targetdir, IndexingContext context) throws CorruptIndexException, LockObtainFailedException, IOException { IndexWriter w = null;/*from w ww. j av a 2s . c om*/ try { w = new IndexWriter(targetdir, false, new NexusLegacyAnalyzer(), true); for (int i = 0; i < r.maxDoc(); i++) { if (!r.isDeleted(i)) { w.addDocument(updateLegacyDocument(r.document(i), context)); } } w.optimize(); w.flush(); } finally { IndexUtils.close(w); } }
From source file:org.sonatype.nexus.index.updater.DefaultIndexUpdater.java
License:Open Source License
private static void copyUpdatedDocuments(final Directory sourcedir, final Directory targetdir, final IndexingContext context) throws CorruptIndexException, LockObtainFailedException, IOException { IndexWriter w = null;//from ww w .j ava 2 s .co m IndexReader r = null; try { r = IndexReader.open(sourcedir); w = new IndexWriter(targetdir, false, new NexusAnalyzer(), true); for (int i = 0; i < r.maxDoc(); i++) { if (!r.isDeleted(i)) { w.addDocument(IndexUtils.updateDocument(r.document(i), context)); } } w.optimize(); w.flush(); } finally { IndexUtils.close(w); IndexUtils.close(r); } }
From source file:org.sonatype.nexus.index.updater.DefaultIndexUpdater.java
License:Open Source License
private static void filterDirectory(final Directory directory, final DocumentFilter filter) throws IOException { IndexReader r = null; try {//from ww w . j a v a 2s. c om r = IndexReader.open(directory); int numDocs = r.numDocs(); for (int i = 0; i < numDocs; i++) { if (r.isDeleted(i)) { continue; } Document d = r.document(i); if (!filter.accept(d)) { r.deleteDocument(i); } } } finally { IndexUtils.close(r); } IndexWriter w = null; try { // analyzer is unimportant, since we are not adding/searching to/on index, only reading/deleting w = new IndexWriter(directory, new NexusAnalyzer()); w.optimize(); w.flush(); } finally { IndexUtils.close(w); } }
From source file:org.sonatype.nexus.index.updater.IndexDataWriter.java
License:Open Source License
public int writeDocuments(IndexReader r, List<Integer> docIndexes) throws IOException { int n = 0;/*from w w w. j a va2 s . com*/ if (docIndexes == null) { for (int i = 0; i < r.numDocs(); i++) { if (!r.isDeleted(i)) { writeDocument(r.document(i)); n++; } } } else { for (int i : docIndexes) { if (!r.isDeleted(i)) { writeDocument(r.document(i)); n++; } } } return n; }
From source file:org.sonatype.nexus.ReindexIT.java
License:Open Source License
protected void shiftContextInTime(IndexingContext ctx, int shiftDays) throws IOException { if (shiftDays != 0) { final IndexWriter iw = ctx.getIndexWriter(); final IndexSearcher is = ctx.acquireIndexSearcher(); try {//from ww w . ja v a2 s .co m final IndexReader ir = is.getIndexReader(); for (int docNum = 0; docNum < ir.maxDoc(); docNum++) { if (!ir.isDeleted(docNum)) { Document doc = ir.document(docNum); String lastModified = doc.get(ArtifactInfo.LAST_MODIFIED); if (lastModified != null) { long lm = Long.parseLong(lastModified); lm = lm + (shiftDays * A_DAY_MILLIS); doc.removeFields(ArtifactInfo.LAST_MODIFIED); doc.add(new Field(ArtifactInfo.LAST_MODIFIED, Long.toString(lm), Field.Store.YES, Field.Index.NO)); iw.updateDocument(new Term(ArtifactInfo.UINFO, doc.get(ArtifactInfo.UINFO)), doc); } } } ctx.optimize(); ctx.commit(); // shift timestamp too if (ctx.getTimestamp() != null) { ctx.updateTimestamp(true, new Date(ctx.getTimestamp().getTime() + (shiftDays * A_DAY_MILLIS))); } else { ctx.updateTimestamp(true, new Date(System.currentTimeMillis() + (shiftDays * A_DAY_MILLIS))); } } finally { ctx.releaseIndexSearcher(is); } } }
From source file:org.talend.dataquality.semantic.api.LocalDictionaryCache.java
License:Open Source License
private List<DQDocument> dqDocListFromTopDocs(String categoryName, TopDocs docs) throws IOException { mgr.maybeRefresh();//from www . java 2 s . c om IndexSearcher searcher = mgr.acquire(); IndexReader reader = searcher.getIndexReader(); List<DQDocument> dqDocList = new ArrayList<>(); for (ScoreDoc scoreDoc : docs.scoreDocs) { Document luceneDoc = reader.document(scoreDoc.doc); DQDocument dqDoc = DictionaryUtils.dictionaryEntryFromDocument(luceneDoc, categoryName); dqDocList.add(dqDoc); } mgr.release(searcher); return dqDocList; }
From source file:org.talend.dataquality.semantic.api.LocalDictionaryCache.java
License:Open Source License
private Set<String> doSuggestValues(String categoryName, String input, int num, boolean isPrefixSearch) { String jointInput = DictionarySearcher.getJointTokens(input); String queryString = isPrefixSearch ? jointInput + "*" : "*" + jointInput + "*"; final BooleanQuery booleanQuery = new BooleanQuery(); final Query catQuery = new TermQuery(new Term(DictionarySearcher.F_WORD, categoryName)); booleanQuery.add(catQuery, BooleanClause.Occur.MUST); final Query wildcardQuery = new WildcardQuery(new Term(DictionarySearcher.F_SYNTERM, queryString)); booleanQuery.add(wildcardQuery, BooleanClause.Occur.MUST); Set<String> results = new TreeSet<String>(); try {/*w w w.j av a2 s . c om*/ mgr.maybeRefresh(); IndexSearcher searcher = mgr.acquire(); IndexReader reader = searcher.getIndexReader(); TopDocs topDocs = searcher.search(booleanQuery, num); mgr.release(searcher); for (int i = 0; i < topDocs.scoreDocs.length; i++) { Document doc = reader.document(topDocs.scoreDocs[i].doc); IndexableField[] fields = doc.getFields(DictionarySearcher.F_RAW); for (IndexableField f : fields) { final String str = f.stringValue(); if (isPrefixSearch) { if (StringUtils.startsWithIgnoreCase(str, input) || StringUtils .startsWithIgnoreCase(DictionarySearcher.getJointTokens(str), jointInput)) { results.add(str); } } else {// infix search if (StringUtils.containsIgnoreCase(str, input) || StringUtils .containsIgnoreCase(DictionarySearcher.getJointTokens(str), jointInput)) { results.add(str); } } } } } catch (IOException e) { LOGGER.trace(e.getMessage(), e); } return results; }
From source file:org.talend.dataquality.standardization.migration.FirstNameIndexMigrator.java
License:Open Source License
/** * regenerate all indexes recursively./*from w w w . jav a 2 s. c om*/ * * @param inputFolder * @param outputFolder * @throws java.io.IOException */ private int regenerate(File inputFolder, File outputFolder) throws IOException { FSDirectory indexDir = FSDirectory.open(inputFolder); CheckIndex check = new CheckIndex(indexDir); Status status = check.checkIndex(); if (status.missingSegments) { for (File f : inputFolder.listFiles()) { if (f.isDirectory()) { File out = new File(outputFolder.getAbsolutePath() + "/" + f.getName()); out.mkdir(); regenerate(f, out); } } } else { System.out.println("REGENERATE: " + inputFolder.getAbsoluteFile()); FSDirectory outputDir = FSDirectory.open(outputFolder); IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer); IndexWriter writer = new IndexWriter(outputDir, config); IndexReader reader = DirectoryReader.open(indexDir); Document doc = null; // for any other indexes, regenerate with new Analyzer, but no // changes to document. for (int i = 0; i < reader.maxDoc(); i++) { doc = reader.document(i); if (IS_MIGRATING_FIRSTNAME_INDEX) { Document newDoc = generateFirstNameDoc(doc); if (newDoc != null) { writer.addDocument(newDoc); } } else { writer.addDocument(doc); } } System.out.println("count: " + count); writer.commit(); writer.close(); outputDir.close(); // copy all other files such as "readMe.txt" for (File file : inputFolder.listFiles()) { if (file.isFile() && !isLuceneIndexFile(file)) { // copy to destination folder copyFile(file, outputFolder); } } } return 0; }
From source file:org.talend.dataquality.standardization.migration.IndexMigrator.java
License:Open Source License
/** * regenerate all indexes recursively.//from w w w . ja va2 s . c om * * @param inputFolder * @param outputFolder * @throws java.io.IOException */ private int regenerate(File inputFolder, File outputFolder) throws IOException { FSDirectory inputDir = FSDirectory.open(inputFolder); CheckIndex check = new CheckIndex(inputDir); Status status = check.checkIndex(); if (status.missingSegments) { for (File f : inputFolder.listFiles()) { if (f.isDirectory()) { File out = new File(outputFolder.getAbsolutePath() + "/" + f.getName()); out.mkdir(); regenerate(f, out); } } } else { System.out.println("REGENERATE: " + inputFolder.getPath()); FSDirectory outputDir = FSDirectory.open(outputFolder); analyzer = new StandardAnalyzer(CharArraySet.EMPTY_SET); IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer); IndexWriter writer = new IndexWriter(outputDir, config); IndexReader reader = DirectoryReader.open(inputDir); // for any other indexes, regenerate with new Analyzer, but no // changes to document. Collection<String> fieldNames = new ArrayList<String>(); int count = 0; Bits liveDocs = MultiFields.getLiveDocs(reader); for (int i = 0; i < reader.maxDoc(); i++) { if (liveDocs != null && !liveDocs.get(i)) { continue; } Document doc = reader.document(i); List<IndexableField> fields = doc.getFields(); for (int k = 0; k < fields.size(); k++) { fieldNames.add(fields.get(k).name()); } if (fieldNames.contains(F_WORD)) { // for "out of the box" indexes, regenerate the index with 2 // extra fields ("SYNTERM" and "WORDTERM") for better scoring. String word = doc.getValues(F_WORD)[0]; String[] synonyms = doc.getValues(F_SYN); Set<String> synonymSet = new HashSet<String>(); for (String syn : synonyms) { if (!syn.equals(word)) { synonymSet.add(syn); } } Document newDoc = generateDocument(word, synonymSet); writer.addDocument(newDoc); } else { writer.addDocument(doc); } count++; } System.out.println("count: " + count); reader.close(); writer.commit(); writer.close(); outputDir.close(); // copy all other files such as "readMe.txt" for (File file : inputFolder.listFiles()) { if (file.isFile() && !isLuceneIndexFile(file)) { // copy to destination folder copyFile(file, outputFolder); } } } return 0; }
From source file:org.tallison.lucene.search.concordance.TestSimpleAnalyzerUtil.java
License:Apache License
public void testHitInGaps() throws Exception { String[] values = new String[] { "abc", "def", "ghi", "jkl" }; List<String[]> docs = new ArrayList<>(); docs.add(values);//from w w w . ja va 2 s. c o m Directory directory = getDirectory(customCharOffsetGapAnalyzer, docs); String joiner = " | "; int gap = customCharOffsetGapAnalyzer.getOffsetGap(FIELD); IndexReader reader = DirectoryReader.open(directory); Document d = reader.document(0); String[] fieldValues = d.getValues(FIELD); assertEquals("two negs", "", testSimple(-10, -1, fieldValues, gap, joiner)); assertEquals("two way beyonds", "", testSimple(1000, 1020, fieldValues, gap, joiner)); assertEquals("two in betweens", " | ", testSimple(100, 110, fieldValues, gap, joiner)); assertEquals("one neg", "abc", testSimple(-20, 3, fieldValues, gap, joiner)); assertEquals("end < start 1", "", testSimple(3, -20, fieldValues, gap, joiner)); assertEquals("end < start 2", "", testSimple(3, 2, fieldValues, gap, joiner)); assertEquals("end in between", "abc", testSimple(0, 50, fieldValues, gap, joiner)); //TODO: these used to be "def"; need to fix assertEquals("start in between", " | def", testSimple(5, 219, fieldValues, gap, joiner)); assertEquals("start in between and end in between1", " | def", testSimple(5, 300, fieldValues, gap, joiner)); assertEquals("start in between and end in between2", " | def | ghi", testSimple(5, 600, fieldValues, gap, joiner)); assertEquals("", "def | ghi | jkl", testSimple(216, 10000, fieldValues, gap, joiner)); reader.close(); directory.close(); }