Example usage for org.apache.lucene.index IndexReader document

List of usage examples for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype




public final Document document(int docID) throws IOException 

Source Link

Document

Returns the stored fields of the nth Document in this index.

Usage

From source file:org.sonatype.nexus.index.packer.DefaultIndexPacker.java

License:Open Source License

static void copyLegacyDocuments(IndexReader r, Directory targetdir, IndexingContext context)
        throws CorruptIndexException, LockObtainFailedException, IOException {
    IndexWriter w = null;/*from  w ww.  j av a 2s  . c om*/
    try {
        w = new IndexWriter(targetdir, false, new NexusLegacyAnalyzer(), true);

        for (int i = 0; i < r.maxDoc(); i++) {
            if (!r.isDeleted(i)) {
                w.addDocument(updateLegacyDocument(r.document(i), context));
            }
        }

        w.optimize();
        w.flush();
    } finally {
        IndexUtils.close(w);
    }
}

From source file:org.sonatype.nexus.index.updater.DefaultIndexUpdater.java

License:Open Source License

private static void copyUpdatedDocuments(final Directory sourcedir, final Directory targetdir,
        final IndexingContext context) throws CorruptIndexException, LockObtainFailedException, IOException {
    IndexWriter w = null;//from  ww  w .j  ava  2 s  .co m
    IndexReader r = null;
    try {
        r = IndexReader.open(sourcedir);
        w = new IndexWriter(targetdir, false, new NexusAnalyzer(), true);

        for (int i = 0; i < r.maxDoc(); i++) {
            if (!r.isDeleted(i)) {
                w.addDocument(IndexUtils.updateDocument(r.document(i), context));
            }
        }

        w.optimize();
        w.flush();
    } finally {
        IndexUtils.close(w);
        IndexUtils.close(r);
    }
}

From source file:org.sonatype.nexus.index.updater.DefaultIndexUpdater.java

License:Open Source License

private static void filterDirectory(final Directory directory, final DocumentFilter filter) throws IOException {
    IndexReader r = null;
    try {//from  ww w  . j  a  v a  2s. c  om
        r = IndexReader.open(directory);

        int numDocs = r.numDocs();

        for (int i = 0; i < numDocs; i++) {
            if (r.isDeleted(i)) {
                continue;
            }

            Document d = r.document(i);

            if (!filter.accept(d)) {
                r.deleteDocument(i);
            }
        }
    } finally {
        IndexUtils.close(r);
    }

    IndexWriter w = null;
    try {
        // analyzer is unimportant, since we are not adding/searching to/on index, only reading/deleting
        w = new IndexWriter(directory, new NexusAnalyzer());

        w.optimize();

        w.flush();
    } finally {
        IndexUtils.close(w);
    }
}

From source file:org.sonatype.nexus.index.updater.IndexDataWriter.java

License:Open Source License

public int writeDocuments(IndexReader r, List<Integer> docIndexes) throws IOException {
    int n = 0;/*from   w w w. j a va2  s . com*/

    if (docIndexes == null) {
        for (int i = 0; i < r.numDocs(); i++) {
            if (!r.isDeleted(i)) {
                writeDocument(r.document(i));
                n++;
            }
        }
    } else {
        for (int i : docIndexes) {
            if (!r.isDeleted(i)) {
                writeDocument(r.document(i));
                n++;
            }
        }
    }

    return n;
}

From source file:org.sonatype.nexus.ReindexIT.java

License:Open Source License

protected void shiftContextInTime(IndexingContext ctx, int shiftDays) throws IOException {
    if (shiftDays != 0) {
        final IndexWriter iw = ctx.getIndexWriter();
        final IndexSearcher is = ctx.acquireIndexSearcher();
        try {//from  ww w  . ja v a2 s .co  m
            final IndexReader ir = is.getIndexReader();
            for (int docNum = 0; docNum < ir.maxDoc(); docNum++) {
                if (!ir.isDeleted(docNum)) {
                    Document doc = ir.document(docNum);

                    String lastModified = doc.get(ArtifactInfo.LAST_MODIFIED);

                    if (lastModified != null) {
                        long lm = Long.parseLong(lastModified);

                        lm = lm + (shiftDays * A_DAY_MILLIS);

                        doc.removeFields(ArtifactInfo.LAST_MODIFIED);

                        doc.add(new Field(ArtifactInfo.LAST_MODIFIED, Long.toString(lm), Field.Store.YES,
                                Field.Index.NO));

                        iw.updateDocument(new Term(ArtifactInfo.UINFO, doc.get(ArtifactInfo.UINFO)), doc);
                    }
                }
            }

            ctx.optimize();

            ctx.commit();

            // shift timestamp too
            if (ctx.getTimestamp() != null) {
                ctx.updateTimestamp(true, new Date(ctx.getTimestamp().getTime() + (shiftDays * A_DAY_MILLIS)));
            } else {
                ctx.updateTimestamp(true, new Date(System.currentTimeMillis() + (shiftDays * A_DAY_MILLIS)));
            }
        } finally {
            ctx.releaseIndexSearcher(is);
        }
    }
}

From source file:org.talend.dataquality.semantic.api.LocalDictionaryCache.java

License:Open Source License

private List<DQDocument> dqDocListFromTopDocs(String categoryName, TopDocs docs) throws IOException {
    mgr.maybeRefresh();//from   www .  java  2 s . c om
    IndexSearcher searcher = mgr.acquire();
    IndexReader reader = searcher.getIndexReader();
    List<DQDocument> dqDocList = new ArrayList<>();
    for (ScoreDoc scoreDoc : docs.scoreDocs) {
        Document luceneDoc = reader.document(scoreDoc.doc);
        DQDocument dqDoc = DictionaryUtils.dictionaryEntryFromDocument(luceneDoc, categoryName);
        dqDocList.add(dqDoc);
    }
    mgr.release(searcher);
    return dqDocList;
}

From source file:org.talend.dataquality.semantic.api.LocalDictionaryCache.java

License:Open Source License

private Set<String> doSuggestValues(String categoryName, String input, int num, boolean isPrefixSearch) {
    String jointInput = DictionarySearcher.getJointTokens(input);
    String queryString = isPrefixSearch ? jointInput + "*" : "*" + jointInput + "*";

    final BooleanQuery booleanQuery = new BooleanQuery();
    final Query catQuery = new TermQuery(new Term(DictionarySearcher.F_WORD, categoryName));
    booleanQuery.add(catQuery, BooleanClause.Occur.MUST);
    final Query wildcardQuery = new WildcardQuery(new Term(DictionarySearcher.F_SYNTERM, queryString));
    booleanQuery.add(wildcardQuery, BooleanClause.Occur.MUST);

    Set<String> results = new TreeSet<String>();

    try {/*w w  w.j  av a2  s  .  c om*/
        mgr.maybeRefresh();
        IndexSearcher searcher = mgr.acquire();
        IndexReader reader = searcher.getIndexReader();
        TopDocs topDocs = searcher.search(booleanQuery, num);
        mgr.release(searcher);
        for (int i = 0; i < topDocs.scoreDocs.length; i++) {
            Document doc = reader.document(topDocs.scoreDocs[i].doc);
            IndexableField[] fields = doc.getFields(DictionarySearcher.F_RAW);
            for (IndexableField f : fields) {
                final String str = f.stringValue();
                if (isPrefixSearch) {
                    if (StringUtils.startsWithIgnoreCase(str, input) || StringUtils
                            .startsWithIgnoreCase(DictionarySearcher.getJointTokens(str), jointInput)) {
                        results.add(str);
                    }
                } else {// infix search
                    if (StringUtils.containsIgnoreCase(str, input) || StringUtils
                            .containsIgnoreCase(DictionarySearcher.getJointTokens(str), jointInput)) {
                        results.add(str);
                    }
                }
            }
        }
    } catch (IOException e) {
        LOGGER.trace(e.getMessage(), e);
    }
    return results;
}

From source file:org.talend.dataquality.standardization.migration.FirstNameIndexMigrator.java

License:Open Source License

/**
 * regenerate all indexes recursively./*from  w w w  . jav  a 2  s.  c  om*/
 * 
 * @param inputFolder
 * @param outputFolder
 * @throws java.io.IOException
 */
private int regenerate(File inputFolder, File outputFolder) throws IOException {
    FSDirectory indexDir = FSDirectory.open(inputFolder);
    CheckIndex check = new CheckIndex(indexDir);
    Status status = check.checkIndex();
    if (status.missingSegments) {
        for (File f : inputFolder.listFiles()) {
            if (f.isDirectory()) {
                File out = new File(outputFolder.getAbsolutePath() + "/" + f.getName());
                out.mkdir();
                regenerate(f, out);
            }
        }
    } else {
        System.out.println("REGENERATE: " + inputFolder.getAbsoluteFile());
        FSDirectory outputDir = FSDirectory.open(outputFolder);

        IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);
        IndexWriter writer = new IndexWriter(outputDir, config);

        IndexReader reader = DirectoryReader.open(indexDir);

        Document doc = null;
        // for any other indexes, regenerate with new Analyzer, but no
        // changes to document.
        for (int i = 0; i < reader.maxDoc(); i++) {
            doc = reader.document(i);

            if (IS_MIGRATING_FIRSTNAME_INDEX) {
                Document newDoc = generateFirstNameDoc(doc);
                if (newDoc != null) {
                    writer.addDocument(newDoc);
                }
            } else {
                writer.addDocument(doc);
            }
        }
        System.out.println("count: " + count);

        writer.commit();
        writer.close();
        outputDir.close();

        // copy all other files such as "readMe.txt"
        for (File file : inputFolder.listFiles()) {
            if (file.isFile() && !isLuceneIndexFile(file)) {
                // copy to destination folder
                copyFile(file, outputFolder);
            }
        }
    }
    return 0;
}

From source file:org.talend.dataquality.standardization.migration.IndexMigrator.java

License:Open Source License

/**
 * regenerate all indexes recursively.//from   w w  w .  ja  va2  s  .  c om
 * 
 * @param inputFolder
 * @param outputFolder
 * @throws java.io.IOException
 */
private int regenerate(File inputFolder, File outputFolder) throws IOException {
    FSDirectory inputDir = FSDirectory.open(inputFolder);
    CheckIndex check = new CheckIndex(inputDir);
    Status status = check.checkIndex();
    if (status.missingSegments) {
        for (File f : inputFolder.listFiles()) {
            if (f.isDirectory()) {
                File out = new File(outputFolder.getAbsolutePath() + "/" + f.getName());
                out.mkdir();
                regenerate(f, out);
            }
        }
    } else {
        System.out.println("REGENERATE: " + inputFolder.getPath());
        FSDirectory outputDir = FSDirectory.open(outputFolder);

        analyzer = new StandardAnalyzer(CharArraySet.EMPTY_SET);
        IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);
        IndexWriter writer = new IndexWriter(outputDir, config);

        IndexReader reader = DirectoryReader.open(inputDir);

        // for any other indexes, regenerate with new Analyzer, but no
        // changes to document.
        Collection<String> fieldNames = new ArrayList<String>();

        int count = 0;
        Bits liveDocs = MultiFields.getLiveDocs(reader);
        for (int i = 0; i < reader.maxDoc(); i++) {
            if (liveDocs != null && !liveDocs.get(i)) {
                continue;
            }
            Document doc = reader.document(i);

            List<IndexableField> fields = doc.getFields();
            for (int k = 0; k < fields.size(); k++) {
                fieldNames.add(fields.get(k).name());
            }

            if (fieldNames.contains(F_WORD)) {
                // for "out of the box" indexes, regenerate the index with 2
                // extra fields ("SYNTERM" and "WORDTERM") for better scoring.
                String word = doc.getValues(F_WORD)[0];
                String[] synonyms = doc.getValues(F_SYN);
                Set<String> synonymSet = new HashSet<String>();
                for (String syn : synonyms) {
                    if (!syn.equals(word)) {
                        synonymSet.add(syn);
                    }
                }
                Document newDoc = generateDocument(word, synonymSet);
                writer.addDocument(newDoc);
            } else {
                writer.addDocument(doc);
            }
            count++;
        }
        System.out.println("count: " + count);

        reader.close();
        writer.commit();
        writer.close();
        outputDir.close();

        // copy all other files such as "readMe.txt"
        for (File file : inputFolder.listFiles()) {
            if (file.isFile() && !isLuceneIndexFile(file)) {
                // copy to destination folder
                copyFile(file, outputFolder);
            }
        }
    }
    return 0;
}

From source file:org.tallison.lucene.search.concordance.TestSimpleAnalyzerUtil.java

License:Apache License

public void testHitInGaps() throws Exception {
    String[] values = new String[] { "abc", "def", "ghi", "jkl" };
    List<String[]> docs = new ArrayList<>();
    docs.add(values);//from w  w  w  .  ja va  2  s.  c o m

    Directory directory = getDirectory(customCharOffsetGapAnalyzer, docs);

    String joiner = " | ";
    int gap = customCharOffsetGapAnalyzer.getOffsetGap(FIELD);
    IndexReader reader = DirectoryReader.open(directory);
    Document d = reader.document(0);
    String[] fieldValues = d.getValues(FIELD);

    assertEquals("two negs", "", testSimple(-10, -1, fieldValues, gap, joiner));

    assertEquals("two way beyonds", "", testSimple(1000, 1020, fieldValues, gap, joiner));

    assertEquals("two in betweens", " | ", testSimple(100, 110, fieldValues, gap, joiner));

    assertEquals("one neg", "abc", testSimple(-20, 3, fieldValues, gap, joiner));
    assertEquals("end < start 1", "", testSimple(3, -20, fieldValues, gap, joiner));
    assertEquals("end < start 2", "", testSimple(3, 2, fieldValues, gap, joiner));
    assertEquals("end in between", "abc", testSimple(0, 50, fieldValues, gap, joiner));
    //TODO: these used to be "def"; need to fix
    assertEquals("start in between", " | def", testSimple(5, 219, fieldValues, gap, joiner));
    assertEquals("start in between and end in between1", " | def",
            testSimple(5, 300, fieldValues, gap, joiner));
    assertEquals("start in between and end in between2", " | def | ghi",
            testSimple(5, 600, fieldValues, gap, joiner));
    assertEquals("", "def | ghi | jkl", testSimple(216, 10000, fieldValues, gap, joiner));

    reader.close();
    directory.close();

}