Example usage for org.apache.lucene.index IndexReader maxDoc

List of usage examples for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:org.sonatype.nexus.index.updater.DefaultIndexUpdater.java

License:Open Source License

private static void copyUpdatedDocuments(final Directory sourcedir, final Directory targetdir,
        final IndexingContext context) throws CorruptIndexException, LockObtainFailedException, IOException {
    IndexWriter w = null;//from w  w  w  .j a  v a  2s.co  m
    IndexReader r = null;
    try {
        r = IndexReader.open(sourcedir);
        w = new IndexWriter(targetdir, false, new NexusAnalyzer(), true);

        for (int i = 0; i < r.maxDoc(); i++) {
            if (!r.isDeleted(i)) {
                w.addDocument(IndexUtils.updateDocument(r.document(i), context));
            }
        }

        w.optimize();
        w.flush();
    } finally {
        IndexUtils.close(w);
        IndexUtils.close(r);
    }
}

From source file:org.sonatype.nexus.ReindexIT.java

License:Open Source License

protected void shiftContextInTime(IndexingContext ctx, int shiftDays) throws IOException {
    if (shiftDays != 0) {
        final IndexWriter iw = ctx.getIndexWriter();
        final IndexSearcher is = ctx.acquireIndexSearcher();
        try {//from w  ww.j ava2  s. com
            final IndexReader ir = is.getIndexReader();
            for (int docNum = 0; docNum < ir.maxDoc(); docNum++) {
                if (!ir.isDeleted(docNum)) {
                    Document doc = ir.document(docNum);

                    String lastModified = doc.get(ArtifactInfo.LAST_MODIFIED);

                    if (lastModified != null) {
                        long lm = Long.parseLong(lastModified);

                        lm = lm + (shiftDays * A_DAY_MILLIS);

                        doc.removeFields(ArtifactInfo.LAST_MODIFIED);

                        doc.add(new Field(ArtifactInfo.LAST_MODIFIED, Long.toString(lm), Field.Store.YES,
                                Field.Index.NO));

                        iw.updateDocument(new Term(ArtifactInfo.UINFO, doc.get(ArtifactInfo.UINFO)), doc);
                    }
                }
            }

            ctx.optimize();

            ctx.commit();

            // shift timestamp too
            if (ctx.getTimestamp() != null) {
                ctx.updateTimestamp(true, new Date(ctx.getTimestamp().getTime() + (shiftDays * A_DAY_MILLIS)));
            } else {
                ctx.updateTimestamp(true, new Date(System.currentTimeMillis() + (shiftDays * A_DAY_MILLIS)));
            }
        } finally {
            ctx.releaseIndexSearcher(is);
        }
    }
}

From source file:org.springmodules.lucene.index.core.DefaultLuceneIndexTemplate.java

License:Apache License

public int getMaxDoc() {
    IndexReader reader = IndexReaderFactoryUtils.getIndexReader(indexFactory);
    try {/*ww  w .j  a  v  a 2s .  c o m*/
        return reader.maxDoc();
    } finally {
        IndexReaderFactoryUtils.releaseIndexReader(indexFactory, reader);
    }
}

From source file:org.talend.dataquality.standardization.migration.FirstNameIndexMigrator.java

License:Open Source License

/**
 * regenerate all indexes recursively.//from w  ww  .ja va 2s  .com
 * 
 * @param inputFolder
 * @param outputFolder
 * @throws java.io.IOException
 */
private int regenerate(File inputFolder, File outputFolder) throws IOException {
    FSDirectory indexDir = FSDirectory.open(inputFolder);
    CheckIndex check = new CheckIndex(indexDir);
    Status status = check.checkIndex();
    if (status.missingSegments) {
        for (File f : inputFolder.listFiles()) {
            if (f.isDirectory()) {
                File out = new File(outputFolder.getAbsolutePath() + "/" + f.getName());
                out.mkdir();
                regenerate(f, out);
            }
        }
    } else {
        System.out.println("REGENERATE: " + inputFolder.getAbsoluteFile());
        FSDirectory outputDir = FSDirectory.open(outputFolder);

        IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);
        IndexWriter writer = new IndexWriter(outputDir, config);

        IndexReader reader = DirectoryReader.open(indexDir);

        Document doc = null;
        // for any other indexes, regenerate with new Analyzer, but no
        // changes to document.
        for (int i = 0; i < reader.maxDoc(); i++) {
            doc = reader.document(i);

            if (IS_MIGRATING_FIRSTNAME_INDEX) {
                Document newDoc = generateFirstNameDoc(doc);
                if (newDoc != null) {
                    writer.addDocument(newDoc);
                }
            } else {
                writer.addDocument(doc);
            }
        }
        System.out.println("count: " + count);

        writer.commit();
        writer.close();
        outputDir.close();

        // copy all other files such as "readMe.txt"
        for (File file : inputFolder.listFiles()) {
            if (file.isFile() && !isLuceneIndexFile(file)) {
                // copy to destination folder
                copyFile(file, outputFolder);
            }
        }
    }
    return 0;
}

From source file:org.talend.dataquality.standardization.migration.IndexMigrator.java

License:Open Source License

/**
 * regenerate all indexes recursively./*from w ww  .  j  a v  a 2 s.co m*/
 * 
 * @param inputFolder
 * @param outputFolder
 * @throws java.io.IOException
 */
private int regenerate(File inputFolder, File outputFolder) throws IOException {
    FSDirectory inputDir = FSDirectory.open(inputFolder);
    CheckIndex check = new CheckIndex(inputDir);
    Status status = check.checkIndex();
    if (status.missingSegments) {
        for (File f : inputFolder.listFiles()) {
            if (f.isDirectory()) {
                File out = new File(outputFolder.getAbsolutePath() + "/" + f.getName());
                out.mkdir();
                regenerate(f, out);
            }
        }
    } else {
        System.out.println("REGENERATE: " + inputFolder.getPath());
        FSDirectory outputDir = FSDirectory.open(outputFolder);

        analyzer = new StandardAnalyzer(CharArraySet.EMPTY_SET);
        IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);
        IndexWriter writer = new IndexWriter(outputDir, config);

        IndexReader reader = DirectoryReader.open(inputDir);

        // for any other indexes, regenerate with new Analyzer, but no
        // changes to document.
        Collection<String> fieldNames = new ArrayList<String>();

        int count = 0;
        Bits liveDocs = MultiFields.getLiveDocs(reader);
        for (int i = 0; i < reader.maxDoc(); i++) {
            if (liveDocs != null && !liveDocs.get(i)) {
                continue;
            }
            Document doc = reader.document(i);

            List<IndexableField> fields = doc.getFields();
            for (int k = 0; k < fields.size(); k++) {
                fieldNames.add(fields.get(k).name());
            }

            if (fieldNames.contains(F_WORD)) {
                // for "out of the box" indexes, regenerate the index with 2
                // extra fields ("SYNTERM" and "WORDTERM") for better scoring.
                String word = doc.getValues(F_WORD)[0];
                String[] synonyms = doc.getValues(F_SYN);
                Set<String> synonymSet = new HashSet<String>();
                for (String syn : synonyms) {
                    if (!syn.equals(word)) {
                        synonymSet.add(syn);
                    }
                }
                Document newDoc = generateDocument(word, synonymSet);
                writer.addDocument(newDoc);
            } else {
                writer.addDocument(doc);
            }
            count++;
        }
        System.out.println("count: " + count);

        reader.close();
        writer.commit();
        writer.close();
        outputDir.close();

        // copy all other files such as "readMe.txt"
        for (File file : inputFolder.listFiles()) {
            if (file.isFile() && !isLuceneIndexFile(file)) {
                // copy to destination folder
                copyFile(file, outputFolder);
            }
        }
    }
    return 0;
}

From source file:org.toubassi.femtozip.lucene.IndexDocumentList.java

License:Apache License

public IndexDocumentList(IndexReader reader, int numSamples, int firstDoc, String fieldName)
        throws IOException {
    this.reader = reader;
    this.fieldName = fieldName;
    numDocs = reader.numDocs();//from w w  w  .j  ava2 s. c  o m
    float samplingRate = ((float) numSamples) / numDocs;

    ArrayList<Integer> docIdsList = new ArrayList<Integer>();
    ArrayList<Integer> fieldCountList = new ArrayList<Integer>();

    int numDocsScanned = 0, numDocsSampled = 0;
    for (int i = firstDoc, count = reader.maxDoc(); i < count; i++) {
        numDocsScanned++;

        if (reader.isDeleted(i)) {
            continue;
        }

        if (((int) (numDocsScanned * samplingRate)) <= numDocsSampled) {
            continue;
        }

        numDocsSampled++;

        Document doc = reader.document(i);
        Field fields[] = doc.getFields(fieldName);
        if (fields.length > 0) {
            if (fields[0].isStored()) {
                docIdsList.add(i);
                fieldCountList.add(fields.length);
            }
        }
    }

    docIds = new int[docIdsList.size()];
    for (int i = 0, count = docIdsList.size(); i < count; i++) {
        docIds[i] = docIdsList.get(i);
    }

    fieldCounts = new int[fieldCountList.size()];
    for (int i = 0, count = fieldCountList.size(); i < count; i++) {
        fieldCounts[i] = fieldCountList.get(i);
        if (i > 0) {
            fieldCounts[i] += fieldCounts[i - 1];
        }
    }
}

From source file:org.toubassi.femtozip.lucene.IndexDumper.java

License:Apache License

protected void dump() throws IOException {
    IndexReader reader = IndexReader.open(indexPath);

    Collection<?> allFields = reader.getFieldNames(IndexReader.FieldOption.ALL);
    String[] fieldNames = new String[allFields.size()];
    allFields.toArray(fieldNames);/*w w  w. j a  v a  2s.c  om*/

    numDocs = reader.numDocs();
    int maxDocId = reader.maxDoc();
    float samplingRate = ((float) numSamples) / numDocs;

    int numDocsScanned = 0;
    int numDocsSampled = 0;
    for (int docId = 0; docId < maxDocId; docId++) {

        if (reader.isDeleted(docId)) {
            continue;
        }

        numDocsScanned++;

        if (((int) (numDocsScanned * samplingRate)) <= numDocsSampled) {
            continue;
        }

        numDocsSampled++;

        Document doc = reader.document(docId);

        System.out.println("DOCUMENT: " + docId);

        for (String fieldName : fieldNames) {
            if (fieldsToDump != null && fieldsToDump.indexOf(fieldName) == -1) {
                continue;
            }

            Field[] fields = doc.getFields(fieldName);

            for (Field field : fields) {

                if (!field.isStored() || field.isCompressed()) {
                    // TODO if its compressed, uncompress it and benchmark it.
                    continue;
                }

                byte[] bytes;

                if (field.isBinary()) {
                    bytes = new byte[field.getBinaryLength()];
                    System.arraycopy(field.getBinaryValue(), field.getBinaryOffset(), bytes, 0,
                            field.getBinaryLength());
                } else {
                    String value = field.stringValue();
                    bytes = value.getBytes("UTF-8");
                }

                if (bytes.length > 0) {
                    System.out.print("    " + fieldName + " " + bytes.length + " ");
                    System.out.write(bytes);
                    System.out.println();
                }
            }
        }
    }

    reader.close();
}

From source file:org.toubassi.femtozip.lucene.StoredFieldDumper.java

License:Apache License

protected void dump() throws IOException {
    IndexReader reader = IndexReader.open(indexPath);

    Collection<?> allFields = reader.getFieldNames(IndexReader.FieldOption.ALL);
    String[] fieldNames = new String[allFields.size()];
    allFields.toArray(fieldNames);/*ww  w.  jav a2s .c o  m*/

    Map<String, OutputStream> output = new HashMap<String, OutputStream>();

    long lastStatusTime = 0;

    for (int docId = 0, count = reader.maxDoc(); docId < count; docId++) {
        Document doc = reader.document(docId);

        if (System.currentTimeMillis() - lastStatusTime > 5000) {
            lastStatusTime = System.currentTimeMillis();
            System.out.println("Processing docId " + docId + " of " + count);
        }

        for (String fieldName : fieldNames) {
            Field[] fields = doc.getFields(fieldName);

            for (Field field : fields) {

                if (!field.isStored() || field.isCompressed()) {
                    // TODO if its compressed, uncompress it and benchmark it.
                    continue;
                }

                byte[] bytes;

                if (field.isBinary()) {
                    bytes = new byte[field.getBinaryLength()];
                    System.arraycopy(field.getBinaryValue(), field.getBinaryOffset(), bytes, 0,
                            field.getBinaryLength());
                } else {
                    String value = field.stringValue();
                    bytes = value.getBytes("UTF-8");
                }

                OutputStream out = output.get(fieldName);
                if (out == null) {
                    FileOutputStream fileOut = new FileOutputStream(outputBasePath + "_" + fieldName);
                    out = new BufferedOutputStream(fileOut);
                    output.put(fieldName, out);
                }

                out.write(bytes);
            }
        }
    }

    reader.close();

    for (Map.Entry<String, OutputStream> entry : output.entrySet()) {
        entry.getValue().close();
    }
}

From source file:org.toubassi.femtozip.lucene.StoredFieldExploder.java

License:Apache License

protected void dump() throws IOException {
    IndexReader reader = IndexReader.open(indexPath);

    Collection<?> allFields = reader.getFieldNames(IndexReader.FieldOption.ALL);
    String[] fieldNames = new String[allFields.size()];
    allFields.toArray(fieldNames);/*w w w. ja  v  a  2s  . c om*/

    int numProcessed = 0;

    for (int docId = 0, count = reader.maxDoc(); docId < count && numProcessed < numSamples; docId++) {
        if (reader.isDeleted(docId)) {
            continue;
        }

        Document doc = reader.document(docId);
        Field field = doc.getField(fieldName);

        if (field != null) {

            FileOutputStream out = new FileOutputStream(
                    outputBasePath + File.separator + (numProcessed + 1) + "." + fieldName);
            if (field.isBinary()) {
                out.write(field.getBinaryValue(), field.getBinaryOffset(), field.getBinaryLength());
            } else {
                out.write(field.stringValue().getBytes("UTF-8"));
            }
            out.close();

            numProcessed++;
        }
    }

    reader.close();
}

From source file:org.zanata.hibernate.search.LocaleFilter.java

License:Open Source License

@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
    log.debug("getDocIdSet for {}", locale);
    Term term = new Term("locale", locale.toString());
    TermDocs termDocs = reader.termDocs(term);
    while (termDocs.next()) {
        bitSet.set(termDocs.doc());/*w  ww .  j  a  v  a2 s. c  o m*/
    }
    return bitSet;
}