Example usage for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:org.sonatype.nexus.index.updater.DefaultIndexUpdater.java

License:Open Source License

private static void copyUpdatedDocuments(final Directory sourcedir, final Directory targetdir,
        final IndexingContext context) throws CorruptIndexException, LockObtainFailedException, IOException {
    IndexWriter w = null;//from w  w  w  .j a  v a  2s.co  m
    IndexReader r = null;
    try {
        r = IndexReader.open(sourcedir);
        w = new IndexWriter(targetdir, false, new NexusAnalyzer(), true);

        for (int i = 0; i < r.maxDoc(); i++) {
            if (!r.isDeleted(i)) {
                w.addDocument(IndexUtils.updateDocument(r.document(i), context));
            }
        }

        w.optimize();
        w.flush();
    } finally {
        IndexUtils.close(w);
        IndexUtils.close(r);
    }
}

From source file:org.sonatype.nexus.ReindexIT.java

License:Open Source License

protected void shiftContextInTime(IndexingContext ctx, int shiftDays) throws IOException {
    if (shiftDays != 0) {
        final IndexWriter iw = ctx.getIndexWriter();
        final IndexSearcher is = ctx.acquireIndexSearcher();
        try {//from w  ww.j ava2  s. com
            final IndexReader ir = is.getIndexReader();
            for (int docNum = 0; docNum < ir.maxDoc(); docNum++) {
                if (!ir.isDeleted(docNum)) {
                    Document doc = ir.document(docNum);

                    String lastModified = doc.get(ArtifactInfo.LAST_MODIFIED);

                    if (lastModified != null) {
                        long lm = Long.parseLong(lastModified);

                        lm = lm + (shiftDays * A_DAY_MILLIS);

                        doc.removeFields(ArtifactInfo.LAST_MODIFIED);

                        doc.add(new Field(ArtifactInfo.LAST_MODIFIED, Long.toString(lm), Field.Store.YES,
                                Field.Index.NO));

                        iw.updateDocument(new Term(ArtifactInfo.UINFO, doc.get(ArtifactInfo.UINFO)), doc);
                    }
                }
            }

            ctx.optimize();

            ctx.commit();

            // shift timestamp too
            if (ctx.getTimestamp() != null) {
                ctx.updateTimestamp(true, new Date(ctx.getTimestamp().getTime() + (shiftDays * A_DAY_MILLIS)));
            } else {
                ctx.updateTimestamp(true, new Date(System.currentTimeMillis() + (shiftDays * A_DAY_MILLIS)));
            }
        } finally {
            ctx.releaseIndexSearcher(is);
        }
    }
}

From source file:org.springmodules.lucene.index.core.DefaultLuceneIndexTemplate.java

License:Apache License

public int getMaxDoc() {
    IndexReader reader = IndexReaderFactoryUtils.getIndexReader(indexFactory);
    try {/*ww  w .j  a  v  a 2s .  c o m*/
        return reader.maxDoc();
    } finally {
        IndexReaderFactoryUtils.releaseIndexReader(indexFactory, reader);
    }
}

From source file:org.talend.dataquality.standardization.migration.FirstNameIndexMigrator.java

License:Open Source License

/**
 * regenerate all indexes recursively.//from w  ww  .ja va 2s  .com
 * 
 * @param inputFolder
 * @param outputFolder
 * @throws java.io.IOException
 */
private int regenerate(File inputFolder, File outputFolder) throws IOException {
    FSDirectory indexDir = FSDirectory.open(inputFolder);
    CheckIndex check = new CheckIndex(indexDir);
    Status status = check.checkIndex();
    if (status.missingSegments) {
        for (File f : inputFolder.listFiles()) {
            if (f.isDirectory()) {
                File out = new File(outputFolder.getAbsolutePath() + "/" + f.getName());
                out.mkdir();
                regenerate(f, out);
            }
        }
    } else {
        System.out.println("REGENERATE: " + inputFolder.getAbsoluteFile());
        FSDirectory outputDir = FSDirectory.open(outputFolder);

        IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);
        IndexWriter writer = new IndexWriter(outputDir, config);

        IndexReader reader = DirectoryReader.open(indexDir);

        Document doc = null;
        // for any other indexes, regenerate with new Analyzer, but no
        // changes to document.
        for (int i = 0; i < reader.maxDoc(); i++) {
            doc = reader.document(i);

            if (IS_MIGRATING_FIRSTNAME_INDEX) {
                Document newDoc = generateFirstNameDoc(doc);
                if (newDoc != null) {
                    writer.addDocument(newDoc);
                }
            } else {
                writer.addDocument(doc);
            }
        }
        System.out.println("count: " + count);

        writer.commit();
        writer.close();
        outputDir.close();

        // copy all other files such as "readMe.txt"
        for (File file : inputFolder.listFiles()) {
            if (file.isFile() && !isLuceneIndexFile(file)) {
                // copy to destination folder
                copyFile(file, outputFolder);
            }
        }
    }
    return 0;
}

From source file:org.talend.dataquality.standardization.migration.IndexMigrator.java

License:Open Source License

/**
 * regenerate all indexes recursively./*from w ww  .  j  a v  a 2 s.co m*/
 * 
 * @param inputFolder
 * @param outputFolder
 * @throws java.io.IOException
 */
private int regenerate(File inputFolder, File outputFolder) throws IOException {
    FSDirectory inputDir = FSDirectory.open(inputFolder);
    CheckIndex check = new CheckIndex(inputDir);
    Status status = check.checkIndex();
    if (status.missingSegments) {
        for (File f : inputFolder.listFiles()) {
            if (f.isDirectory()) {
                File out = new File(outputFolder.getAbsolutePath() + "/" + f.getName());
                out.mkdir();
                regenerate(f, out);
            }
        }
    } else {
        System.out.println("REGENERATE: " + inputFolder.getPath());
        FSDirectory outputDir = FSDirectory.open(outputFolder);

        analyzer = new StandardAnalyzer(CharArraySet.EMPTY_SET);
        IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);
        IndexWriter writer = new IndexWriter(outputDir, config);

        IndexReader reader = DirectoryReader.open(inputDir);

        // for any other indexes, regenerate with new Analyzer, but no
        // changes to document.
        Collection<String> fieldNames = new ArrayList<String>();

        int count = 0;
        Bits liveDocs = MultiFields.getLiveDocs(reader);
        for (int i = 0; i < reader.maxDoc(); i++) {
            if (liveDocs != null && !liveDocs.get(i)) {
                continue;
            }
            Document doc = reader.document(i);

            List<IndexableField> fields = doc.getFields();
            for (int k = 0; k < fields.size(); k++) {
                fieldNames.add(fields.get(k).name());
            }

            if (fieldNames.contains(F_WORD)) {
                // for "out of the box" indexes, regenerate the index with 2
                // extra fields ("SYNTERM" and "WORDTERM") for better scoring.
                String word = doc.getValues(F_WORD)[0];
                String[] synonyms = doc.getValues(F_SYN);
                Set<String> synonymSet = new HashSet<String>();
                for (String syn : synonyms) {
                    if (!syn.equals(word)) {
                        synonymSet.add(syn);
                    }
                }
                Document newDoc = generateDocument(word, synonymSet);
                writer.addDocument(newDoc);
            } else {
                writer.addDocument(doc);
            }
            count++;
        }
        System.out.println("count: " + count);

        reader.close();
        writer.commit();
        writer.close();
        outputDir.close();

        // copy all other files such as "readMe.txt"
        for (File file : inputFolder.listFiles()) {
            if (file.isFile() && !isLuceneIndexFile(file)) {
                // copy to destination folder
                copyFile(file, outputFolder);
            }
        }
    }
    return 0;
}

From source file:org.toubassi.femtozip.lucene.IndexDocumentList.java

License:Apache License

public IndexDocumentList(IndexReader reader, int numSamples, int firstDoc, String fieldName)
        throws IOException {
    this.reader = reader;
    this.fieldName = fieldName;
    numDocs = reader.numDocs();//from w w  w  .j  ava2 s. c  o m
    float samplingRate = ((float) numSamples) / numDocs;

    ArrayList<Integer> docIdsList = new ArrayList<Integer>();
    ArrayList<Integer> fieldCountList = new ArrayList<Integer>();

    int numDocsScanned = 0, numDocsSampled = 0;
    for (int i = firstDoc, count = reader.maxDoc(); i < count; i++) {
        numDocsScanned++;

        if (reader.isDeleted(i)) {
            continue;
        }

        if (((int) (numDocsScanned * samplingRate)) <= numDocsSampled) {
            continue;
        }

        numDocsSampled++;

        Document doc = reader.document(i);
        Field fields[] = doc.getFields(fieldName);
        if (fields.length > 0) {
            if (fields[0].isStored()) {
                docIdsList.add(i);
                fieldCountList.add(fields.length);
            }
        }
    }

    docIds = new int[docIdsList.size()];
    for (int i = 0, count = docIdsList.size(); i < count; i++) {
        docIds[i] = docIdsList.get(i);
    }

    fieldCounts = new int[fieldCountList.size()];
    for (int i = 0, count = fieldCountList.size(); i < count; i++) {
        fieldCounts[i] = fieldCountList.get(i);
        if (i > 0) {
            fieldCounts[i] += fieldCounts[i - 1];
        }
    }
}

From source file:org.toubassi.femtozip.lucene.IndexDumper.java

License:Apache License

protected void dump() throws IOException {
    IndexReader reader = IndexReader.open(indexPath);

    Collection<?> allFields = reader.getFieldNames(IndexReader.FieldOption.ALL);
    String[] fieldNames = new String[allFields.size()];
    allFields.toArray(fieldNames);/*w w  w. j a  v a  2s.c  om*/

    numDocs = reader.numDocs();
    int maxDocId = reader.maxDoc();
    float samplingRate = ((float) numSamples) / numDocs;

    int numDocsScanned = 0;
    int numDocsSampled = 0;
    for (int docId = 0; docId < maxDocId; docId++) {

        if (reader.isDeleted(docId)) {
            continue;
        }

        numDocsScanned++;

        if (((int) (numDocsScanned * samplingRate)) <= numDocsSampled) {
            continue;
        }

        numDocsSampled++;

        Document doc = reader.document(docId);

        System.out.println("DOCUMENT: " + docId);

        for (String fieldName : fieldNames) {
            if (fieldsToDump != null && fieldsToDump.indexOf(fieldName) == -1) {
                continue;
            }

            Field[] fields = doc.getFields(fieldName);

            for (Field field : fields) {

                if (!field.isStored() || field.isCompressed()) {
                    // TODO if its compressed, uncompress it and benchmark it.
                    continue;
                }

                byte[] bytes;

                if (field.isBinary()) {
                    bytes = new byte[field.getBinaryLength()];
                    System.arraycopy(field.getBinaryValue(), field.getBinaryOffset(), bytes, 0,
                            field.getBinaryLength());
                } else {
                    String value = field.stringValue();
                    bytes = value.getBytes("UTF-8");
                }

                if (bytes.length > 0) {
                    System.out.print("    " + fieldName + " " + bytes.length + " ");
                    System.out.write(bytes);
                    System.out.println();
                }
            }
        }
    }

    reader.close();
}

From source file:org.toubassi.femtozip.lucene.StoredFieldDumper.java

License:Apache License

protected void dump() throws IOException {
    IndexReader reader = IndexReader.open(indexPath);

    Collection<?> allFields = reader.getFieldNames(IndexReader.FieldOption.ALL);
    String[] fieldNames = new String[allFields.size()];
    allFields.toArray(fieldNames);/*ww  w.  jav a2s .c o  m*/

    Map<String, OutputStream> output = new HashMap<String, OutputStream>();

    long lastStatusTime = 0;

    for (int docId = 0, count = reader.maxDoc(); docId < count; docId++) {
        Document doc = reader.document(docId);

        if (System.currentTimeMillis() - lastStatusTime > 5000) {
            lastStatusTime = System.currentTimeMillis();
            System.out.println("Processing docId " + docId + " of " + count);
        }

        for (String fieldName : fieldNames) {
            Field[] fields = doc.getFields(fieldName);

            for (Field field : fields) {

                if (!field.isStored() || field.isCompressed()) {
                    // TODO if its compressed, uncompress it and benchmark it.
                    continue;
                }

                byte[] bytes;

                if (field.isBinary()) {
                    bytes = new byte[field.getBinaryLength()];
                    System.arraycopy(field.getBinaryValue(), field.getBinaryOffset(), bytes, 0,
                            field.getBinaryLength());
                } else {
                    String value = field.stringValue();
                    bytes = value.getBytes("UTF-8");
                }

                OutputStream out = output.get(fieldName);
                if (out == null) {
                    FileOutputStream fileOut = new FileOutputStream(outputBasePath + "_" + fieldName);
                    out = new BufferedOutputStream(fileOut);
                    output.put(fieldName, out);
                }

                out.write(bytes);
            }
        }
    }

    reader.close();

    for (Map.Entry<String, OutputStream> entry : output.entrySet()) {
        entry.getValue().close();
    }
}

From source file:org.toubassi.femtozip.lucene.StoredFieldExploder.java

License:Apache License

protected void dump() throws IOException {
    IndexReader reader = IndexReader.open(indexPath);

    Collection<?> allFields = reader.getFieldNames(IndexReader.FieldOption.ALL);
    String[] fieldNames = new String[allFields.size()];
    allFields.toArray(fieldNames);/*w w w. ja  v  a  2s  . c om*/

    int numProcessed = 0;

    for (int docId = 0, count = reader.maxDoc(); docId < count && numProcessed < numSamples; docId++) {
        if (reader.isDeleted(docId)) {
            continue;
        }

        Document doc = reader.document(docId);
        Field field = doc.getField(fieldName);

        if (field != null) {

            FileOutputStream out = new FileOutputStream(
                    outputBasePath + File.separator + (numProcessed + 1) + "." + fieldName);
            if (field.isBinary()) {
                out.write(field.getBinaryValue(), field.getBinaryOffset(), field.getBinaryLength());
            } else {
                out.write(field.stringValue().getBytes("UTF-8"));
            }
            out.close();

            numProcessed++;
        }
    }

    reader.close();
}

From source file:org.zanata.hibernate.search.LocaleFilter.java

License:Open Source License

@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
    log.debug("getDocIdSet for {}", locale);
    Term term = new Term("locale", locale.toString());
    TermDocs termDocs = reader.termDocs(term);
    while (termDocs.next()) {
        bitSet.set(termDocs.doc());/*w  ww .  j  a  v  a2 s. c  o m*/
    }
    return bitSet;
}