Example usage for org.apache.lucene.index IndexableField readerValue

List of usage examples for org.apache.lucene.index IndexableField readerValue

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexableField readerValue.

Prototype

public Reader readerValue();

Source Link

Document

Non-null if this field has a Reader value

Usage

From source file:com.meizu.nlp.classification.utils.DatasetSplitter.java

License:Apache License

/**
 * Split a given index into 3 indexes for training, test and cross validation tasks respectively
 *
 * @param originalIndex        an {@link org.apache.lucene.index.LeafReader} on the source index
 * @param trainingIndex        a {@link Directory} used to write the training index
 * @param testIndex            a {@link Directory} used to write the test index
 * @param crossValidationIndex a {@link Directory} used to write the cross validation index
 * @param analyzer             {@link Analyzer} used to create the new docs
 * @param fieldNames           names of fields that need to be put in the new indexes or <code>null</code> if all should be used
 * @throws IOException if any writing operation fails on any of the indexes
 *//*from  w w  w .j  a v a2 s  .  c om*/
public void split(LeafReader originalIndex, Directory trainingIndex, Directory testIndex,
        Directory crossValidationIndex, Analyzer analyzer, String... fieldNames) throws IOException {

    // create IWs for train / test / cv IDXs
    IndexWriter testWriter = new IndexWriter(testIndex, new IndexWriterConfig(analyzer));
    IndexWriter cvWriter = new IndexWriter(crossValidationIndex, new IndexWriterConfig(analyzer));
    IndexWriter trainingWriter = new IndexWriter(trainingIndex, new IndexWriterConfig(analyzer));

    try {
        int size = originalIndex.maxDoc();

        IndexSearcher indexSearcher = new IndexSearcher(originalIndex);
        TopDocs topDocs = indexSearcher.search(new MatchAllDocsQuery(), Integer.MAX_VALUE);

        // set the type to be indexed, stored, with term vectors
        FieldType ft = new FieldType(TextField.TYPE_STORED);
        ft.setStoreTermVectors(true);
        ft.setStoreTermVectorOffsets(true);
        ft.setStoreTermVectorPositions(true);

        int b = 0;

        // iterate over existing documents
        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {

            // create a new document for indexing
            Document doc = new Document();
            if (fieldNames != null && fieldNames.length > 0) {
                for (String fieldName : fieldNames) {
                    doc.add(new Field(fieldName,
                            originalIndex.document(scoreDoc.doc).getField(fieldName).stringValue(), ft));
                }
            } else {
                for (IndexableField storableField : originalIndex.document(scoreDoc.doc).getFields()) {
                    if (storableField.readerValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.readerValue(), ft));
                    } else if (storableField.binaryValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.binaryValue(), ft));
                    } else if (storableField.stringValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.stringValue(), ft));
                    } else if (storableField.numericValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.numericValue().toString(), ft));
                    }
                }
            }

            // add it to one of the IDXs
            if (b % 2 == 0 && testWriter.maxDoc() < size * testRatio) {
                testWriter.addDocument(doc);
            } else if (cvWriter.maxDoc() < size * crossValidationRatio) {
                cvWriter.addDocument(doc);
            } else {
                trainingWriter.addDocument(doc);
            }
            b++;
        }
    } catch (Exception e) {
        throw new IOException(e);
    } finally {
        testWriter.commit();
        cvWriter.commit();
        trainingWriter.commit();
        // close IWs
        testWriter.close();
        cvWriter.close();
        trainingWriter.close();
    }
}

From source file:org.opencms.search.CmsLuceneDocument.java

License:Open Source License

/**
 * @see org.opencms.search.I_CmsSearchDocument#getContentBlob()
 *///  www.  ja v  a2 s .  co  m
public byte[] getContentBlob() {

    IndexableField fieldContentBlob = m_doc.getField(CmsSearchField.FIELD_CONTENT_BLOB);
    if (fieldContentBlob != null) {
        try {
            if (fieldContentBlob.readerValue() != null) {
                return IOUtils.toByteArray(fieldContentBlob.readerValue());
            }
        } catch (IOException e) {
            // TODO:
        }
    }
    return null;
}

From source file:org.opengrok.indexer.index.IndexDatabase.java

License:Open Source License

/**
 * Do a best effort to clean up all resources allocated when populating
 * a Lucene document. On normal execution, these resources should be
 * closed automatically by the index writer once it's done with them, but
 * we may not get that far if something fails.
 *
 * @param doc the document whose resources to clean up
 *//*from   w  w  w . ja v  a 2s  . co  m*/
private static void cleanupResources(Document doc) {
    for (IndexableField f : doc) {
        // If the field takes input from a reader, close the reader.
        IOUtils.close(f.readerValue());

        // If the field takes input from a token stream, close the
        // token stream.
        if (f instanceof Field) {
            IOUtils.close(((Field) f).tokenStreamValue());
        }
    }
}

From source file:org.opensolaris.opengrok.index.IndexDatabase.java

License:Open Source License

/**
 * Do a best effort to clean up all resources allocated when populating
 * a Lucene document. On normal execution, these resources should be
 * closed automatically by the index writer once it's done with them, but
 * we may not get that far if something fails.
 *
 * @param doc the document whose resources to clean up
 *//*w  w w. j  a  va 2s  . co  m*/
private void cleanupResources(Document doc) {
    for (IndexableField f : doc) {
        // If the field takes input from a reader, close the reader.
        IOUtils.close(f.readerValue());

        // If the field takes input from a token stream, close the
        // token stream.
        if (f instanceof Field) {
            IOUtils.close(((Field) f).tokenStreamValue());
        }
    }
}