Example usage for org.apache.lucene.document Field readerValue

List of usage examples for org.apache.lucene.document Field readerValue

Introduction

In this page you can find the example usage for org.apache.lucene.document Field readerValue.

Prototype

@Override
public Reader readerValue() 

Source Link

Document

The value of the field as a Reader, or null.

Usage

From source file:engine.easy.indexer.writer.EasySearchIndexWriter.java

License:Apache License

/**
 * Read the extra data field information
 * /*from  w  w  w.j a v a 2 s.c om*/
 * @return it returns the no: of token streams for the extra data field information.
  * @throws IOException if the file would have any IO operation.
 */
private int[] extraData(Field field, Analyzer analyzer) throws IOException {
    if (!field.isIndexed())
        return null;
    if (!field.isTokenized())
        return (new int[] { 1, 1 });
    String strv = field.stringValue();
    int v[];
    if (strv == null) {
        Reader readerv = field.readerValue();
        if (readerv == null) {
            TokenStream tsv = field.tokenStreamValue();
            if (tsv == null) {
                throw new IllegalArgumentException(
                        (new StringBuilder("Cannot obtain field value. field_name: ")).append(field.name())
                                .append(".").toString());
            } else {
                v = countTokenStream(tsv);
                return v;
            }
        }
        strv = readAll(readerv);
        if (strv == null)
            throw new IllegalArgumentException((new StringBuilder("Cannot obtain field value. field_name: "))
                    .append(field.name()).append(".").toString());

        field.setValue(strv);
    }
    BufferedReader reader = new BufferedReader(new StringReader(strv));
    TokenStream ts = analyzer.tokenStream(field.name(), reader);
    v = countTokenStream(ts);
    ts.close();
    reader.close();
    return v;
}

From source file:lucli.LuceneMethods.java

License:Apache License

private void invertDocument(Document doc) throws IOException {

    Map tokenMap = new HashMap();
    final int maxFieldLength = 10000;

    Analyzer analyzer = createAnalyzer();
    Iterator fields = doc.getFields().iterator();
    final Token reusableToken = new Token();
    while (fields.hasNext()) {
        Field field = (Field) fields.next();
        String fieldName = field.name();

        if (field.isIndexed()) {
            if (field.isTokenized()) { // un-tokenized field
                Reader reader; // find or make Reader
                if (field.readerValue() != null)
                    reader = field.readerValue();
                else if (field.stringValue() != null)
                    reader = new StringReader(field.stringValue());
                else
                    throw new IllegalArgumentException("field must have either String or Reader value");

                int position = 0;
                // Tokenize field and add to postingTable
                TokenStream stream = analyzer.tokenStream(fieldName, reader);
                TermAttribute termAtt = (TermAttribute) stream.addAttribute(TermAttribute.class);
                PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute) stream
                        .addAttribute(PositionIncrementAttribute.class);

                try {
                    while (stream.incrementToken()) {
                        position += (posIncrAtt.getPositionIncrement() - 1);
                        position++;//from   w  ww.j a  va  2 s .  co m
                        String name = termAtt.term();
                        Integer Count = (Integer) tokenMap.get(name);
                        if (Count == null) { // not in there yet
                            tokenMap.put(name, new Integer(1)); //first one
                        } else {
                            int count = Count.intValue();
                            tokenMap.put(name, new Integer(count + 1));
                        }
                        if (position > maxFieldLength)
                            break;
                    }
                } finally {
                    stream.close();
                }
            }

        }
    }
    Entry[] sortedHash = getSortedMapEntries(tokenMap);
    for (int ii = 0; ii < sortedHash.length && ii < 10; ii++) {
        Entry currentEntry = sortedHash[ii];
        message((ii + 1) + ":" + currentEntry.getKey() + " " + currentEntry.getValue());
    }
}

From source file:org.apache.jackrabbit.core.query.lucene.AbstractIndex.java

License:Apache License

/**
 * Returns a document that is finished with text extraction and is ready to
 * be added to the index.//from   w  w w. j  a v a 2s  .  com
 *
 * @param doc the document to check.
 * @return <code>doc</code> if it is finished already or a stripped down
 *         copy of <code>doc</code> without text extractors.
 * @throws IOException if the document cannot be added to the indexing
 *                     queue.
 */
private Document getFinishedDocument(Document doc) throws IOException {
    if (!Util.isDocumentReady(doc)) {
        Document copy = new Document();
        for (Iterator fields = doc.getFields().iterator(); fields.hasNext();) {
            Field f = (Field) fields.next();
            Field field = null;
            Field.TermVector tv = getTermVectorParameter(f);
            Field.Store stored = getStoreParameter(f);
            Field.Index indexed = getIndexParameter(f);
            if (f.readerValue() != null) {
                // replace all readers with empty string reader
                field = new Field(f.name(), new StringReader(""), tv);
            } else if (f.stringValue() != null) {
                field = new Field(f.name(), f.stringValue(), stored, indexed, tv);
            } else if (f.isBinary()) {
                field = new Field(f.name(), f.binaryValue(), stored);
            }
            if (field != null) {
                field.setOmitNorms(f.getOmitNorms());
                copy.add(field);
            }
        }
        // schedule the original document for later indexing
        Document existing = indexingQueue.addDocument(doc);
        if (existing != null) {
            // the queue already contained a pending document for this
            // node. -> dispose the document
            Util.disposeDocument(existing);
        }
        // use the stripped down copy for now
        doc = copy;
    }
    return doc;
}

From source file:org.apache.jackrabbit.core.query.lucene.AbstractIndex.java

License:Apache License

/**
 * Returns a document that is finished with text extraction and is ready to
 * be added to the index./*w w w.j ava  2  s.  c  om*/
 *
 * @param doc the document to check.
 * @return <code>doc</code> if it is finished already or a stripped down
 *         copy of <code>doc</code> without text extractors.
 * @throws IOException if the document cannot be added to the indexing
 *                     queue.
 */
private Document getFinishedDocument(Document doc) throws IOException {
    if (!Util.isDocumentReady(doc)) {
        Document copy = new Document();
        for (Enumeration fields = doc.fields(); fields.hasMoreElements();) {
            Field f = (Field) fields.nextElement();
            Field field = null;
            Field.TermVector tv = getTermVectorParameter(f);
            Field.Store stored = getStoreParameter(f);
            Field.Index indexed = getIndexParameter(f);
            if (f.readerValue() != null) {
                // replace all readers with empty string reader
                field = new Field(f.name(), new StringReader(""), tv);
            } else if (f.stringValue() != null) {
                field = new Field(f.name(), f.stringValue(), stored, indexed, tv);
            } else if (f.isBinary()) {
                field = new Field(f.name(), f.binaryValue(), stored);
            }
            if (field != null) {
                field.setOmitNorms(f.getOmitNorms());
                copy.add(field);
            }
        }
        // schedule the original document for later indexing
        Document existing = indexingQueue.addDocument(doc);
        if (existing != null) {
            // the queue already contained a pending document for this
            // node. -> dispose the document
            Util.disposeDocument(existing);
        }
        // use the stripped down copy for now
        doc = copy;
    }
    return doc;
}

From source file:org.dspace.search.DSIndexer.java

License:BSD License

private static void closeAllReaders(Document doc) {
    if (doc != null) {
        int count = 0;
        List fields = doc.getFields();
        if (fields != null) {
            for (Field field : (List<Field>) fields) {
                Reader r = field.readerValue();
                if (r != null) {
                    try {
                        r.close();/*ww w. j a  va  2  s. co m*/
                        count++;
                    } catch (IOException e) {
                        log.error("Unable to close reader", e);
                    }
                }
            }
        }

        if (count > 0) {
            log.debug("closed " + count + " readers");
        }
    }
}

From source file:org.hibernate.search.indexes.serialization.impl.LuceneWorkSerializerImpl.java

License:LGPL

private void serializeField(Serializer serializer, Field fieldable) {
    //FIXME it seems like in new Field implementation it's possible to have multiple data types at the same time. Investigate?
    //The following sequence of else/ifs would not be appropriate.
    if (fieldable.binaryValue() != null) {
        serializer.addFieldWithBinaryData(new LuceneFieldContext(fieldable));
    } else if (fieldable.stringValue() != null) {
        serializer.addFieldWithStringData(new LuceneFieldContext(fieldable));
    } else if (fieldable.readerValue() != null && fieldable.readerValue() instanceof Serializable) {
        serializer.addFieldWithSerializableReaderData(new LuceneFieldContext(fieldable));
    } else if (fieldable.readerValue() != null) {
        throw log.conversionFromReaderToStringNotYetImplemented();
    } else if (fieldable.tokenStreamValue() != null) {
        serializer.addFieldWithTokenStreamData(new LuceneFieldContext(fieldable));
    } else {/*ww  w .j  ava2 s .c om*/
        throw log.unknownFieldType(fieldable.getClass());
    }
}

From source file:org.hibernate.search.test.serialization.SerializationTest.java

License:Open Source License

private void assertNormalField(Field field, Field copy) {
    assertThat(copy.name()).isEqualTo(field.name());
    assertThat(copy.getBinaryLength()).isEqualTo(field.getBinaryLength());
    assertThat(copy.getBinaryOffset()).isEqualTo(field.getBinaryOffset());
    assertThat(copy.getBinaryValue()).isEqualTo(field.getBinaryValue());
    assertThat(copy.getBoost()).isEqualTo(field.getBoost());
    assertThat(copy.getOmitNorms()).isEqualTo(field.getOmitNorms());
    assertThat(copy.getOmitTermFreqAndPositions()).isEqualTo(field.getOmitTermFreqAndPositions());
    assertThat(copy.isBinary()).isEqualTo(field.isBinary());
    assertThat(copy.isIndexed()).isEqualTo(field.isIndexed());
    assertThat(copy.isLazy()).isEqualTo(field.isLazy());
    assertThat(copy.isStoreOffsetWithTermVector()).isEqualTo(field.isStoreOffsetWithTermVector());
    assertThat(copy.isStorePositionWithTermVector()).isEqualTo(field.isStorePositionWithTermVector());
    assertThat(copy.isStored()).isEqualTo(field.isStored());
    assertThat(copy.isTokenized()).isEqualTo(field.isTokenized());
    assertThat(compareReaders(copy.readerValue(), field.readerValue())).isTrue();
    assertThat(compareTokenStreams(field.tokenStreamValue(), copy.tokenStreamValue())).isTrue();
    assertThat(copy.stringValue()).isEqualTo(field.stringValue());

    assertThat(copy.isTermVectorStored()).isEqualTo(field.isTermVectorStored());
}

From source file:org.hibernate.search.test.util.SerializationTestHelper.java

License:LGPL

private static void assertFieldEquality(Field original, Field copy) {
    assertThat(copy.name()).isEqualTo(original.name());
    assertThat(copy.binaryValue()).isEqualTo(original.binaryValue());
    assertThat(copy.boost()).isEqualTo(original.boost());
    assertFieldType(copy.fieldType(), original.fieldType());
    assertThat(compareReaders(copy.readerValue(), original.readerValue())).isTrue();
    assertThat(compareTokenStreams(original.tokenStreamValue(), copy.tokenStreamValue())).isTrue();
    assertThat(copy.stringValue()).isEqualTo(original.stringValue());
}