List of usage examples for org.apache.lucene.document Field readerValue
@Override
public Reader readerValue()
From source file:engine.easy.indexer.writer.EasySearchIndexWriter.java
License:Apache License
/** * Read the extra data field information * /*from w w w.j a v a 2 s.c om*/ * @return it returns the no: of token streams for the extra data field information. * @throws IOException if the file would have any IO operation. */ private int[] extraData(Field field, Analyzer analyzer) throws IOException { if (!field.isIndexed()) return null; if (!field.isTokenized()) return (new int[] { 1, 1 }); String strv = field.stringValue(); int v[]; if (strv == null) { Reader readerv = field.readerValue(); if (readerv == null) { TokenStream tsv = field.tokenStreamValue(); if (tsv == null) { throw new IllegalArgumentException( (new StringBuilder("Cannot obtain field value. field_name: ")).append(field.name()) .append(".").toString()); } else { v = countTokenStream(tsv); return v; } } strv = readAll(readerv); if (strv == null) throw new IllegalArgumentException((new StringBuilder("Cannot obtain field value. field_name: ")) .append(field.name()).append(".").toString()); field.setValue(strv); } BufferedReader reader = new BufferedReader(new StringReader(strv)); TokenStream ts = analyzer.tokenStream(field.name(), reader); v = countTokenStream(ts); ts.close(); reader.close(); return v; }
From source file:lucli.LuceneMethods.java
License:Apache License
private void invertDocument(Document doc) throws IOException { Map tokenMap = new HashMap(); final int maxFieldLength = 10000; Analyzer analyzer = createAnalyzer(); Iterator fields = doc.getFields().iterator(); final Token reusableToken = new Token(); while (fields.hasNext()) { Field field = (Field) fields.next(); String fieldName = field.name(); if (field.isIndexed()) { if (field.isTokenized()) { // un-tokenized field Reader reader; // find or make Reader if (field.readerValue() != null) reader = field.readerValue(); else if (field.stringValue() != null) reader = new StringReader(field.stringValue()); else throw new IllegalArgumentException("field must have either String or Reader value"); int position = 0; // Tokenize field and add to postingTable TokenStream stream = analyzer.tokenStream(fieldName, reader); TermAttribute termAtt = (TermAttribute) stream.addAttribute(TermAttribute.class); PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute) stream .addAttribute(PositionIncrementAttribute.class); try { while (stream.incrementToken()) { position += (posIncrAtt.getPositionIncrement() - 1); position++;//from w ww.j a va 2 s . co m String name = termAtt.term(); Integer Count = (Integer) tokenMap.get(name); if (Count == null) { // not in there yet tokenMap.put(name, new Integer(1)); //first one } else { int count = Count.intValue(); tokenMap.put(name, new Integer(count + 1)); } if (position > maxFieldLength) break; } } finally { stream.close(); } } } } Entry[] sortedHash = getSortedMapEntries(tokenMap); for (int ii = 0; ii < sortedHash.length && ii < 10; ii++) { Entry currentEntry = sortedHash[ii]; message((ii + 1) + ":" + currentEntry.getKey() + " " + currentEntry.getValue()); } }
From source file:org.apache.jackrabbit.core.query.lucene.AbstractIndex.java
License:Apache License
/** * Returns a document that is finished with text extraction and is ready to * be added to the index.//from w w w. j a v a 2s . com * * @param doc the document to check. * @return <code>doc</code> if it is finished already or a stripped down * copy of <code>doc</code> without text extractors. * @throws IOException if the document cannot be added to the indexing * queue. */ private Document getFinishedDocument(Document doc) throws IOException { if (!Util.isDocumentReady(doc)) { Document copy = new Document(); for (Iterator fields = doc.getFields().iterator(); fields.hasNext();) { Field f = (Field) fields.next(); Field field = null; Field.TermVector tv = getTermVectorParameter(f); Field.Store stored = getStoreParameter(f); Field.Index indexed = getIndexParameter(f); if (f.readerValue() != null) { // replace all readers with empty string reader field = new Field(f.name(), new StringReader(""), tv); } else if (f.stringValue() != null) { field = new Field(f.name(), f.stringValue(), stored, indexed, tv); } else if (f.isBinary()) { field = new Field(f.name(), f.binaryValue(), stored); } if (field != null) { field.setOmitNorms(f.getOmitNorms()); copy.add(field); } } // schedule the original document for later indexing Document existing = indexingQueue.addDocument(doc); if (existing != null) { // the queue already contained a pending document for this // node. -> dispose the document Util.disposeDocument(existing); } // use the stripped down copy for now doc = copy; } return doc; }
From source file:org.apache.jackrabbit.core.query.lucene.AbstractIndex.java
License:Apache License
/** * Returns a document that is finished with text extraction and is ready to * be added to the index./*w w w.j ava 2 s. c om*/ * * @param doc the document to check. * @return <code>doc</code> if it is finished already or a stripped down * copy of <code>doc</code> without text extractors. * @throws IOException if the document cannot be added to the indexing * queue. */ private Document getFinishedDocument(Document doc) throws IOException { if (!Util.isDocumentReady(doc)) { Document copy = new Document(); for (Enumeration fields = doc.fields(); fields.hasMoreElements();) { Field f = (Field) fields.nextElement(); Field field = null; Field.TermVector tv = getTermVectorParameter(f); Field.Store stored = getStoreParameter(f); Field.Index indexed = getIndexParameter(f); if (f.readerValue() != null) { // replace all readers with empty string reader field = new Field(f.name(), new StringReader(""), tv); } else if (f.stringValue() != null) { field = new Field(f.name(), f.stringValue(), stored, indexed, tv); } else if (f.isBinary()) { field = new Field(f.name(), f.binaryValue(), stored); } if (field != null) { field.setOmitNorms(f.getOmitNorms()); copy.add(field); } } // schedule the original document for later indexing Document existing = indexingQueue.addDocument(doc); if (existing != null) { // the queue already contained a pending document for this // node. -> dispose the document Util.disposeDocument(existing); } // use the stripped down copy for now doc = copy; } return doc; }
From source file:org.dspace.search.DSIndexer.java
License:BSD License
private static void closeAllReaders(Document doc) { if (doc != null) { int count = 0; List fields = doc.getFields(); if (fields != null) { for (Field field : (List<Field>) fields) { Reader r = field.readerValue(); if (r != null) { try { r.close();/*ww w. j a va 2 s. co m*/ count++; } catch (IOException e) { log.error("Unable to close reader", e); } } } } if (count > 0) { log.debug("closed " + count + " readers"); } } }
From source file:org.hibernate.search.indexes.serialization.impl.LuceneWorkSerializerImpl.java
License:LGPL
private void serializeField(Serializer serializer, Field fieldable) { //FIXME it seems like in new Field implementation it's possible to have multiple data types at the same time. Investigate? //The following sequence of else/ifs would not be appropriate. if (fieldable.binaryValue() != null) { serializer.addFieldWithBinaryData(new LuceneFieldContext(fieldable)); } else if (fieldable.stringValue() != null) { serializer.addFieldWithStringData(new LuceneFieldContext(fieldable)); } else if (fieldable.readerValue() != null && fieldable.readerValue() instanceof Serializable) { serializer.addFieldWithSerializableReaderData(new LuceneFieldContext(fieldable)); } else if (fieldable.readerValue() != null) { throw log.conversionFromReaderToStringNotYetImplemented(); } else if (fieldable.tokenStreamValue() != null) { serializer.addFieldWithTokenStreamData(new LuceneFieldContext(fieldable)); } else {/*ww w .j ava2 s .c om*/ throw log.unknownFieldType(fieldable.getClass()); } }
From source file:org.hibernate.search.test.serialization.SerializationTest.java
License:Open Source License
private void assertNormalField(Field field, Field copy) { assertThat(copy.name()).isEqualTo(field.name()); assertThat(copy.getBinaryLength()).isEqualTo(field.getBinaryLength()); assertThat(copy.getBinaryOffset()).isEqualTo(field.getBinaryOffset()); assertThat(copy.getBinaryValue()).isEqualTo(field.getBinaryValue()); assertThat(copy.getBoost()).isEqualTo(field.getBoost()); assertThat(copy.getOmitNorms()).isEqualTo(field.getOmitNorms()); assertThat(copy.getOmitTermFreqAndPositions()).isEqualTo(field.getOmitTermFreqAndPositions()); assertThat(copy.isBinary()).isEqualTo(field.isBinary()); assertThat(copy.isIndexed()).isEqualTo(field.isIndexed()); assertThat(copy.isLazy()).isEqualTo(field.isLazy()); assertThat(copy.isStoreOffsetWithTermVector()).isEqualTo(field.isStoreOffsetWithTermVector()); assertThat(copy.isStorePositionWithTermVector()).isEqualTo(field.isStorePositionWithTermVector()); assertThat(copy.isStored()).isEqualTo(field.isStored()); assertThat(copy.isTokenized()).isEqualTo(field.isTokenized()); assertThat(compareReaders(copy.readerValue(), field.readerValue())).isTrue(); assertThat(compareTokenStreams(field.tokenStreamValue(), copy.tokenStreamValue())).isTrue(); assertThat(copy.stringValue()).isEqualTo(field.stringValue()); assertThat(copy.isTermVectorStored()).isEqualTo(field.isTermVectorStored()); }
From source file:org.hibernate.search.test.util.SerializationTestHelper.java
License:LGPL
private static void assertFieldEquality(Field original, Field copy) { assertThat(copy.name()).isEqualTo(original.name()); assertThat(copy.binaryValue()).isEqualTo(original.binaryValue()); assertThat(copy.boost()).isEqualTo(original.boost()); assertFieldType(copy.fieldType(), original.fieldType()); assertThat(compareReaders(copy.readerValue(), original.readerValue())).isTrue(); assertThat(compareTokenStreams(original.tokenStreamValue(), copy.tokenStreamValue())).isTrue(); assertThat(copy.stringValue()).isEqualTo(original.stringValue()); }