List of usage examples for org.apache.lucene.document FieldType setStoreTermVectorPayloads
public void setStoreTermVectorPayloads(boolean value)
true to also store token payloads into the term vector for this field. From source file:alix.lucene.Alix.java
License:Open Source License
/** * Parse field type String// w ww.ja va2 s. c om * * @param name Name of the field * @param value Value of the field * @param options a string composed of letters in any order following Luke convention to describe fields * IdfpoPSV * I: Indexed * d: docs * f: freqs * p: pos * o: offset * P: payloads * S: Stored * V: TermVector */ public static FieldType fieldType(String options) { FieldType type; if (options == null) return new FieldType(); if ("S".equals(options)) { type = new FieldType(); type.setStored(true); return type; } if (options.contains("S")) { type = new FieldType(TextField.TYPE_STORED); } else { type = new FieldType(TextField.TYPE_NOT_STORED); } // optimize ? type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); if (options.contains("p")) { type.setStoreTermVectorPositions(true); } if (options.contains("o")) { type.setTokenized(true); type.setStoreTermVectors(true); type.setStoreTermVectorOffsets(true); } if (options.contains("P")) { type.setTokenized(true); type.setStoreTermVectors(true); type.setStoreTermVectorPositions(true); type.setStoreTermVectorPayloads(true); } if (options.contains("V")) { type.setTokenized(true); type.setStoreTermVectors(true); } return type; }
From source file:api.startup.PDFIndexer.java
License:Open Source License
/** * Indexes a single document and writes it to the given index writer * @param writer - the index writer to writer * @param metadata - the document/* w w w. j a v a 2 s . c o m*/ * @throws IOException */ static void indexDoc(IndexWriter writer, DocumentMetadata metadata) throws IOException { Path file = Paths.get(metadata.getFilename()); try { Document doc = new Document(); Field pathField = new StringField(Constants.FIELD_PATH, file.toString(), Field.Store.YES); doc.add(pathField); // Add Document metadata // doc.add(new StringField(Constants.FIELD_AUTHOR, metadata.getAuthor(), Field.Store.YES)); doc.add(new StringField(Constants.FIELD_TITLE, metadata.getTitle(), Field.Store.YES)); doc.add(new StringField(Constants.FIELD_CONFERENCE, metadata.getConference(), Field.Store.YES)); // End of Document Metadata // Field modified = new LongField(Constants.FIELD_MODIFIED, Files.getLastModifiedTime(file).toMillis(), Field.Store.YES); doc.add(modified); PDFTextExtractor extractor = new PDFTextExtractor(); // Get the string contents String textContents = extractor.extractText(file.toString()); // Store the string contents FieldType contentsType = new FieldType(); contentsType.setStored(true); contentsType.setTokenized(true); contentsType.setStoreTermVectors(true); contentsType.setStoreTermVectorPositions(true); contentsType.setStoreTermVectorPayloads(true); contentsType.setStoreTermVectorOffsets(true); contentsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field contents = new Field(Constants.FIELD_CONTENTS, textContents, contentsType); doc.add(contents); if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): log.info("adding " + file + " to index"); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: log.info("updating " + file + " in index"); writer.updateDocument(new Term(Constants.FIELD_PATH, file.toString()), doc); } } catch (IOException e) { log.error("Failed to read file " + metadata.getFilename()); } }
From source file:com.github.hotware.lucene.extension.bean.field.BeanInformationCacheImpl.java
License:BEER-WARE LICENSE
private FieldInformation buildFieldInformation(BeanField bf, Field field, Class<?> fieldClass) { com.github.hotware.lucene.extension.bean.type.Type typeWrapper; try {// w ww . ja v a 2 s . c om // TODO: maybe cache these? typeWrapper = (com.github.hotware.lucene.extension.bean.type.Type) bf.type().newInstance(); } catch (InstantiationException | IllegalAccessException e) { throw new RuntimeException(e); } FieldType fieldType = new FieldType(); fieldType.setIndexed(bf.index()); fieldType.setStored(bf.store()); fieldType.setTokenized(bf.tokenized()); fieldType.setStoreTermVectors(bf.storeTermVectors()); fieldType.setStoreTermVectorPositions(bf.storeTermVectorPositions()); fieldType.setStoreTermVectorOffsets(bf.storeTermVectorOffsets()); fieldType.setStoreTermVectorPayloads(bf.storeTermVectorPayloads()); fieldType.setOmitNorms(bf.omitNorms()); fieldType.setIndexOptions(bf.indexOptions()); typeWrapper.configureFieldType(fieldType); fieldType.freeze(); return new FieldInformation(new FrozenField(field), fieldClass, fieldType, bf); }
From source file:com.qwazr.search.field.CustomFieldType.java
License:Apache License
@Override final public void fillValue(final Object value, final FieldConsumer consumer) { final FieldType type = new FieldType(); if (fieldDef.stored != null) type.setStored(fieldDef.stored); if (fieldDef.tokenized != null) type.setTokenized(fieldDef.tokenized); if (fieldDef.store_termvectors != null) type.setStoreTermVectors(fieldDef.store_termvectors); if (fieldDef.store_termvector_offsets != null) type.setStoreTermVectorOffsets(fieldDef.store_termvector_offsets); if (fieldDef.store_termvector_positions != null) type.setStoreTermVectorPositions(fieldDef.store_termvector_positions); if (fieldDef.store_termvector_payloads != null) type.setStoreTermVectorPayloads(fieldDef.store_termvector_payloads); if (fieldDef.omit_norms != null) type.setOmitNorms(fieldDef.omit_norms); if (fieldDef.numeric_type != null) type.setNumericType(fieldDef.numeric_type); if (fieldDef.index_options != null) type.setIndexOptions(fieldDef.index_options); if (fieldDef.docvalues_type != null) type.setDocValuesType(fieldDef.docvalues_type); consumer.accept(new CustomField(fieldName, type, value)); }
From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat2.java
License:Apache License
private Document newDocument() { Document doc = new Document(); for (IndexOptions option : IndexOptions.values()) { if (option == IndexOptions.NONE) { continue; }/*from ww w . j av a 2s.com*/ FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); // turn on tvs for a cross-check, since we rely upon checkindex in this test (for now) ft.setStoreTermVectors(true); ft.setStoreTermVectorOffsets(true); ft.setStoreTermVectorPositions(true); ft.setStoreTermVectorPayloads(true); ft.setIndexOptions(option); doc.add(new Field(option.toString(), "", ft)); } return doc; }
From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java
License:Apache License
public void test() throws Exception { Directory dir = newDirectory();/*ww w . j a v a 2 s . c o m*/ Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(); if (fieldName.contains("payloadsFixed")) { TokenFilter filter = new MockFixedLengthPayloadFilter(new Random(0), tokenizer, 1); return new TokenStreamComponents(tokenizer, filter); } else if (fieldName.contains("payloadsVariable")) { TokenFilter filter = new MockVariableLengthPayloadFilter(new Random(0), tokenizer); return new TokenStreamComponents(tokenizer, filter); } else { return new TokenStreamComponents(tokenizer); } } }; IndexWriterConfig iwc = newIndexWriterConfig(analyzer); iwc.setCodec(TestUtil.alwaysPostingsFormat(new RocanaLucene50PostingsFormat())); // TODO we could actually add more fields implemented with different PFs // or, just put this test into the usual rotation? RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); Document doc = new Document(); FieldType docsOnlyType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsOnlyType.setStoreTermVectors(true); docsOnlyType.setIndexOptions(IndexOptions.DOCS); FieldType docsAndFreqsType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsAndFreqsType.setStoreTermVectors(true); docsAndFreqsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS); FieldType positionsType = new FieldType(TextField.TYPE_NOT_STORED); // turn these on for a cross-check positionsType.setStoreTermVectors(true); positionsType.setStoreTermVectorPositions(true); positionsType.setStoreTermVectorOffsets(true); positionsType.setStoreTermVectorPayloads(true); FieldType offsetsType = new FieldType(positionsType); offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field field1 = new Field("field1docs", "", docsOnlyType); Field field2 = new Field("field2freqs", "", docsAndFreqsType); Field field3 = new Field("field3positions", "", positionsType); Field field4 = new Field("field4offsets", "", offsetsType); Field field5 = new Field("field5payloadsFixed", "", positionsType); Field field6 = new Field("field6payloadsVariable", "", positionsType); Field field7 = new Field("field7payloadsFixedOffsets", "", offsetsType); Field field8 = new Field("field8payloadsVariableOffsets", "", offsetsType); doc.add(field1); doc.add(field2); doc.add(field3); doc.add(field4); doc.add(field5); doc.add(field6); doc.add(field7); doc.add(field8); for (int i = 0; i < MAXDOC; i++) { String stringValue = Integer.toString(i) + " verycommon " + English.intToEnglish(i).replace('-', ' ') + " " + TestUtil.randomSimpleString(random()); field1.setStringValue(stringValue); field2.setStringValue(stringValue); field3.setStringValue(stringValue); field4.setStringValue(stringValue); field5.setStringValue(stringValue); field6.setStringValue(stringValue); field7.setStringValue(stringValue); field8.setStringValue(stringValue); iw.addDocument(doc); } iw.close(); verify(dir); TestUtil.checkIndex(dir); // for some extra coverage, checkIndex before we forceMerge iwc = newIndexWriterConfig(analyzer); iwc.setCodec(TestUtil.alwaysPostingsFormat(new RocanaLucene50PostingsFormat())); iwc.setOpenMode(OpenMode.APPEND); IndexWriter iw2 = new IndexWriter(dir, iwc); iw2.forceMerge(1); iw2.close(); verify(dir); dir.close(); }
From source file:com.tuplejump.stargate.cassandra.CassandraUtils.java
License:Apache License
public static FieldType fieldType(Properties properties, AbstractType validator) { FieldType fieldType = new FieldType(); fieldType.setIndexed(properties.isIndexed()); fieldType.setTokenized(properties.isTokenized()); fieldType.setStored(properties.isStored()); fieldType.setStoreTermVectors(properties.isStoreTermVectors()); fieldType.setStoreTermVectorOffsets(properties.isStoreTermVectorOffsets()); fieldType.setStoreTermVectorPayloads(properties.isStoreTermVectorPayloads()); fieldType.setStoreTermVectorPositions(properties.isStoreTermVectorPositions()); fieldType.setOmitNorms(properties.isOmitNorms()); fieldType.setIndexOptions(properties.getIndexOptions()); Fields.setNumericType(validator, fieldType); if (fieldType.numericType() != null) { fieldType.setNumericPrecisionStep(properties.getNumericPrecisionStep()); }/* w w w . jav a 2s . c o m*/ return fieldType; }
From source file:com.tuplejump.stargate.lucene.LuceneUtils.java
License:Apache License
public static FieldType dynamicFieldType(Properties properties) { FieldType fieldType = new FieldType(); fieldType.setIndexed(properties.isIndexed()); fieldType.setTokenized(properties.isTokenized()); fieldType.setStored(properties.isStored()); fieldType.setStoreTermVectors(properties.isStoreTermVectors()); fieldType.setStoreTermVectorOffsets(properties.isStoreTermVectorOffsets()); fieldType.setStoreTermVectorPayloads(properties.isStoreTermVectorPayloads()); fieldType.setStoreTermVectorPositions(properties.isStoreTermVectorPositions()); fieldType.setOmitNorms(properties.isOmitNorms()); fieldType.setIndexOptions(properties.getIndexOptions()); if (properties.getType().isNumeric()) { switch (properties.getType()) { case integer: fieldType.setNumericType(FieldType.NumericType.INT); break; case bigint: fieldType.setNumericType(FieldType.NumericType.LONG); break; case decimal: fieldType.setNumericType(FieldType.NumericType.FLOAT); break; default://from w w w . j av a 2s.c o m fieldType.setNumericType(FieldType.NumericType.DOUBLE); break; } fieldType.setNumericPrecisionStep(properties.getNumericPrecisionStep()); } return fieldType; }
From source file:edu.co.usbcali.ir.processes.Indexer.java
private Document getDocument(File file) throws IOException { Document document = new Document(); FieldType type = new FieldType(); type.setIndexOptions(IndexOptions.DOCS); type.setStored(true);/*from ww w .j a va2 s . c om*/ type.setTokenized(true); type.setStoreTermVectors(true); type.setStoreTermVectorPositions(true); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPayloads(true); Field contentField = new Field(LuceneConstants.CONTENTS, getContent(file), type); Field fileNameField = new Field(LuceneConstants.FILE_NAME, file.getName(), type); Field filePathField = new Field(LuceneConstants.FILE_PATH, file.getCanonicalPath(), type); document.add(contentField); document.add(fileNameField); document.add(filePathField); return document; }
From source file:lab_mri.CranIndexer.java
/** * doc_file index_dir//from w w w.j av a 2 s . c om * * * @param args */ public static void main(String args[]) { File index_dir = new File("/home/luigi/NetBeansProjects/LAB_mri/inv_index"); String doc_file = "/home/luigi/NetBeansProjects/LAB_mri/CRAN/cran.all.1400"; try { SearchEngine se = new SearchEngine(index_dir); se.open(); File inputFile = new File(doc_file); BufferedReader reader = new BufferedReader(new FileReader(inputFile)); FieldType ft = new FieldType(); ft.stored(); ft.setIndexed(true); //done as default ft.setStoreTermVectors(true); ft.setStoreTermVectorPositions(true); ft.setStoreTermVectorOffsets(true); ft.setStoreTermVectorPayloads(true); String id = null; StringBuilder title = new StringBuilder(); StringBuilder authors = new StringBuilder(); StringBuilder affiliation = new StringBuilder(); StringBuilder abst = new StringBuilder(); char code = ' '; int c = 0; while (reader.ready()) { String line = reader.readLine(); if (line.startsWith(".I")) { if (id != null) { System.out.println(id + "\t" + abst); Document doc = new Document(); doc.add(new StringField("id", id, Field.Store.YES)); doc.add(new TextField("title", title.toString(), Field.Store.NO)); doc.add(new TextField("authors", authors.toString(), Field.Store.NO)); doc.add(new TextField("affiliation", affiliation.toString(), Field.Store.NO)); doc.add(new Field("abst", abst.toString(), ft)); se.addDocument(doc); c++; title = new StringBuilder(); authors = new StringBuilder(); affiliation = new StringBuilder(); abst = new StringBuilder(); } id = line.substring(2).trim(); } else if (line.startsWith(".T")) { code = 'T'; } else if (line.startsWith(".A")) { code = 'A'; } else if (line.startsWith(".B")) { code = 'B'; } else if (line.startsWith(".W")) { code = 'W'; } else { switch (code) { case 'T': title.append(line).append(" "); break; case 'A': authors.append(line).append(" "); break; case 'B': affiliation.append(line).append(" "); break; case 'W': abst.append(line).append(" "); break; default: break; } } } reader.close(); //store last documents if (id != null) { System.out.println(id + "\t" + title); //store document //put index code here Document doc = new Document(); doc.add(new StringField("id", id, Field.Store.YES)); doc.add(new TextField("title", title.toString(), Field.Store.NO)); doc.add(new TextField("authors", authors.toString(), Field.Store.NO)); doc.add(new TextField("affiliation", affiliation.toString(), Field.Store.NO)); doc.add(new TextField("abst", abst.toString(), Field.Store.NO)); se.addDocument(doc); c++; } System.out.println("Total docs: " + c); se.close(); } catch (IOException ioex) { Logger.getLogger(CranIndexer.class.getName()).log(Level.SEVERE, null, ioex); } }