List of usage examples for org.apache.lucene.index IndexOptions DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS
IndexOptions DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS
To view the source code for org.apache.lucene.index IndexOptions DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS.
Click Source Link
From source file:alix.lucene.Alix.java
License:Open Source License
/** * Parse field type String/* w ww . j a va 2s.c o m*/ * * @param name Name of the field * @param value Value of the field * @param options a string composed of letters in any order following Luke convention to describe fields * IdfpoPSV * I: Indexed * d: docs * f: freqs * p: pos * o: offset * P: payloads * S: Stored * V: TermVector */ public static FieldType fieldType(String options) { FieldType type; if (options == null) return new FieldType(); if ("S".equals(options)) { type = new FieldType(); type.setStored(true); return type; } if (options.contains("S")) { type = new FieldType(TextField.TYPE_STORED); } else { type = new FieldType(TextField.TYPE_NOT_STORED); } // optimize ? type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); if (options.contains("p")) { type.setStoreTermVectorPositions(true); } if (options.contains("o")) { type.setTokenized(true); type.setStoreTermVectors(true); type.setStoreTermVectorOffsets(true); } if (options.contains("P")) { type.setTokenized(true); type.setStoreTermVectors(true); type.setStoreTermVectorPositions(true); type.setStoreTermVectorPayloads(true); } if (options.contains("V")) { type.setTokenized(true); type.setStoreTermVectors(true); } return type; }
From source file:api.startup.PDFIndexer.java
License:Open Source License
/** * Indexes a single document and writes it to the given index writer * @param writer - the index writer to writer * @param metadata - the document// w w w . j a v a2 s .co m * @throws IOException */ static void indexDoc(IndexWriter writer, DocumentMetadata metadata) throws IOException { Path file = Paths.get(metadata.getFilename()); try { Document doc = new Document(); Field pathField = new StringField(Constants.FIELD_PATH, file.toString(), Field.Store.YES); doc.add(pathField); // Add Document metadata // doc.add(new StringField(Constants.FIELD_AUTHOR, metadata.getAuthor(), Field.Store.YES)); doc.add(new StringField(Constants.FIELD_TITLE, metadata.getTitle(), Field.Store.YES)); doc.add(new StringField(Constants.FIELD_CONFERENCE, metadata.getConference(), Field.Store.YES)); // End of Document Metadata // Field modified = new LongField(Constants.FIELD_MODIFIED, Files.getLastModifiedTime(file).toMillis(), Field.Store.YES); doc.add(modified); PDFTextExtractor extractor = new PDFTextExtractor(); // Get the string contents String textContents = extractor.extractText(file.toString()); // Store the string contents FieldType contentsType = new FieldType(); contentsType.setStored(true); contentsType.setTokenized(true); contentsType.setStoreTermVectors(true); contentsType.setStoreTermVectorPositions(true); contentsType.setStoreTermVectorPayloads(true); contentsType.setStoreTermVectorOffsets(true); contentsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field contents = new Field(Constants.FIELD_CONTENTS, textContents, contentsType); doc.add(contents); if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): log.info("adding " + file + " to index"); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: log.info("updating " + file + " in index"); writer.updateDocument(new Term(Constants.FIELD_PATH, file.toString()), doc); } } catch (IOException e) { log.error("Failed to read file " + metadata.getFilename()); } }
From source file:com.lucure.core.codec.LucurePostingsWriter.java
License:Apache License
@Override public int setField(FieldInfo fieldInfo) { IndexOptions indexOptions = fieldInfo.getIndexOptions(); fieldHasFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; fieldHasPositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; fieldHasOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; fieldHasPayloads = fieldInfo.hasPayloads(); skipWriter.setField(fieldHasPositions, fieldHasOffsets, fieldHasPayloads); lastState = emptyState;/*from w w w. j a v a 2 s . c o m*/ if (fieldHasPositions) { if (fieldHasPayloads || fieldHasOffsets) { return 3; // doc + pos + pay FP } else { return 2; // doc + pos FP } } else { return 1; // doc FP } }
From source file:com.o19s.es.ltr.query.LtrQueryTests.java
License:Apache License
private Field newField(String name, String value, Store stored) { FieldType tagsFieldType = new FieldType(); tagsFieldType.setStored(stored == Store.YES); IndexOptions idxOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; tagsFieldType.setIndexOptions(idxOptions); return new Field(name, value, tagsFieldType); }
From source file:com.rapidminer.search.GlobalSearchUtilities.java
License:Open Source License
GlobalSearchUtilities() {
titleFieldType = new FieldType();
titleFieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
titleFieldType.setStored(true);//from w ww . jav a2 s . c o m
titleFieldType.setTokenized(true);
textFieldType = new FieldType();
textFieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
textFieldType.setStored(true);
textFieldType.setTokenized(true);
textFieldType.setStoreTermVectors(true);
textFieldType.setStoreTermVectorPositions(true);
textFieldType.setStoreTermVectorOffsets(true);
textFieldType.setStoreTermVectorPayloads(true);
}
From source file:com.rocana.lucene.codec.v1.RocanaFieldReader.java
License:Apache License
@Override public boolean hasOffsets() { return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; }
From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java
License:Apache License
public void test() throws Exception { Directory dir = newDirectory();/*from ww w . j av a 2 s.co m*/ Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(); if (fieldName.contains("payloadsFixed")) { TokenFilter filter = new MockFixedLengthPayloadFilter(new Random(0), tokenizer, 1); return new TokenStreamComponents(tokenizer, filter); } else if (fieldName.contains("payloadsVariable")) { TokenFilter filter = new MockVariableLengthPayloadFilter(new Random(0), tokenizer); return new TokenStreamComponents(tokenizer, filter); } else { return new TokenStreamComponents(tokenizer); } } }; IndexWriterConfig iwc = newIndexWriterConfig(analyzer); iwc.setCodec(TestUtil.alwaysPostingsFormat(new RocanaLucene50PostingsFormat())); // TODO we could actually add more fields implemented with different PFs // or, just put this test into the usual rotation? RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); Document doc = new Document(); FieldType docsOnlyType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsOnlyType.setStoreTermVectors(true); docsOnlyType.setIndexOptions(IndexOptions.DOCS); FieldType docsAndFreqsType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsAndFreqsType.setStoreTermVectors(true); docsAndFreqsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS); FieldType positionsType = new FieldType(TextField.TYPE_NOT_STORED); // turn these on for a cross-check positionsType.setStoreTermVectors(true); positionsType.setStoreTermVectorPositions(true); positionsType.setStoreTermVectorOffsets(true); positionsType.setStoreTermVectorPayloads(true); FieldType offsetsType = new FieldType(positionsType); offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field field1 = new Field("field1docs", "", docsOnlyType); Field field2 = new Field("field2freqs", "", docsAndFreqsType); Field field3 = new Field("field3positions", "", positionsType); Field field4 = new Field("field4offsets", "", offsetsType); Field field5 = new Field("field5payloadsFixed", "", positionsType); Field field6 = new Field("field6payloadsVariable", "", positionsType); Field field7 = new Field("field7payloadsFixedOffsets", "", offsetsType); Field field8 = new Field("field8payloadsVariableOffsets", "", offsetsType); doc.add(field1); doc.add(field2); doc.add(field3); doc.add(field4); doc.add(field5); doc.add(field6); doc.add(field7); doc.add(field8); for (int i = 0; i < MAXDOC; i++) { String stringValue = Integer.toString(i) + " verycommon " + English.intToEnglish(i).replace('-', ' ') + " " + TestUtil.randomSimpleString(random()); field1.setStringValue(stringValue); field2.setStringValue(stringValue); field3.setStringValue(stringValue); field4.setStringValue(stringValue); field5.setStringValue(stringValue); field6.setStringValue(stringValue); field7.setStringValue(stringValue); field8.setStringValue(stringValue); iw.addDocument(doc); } iw.close(); verify(dir); TestUtil.checkIndex(dir); // for some extra coverage, checkIndex before we forceMerge iwc = newIndexWriterConfig(analyzer); iwc.setCodec(TestUtil.alwaysPostingsFormat(new RocanaLucene50PostingsFormat())); iwc.setOpenMode(OpenMode.APPEND); IndexWriter iw2 = new IndexWriter(dir, iwc); iw2.forceMerge(1); iw2.close(); verify(dir); dir.close(); }
From source file:com.sindicetech.siren.index.codecs.siren10.Siren10PostingsWriter.java
License:Open Source License
@Override public int setField(final FieldInfo fieldInfo) { this.fieldInfo = fieldInfo; this.indexOptions = fieldInfo.getIndexOptions(); if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) { throw new UnsupportedOperationException("this codec cannot index offsets"); }//from w w w .jav a 2 s . c om skipWriter.setIndexOptions(indexOptions); lastSkipFP = 0; lastState = setEmptyState(); return 0; }
From source file:com.spike.text.lucene.util.LuceneTestBookIndexingUtil.java
License:Apache License
public static Document getDocument(String rootDir, File file) throws IOException { Properties props = new Properties(); props.load(new FileInputStream(file)); Document doc = new Document(); // category comes from relative path below the base directory String category = file.getParent().substring(rootDir.length()); category = category.replace(File.separatorChar, '/'); String isbn = props.getProperty("isbn"); String title = props.getProperty("title"); String author = props.getProperty("author"); String url = props.getProperty("url"); String subject = props.getProperty("subject"); String pubmonth = props.getProperty("pubmonth"); System.out.println(/*from ww w .java 2 s . co m*/ title + "\n" + author + "\n" + subject + "\n" + pubmonth + "\n" + category + "\n---------"); // doc.add(new Field("isbn", isbn, Field.Store.YES, // Field.Index.NOT_ANALYZED)); doc.add(LuceneAppUtil.createStringField("isbn", isbn, Store.YES, false, IndexOptions.DOCS, false)); // doc.add(new Field("category", category, Field.Store.YES, // Field.Index.NOT_ANALYZED)); doc.add(LuceneAppUtil.createStringField("category", category, Store.YES, false, IndexOptions.DOCS, false)); // doc.add(new Field("title", title, Field.Store.YES, // Field.Index.ANALYZED, // Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.add(LuceneAppUtil.createStringField("title", title, Store.YES, true, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, false)); // doc.add(new Field("title2", title.toLowerCase(), Field.Store.YES, // Field.Index.NOT_ANALYZED_NO_NORMS, // Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.add(LuceneAppUtil.createStringField("title2", title.toLowerCase(), Store.YES, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, true)); // split multiple authors into unique field instances String[] authors = author.split(","); for (String a : authors) { // doc.add(new Field("author", a, Field.Store.YES, // Field.Index.NOT_ANALYZED, // Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.add(LuceneAppUtil.createStringField("author", a, Store.YES, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, false)); } // doc.add(new Field("url", url, Field.Store.YES, // Field.Index.NOT_ANALYZED_NO_NORMS)); doc.add(LuceneAppUtil.createStringField("url", url, Store.YES, false, IndexOptions.DOCS, true)); // doc.add(new Field("subject", subject, Field.Store.YES, // Field.Index.ANALYZED, // Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.add(LuceneAppUtil.createStringField("subject", subject, Store.YES, true, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, true)); Field pubmonthField = new IntField("pubmonth", Integer.parseInt(pubmonth), Store.YES); doc.add(pubmonthField); Date d; try { d = DateTools.stringToDate(pubmonth); } catch (ParseException pe) { throw new RuntimeException(pe); } Field pubmonthAsDayField = new IntField("pubmonthAsDay", (int) (d.getTime() / (1000 * 3600 * 24)), Store.YES); doc.add(pubmonthAsDayField); for (String text : new String[] { title, subject, author, category }) { // doc.add(new Field("contents", text, Field.Store.NO, // Field.Index.ANALYZED, // Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.add(LuceneAppUtil.createStringField("contents", text, Store.NO, true, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, false)); } return doc; }
From source file:com.vmware.xenon.services.common.Lucene60FieldInfosFormatWithCache.java
License:Open Source License
private static IndexOptions getIndexOptions(IndexInput input, byte b) throws IOException { switch (b) {//from w w w. j a v a2 s . com case 0: return IndexOptions.NONE; case 1: return IndexOptions.DOCS; case 2: return IndexOptions.DOCS_AND_FREQS; case 3: return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; case 4: return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; default: // BUG throw new CorruptIndexException("invalid IndexOptions byte: " + b, input); } }