Example usage for org.apache.lucene.document FieldType setIndexOptions

List of usage examples for org.apache.lucene.document FieldType setIndexOptions

Introduction

In this page you can find the example usage for org.apache.lucene.document FieldType setIndexOptions.

Prototype

public void setIndexOptions(IndexOptions value) 

Source Link

Document

Sets the indexing options for the field:

Usage

From source file:alix.lucene.Alix.java

License:Open Source License

/**
 * Parse field type String/*w w w  . j av  a  2s. c  o m*/
 * 
 * @param name Name of the field
 * @param value Value of the field
 * @param options a string composed of letters in any order following Luke convention to describe fields
 * IdfpoPSV
 * I: Indexed
 * d: docs
 * f: freqs
 * p: pos
 * o: offset
 * P: payloads
 * S: Stored
 * V: TermVector
 */
public static FieldType fieldType(String options) {
    FieldType type;
    if (options == null)
        return new FieldType();
    if ("S".equals(options)) {
        type = new FieldType();
        type.setStored(true);
        return type;
    }
    if (options.contains("S")) {
        type = new FieldType(TextField.TYPE_STORED);
    } else {
        type = new FieldType(TextField.TYPE_NOT_STORED);
    }
    // optimize ?
    type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    if (options.contains("p")) {
        type.setStoreTermVectorPositions(true);
    }

    if (options.contains("o")) {
        type.setTokenized(true);
        type.setStoreTermVectors(true);
        type.setStoreTermVectorOffsets(true);
    }
    if (options.contains("P")) {
        type.setTokenized(true);
        type.setStoreTermVectors(true);
        type.setStoreTermVectorPositions(true);
        type.setStoreTermVectorPayloads(true);
    }
    if (options.contains("V")) {
        type.setTokenized(true);
        type.setStoreTermVectors(true);
    }
    return type;
}

From source file:api.startup.PDFIndexer.java

License:Open Source License

/**
 * Indexes a single document and writes it to the given index writer
 * @param writer - the index writer to writer
 * @param metadata - the document/*from  www. jav  a2  s  . c  om*/
 * @throws IOException
 */
static void indexDoc(IndexWriter writer, DocumentMetadata metadata) throws IOException {
    Path file = Paths.get(metadata.getFilename());
    try {
        Document doc = new Document();

        Field pathField = new StringField(Constants.FIELD_PATH, file.toString(), Field.Store.YES);
        doc.add(pathField);

        // Add Document metadata //
        doc.add(new StringField(Constants.FIELD_AUTHOR, metadata.getAuthor(), Field.Store.YES));
        doc.add(new StringField(Constants.FIELD_TITLE, metadata.getTitle(), Field.Store.YES));
        doc.add(new StringField(Constants.FIELD_CONFERENCE, metadata.getConference(), Field.Store.YES));
        // End of Document Metadata //

        Field modified = new LongField(Constants.FIELD_MODIFIED, Files.getLastModifiedTime(file).toMillis(),
                Field.Store.YES);
        doc.add(modified);

        PDFTextExtractor extractor = new PDFTextExtractor();
        // Get the string contents
        String textContents = extractor.extractText(file.toString());

        // Store the string contents
        FieldType contentsType = new FieldType();
        contentsType.setStored(true);
        contentsType.setTokenized(true);
        contentsType.setStoreTermVectors(true);
        contentsType.setStoreTermVectorPositions(true);
        contentsType.setStoreTermVectorPayloads(true);
        contentsType.setStoreTermVectorOffsets(true);
        contentsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
        Field contents = new Field(Constants.FIELD_CONTENTS, textContents, contentsType);
        doc.add(contents);

        if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            log.info("adding " + file + " to index");
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so
            // we use updateDocument instead to replace the old one matching the exact
            // path, if present:
            log.info("updating " + file + " in index");
            writer.updateDocument(new Term(Constants.FIELD_PATH, file.toString()), doc);
        }
    } catch (IOException e) {
        log.error("Failed to read file " + metadata.getFilename());
    }

}

From source file:cc.twittertools.index.IndexStatuses.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(new Option(HELP_OPTION, "show help"));
    options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment"));
    options.addOption(new Option(STORE_TERM_VECTORS_OPTION, "store term vectors"));

    options.addOption(OptionBuilder.withArgName("dir").hasArg().withDescription("source collection directory")
            .create(COLLECTION_OPTION));
    options.addOption(//from w ww .  java2 s  . c o  m
            OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("file with deleted tweetids")
            .create(DELETES_OPTION));
    options.addOption(OptionBuilder.withArgName("id").hasArg().withDescription("max id").create(MAX_ID_OPTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(COLLECTION_OPTION)
            || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(IndexStatuses.class.getName(), options);
        System.exit(-1);
    }

    String collectionPath = cmdline.getOptionValue(COLLECTION_OPTION);
    String indexPath = cmdline.getOptionValue(INDEX_OPTION);

    final FieldType textOptions = new FieldType();
    textOptions.setIndexed(true);
    textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
    textOptions.setStored(true);
    textOptions.setTokenized(true);
    if (cmdline.hasOption(STORE_TERM_VECTORS_OPTION)) {
        textOptions.setStoreTermVectors(true);
    }

    LOG.info("collection: " + collectionPath);
    LOG.info("index: " + indexPath);

    LongOpenHashSet deletes = null;
    if (cmdline.hasOption(DELETES_OPTION)) {
        deletes = new LongOpenHashSet();
        File deletesFile = new File(cmdline.getOptionValue(DELETES_OPTION));
        if (!deletesFile.exists()) {
            System.err.println("Error: " + deletesFile + " does not exist!");
            System.exit(-1);
        }
        LOG.info("Reading deletes from " + deletesFile);

        FileInputStream fin = new FileInputStream(deletesFile);
        byte[] ignoreBytes = new byte[2];
        fin.read(ignoreBytes); // "B", "Z" bytes from commandline tools
        BufferedReader br = new BufferedReader(new InputStreamReader(new CBZip2InputStream(fin)));

        String s;
        while ((s = br.readLine()) != null) {
            if (s.contains("\t")) {
                deletes.add(Long.parseLong(s.split("\t")[0]));
            } else {
                deletes.add(Long.parseLong(s));
            }
        }
        br.close();
        fin.close();
        LOG.info("Read " + deletes.size() + " tweetids from deletes file.");
    }

    long maxId = Long.MAX_VALUE;
    if (cmdline.hasOption(MAX_ID_OPTION)) {
        maxId = Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION));
        LOG.info("index: " + maxId);
    }

    long startTime = System.currentTimeMillis();
    File file = new File(collectionPath);
    if (!file.exists()) {
        System.err.println("Error: " + file + " does not exist!");
        System.exit(-1);
    }

    StatusStream stream = new JsonStatusCorpusReader(file);

    Directory dir = FSDirectory.open(new File(indexPath));
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, IndexStatuses.ANALYZER);
    config.setOpenMode(OpenMode.CREATE);

    IndexWriter writer = new IndexWriter(dir, config);
    int cnt = 0;
    Status status;
    try {
        while ((status = stream.next()) != null) {
            if (status.getText() == null) {
                continue;
            }

            // Skip deletes tweetids.
            if (deletes != null && deletes.contains(status.getId())) {
                continue;
            }

            if (status.getId() > maxId) {
                continue;
            }

            cnt++;
            Document doc = new Document();
            doc.add(new LongField(StatusField.ID.name, status.getId(), Field.Store.YES));
            doc.add(new LongField(StatusField.EPOCH.name, status.getEpoch(), Field.Store.YES));
            doc.add(new TextField(StatusField.SCREEN_NAME.name, status.getScreenname(), Store.YES));

            doc.add(new Field(StatusField.TEXT.name, status.getText(), textOptions));

            doc.add(new IntField(StatusField.FRIENDS_COUNT.name, status.getFollowersCount(), Store.YES));
            doc.add(new IntField(StatusField.FOLLOWERS_COUNT.name, status.getFriendsCount(), Store.YES));
            doc.add(new IntField(StatusField.STATUSES_COUNT.name, status.getStatusesCount(), Store.YES));

            long inReplyToStatusId = status.getInReplyToStatusId();
            if (inReplyToStatusId > 0) {
                doc.add(new LongField(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId,
                        Field.Store.YES));
                doc.add(new LongField(StatusField.IN_REPLY_TO_USER_ID.name, status.getInReplyToUserId(),
                        Field.Store.YES));
            }

            String lang = status.getLang();
            if (!lang.equals("unknown")) {
                doc.add(new TextField(StatusField.LANG.name, status.getLang(), Store.YES));
            }

            long retweetStatusId = status.getRetweetedStatusId();
            if (retweetStatusId > 0) {
                doc.add(new LongField(StatusField.RETWEETED_STATUS_ID.name, retweetStatusId, Field.Store.YES));
                doc.add(new LongField(StatusField.RETWEETED_USER_ID.name, status.getRetweetedUserId(),
                        Field.Store.YES));
                doc.add(new IntField(StatusField.RETWEET_COUNT.name, status.getRetweetCount(), Store.YES));
                if (status.getRetweetCount() < 0 || status.getRetweetedStatusId() < 0) {
                    LOG.warn("Error parsing retweet fields of " + status.getId());
                }
            }

            writer.addDocument(doc);
            if (cnt % 100000 == 0) {
                LOG.info(cnt + " statuses indexed");
            }
        }

        LOG.info(String.format("Total of %s statuses added", cnt));

        if (cmdline.hasOption(OPTIMIZE_OPTION)) {
            LOG.info("Merging segments...");
            writer.forceMerge(1);
            LOG.info("Done!");
        }

        LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms");
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        writer.close();
        dir.close();
        stream.close();
    }
}

From source file:com.basistech.lucene.tools.LuceneQueryToolTest.java

License:Apache License

@BeforeClass
public static void oneTimeSetup() throws IOException, ParseException {
    LuceneQueryToolTest.showOutput = false; // for debugging tests
    Directory dir = new RAMDirectory();
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    IndexWriter writer = new IndexWriter(dir, config);
    Document doc = new Document();
    doc.add(new Field("longest-mention", "Bill Clinton", StringField.TYPE_STORED));
    doc.add(new Field("context", "Hillary Clinton Arkansas", TextField.TYPE_NOT_STORED));
    writer.addDocument(doc);/*from w w w .  ja v a 2s  .c o m*/
    doc = new Document();
    doc.add(new Field("longest-mention", "George W. Bush", StringField.TYPE_STORED));
    doc.add(new Field("context", "Texas Laura Bush", TextField.TYPE_NOT_STORED));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new Field("longest-mention", "George H. W. Bush", StringField.TYPE_STORED));
    doc.add(new Field("context", "Barbara Bush Texas", TextField.TYPE_NOT_STORED));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new Field("bbb", "foo", StringField.TYPE_STORED));
    doc.add(new Field("bbb", "bar", StringField.TYPE_STORED));
    doc.add(new Field("aaa", "foo", StringField.TYPE_STORED));
    FieldType typeUnindexed = new FieldType(StringField.TYPE_STORED);
    typeUnindexed.setIndexOptions(IndexOptions.NONE);
    doc.add(new Field("zzz", "foo", typeUnindexed));
    writer.addDocument(doc);
    writer.close();
    reader = DirectoryReader.open(dir);
}

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

private void addCommonDocumentFields(Document document, FeatureIndexEntry entry, final Long featureFileId) {
    document.add(new SortedStringField(FeatureIndexFields.FEATURE_ID.getFieldName(), entry.getFeatureId()));

    FieldType fieldType = new FieldType();
    fieldType.setOmitNorms(true);//  w w  w  .java 2s. c  o  m
    fieldType.setIndexOptions(IndexOptions.DOCS);
    fieldType.setStored(true);
    fieldType.setTokenized(false);
    fieldType.setDocValuesType(DocValuesType.SORTED);
    fieldType.freeze();
    Field field = new Field(FeatureIndexFields.CHROMOSOME_ID.getFieldName(),
            entry.getChromosome() != null ? new BytesRef(entry.getChromosome().getId().toString())
                    : new BytesRef(""),
            fieldType);
    document.add(field);
    document.add(new SortedStringField(FeatureIndexFields.CHROMOSOME_NAME.getFieldName(),
            entry.getChromosome().getName(), true));

    document.add(new SortedIntPoint(FeatureIndexFields.START_INDEX.getFieldName(), entry.getStartIndex()));
    document.add(new StoredField(FeatureIndexFields.START_INDEX.getFieldName(), entry.getStartIndex()));
    document.add(new SortedDocValuesField(FeatureIndexFields.START_INDEX.getGroupName(),
            new BytesRef(entry.getStartIndex().toString())));

    document.add(new SortedIntPoint(FeatureIndexFields.END_INDEX.getFieldName(), entry.getEndIndex()));
    document.add(new StoredField(FeatureIndexFields.END_INDEX.getFieldName(), entry.getEndIndex()));
    document.add(new SortedDocValuesField(FeatureIndexFields.END_INDEX.getGroupName(),
            new BytesRef(entry.getStartIndex().toString())));

    document.add(new StringField(FeatureIndexFields.FEATURE_TYPE.getFieldName(),
            entry.getFeatureType() != null ? entry.getFeatureType().getFileValue() : "", Field.Store.YES));
    document.add(new StringField(FeatureIndexFields.FILE_ID.getFieldName(), featureFileId.toString(),
            Field.Store.YES));

    document.add(new StringField(FeatureIndexFields.FEATURE_NAME.getFieldName(),
            entry.getFeatureName() != null ? entry.getFeatureName().toLowerCase() : "", Field.Store.YES));
    document.add(new SortedDocValuesField(FeatureIndexFields.FEATURE_NAME.getFieldName(),
            new BytesRef(entry.getFeatureName() != null ? entry.getFeatureName() : "")));

    document.add(new SortedSetDocValuesFacetField(FeatureIndexFields.CHR_ID.getFieldName(),
            entry.getChromosome().getId().toString()));

    document.add(new SortedStringField(FeatureIndexFields.UID.getFieldName(), entry.getUuid().toString()));
    document.add(new SortedSetDocValuesFacetField(FeatureIndexFields.F_UID.getFieldName(),
            entry.getUuid().toString()));
}

From source file:com.github.hotware.lucene.extension.bean.field.BeanInformationCacheImpl.java

License:BEER-WARE LICENSE

private FieldInformation buildFieldInformation(BeanField bf, Field field, Class<?> fieldClass) {
    com.github.hotware.lucene.extension.bean.type.Type typeWrapper;
    try {/*from   www  .  j a  v  a  2 s  .  c om*/
        // TODO: maybe cache these?
        typeWrapper = (com.github.hotware.lucene.extension.bean.type.Type) bf.type().newInstance();
    } catch (InstantiationException | IllegalAccessException e) {
        throw new RuntimeException(e);
    }
    FieldType fieldType = new FieldType();
    fieldType.setIndexed(bf.index());
    fieldType.setStored(bf.store());
    fieldType.setTokenized(bf.tokenized());
    fieldType.setStoreTermVectors(bf.storeTermVectors());
    fieldType.setStoreTermVectorPositions(bf.storeTermVectorPositions());
    fieldType.setStoreTermVectorOffsets(bf.storeTermVectorOffsets());
    fieldType.setStoreTermVectorPayloads(bf.storeTermVectorPayloads());
    fieldType.setOmitNorms(bf.omitNorms());
    fieldType.setIndexOptions(bf.indexOptions());
    typeWrapper.configureFieldType(fieldType);
    fieldType.freeze();
    return new FieldInformation(new FrozenField(field), fieldClass, fieldType, bf);
}

From source file:com.o19s.es.ltr.query.LtrQueryTests.java

License:Apache License

private Field newField(String name, String value, Store stored) {
    FieldType tagsFieldType = new FieldType();
    tagsFieldType.setStored(stored == Store.YES);
    IndexOptions idxOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
    tagsFieldType.setIndexOptions(idxOptions);
    return new Field(name, value, tagsFieldType);
}

From source file:com.orientechnologies.spatial.engine.OLuceneSpatialIndexEngineAbstract.java

License:Apache License

protected Document newGeoDocument(OIdentifiable oIdentifiable, Shape shape) {

    FieldType ft = new FieldType();
    ft.setIndexOptions(IndexOptions.DOCS);
    ft.setStored(true);/*from   w w  w . j a  v a2  s  .c o  m*/

    Document doc = new Document();

    doc.add(OLuceneIndexType.createField(RID, oIdentifiable.getIdentity().toString(), Field.Store.YES,
            Field.Index.NOT_ANALYZED_NO_NORMS));
    for (IndexableField f : strategy.createIndexableFields(shape)) {
        doc.add(f);
    }

    doc.add(new StoredField(strategy.getFieldName(), ctx.toString(shape)));
    return doc;
}

From source file:com.ponysdk.sample.client.page.addon.SelectizeAddon.java

License:Apache License

public SelectizeAddon() {
    super(Element.newInput());
    setTerminalHandler(this);

    ///*from w  ww .  ja  v a  2 s .c o  m*/
    final Analyzer analyzer = new StandardAnalyzer();
    final Directory directory = new RAMDirectory();

    final IndexWriterConfig config = new IndexWriterConfig(analyzer);
    IndexWriter writer;
    try {
        writer = new IndexWriter(directory, config);
        final Document doc = new Document();
        final String text = "Test de ouf";

        final FieldType fieldType = new FieldType();
        fieldType.setIndexOptions(IndexOptions.NONE);
        fieldType.setStored(true);
        fieldType.setTokenized(false);
        doc.add(new Field("id", "12", fieldType));
        doc.add(new Field("fieldname", text, TextField.TYPE_STORED));

        writer.addDocument(doc);

        addAssetsType(writer);
        addTenor(writer);
        addClients(writer);
        addSide(writer);

        writer.close();
    } catch (final IOException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
    }

    try {
        // Now search the index:
        final DirectoryReader ireader = DirectoryReader.open(directory);
        isearcher = new IndexSearcher(ireader);
        // Parse a simple query that searches for "text":
        // final QueryParser parser = new QueryParser("fieldname",
        // analyzer);
        // parser.setFuzzyMinSim(2f);

        final Term term = new Term("fieldname", "indesfed");
        final Query query = new FuzzyQuery(term);
        // final TopDocs hits = isearcher.search(query, 1000).scoreDocs;

        // final Query query = parser.parse("indeed");
        final ScoreDoc[] hits = isearcher.search(query, 1000).scoreDocs;
        // Iterate through the results:
        for (final ScoreDoc hit : hits) {
            System.err.println("Score : " + hit.score);
            final Document hitDoc = isearcher.doc(hit.doc);
            System.err.println("Found document" + hitDoc.getField("fieldname").stringValue());
        }
        // ireader.close();
        // directory.close();
    } catch (final Exception exception) {
        exception.printStackTrace();
    }

    // <input type="text" id="input-tags3" class="demo-default"
    // value="science,biology,chemistry,physics">
}

From source file:com.ponysdk.sample.client.page.addon.SelectizeAddon.java

License:Apache License

private void addSide(final IndexWriter writer) throws IOException {
    final Document doc1 = new Document();
    final FieldType fieldType1 = new FieldType();
    fieldType1.setIndexOptions(IndexOptions.NONE);
    fieldType1.setStored(true);/*from www .ja v  a  2 s .  com*/
    fieldType1.setTokenized(false);
    doc1.add(new Field("id", "sell", fieldType1));
    doc1.add(new Field("fieldname", "Sell", TextField.TYPE_STORED));
    doc1.add(new Field("fieldname", "S", TextField.TYPE_STORED));
    doc1.add(new Field("type", Type.SIDE.name(), TextField.TYPE_STORED));
    doc1.add(new Field("desc", "side", TextField.TYPE_STORED));
    writer.addDocument(doc1);

    final Document doc2 = new Document();
    final FieldType fieldType2 = new FieldType();
    fieldType2.setIndexOptions(IndexOptions.NONE);
    fieldType2.setStored(true);
    fieldType2.setTokenized(false);
    doc2.add(new Field("id", "buy", fieldType2));
    doc2.add(new Field("fieldname", "Buy", TextField.TYPE_STORED));
    doc2.add(new Field("fieldname", "B", TextField.TYPE_STORED));
    doc2.add(new Field("type", Type.SIDE.name(), TextField.TYPE_STORED));
    doc2.add(new Field("desc", "side", TextField.TYPE_STORED));
    writer.addDocument(doc2);

}