Example usage for org.apache.lucene.document FieldType setOmitNorms

List of usage examples for org.apache.lucene.document FieldType setOmitNorms

Introduction

In this page you can find the example usage for org.apache.lucene.document FieldType setOmitNorms.

Prototype

public void setOmitNorms(boolean value) 

Source Link

Document

Set to true to omit normalization values for the field.

Usage

From source file:au.org.ala.names.search.ALANameIndexer.java

License:Open Source License

private Document createALAIndexDocument(String name, String id, String lsid, String rank, String rankString,
        String kingdom, String kid, String phylum, String pid, String clazz, String cid, String order,
        String oid, String family, String fid, String genus, String gid, String species, String sid,
        String left, String right, String acceptedConcept, String specificEpithet, String infraspecificEpithet,
        String author, float boost) {
    ////ww w  .  j  a  v  a2 s. c  o  m
    if (isBlacklisted(name)) {
        System.out.println(name + " has been blacklisted");
        return null;
    }
    Document doc = new Document();

    //Add the ids
    doc.add(new StringField(NameIndexField.ID.toString(), id, Store.YES));

    doc.add(new StringField(NameIndexField.LSID.toString(), lsid, Store.YES));
    if (lsid.startsWith("ALA")) {
        doc.add(new TextField(NameIndexField.ALA.toString(), "T", Store.NO));
    }

    //Add the scientific name information

    Field f = new TextField(NameIndexField.NAME.toString(), name, Store.YES);
    f.setBoost(boost);
    doc.add(f);

    //rank information
    if (StringUtils.isNotEmpty(rank)) {
        doc.add(new StringField(NameIndexField.RANK_ID.toString(), rank, Store.YES));
    }
    if (StringUtils.isNotEmpty(rankString)) {
        doc.add(new StringField(NameIndexField.RANK.toString(), rankString, Store.YES));
    }

    //handle the synonyms
    if (StringUtils.isNotEmpty(acceptedConcept)) {
        doc.add(new StringField(NameIndexField.ACCEPTED.toString(), acceptedConcept, Store.YES));
        doc.add(new TextField(NameIndexField.iS_SYNONYM.toString(), "T", Store.NO));
    } else {
        doc.add(new TextField(NameIndexField.iS_SYNONYM.toString(), "F", Store.NO));
    }

    //Add the classification information
    if (StringUtils.trimToNull(kingdom) != null) {
        doc.add(new TextField(RankType.KINGDOM.getRank(), kingdom, Store.YES));
        if (StringUtils.isNotBlank(kid)) {
            doc.add(new StoredField("kid", kid));
        }
    }
    if (StringUtils.trimToNull(phylum) != null) {
        doc.add(new TextField(RankType.PHYLUM.getRank(), phylum, Store.YES));
        if (StringUtils.isNotBlank(pid)) {
            doc.add(new StoredField("pid", pid));
        }
    }
    if (StringUtils.trimToNull(clazz) != null) {
        doc.add(new TextField(RankType.CLASS.getRank(), clazz, Store.YES));
        if (StringUtils.isNotBlank(cid)) {
            doc.add(new StoredField("cid", cid));
        }
    }
    if (StringUtils.trimToNull(order) != null) {
        doc.add(new TextField(RankType.ORDER.getRank(), order, Store.YES));
        if (StringUtils.isNotBlank(oid)) {
            doc.add(new StoredField("oid", oid));
        }
    }
    if (StringUtils.trimToNull(family) != null) {
        doc.add(new TextField(RankType.FAMILY.getRank(), family, Store.YES));
        if (StringUtils.isNotBlank(fid)) {
            doc.add(new StoredField("fid", fid));
        }
    }
    if (StringUtils.trimToNull(genus) != null) {
        doc.add(new TextField(RankType.GENUS.getRank(), genus, Store.YES));
        if (StringUtils.isNotBlank(gid)) {
            doc.add(new StoredField("gid", gid));
        }
    }
    if (StringUtils.trimToNull(species) != null) {
        doc.add(new TextField(RankType.SPECIES.getRank(), species, Store.YES));
        if (StringUtils.isNotBlank(sid)) {
            doc.add(new StoredField("sid", sid));
        }
    }
    if (StringUtils.trimToNull(left) != null) {
        doc.add(new StringField("left", left, Store.YES));
    }
    if (StringUtils.trimToNull(right) != null) {
        doc.add(new StringField("right", right, Store.YES));
    }

    //Add the author information
    if (StringUtils.isNotEmpty(author)) {
        //TODO think about whether we need to treat the author string with the taxamatch
        doc.add(new TextField(NameIndexField.AUTHOR.toString(), author, Store.YES));
    }

    //Generate the canonical
    //add the canonical form of the name
    try {
        ParsedName cn = parser.parse(name);
        //if(cn != null && !cn.hasProblem() && !cn.isIndetermined()){
        if (cn != null && cn.isParsableType() && !cn.isIndetermined() && cn.getType() != NameType.informal
                && !"6500".equals(rank) && cn.getType() != NameType.doubtful)// a scientific name with some informal addition like "cf." or indetermined like Abies spec. ALSO prevent subgenus because they parse down to genus plus author
        {

            Field f2 = new TextField(NameIndexField.NAME.toString(), cn.canonicalName(), Store.YES);
            f2.setBoost(boost);
            doc.add(f2);
            if (specificEpithet == null && cn.isBinomial()) {
                //check to see if we need to determine the epithets from the parse
                genus = cn.getGenusOrAbove();
                if (specificEpithet == null)
                    specificEpithet = cn.getSpecificEpithet();
                if (infraspecificEpithet == null)
                    infraspecificEpithet = cn.getInfraSpecificEpithet();
            }
        }
        //check to see if the concept represents a phrase name
        if (cn instanceof ALAParsedName) {
            //set up the field type that is stored and Index.ANALYZED_NO_NORMS
            FieldType ft = new FieldType(TextField.TYPE_STORED);
            ft.setOmitNorms(true);
            ALAParsedName alapn = (ALAParsedName) cn;
            if ((!"sp.".equals(alapn.rank)) && alapn.specificEpithet != null) {
                doc.add(new Field(NameIndexField.SPECIFIC.toString(), alapn.getSpecificEpithet(), ft));
            } else if ((!"sp.".equals(alapn.rank)) && alapn.specificEpithet == null) {
                log.warn(lsid + " " + name + " has an empty specific for non sp. phrase");
            }
            if (StringUtils.trimToNull(alapn.getLocationPhraseDesciption()) != null) {
                doc.add(new Field(NameIndexField.PHRASE.toString(), alapn.cleanPhrase, ft));
            }
            if (alapn.getPhraseVoucher() != null) {
                doc.add(new Field(NameIndexField.VOUCHER.toString(), alapn.cleanVoucher, ft));
            }
            if (StringUtils.isBlank(genus) && StringUtils.isNotBlank(alapn.getGenusOrAbove())) {
                //add the genus to the index as it is necessary to match on the phrase name.
                doc.add(new TextField(RankType.GENUS.getRank(), alapn.getGenusOrAbove(), Store.YES));
            }

        }
    } catch (org.gbif.ecat.parser.UnparsableException e) {
        //check to see if the name is a virus in which case an extra name is added without the virus key word
        if (e.type == NameType.virus) {
            doc.add(new TextField(NameIndexField.NAME.toString(),
                    ALANameSearcher.virusStopPattern.matcher(name).replaceAll(" "), Store.YES));
        }

    } catch (Exception e) {
        e.printStackTrace();
        //throw e;
    }

    //add the sound expressions for the name if required
    try {
        if (StringUtils.isNotBlank(genus)) {
            doc.add(new TextField(NameIndexField.GENUS_EX.toString(),
                    TaxonNameSoundEx.treatWord(genus, "genus"), Store.YES));
        }
        if (StringUtils.isNotBlank(specificEpithet)) {
            doc.add(new TextField(NameIndexField.SPECIES_EX.toString(),
                    TaxonNameSoundEx.treatWord(specificEpithet, "species"), Store.YES));
        } else if (StringUtils.isNotBlank(genus)) {
            doc.add(new TextField(NameIndexField.SPECIES_EX.toString(), "<null>", Store.YES));
        }
        if (StringUtils.isNotBlank(infraspecificEpithet)) {
            doc.add(new TextField(NameIndexField.INFRA_EX.toString(),
                    TaxonNameSoundEx.treatWord(infraspecificEpithet, "species"), Store.YES));
        } else if (StringUtils.isNotBlank(specificEpithet)) {
            //make searching for an empty infraspecific soudex easier
            doc.add(new TextField(NameIndexField.INFRA_EX.toString(), "<null>", Store.YES));
        }
    } catch (Exception e) {
        log.warn(lsid + " " + name + " has issues creating a soundex: " + e.getMessage());
    }

    return doc;

}

From source file:brightsolid.solr.plugins.TestTargetPositionQueryFuzzy.java

License:Apache License

@Override
public void setUp() throws Exception {
    super.setUp();
    directory = newDirectory();/*from   w  ww .j  a v  a2s.  c  o m*/
    RandomIndexWriter iw = new RandomIndexWriter(random(), directory);
    Document doc = new Document();
    FieldType newType = new FieldType(org.apache.lucene.document.TextField.TYPE_STORED);
    newType.setOmitNorms(true);
    Field field = newField("field", "", newType);
    field.fieldType().setOmitNorms(true);

    doc.add(field);

    field.setStringValue("one two threx");
    iw.addDocument(doc);
    field.setStringValue("two threx one");
    iw.addDocument(doc);
    field.setStringValue("threx one twp");
    iw.addDocument(doc);

    reader = iw.getReader();
    iw.close();
    searcher = newSearcher(reader);
}

From source file:brightsolid.solr.plugins.TestTargetPositionQuerySimple.java

License:Apache License

@Override
public void setUp() throws Exception {
    super.setUp();
    directory = newDirectory();/*  w  ww  .  j ava 2 s. c om*/
    RandomIndexWriter iw = new RandomIndexWriter(random(), directory);

    Document doc = new Document();
    FieldType newType = new FieldType(org.apache.lucene.document.TextField.TYPE_STORED);
    newType.setOmitNorms(true);
    Field field = newField("field", "", newType);
    field.fieldType().setOmitNorms(true);

    doc.add(field);

    field.setStringValue("two");
    iw.addDocument(doc);
    field.setStringValue("one");
    iw.addDocument(doc);
    field.setStringValue("one two");
    iw.addDocument(doc);
    field.setStringValue("two one");
    iw.addDocument(doc);
    field.setStringValue("one two three");
    iw.addDocument(doc);
    field.setStringValue("three two one");
    iw.addDocument(doc);
    field.setStringValue("one one two");
    iw.addDocument(doc);

    reader = iw.getReader();
    iw.close();
    searcher = newSearcher(reader);
}

From source file:brightsolid.solr.plugins.TestTargetPositionQuerySynonyms.java

License:Apache License

@Override
public void setUp() throws Exception {
    super.setUp();

    String testFile = "one, uno, un\n" + "two, dos, too\n" + "three, free, tres";

    SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random()));
    parser.parse(new StringReader(testFile));

    final SynonymMap map = parser.build();
    Analyzer analyzer = new Analyzer() {
        @Override//from w w  w .j  av  a2s.c  o  m
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
            return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false));
        }
    };

    directory = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), directory, analyzer);
    Document doc = new Document();
    FieldType newType = new FieldType(org.apache.lucene.document.TextField.TYPE_STORED);
    newType.setOmitNorms(true);
    Field field = newField("field", "", newType);
    field.fieldType().setOmitNorms(true);

    doc.add(field);

    field.setStringValue("one two three");
    iw.addDocument(doc);
    field.setStringValue("two three one");
    iw.addDocument(doc);
    field.setStringValue("three one two");
    iw.addDocument(doc);

    reader = iw.getReader();
    iw.close();
    searcher = newSearcher(reader);
}

From source file:com.czw.search.lucene.example.xmlparser.FormBasedXmlQueryDemo.java

License:Apache License

private void openExampleIndex() throws IOException {
    //Create a RAM-based index from our test data file
    RAMDirectory rd = new RAMDirectory();
    IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer);
    IndexWriter writer = new IndexWriter(rd, iwConfig);
    InputStream dataIn = getServletContext().getResourceAsStream("/WEB-INF/data.tsv");
    BufferedReader br = new BufferedReader(new InputStreamReader(dataIn, StandardCharsets.UTF_8));
    String line = br.readLine();//from ww w.j  av a  2 s.  co m
    final FieldType textNoNorms = new FieldType(TextField.TYPE_STORED);
    textNoNorms.setOmitNorms(true);
    while (line != null) {
        line = line.trim();
        if (line.length() > 0) {
            //parse row and create a document
            StringTokenizer st = new StringTokenizer(line, "\t");
            Document doc = new Document();
            doc.add(new Field("location", st.nextToken(), textNoNorms));
            doc.add(new Field("salary", st.nextToken(), textNoNorms));
            doc.add(new Field("type", st.nextToken(), textNoNorms));
            doc.add(new Field("description", st.nextToken(), textNoNorms));
            writer.addDocument(doc);
        }
        line = br.readLine();
    }
    writer.close();

    //open searcher
    // this example never closes it reader!
    IndexReader reader = DirectoryReader.open(rd);
    searcher = new IndexSearcher(reader);
}

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

private void addCommonDocumentFields(Document document, FeatureIndexEntry entry, final Long featureFileId) {
    document.add(new SortedStringField(FeatureIndexFields.FEATURE_ID.getFieldName(), entry.getFeatureId()));

    FieldType fieldType = new FieldType();
    fieldType.setOmitNorms(true);
    fieldType.setIndexOptions(IndexOptions.DOCS);
    fieldType.setStored(true);/*  www  .  j  a va 2 s  .co m*/
    fieldType.setTokenized(false);
    fieldType.setDocValuesType(DocValuesType.SORTED);
    fieldType.freeze();
    Field field = new Field(FeatureIndexFields.CHROMOSOME_ID.getFieldName(),
            entry.getChromosome() != null ? new BytesRef(entry.getChromosome().getId().toString())
                    : new BytesRef(""),
            fieldType);
    document.add(field);
    document.add(new SortedStringField(FeatureIndexFields.CHROMOSOME_NAME.getFieldName(),
            entry.getChromosome().getName(), true));

    document.add(new SortedIntPoint(FeatureIndexFields.START_INDEX.getFieldName(), entry.getStartIndex()));
    document.add(new StoredField(FeatureIndexFields.START_INDEX.getFieldName(), entry.getStartIndex()));
    document.add(new SortedDocValuesField(FeatureIndexFields.START_INDEX.getGroupName(),
            new BytesRef(entry.getStartIndex().toString())));

    document.add(new SortedIntPoint(FeatureIndexFields.END_INDEX.getFieldName(), entry.getEndIndex()));
    document.add(new StoredField(FeatureIndexFields.END_INDEX.getFieldName(), entry.getEndIndex()));
    document.add(new SortedDocValuesField(FeatureIndexFields.END_INDEX.getGroupName(),
            new BytesRef(entry.getStartIndex().toString())));

    document.add(new StringField(FeatureIndexFields.FEATURE_TYPE.getFieldName(),
            entry.getFeatureType() != null ? entry.getFeatureType().getFileValue() : "", Field.Store.YES));
    document.add(new StringField(FeatureIndexFields.FILE_ID.getFieldName(), featureFileId.toString(),
            Field.Store.YES));

    document.add(new StringField(FeatureIndexFields.FEATURE_NAME.getFieldName(),
            entry.getFeatureName() != null ? entry.getFeatureName().toLowerCase() : "", Field.Store.YES));
    document.add(new SortedDocValuesField(FeatureIndexFields.FEATURE_NAME.getFieldName(),
            new BytesRef(entry.getFeatureName() != null ? entry.getFeatureName() : "")));

    document.add(new SortedSetDocValuesFacetField(FeatureIndexFields.CHR_ID.getFieldName(),
            entry.getChromosome().getId().toString()));

    document.add(new SortedStringField(FeatureIndexFields.UID.getFieldName(), entry.getUuid().toString()));
    document.add(new SortedSetDocValuesFacetField(FeatureIndexFields.F_UID.getFieldName(),
            entry.getUuid().toString()));
}

From source file:com.FormBasedXmlQueryDemo.java

License:Apache License

private void openExampleIndex() throws IOException {
    //Create a RAM-based index from our test data file
    RAMDirectory rd = new RAMDirectory();
    IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_40, analyzer);
    IndexWriter writer = new IndexWriter(rd, iwConfig);
    InputStream dataIn = getServletContext().getResourceAsStream("/WEB-INF/data.tsv");
    BufferedReader br = new BufferedReader(new InputStreamReader(dataIn, IOUtils.CHARSET_UTF_8));
    String line = br.readLine();// w ww  . j a va  2s .c  o  m
    final FieldType textNoNorms = new FieldType(TextField.TYPE_STORED);
    textNoNorms.setOmitNorms(true);
    while (line != null) {
        line = line.trim();
        if (line.length() > 0) {
            //parse row and create a document
            StringTokenizer st = new StringTokenizer(line, "\t");
            Document doc = new Document();
            doc.add(new Field("location", st.nextToken(), textNoNorms));
            doc.add(new Field("salary", st.nextToken(), textNoNorms));
            doc.add(new Field("type", st.nextToken(), textNoNorms));
            doc.add(new Field("description", st.nextToken(), textNoNorms));
            writer.addDocument(doc);
        }
        line = br.readLine();
    }
    writer.close();

    //open searcher
    // this example never closes it reader!
    IndexReader reader = DirectoryReader.open(rd);
    searcher = new IndexSearcher(reader);
}

From source file:com.github.hotware.lucene.extension.bean.field.BeanInformationCacheImpl.java

License:BEER-WARE LICENSE

private FieldInformation buildFieldInformation(BeanField bf, Field field, Class<?> fieldClass) {
    com.github.hotware.lucene.extension.bean.type.Type typeWrapper;
    try {/*from   www.  jav  a  2  s.c om*/
        // TODO: maybe cache these?
        typeWrapper = (com.github.hotware.lucene.extension.bean.type.Type) bf.type().newInstance();
    } catch (InstantiationException | IllegalAccessException e) {
        throw new RuntimeException(e);
    }
    FieldType fieldType = new FieldType();
    fieldType.setIndexed(bf.index());
    fieldType.setStored(bf.store());
    fieldType.setTokenized(bf.tokenized());
    fieldType.setStoreTermVectors(bf.storeTermVectors());
    fieldType.setStoreTermVectorPositions(bf.storeTermVectorPositions());
    fieldType.setStoreTermVectorOffsets(bf.storeTermVectorOffsets());
    fieldType.setStoreTermVectorPayloads(bf.storeTermVectorPayloads());
    fieldType.setOmitNorms(bf.omitNorms());
    fieldType.setIndexOptions(bf.indexOptions());
    typeWrapper.configureFieldType(fieldType);
    fieldType.freeze();
    return new FieldInformation(new FrozenField(field), fieldClass, fieldType, bf);
}

From source file:com.qwazr.search.field.CustomFieldType.java

License:Apache License

@Override
final public void fillValue(final Object value, final FieldConsumer consumer) {
    final FieldType type = new FieldType();
    if (fieldDef.stored != null)
        type.setStored(fieldDef.stored);
    if (fieldDef.tokenized != null)
        type.setTokenized(fieldDef.tokenized);
    if (fieldDef.store_termvectors != null)
        type.setStoreTermVectors(fieldDef.store_termvectors);
    if (fieldDef.store_termvector_offsets != null)
        type.setStoreTermVectorOffsets(fieldDef.store_termvector_offsets);
    if (fieldDef.store_termvector_positions != null)
        type.setStoreTermVectorPositions(fieldDef.store_termvector_positions);
    if (fieldDef.store_termvector_payloads != null)
        type.setStoreTermVectorPayloads(fieldDef.store_termvector_payloads);
    if (fieldDef.omit_norms != null)
        type.setOmitNorms(fieldDef.omit_norms);
    if (fieldDef.numeric_type != null)
        type.setNumericType(fieldDef.numeric_type);
    if (fieldDef.index_options != null)
        type.setIndexOptions(fieldDef.index_options);
    if (fieldDef.docvalues_type != null)
        type.setDocValuesType(fieldDef.docvalues_type);
    consumer.accept(new CustomField(fieldName, type, value));
}

From source file:com.rocana.lucene.codec.v1.TestRocanaPerFieldPostingsFormat2.java

License:Apache License

@Test
public void testStressPerFieldCodec() throws IOException {
    Directory dir = newDirectory(random());
    final int docsPerRound = 97;
    int numRounds = atLeast(1);
    for (int i = 0; i < numRounds; i++) {
        int num = TestUtil.nextInt(random(), 30, 60);
        IndexWriterConfig config = newIndexWriterConfig(random(), new MockAnalyzer(random()));
        config.setOpenMode(OpenMode.CREATE_OR_APPEND);
        IndexWriter writer = newWriter(dir, config);
        for (int j = 0; j < docsPerRound; j++) {
            final Document doc = new Document();
            for (int k = 0; k < num; k++) {
                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.setTokenized(random().nextBoolean());
                customType.setOmitNorms(random().nextBoolean());
                Field field = newField("" + k, TestUtil.randomRealisticUnicodeString(random(), 128),
                        customType);/*from w  ww  . j  a v a  2  s  .co  m*/
                doc.add(field);
            }
            writer.addDocument(doc);
        }
        if (random().nextBoolean()) {
            writer.forceMerge(1);
        }
        writer.commit();
        assertEquals((i + 1) * docsPerRound, writer.maxDoc());
        writer.close();
    }
    dir.close();
}