List of usage examples for org.apache.lucene.document FieldType setOmitNorms
public void setOmitNorms(boolean value)
true to omit normalization values for the field. From source file:au.org.ala.names.search.ALANameIndexer.java
License:Open Source License
private Document createALAIndexDocument(String name, String id, String lsid, String rank, String rankString, String kingdom, String kid, String phylum, String pid, String clazz, String cid, String order, String oid, String family, String fid, String genus, String gid, String species, String sid, String left, String right, String acceptedConcept, String specificEpithet, String infraspecificEpithet, String author, float boost) { ////ww w . j a v a2 s. c o m if (isBlacklisted(name)) { System.out.println(name + " has been blacklisted"); return null; } Document doc = new Document(); //Add the ids doc.add(new StringField(NameIndexField.ID.toString(), id, Store.YES)); doc.add(new StringField(NameIndexField.LSID.toString(), lsid, Store.YES)); if (lsid.startsWith("ALA")) { doc.add(new TextField(NameIndexField.ALA.toString(), "T", Store.NO)); } //Add the scientific name information Field f = new TextField(NameIndexField.NAME.toString(), name, Store.YES); f.setBoost(boost); doc.add(f); //rank information if (StringUtils.isNotEmpty(rank)) { doc.add(new StringField(NameIndexField.RANK_ID.toString(), rank, Store.YES)); } if (StringUtils.isNotEmpty(rankString)) { doc.add(new StringField(NameIndexField.RANK.toString(), rankString, Store.YES)); } //handle the synonyms if (StringUtils.isNotEmpty(acceptedConcept)) { doc.add(new StringField(NameIndexField.ACCEPTED.toString(), acceptedConcept, Store.YES)); doc.add(new TextField(NameIndexField.iS_SYNONYM.toString(), "T", Store.NO)); } else { doc.add(new TextField(NameIndexField.iS_SYNONYM.toString(), "F", Store.NO)); } //Add the classification information if (StringUtils.trimToNull(kingdom) != null) { doc.add(new TextField(RankType.KINGDOM.getRank(), kingdom, Store.YES)); if (StringUtils.isNotBlank(kid)) { doc.add(new StoredField("kid", kid)); } } if (StringUtils.trimToNull(phylum) != null) { doc.add(new TextField(RankType.PHYLUM.getRank(), phylum, Store.YES)); if (StringUtils.isNotBlank(pid)) { doc.add(new StoredField("pid", pid)); } } if (StringUtils.trimToNull(clazz) != null) { doc.add(new TextField(RankType.CLASS.getRank(), clazz, Store.YES)); if (StringUtils.isNotBlank(cid)) { doc.add(new StoredField("cid", cid)); } } if (StringUtils.trimToNull(order) != null) { doc.add(new TextField(RankType.ORDER.getRank(), order, Store.YES)); if (StringUtils.isNotBlank(oid)) { doc.add(new StoredField("oid", oid)); } } if (StringUtils.trimToNull(family) != null) { doc.add(new TextField(RankType.FAMILY.getRank(), family, Store.YES)); if (StringUtils.isNotBlank(fid)) { doc.add(new StoredField("fid", fid)); } } if (StringUtils.trimToNull(genus) != null) { doc.add(new TextField(RankType.GENUS.getRank(), genus, Store.YES)); if (StringUtils.isNotBlank(gid)) { doc.add(new StoredField("gid", gid)); } } if (StringUtils.trimToNull(species) != null) { doc.add(new TextField(RankType.SPECIES.getRank(), species, Store.YES)); if (StringUtils.isNotBlank(sid)) { doc.add(new StoredField("sid", sid)); } } if (StringUtils.trimToNull(left) != null) { doc.add(new StringField("left", left, Store.YES)); } if (StringUtils.trimToNull(right) != null) { doc.add(new StringField("right", right, Store.YES)); } //Add the author information if (StringUtils.isNotEmpty(author)) { //TODO think about whether we need to treat the author string with the taxamatch doc.add(new TextField(NameIndexField.AUTHOR.toString(), author, Store.YES)); } //Generate the canonical //add the canonical form of the name try { ParsedName cn = parser.parse(name); //if(cn != null && !cn.hasProblem() && !cn.isIndetermined()){ if (cn != null && cn.isParsableType() && !cn.isIndetermined() && cn.getType() != NameType.informal && !"6500".equals(rank) && cn.getType() != NameType.doubtful)// a scientific name with some informal addition like "cf." or indetermined like Abies spec. ALSO prevent subgenus because they parse down to genus plus author { Field f2 = new TextField(NameIndexField.NAME.toString(), cn.canonicalName(), Store.YES); f2.setBoost(boost); doc.add(f2); if (specificEpithet == null && cn.isBinomial()) { //check to see if we need to determine the epithets from the parse genus = cn.getGenusOrAbove(); if (specificEpithet == null) specificEpithet = cn.getSpecificEpithet(); if (infraspecificEpithet == null) infraspecificEpithet = cn.getInfraSpecificEpithet(); } } //check to see if the concept represents a phrase name if (cn instanceof ALAParsedName) { //set up the field type that is stored and Index.ANALYZED_NO_NORMS FieldType ft = new FieldType(TextField.TYPE_STORED); ft.setOmitNorms(true); ALAParsedName alapn = (ALAParsedName) cn; if ((!"sp.".equals(alapn.rank)) && alapn.specificEpithet != null) { doc.add(new Field(NameIndexField.SPECIFIC.toString(), alapn.getSpecificEpithet(), ft)); } else if ((!"sp.".equals(alapn.rank)) && alapn.specificEpithet == null) { log.warn(lsid + " " + name + " has an empty specific for non sp. phrase"); } if (StringUtils.trimToNull(alapn.getLocationPhraseDesciption()) != null) { doc.add(new Field(NameIndexField.PHRASE.toString(), alapn.cleanPhrase, ft)); } if (alapn.getPhraseVoucher() != null) { doc.add(new Field(NameIndexField.VOUCHER.toString(), alapn.cleanVoucher, ft)); } if (StringUtils.isBlank(genus) && StringUtils.isNotBlank(alapn.getGenusOrAbove())) { //add the genus to the index as it is necessary to match on the phrase name. doc.add(new TextField(RankType.GENUS.getRank(), alapn.getGenusOrAbove(), Store.YES)); } } } catch (org.gbif.ecat.parser.UnparsableException e) { //check to see if the name is a virus in which case an extra name is added without the virus key word if (e.type == NameType.virus) { doc.add(new TextField(NameIndexField.NAME.toString(), ALANameSearcher.virusStopPattern.matcher(name).replaceAll(" "), Store.YES)); } } catch (Exception e) { e.printStackTrace(); //throw e; } //add the sound expressions for the name if required try { if (StringUtils.isNotBlank(genus)) { doc.add(new TextField(NameIndexField.GENUS_EX.toString(), TaxonNameSoundEx.treatWord(genus, "genus"), Store.YES)); } if (StringUtils.isNotBlank(specificEpithet)) { doc.add(new TextField(NameIndexField.SPECIES_EX.toString(), TaxonNameSoundEx.treatWord(specificEpithet, "species"), Store.YES)); } else if (StringUtils.isNotBlank(genus)) { doc.add(new TextField(NameIndexField.SPECIES_EX.toString(), "<null>", Store.YES)); } if (StringUtils.isNotBlank(infraspecificEpithet)) { doc.add(new TextField(NameIndexField.INFRA_EX.toString(), TaxonNameSoundEx.treatWord(infraspecificEpithet, "species"), Store.YES)); } else if (StringUtils.isNotBlank(specificEpithet)) { //make searching for an empty infraspecific soudex easier doc.add(new TextField(NameIndexField.INFRA_EX.toString(), "<null>", Store.YES)); } } catch (Exception e) { log.warn(lsid + " " + name + " has issues creating a soundex: " + e.getMessage()); } return doc; }
From source file:brightsolid.solr.plugins.TestTargetPositionQueryFuzzy.java
License:Apache License
@Override public void setUp() throws Exception { super.setUp(); directory = newDirectory();/*from w ww .j a v a2s. c o m*/ RandomIndexWriter iw = new RandomIndexWriter(random(), directory); Document doc = new Document(); FieldType newType = new FieldType(org.apache.lucene.document.TextField.TYPE_STORED); newType.setOmitNorms(true); Field field = newField("field", "", newType); field.fieldType().setOmitNorms(true); doc.add(field); field.setStringValue("one two threx"); iw.addDocument(doc); field.setStringValue("two threx one"); iw.addDocument(doc); field.setStringValue("threx one twp"); iw.addDocument(doc); reader = iw.getReader(); iw.close(); searcher = newSearcher(reader); }
From source file:brightsolid.solr.plugins.TestTargetPositionQuerySimple.java
License:Apache License
@Override public void setUp() throws Exception { super.setUp(); directory = newDirectory();/* w ww . j ava 2 s. c om*/ RandomIndexWriter iw = new RandomIndexWriter(random(), directory); Document doc = new Document(); FieldType newType = new FieldType(org.apache.lucene.document.TextField.TYPE_STORED); newType.setOmitNorms(true); Field field = newField("field", "", newType); field.fieldType().setOmitNorms(true); doc.add(field); field.setStringValue("two"); iw.addDocument(doc); field.setStringValue("one"); iw.addDocument(doc); field.setStringValue("one two"); iw.addDocument(doc); field.setStringValue("two one"); iw.addDocument(doc); field.setStringValue("one two three"); iw.addDocument(doc); field.setStringValue("three two one"); iw.addDocument(doc); field.setStringValue("one one two"); iw.addDocument(doc); reader = iw.getReader(); iw.close(); searcher = newSearcher(reader); }
From source file:brightsolid.solr.plugins.TestTargetPositionQuerySynonyms.java
License:Apache License
@Override public void setUp() throws Exception { super.setUp(); String testFile = "one, uno, un\n" + "two, dos, too\n" + "three, free, tres"; SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random())); parser.parse(new StringReader(testFile)); final SynonymMap map = parser.build(); Analyzer analyzer = new Analyzer() { @Override//from w w w .j av a2s.c o m protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false)); } }; directory = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), directory, analyzer); Document doc = new Document(); FieldType newType = new FieldType(org.apache.lucene.document.TextField.TYPE_STORED); newType.setOmitNorms(true); Field field = newField("field", "", newType); field.fieldType().setOmitNorms(true); doc.add(field); field.setStringValue("one two three"); iw.addDocument(doc); field.setStringValue("two three one"); iw.addDocument(doc); field.setStringValue("three one two"); iw.addDocument(doc); reader = iw.getReader(); iw.close(); searcher = newSearcher(reader); }
From source file:com.czw.search.lucene.example.xmlparser.FormBasedXmlQueryDemo.java
License:Apache License
private void openExampleIndex() throws IOException { //Create a RAM-based index from our test data file RAMDirectory rd = new RAMDirectory(); IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(rd, iwConfig); InputStream dataIn = getServletContext().getResourceAsStream("/WEB-INF/data.tsv"); BufferedReader br = new BufferedReader(new InputStreamReader(dataIn, StandardCharsets.UTF_8)); String line = br.readLine();//from ww w.j av a 2 s. co m final FieldType textNoNorms = new FieldType(TextField.TYPE_STORED); textNoNorms.setOmitNorms(true); while (line != null) { line = line.trim(); if (line.length() > 0) { //parse row and create a document StringTokenizer st = new StringTokenizer(line, "\t"); Document doc = new Document(); doc.add(new Field("location", st.nextToken(), textNoNorms)); doc.add(new Field("salary", st.nextToken(), textNoNorms)); doc.add(new Field("type", st.nextToken(), textNoNorms)); doc.add(new Field("description", st.nextToken(), textNoNorms)); writer.addDocument(doc); } line = br.readLine(); } writer.close(); //open searcher // this example never closes it reader! IndexReader reader = DirectoryReader.open(rd); searcher = new IndexSearcher(reader); }
From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
private void addCommonDocumentFields(Document document, FeatureIndexEntry entry, final Long featureFileId) { document.add(new SortedStringField(FeatureIndexFields.FEATURE_ID.getFieldName(), entry.getFeatureId())); FieldType fieldType = new FieldType(); fieldType.setOmitNorms(true); fieldType.setIndexOptions(IndexOptions.DOCS); fieldType.setStored(true);/* www . j a va 2 s .co m*/ fieldType.setTokenized(false); fieldType.setDocValuesType(DocValuesType.SORTED); fieldType.freeze(); Field field = new Field(FeatureIndexFields.CHROMOSOME_ID.getFieldName(), entry.getChromosome() != null ? new BytesRef(entry.getChromosome().getId().toString()) : new BytesRef(""), fieldType); document.add(field); document.add(new SortedStringField(FeatureIndexFields.CHROMOSOME_NAME.getFieldName(), entry.getChromosome().getName(), true)); document.add(new SortedIntPoint(FeatureIndexFields.START_INDEX.getFieldName(), entry.getStartIndex())); document.add(new StoredField(FeatureIndexFields.START_INDEX.getFieldName(), entry.getStartIndex())); document.add(new SortedDocValuesField(FeatureIndexFields.START_INDEX.getGroupName(), new BytesRef(entry.getStartIndex().toString()))); document.add(new SortedIntPoint(FeatureIndexFields.END_INDEX.getFieldName(), entry.getEndIndex())); document.add(new StoredField(FeatureIndexFields.END_INDEX.getFieldName(), entry.getEndIndex())); document.add(new SortedDocValuesField(FeatureIndexFields.END_INDEX.getGroupName(), new BytesRef(entry.getStartIndex().toString()))); document.add(new StringField(FeatureIndexFields.FEATURE_TYPE.getFieldName(), entry.getFeatureType() != null ? entry.getFeatureType().getFileValue() : "", Field.Store.YES)); document.add(new StringField(FeatureIndexFields.FILE_ID.getFieldName(), featureFileId.toString(), Field.Store.YES)); document.add(new StringField(FeatureIndexFields.FEATURE_NAME.getFieldName(), entry.getFeatureName() != null ? entry.getFeatureName().toLowerCase() : "", Field.Store.YES)); document.add(new SortedDocValuesField(FeatureIndexFields.FEATURE_NAME.getFieldName(), new BytesRef(entry.getFeatureName() != null ? entry.getFeatureName() : ""))); document.add(new SortedSetDocValuesFacetField(FeatureIndexFields.CHR_ID.getFieldName(), entry.getChromosome().getId().toString())); document.add(new SortedStringField(FeatureIndexFields.UID.getFieldName(), entry.getUuid().toString())); document.add(new SortedSetDocValuesFacetField(FeatureIndexFields.F_UID.getFieldName(), entry.getUuid().toString())); }
From source file:com.FormBasedXmlQueryDemo.java
License:Apache License
private void openExampleIndex() throws IOException { //Create a RAM-based index from our test data file RAMDirectory rd = new RAMDirectory(); IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_40, analyzer); IndexWriter writer = new IndexWriter(rd, iwConfig); InputStream dataIn = getServletContext().getResourceAsStream("/WEB-INF/data.tsv"); BufferedReader br = new BufferedReader(new InputStreamReader(dataIn, IOUtils.CHARSET_UTF_8)); String line = br.readLine();// w ww . j a va 2s .c o m final FieldType textNoNorms = new FieldType(TextField.TYPE_STORED); textNoNorms.setOmitNorms(true); while (line != null) { line = line.trim(); if (line.length() > 0) { //parse row and create a document StringTokenizer st = new StringTokenizer(line, "\t"); Document doc = new Document(); doc.add(new Field("location", st.nextToken(), textNoNorms)); doc.add(new Field("salary", st.nextToken(), textNoNorms)); doc.add(new Field("type", st.nextToken(), textNoNorms)); doc.add(new Field("description", st.nextToken(), textNoNorms)); writer.addDocument(doc); } line = br.readLine(); } writer.close(); //open searcher // this example never closes it reader! IndexReader reader = DirectoryReader.open(rd); searcher = new IndexSearcher(reader); }
From source file:com.github.hotware.lucene.extension.bean.field.BeanInformationCacheImpl.java
License:BEER-WARE LICENSE
private FieldInformation buildFieldInformation(BeanField bf, Field field, Class<?> fieldClass) { com.github.hotware.lucene.extension.bean.type.Type typeWrapper; try {/*from www. jav a 2 s.c om*/ // TODO: maybe cache these? typeWrapper = (com.github.hotware.lucene.extension.bean.type.Type) bf.type().newInstance(); } catch (InstantiationException | IllegalAccessException e) { throw new RuntimeException(e); } FieldType fieldType = new FieldType(); fieldType.setIndexed(bf.index()); fieldType.setStored(bf.store()); fieldType.setTokenized(bf.tokenized()); fieldType.setStoreTermVectors(bf.storeTermVectors()); fieldType.setStoreTermVectorPositions(bf.storeTermVectorPositions()); fieldType.setStoreTermVectorOffsets(bf.storeTermVectorOffsets()); fieldType.setStoreTermVectorPayloads(bf.storeTermVectorPayloads()); fieldType.setOmitNorms(bf.omitNorms()); fieldType.setIndexOptions(bf.indexOptions()); typeWrapper.configureFieldType(fieldType); fieldType.freeze(); return new FieldInformation(new FrozenField(field), fieldClass, fieldType, bf); }
From source file:com.qwazr.search.field.CustomFieldType.java
License:Apache License
@Override final public void fillValue(final Object value, final FieldConsumer consumer) { final FieldType type = new FieldType(); if (fieldDef.stored != null) type.setStored(fieldDef.stored); if (fieldDef.tokenized != null) type.setTokenized(fieldDef.tokenized); if (fieldDef.store_termvectors != null) type.setStoreTermVectors(fieldDef.store_termvectors); if (fieldDef.store_termvector_offsets != null) type.setStoreTermVectorOffsets(fieldDef.store_termvector_offsets); if (fieldDef.store_termvector_positions != null) type.setStoreTermVectorPositions(fieldDef.store_termvector_positions); if (fieldDef.store_termvector_payloads != null) type.setStoreTermVectorPayloads(fieldDef.store_termvector_payloads); if (fieldDef.omit_norms != null) type.setOmitNorms(fieldDef.omit_norms); if (fieldDef.numeric_type != null) type.setNumericType(fieldDef.numeric_type); if (fieldDef.index_options != null) type.setIndexOptions(fieldDef.index_options); if (fieldDef.docvalues_type != null) type.setDocValuesType(fieldDef.docvalues_type); consumer.accept(new CustomField(fieldName, type, value)); }
From source file:com.rocana.lucene.codec.v1.TestRocanaPerFieldPostingsFormat2.java
License:Apache License
@Test public void testStressPerFieldCodec() throws IOException { Directory dir = newDirectory(random()); final int docsPerRound = 97; int numRounds = atLeast(1); for (int i = 0; i < numRounds; i++) { int num = TestUtil.nextInt(random(), 30, 60); IndexWriterConfig config = newIndexWriterConfig(random(), new MockAnalyzer(random())); config.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = newWriter(dir, config); for (int j = 0; j < docsPerRound; j++) { final Document doc = new Document(); for (int k = 0; k < num; k++) { FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setTokenized(random().nextBoolean()); customType.setOmitNorms(random().nextBoolean()); Field field = newField("" + k, TestUtil.randomRealisticUnicodeString(random(), 128), customType);/*from w ww . j a v a 2 s .co m*/ doc.add(field); } writer.addDocument(doc); } if (random().nextBoolean()) { writer.forceMerge(1); } writer.commit(); assertEquals((i + 1) * docsPerRound, writer.maxDoc()); writer.close(); } dir.close(); }