List of usage examples for org.apache.lucene.analysis.miscellaneous PerFieldAnalyzerWrapper PerFieldAnalyzerWrapper
public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer, Map<String, Analyzer> fieldAnalyzers)
From source file:at.ac.univie.mminf.luceneSKOS.test.termexpansion.LabelbasedTermExpansionTest.java
License:Apache License
/** * This test indexes a sample metadata record (=lucene document) having a * "title", "description", and "subject" field. * <p/>//from w w w . j av a 2s. c om * A search for "arms" returns that record as a result because "arms" is * defined as an alternative label for "weapons", the term which is * contained in the subject field. * * @throws IOException */ @Test public void labelBasedTermExpansion() throws IOException { /* defining the document to be indexed */ Document doc = new Document(); doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED)); doc.add(new Field("description", "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..." + "The spear was mainly a thrusting weapon, but could also be thrown. " + "It was the principal weapon of the auxiliary soldier... " + "(second - fourth century, Arbeia Roman Fort).", TextField.TYPE_NOT_STORED)); doc.add(new Field("subject", "weapons", TextField.TYPE_NOT_STORED)); /* setting up the SKOS analyzer */ String skosFile = "src/test/resources/skos_samples/ukat_examples.n3"; String indexPath = "build/"; /* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */ Analyzer skosAnalyzer = new SKOSAnalyzer(indexPath, skosFile, ExpansionType.LABEL); /* Define different analyzers for different fields */ Map<String, Analyzer> analyzerPerField = new HashMap<>(); analyzerPerField.put("subject", skosAnalyzer); PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(), analyzerPerField); /* setting up a writer with a default (simple) analyzer */ writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(indexAnalyzer)); /* adding the document to the index */ writer.addDocument(doc); /* defining a query that searches over all fields */ BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD); /* creating a new searcher */ searcher = new IndexSearcher(DirectoryReader.open(writer, false)); TopDocs results = searcher.search(builder.build(), 10); /* the document matches because "arms" is among the expanded terms */ assertEquals(1, results.totalHits); /* defining a query that searches for a broader concept */ Query query = new TermQuery(new Term("subject", "military equipment")); results = searcher.search(query, 10); /* ... also returns the document as result */ assertEquals(1, results.totalHits); }
From source file:at.ac.univie.mminf.luceneSKOS.test.termexpansion.URIbasedTermExpansionTest.java
License:Apache License
/** * This test indexes a sample metadata record (=lucene document) having a * "title", "description", and "subject" field, which is semantically * enriched by a URI pointing to a SKOS concept "weapons". * <p/>/* w w w.j ava2 s . c om*/ * A search for "arms" returns that record as a result because "arms" is * defined as an alternative label (altLabel) for the concept "weapons". * * @throws IOException */ @Test public void uriBasedTermExpansion() throws IOException { /* defining the document to be indexed */ Document doc = new Document(); doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED)); doc.add(new Field("description", "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..." + "The spear was mainly a thrusting weapon, but could also be thrown. " + "It was the principal weapon of the auxiliary soldier... " + "(second - fourth century, Arbeia Roman Fort).", TextField.TYPE_NOT_STORED)); doc.add(new Field("subject", "http://www.ukat.org.uk/thesaurus/concept/859", TextField.TYPE_NOT_STORED)); /* setting up the SKOS analyzer */ String skosFile = "src/test/resources/skos_samples/ukat_examples.n3"; String indexPath = "build/"; /* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */ Analyzer skosAnalyzer = new SKOSAnalyzer(indexPath, skosFile, ExpansionType.URI); /* Define different analyzers for different fields */ Map<String, Analyzer> analyzerPerField = new HashMap<>(); analyzerPerField.put("subject", skosAnalyzer); PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(), analyzerPerField); /* setting up a writer with a default (simple) analyzer */ writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(indexAnalyzer)); /* adding the document to the index */ writer.addDocument(doc); /* defining a query that searches over all fields */ BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD); /* creating a new searcher */ searcher = new IndexSearcher(DirectoryReader.open(writer, false)); TopDocs results = searcher.search(builder.build(), 10); /* the document matches because "arms" is among the expanded terms */ assertEquals(1, results.totalHits); /* defining a query that searches for a broader concept */ Query query = new TermQuery(new Term("subject", "military equipment")); results = searcher.search(query, 10); /* ... also returns the document as result */ assertEquals(1, results.totalHits); }
From source file:com.fuerve.villageelder.common.Lucene.java
License:Apache License
/** * Gets the per-field Lucene analyzer common to indexing and search. * @return The constructed instance of the Lucene per-field analyzer. *///www . jav a2 s . c om public static final Analyzer getPerFieldAnalyzer() { if (analyzer == null) { Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>(); analyzerPerField.put("Author", new SimpleAnalyzer(LUCENE_VERSION)); analyzerPerField.put("Revision", new WhitespaceAnalyzer(LUCENE_VERSION)); analyzerPerField.put("RevisionNumber", new WhitespaceAnalyzer(LUCENE_VERSION)); analyzerPerField.put("Date", new WhitespaceAnalyzer(LUCENE_VERSION)); analyzerPerField.put("Message", new StandardAnalyzer(LUCENE_VERSION)); analyzerPerField.put("ChangedPath", new KeywordAnalyzer()); analyzerPerField.put("Path", new KeywordAnalyzer()); analyzerPerField.put("Change", new KeywordAnalyzer()); analyzerPerField.put("CopyPath", new KeywordAnalyzer()); analyzerPerField.put("CopyRevisionNumber", new WhitespaceAnalyzer(LUCENE_VERSION)); analyzerPerField.put("CopyRevision", new WhitespaceAnalyzer(LUCENE_VERSION)); analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(LUCENE_VERSION), analyzerPerField); } return analyzer; }
From source file:com.gemstone.gemfire.cache.lucene.internal.LuceneIndexCreationProfileJUnitTest.java
License:Apache License
private Analyzer getPerFieldAnalyzerWrapper(Map<String, Analyzer> fieldAnalyzers) { return new PerFieldAnalyzerWrapper(new StandardAnalyzer(), fieldAnalyzers); }
From source file:com.gemstone.gemfire.cache.lucene.internal.LuceneServiceImpl.java
License:Apache License
@Override public void createIndex(String indexName, String regionPath, Map<String, Analyzer> analyzerPerField) { Analyzer analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(), analyzerPerField); String[] fields = (String[]) analyzerPerField.keySet() .toArray(new String[analyzerPerField.keySet().size()]); createIndex(indexName, regionPath, analyzer, fields); }
From source file:com.gemstone.gemfire.cache.lucene.internal.LuceneServiceImplJUnitTest.java
License:Apache License
@Test public void testCreateIndexForPRWithAnalyzer() throws IOException, ParseException { getService();// ww w.j a va 2 s .co m StandardAnalyzer sa = new StandardAnalyzer(); KeywordAnalyzer ka = new KeywordAnalyzer(); Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>(); analyzerPerField.put("field1", ka); analyzerPerField.put("field2", sa); analyzerPerField.put("field3", sa); // field2 and field3 will use StandardAnalyzer PerFieldAnalyzerWrapper analyzer2 = new PerFieldAnalyzerWrapper(sa, analyzerPerField); service.createIndex("index1", "PR1", analyzerPerField); createPR("PR1", false); LuceneIndexImpl index1 = (LuceneIndexImpl) service.getIndex("index1", "PR1"); assertTrue(index1 instanceof LuceneIndexForPartitionedRegion); LuceneIndexForPartitionedRegion index1PR = (LuceneIndexForPartitionedRegion) index1; assertEquals("index1", index1.getName()); assertEquals("/PR1", index1.getRegionPath()); String[] fields1 = index1.getFieldNames(); assertEquals(3, fields1.length); Analyzer analyzer = index1PR.getAnalyzer(); assertTrue(analyzer instanceof PerFieldAnalyzerWrapper); RepositoryManager RepositoryManager = index1PR.getRepositoryManager(); assertTrue(RepositoryManager != null); final String fileRegionName = LuceneServiceImpl.getUniqueIndexName("index1", "/PR1") + ".files"; final String chunkRegionName = LuceneServiceImpl.getUniqueIndexName("index1", "/PR1") + ".chunks"; PartitionedRegion filePR = (PartitionedRegion) cache.getRegion(fileRegionName); PartitionedRegion chunkPR = (PartitionedRegion) cache.getRegion(chunkRegionName); assertTrue(filePR != null); assertTrue(chunkPR != null); }
From source file:com.github.hotware.lucene.extension.bean.converter.BeanConverterImpl.java
License:BEER-WARE LICENSE
@Override public PerFieldAnalyzerWrapper getPerFieldAnalyzerWrapper(Class<?> clazz) { this.lock.lock(); try {/*from w w w. j ava2 s . c o m*/ PerFieldAnalyzerWrapper ret; if (!this.perFieldAnalyzerWrapperCache.containsKey(clazz)) { Analyzer defaultAnalyzer = Constants.DEFAULT_ANALYZER; Map<String, Analyzer> fieldAnalyzers = new HashMap<String, Analyzer>(); for (FieldInformation info : this.cache.getFieldInformations(clazz)) { BeanField bf = info.getBeanField(); String fieldName = bf.name(); if (fieldName.equals(Constants.DEFAULT_NAME)) { fieldName = info.getField().getName(); } Analyzer analyzer; try { analyzer = ((AnalyzerProvider) bf.analyzerProvider().newInstance()).getAnalyzer(info); } catch (InstantiationException | IllegalAccessException e) { throw new RuntimeException(e); } if (!analyzer.equals(defaultAnalyzer)) { fieldAnalyzers.put(fieldName, analyzer); } } ret = new PerFieldAnalyzerWrapper(defaultAnalyzer, fieldAnalyzers); this.perFieldAnalyzerWrapperCache.put(clazz, ret); } else { ret = this.perFieldAnalyzerWrapperCache.get(clazz); } return ret; } finally { this.lock.unlock(); } }
From source file:com.github.mosuka.apache.lucene.example.utils.LuceneExampleUtil.java
License:Apache License
public static PerFieldAnalyzerWrapper createAnalyzerWrapper() { PerFieldAnalyzerWrapper analyzerWrapper = new PerFieldAnalyzerWrapper(getAnalyzerMap().get("text"), getAnalyzerMap());/*w ww . j a va2 s . co m*/ return analyzerWrapper; }
From source file:com.mathworks.xzheng.analysis.keyword.KeywordAnalyzerTest.java
License:Apache License
public void testPerFieldAnalyzer() throws Exception { Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>(); analyzerPerField.put("partnum", new KeywordAnalyzer()); PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(Version.LUCENE_46), analyzerPerField);/*from w ww.ja v a2 s .co m*/ Query query = new QueryParser(Version.LUCENE_46, "description", analyzer).parse("partnum:Q36 AND SPACE"); assertEquals("Q36 kept as-is", "+partnum:Q36 +space", query.toString("description")); assertEquals("doc found!", 1, TestUtil.hitCount(searcher, query)); }
From source file:com.shaie.annots.example.AnnotatorTokenFilterExample.java
License:Apache License
@SuppressWarnings("resource") private static Analyzer createAnalyzer() { final Analyzer colorAnnotatorAnalyzer = new ColorAnnotatorAnalyzer(); final Analyzer animalAnnotatorAnalyzer = new AnimalAnnotatorAnalyzer(); final Analyzer defaultAnalyzer = new WhitespaceAnalyzer(); return new PerFieldAnalyzerWrapper(defaultAnalyzer, ImmutableMap.<String, Analyzer>of(COLOR_FIELD, colorAnnotatorAnalyzer, ANIMAL_FIELD, animalAnnotatorAnalyzer)); }