List of usage examples for org.apache.lucene.index IndexWriterConfig IndexWriterConfig
public IndexWriterConfig(Analyzer analyzer)
From source file:com.czw.search.lucene.example.facet.SimpleSortedSetFacetsExample.java
License:Apache License
/** * Build the example index./*from w ww . j ava2 s . c o m*/ */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); Document doc = new Document(); doc.add(new SortedSetDocValuesFacetField("Author", "Bob")); doc.add(new SortedSetDocValuesFacetField("Publish Year", "2010")); indexWriter.addDocument(config.build(doc)); doc = new Document(); doc.add(new SortedSetDocValuesFacetField("Author", "Lisa")); doc.add(new SortedSetDocValuesFacetField("Publish Year", "2010")); indexWriter.addDocument(config.build(doc)); doc = new Document(); doc.add(new SortedSetDocValuesFacetField("Author", "Lisa")); doc.add(new SortedSetDocValuesFacetField("Publish Year", "2012")); indexWriter.addDocument(config.build(doc)); doc = new Document(); doc.add(new SortedSetDocValuesFacetField("Author", "Susan")); doc.add(new SortedSetDocValuesFacetField("Publish Year", "2012")); indexWriter.addDocument(config.build(doc)); doc = new Document(); doc.add(new SortedSetDocValuesFacetField("Author", "Frank")); doc.add(new SortedSetDocValuesFacetField("Publish Year", "1999")); indexWriter.addDocument(config.build(doc)); indexWriter.close(); }
From source file:com.czw.search.lucene.example.xmlparser.FormBasedXmlQueryDemo.java
License:Apache License
private void openExampleIndex() throws IOException { //Create a RAM-based index from our test data file RAMDirectory rd = new RAMDirectory(); IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(rd, iwConfig); InputStream dataIn = getServletContext().getResourceAsStream("/WEB-INF/data.tsv"); BufferedReader br = new BufferedReader(new InputStreamReader(dataIn, StandardCharsets.UTF_8)); String line = br.readLine();//from www . j a va 2s . c o m final FieldType textNoNorms = new FieldType(TextField.TYPE_STORED); textNoNorms.setOmitNorms(true); while (line != null) { line = line.trim(); if (line.length() > 0) { //parse row and create a document StringTokenizer st = new StringTokenizer(line, "\t"); Document doc = new Document(); doc.add(new Field("location", st.nextToken(), textNoNorms)); doc.add(new Field("salary", st.nextToken(), textNoNorms)); doc.add(new Field("type", st.nextToken(), textNoNorms)); doc.add(new Field("description", st.nextToken(), textNoNorms)); writer.addDocument(doc); } line = br.readLine(); } writer.close(); //open searcher // this example never closes it reader! IndexReader reader = DirectoryReader.open(rd); searcher = new IndexSearcher(reader); }
From source file:com.difference.historybook.index.lucene.LuceneIndex.java
License:Apache License
/** * Constructor for LuceneIndex/*from w ww . j a v a 2 s. c o m*/ * * @param dataDirectory Path to the directory to create an index directory within. * @throws IndexException */ public LuceneIndex(Path dataDirectory) throws IndexException { //TODO: Check to make sure directory is read/writable path = dataDirectory.resolve(INDEXDIR); try { dir = FSDirectory.open(path); analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); writer = new IndexWriter(dir, iwc); reader = DirectoryReader.open(writer, false); searcher = new IndexSearcher(reader); parser = new QueryParser(IndexDocumentAdapter.FIELD_SEARCH, analyzer); } catch (IOException e) { LOG.error(e.getLocalizedMessage()); throw new IndexException(e); } }
From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
/** * Stores features from a specified feature file to the specified project's Lucene index * Sample query: featureId:rs44022* AND (variationType:del OR variationType:ins) * * @param featureFileId a FeatureFile, for which features to save * @param projectId a project, for which to write an index * @param entries a list of FeatureIndexEntry to write to index * @throws IOException/*from w ww. j a v a 2s . co m*/ */ public void writeLuceneIndexForProject(final Long featureFileId, final long projectId, final List<? extends FeatureIndexEntry> entries) throws IOException { try (StandardAnalyzer analyzer = new StandardAnalyzer(); Directory index = fileManager.createIndexForProject(projectId); IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND))) { FacetsConfig facetsConfig = new FacetsConfig(); facetsConfig.setIndexFieldName(FeatureIndexFields.CHR_ID.getFieldName(), FeatureIndexFields.FACET_CHR_ID.getFieldName()); for (FeatureIndexEntry entry : entries) { Document document = new Document(); addCommonDocumentFields(document, entry, featureFileId); if (entry instanceof VcfIndexEntry) { addVcfDocumentFields(document, entry); } writer.addDocument(facetsConfig.build(document)); } } }
From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
/** * Stores features from a specified feature file to it's Lucene index * Sample query: featureId:rs44022* AND (variationType:del OR variationType:ins) * * @param featureFile a FeatureFile, for which features to save * @param entries a list of FeatureIndexEntry to write to index * @throws IOException// ww w . j av a 2 s . c om */ public void writeLuceneIndexForFile(final FeatureFile featureFile, final List<? extends FeatureIndexEntry> entries) throws IOException { try (StandardAnalyzer analyzer = new StandardAnalyzer(); Directory index = fileManager.createIndexForFile(featureFile); IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND))) { FacetsConfig facetsConfig = new FacetsConfig(); facetsConfig.setIndexFieldName(FeatureIndexFields.CHR_ID.getFieldName(), FeatureIndexFields.FACET_CHR_ID.getFieldName()); facetsConfig.setIndexFieldName(FeatureIndexFields.F_UID.getFieldName(), FeatureIndexFields.FACET_UID.getFieldName()); for (FeatureIndexEntry entry : entries) { Document document = new Document(); addCommonDocumentFields(document, entry, featureFile.getId()); if (entry instanceof VcfIndexEntry) { addVcfDocumentFields(document, entry); } writer.addDocument(facetsConfig.build(document)); } } }
From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
/** * Deletes features from specified feature files from project's index * * @param projectId a project to delete index entries * @param fileIds a list of Pair of feature types to file Ids, which entries to delete. To delete gene file * entries, pass FeatureType.GENE */// w w w . j a v a 2 s. co m public void deleteFromIndexByFileId(final long projectId, List<Pair<FeatureType, Long>> fileIds) { if (fileIds == null || fileIds.isEmpty() || !fileManager.indexForProjectExists(projectId)) { return; } try (StandardAnalyzer analyzer = new StandardAnalyzer(); Directory index = fileManager.getIndexForProject(projectId); IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND))) { if (fileManager.indexForProjectExists(projectId)) { for (Pair<FeatureType, Long> id : fileIds) { deleteDocumentByTypeAndId(id.getKey(), id.getValue(), writer); } } } catch (IOException e) { LOGGER.error("Exception while deleting from index:", e); } }
From source file:com.flycode.CRIBSearch.SearchEngine.Demo.IndexFiles.java
License:Apache License
/** * Index all text files under a directory. *//* www. j a va2 s .com*/ public static void main(String[] args) { String usage = "java com.flycode.CRIBSearch.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles class"; String indexPath = "index"; String docsPath = null; boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.fun.sb.demo.lucene.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "/Users/baidu/temp/index/"; String docsPath = "/Users/baidu/temp/"; boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1];/* w ww . j a va2s. c o m*/ i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.gemstone.gemfire.cache.lucene.internal.distributed.DistributedScoringJUnitTest.java
License:Apache License
private IndexRepositoryImpl createIndexRepo() throws IOException { ConcurrentHashMap<String, File> fileRegion = new ConcurrentHashMap<String, File>(); ConcurrentHashMap<ChunkKey, byte[]> chunkRegion = new ConcurrentHashMap<ChunkKey, byte[]>(); RegionDirectory dir = new RegionDirectory(fileRegion, chunkRegion); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(dir, config); return new IndexRepositoryImpl(region, writer, mapper); }
From source file:com.gemstone.gemfire.cache.lucene.internal.IndexRepositoryFactory.java
License:Apache License
public IndexRepository createIndexRepository(final Integer bucketId, PartitionedRegion userRegion, PartitionedRegion fileRegion, PartitionedRegion chunkRegion, LuceneSerializer serializer, Analyzer analyzer, LuceneIndexStats indexStats, FileSystemStats fileSystemStats) throws IOException { final IndexRepository repo; BucketRegion fileBucket = getMatchingBucket(fileRegion, bucketId); BucketRegion chunkBucket = getMatchingBucket(chunkRegion, bucketId); if (fileBucket == null || chunkBucket == null) { return null; }//from w ww. jav a 2s . c o m RegionDirectory dir = new RegionDirectory(fileBucket, chunkBucket, fileSystemStats); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(dir, config); repo = new IndexRepositoryImpl(fileBucket, writer, serializer, indexStats); return repo; }