List of usage examples for org.apache.lucene.index IndexWriter IndexWriter
public IndexWriter(Directory d, IndexWriterConfig conf) throws IOException
conf
. From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
/** * Stores features from a specified feature file to the specified project's Lucene index * Sample query: featureId:rs44022* AND (variationType:del OR variationType:ins) * * @param featureFileId a FeatureFile, for which features to save * @param projectId a project, for which to write an index * @param entries a list of FeatureIndexEntry to write to index * @throws IOException// w w w .j av a2 s . c om */ public void writeLuceneIndexForProject(final Long featureFileId, final long projectId, final List<? extends FeatureIndexEntry> entries) throws IOException { try (StandardAnalyzer analyzer = new StandardAnalyzer(); Directory index = fileManager.createIndexForProject(projectId); IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND))) { FacetsConfig facetsConfig = new FacetsConfig(); facetsConfig.setIndexFieldName(FeatureIndexFields.CHR_ID.getFieldName(), FeatureIndexFields.FACET_CHR_ID.getFieldName()); for (FeatureIndexEntry entry : entries) { Document document = new Document(); addCommonDocumentFields(document, entry, featureFileId); if (entry instanceof VcfIndexEntry) { addVcfDocumentFields(document, entry); } writer.addDocument(facetsConfig.build(document)); } } }
From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
/** * Stores features from a specified feature file to it's Lucene index * Sample query: featureId:rs44022* AND (variationType:del OR variationType:ins) * * @param featureFile a FeatureFile, for which features to save * @param entries a list of FeatureIndexEntry to write to index * @throws IOException// w ww . j a va2s. c om */ public void writeLuceneIndexForFile(final FeatureFile featureFile, final List<? extends FeatureIndexEntry> entries) throws IOException { try (StandardAnalyzer analyzer = new StandardAnalyzer(); Directory index = fileManager.createIndexForFile(featureFile); IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND))) { FacetsConfig facetsConfig = new FacetsConfig(); facetsConfig.setIndexFieldName(FeatureIndexFields.CHR_ID.getFieldName(), FeatureIndexFields.FACET_CHR_ID.getFieldName()); facetsConfig.setIndexFieldName(FeatureIndexFields.F_UID.getFieldName(), FeatureIndexFields.FACET_UID.getFieldName()); for (FeatureIndexEntry entry : entries) { Document document = new Document(); addCommonDocumentFields(document, entry, featureFile.getId()); if (entry instanceof VcfIndexEntry) { addVcfDocumentFields(document, entry); } writer.addDocument(facetsConfig.build(document)); } } }
From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
/** * Deletes features from specified feature files from project's index * * @param projectId a project to delete index entries * @param fileIds a list of Pair of feature types to file Ids, which entries to delete. To delete gene file * entries, pass FeatureType.GENE */// w w w. ja v a 2s.c o m public void deleteFromIndexByFileId(final long projectId, List<Pair<FeatureType, Long>> fileIds) { if (fileIds == null || fileIds.isEmpty() || !fileManager.indexForProjectExists(projectId)) { return; } try (StandardAnalyzer analyzer = new StandardAnalyzer(); Directory index = fileManager.getIndexForProject(projectId); IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND))) { if (fileManager.indexForProjectExists(projectId)) { for (Pair<FeatureType, Long> id : fileIds) { deleteDocumentByTypeAndId(id.getKey(), id.getValue(), writer); } } } catch (IOException e) { LOGGER.error("Exception while deleting from index:", e); } }
From source file:com.epimorphics.server.indexers.LuceneIndex.java
License:Apache License
protected synchronized IndexWriter getIndexWriter() { if (writer == null) { try {/*ww w. java2 s.c o m*/ Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, analyzer); writer = new IndexWriter(indexDir, config); config.setOpenMode(OpenMode.CREATE_OR_APPEND); } catch (Exception e) { throw new EpiException(e); } } return writer; }
From source file:com.evoapps.indexPubData.IndexFiles.java
License:Apache License
void indextitle(String id, String title) { iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else {/*from ww w .jav a 2 s. c o m*/ // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } try { writer = new IndexWriter(dir, iwc); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } Document doc = new Document(); Field pathField = new StringField("path", id, Field.Store.YES); doc.add(pathField); // doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); //doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); //check the role of UTF-8 string in the doc.add() method doc.add(new TextField("contents", title, Field.Store.YES)); try { if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): //System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: //System.out.println("updating " + file); writer.updateDocument(new Term("path", id), doc); writer.close(); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:com.evoapps.indexPubData.IndexFiles.java
License:Apache License
public void indexDocumentList(ArrayList<com.evoapps.indexPubData.Document> list) { Iterator<com.evoapps.indexPubData.Document> it = list.iterator(); com.evoapps.indexPubData.Document document; iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else {/* ww w . ja va 2 s. c o m*/ // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } try { writer = new IndexWriter(dir, iwc); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } while (it.hasNext()) { document = it.next(); Document doc = new Document(); Field pathField = new StringField("path", document.getTitle(), Field.Store.YES); doc.add(pathField); // doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); //doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); //check the role of UTF-8 string in the doc.add() method System.out.println("path>>" + document.getTitle() + "content>>" + document.getContent()); doc.add(new TextField("contents", document.getContent(), Field.Store.YES)); System.out.println("doc" + doc.getFields()); try { if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + document.getContent()); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: //System.out.println("updating " + file); writer.updateDocument(new Term("path", document.getTitle()), doc); writer.close(); } } catch (IOException e) { } } try { writer.close(); } catch (IOException e) { } }
From source file:com.example.search.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; // String docsPath = "E:\\work\\webExp\\exp\\sina_ansi"; String docsPath = ".\\doc"; boolean create = true; /* for(int i=0;i<args.length;i++) { if ("-index".equals(args[i])) { indexPath = args[i+1];//from ww w.j a v a 2 s . c o m i++; } else if ("-docs".equals(args[i])) { docsPath = args[i+1]; i++; } else if ("-update".equals(args[i])) { create = false; } }*/ /* if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); }*/ final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath)); // Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); Analyzer analyzer = new ICTCLASAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getMinutes() - start.getMinutes() + " total minutes"); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.flycode.CRIBSearch.SearchEngine.Demo.IndexFiles.java
License:Apache License
/** * Index all text files under a directory. */// w ww .ja va2 s . c o m public static void main(String[] args) { String usage = "java com.flycode.CRIBSearch.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles class"; String indexPath = "index"; String docsPath = null; boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.FormBasedXmlQueryDemo.java
License:Apache License
private void openExampleIndex() throws IOException { //Create a RAM-based index from our test data file RAMDirectory rd = new RAMDirectory(); IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_40, analyzer); IndexWriter writer = new IndexWriter(rd, iwConfig); InputStream dataIn = getServletContext().getResourceAsStream("/WEB-INF/data.tsv"); BufferedReader br = new BufferedReader(new InputStreamReader(dataIn, IOUtils.CHARSET_UTF_8)); String line = br.readLine();/*from ww w . jav a 2 s . c o m*/ final FieldType textNoNorms = new FieldType(TextField.TYPE_STORED); textNoNorms.setOmitNorms(true); while (line != null) { line = line.trim(); if (line.length() > 0) { //parse row and create a document StringTokenizer st = new StringTokenizer(line, "\t"); Document doc = new Document(); doc.add(new Field("location", st.nextToken(), textNoNorms)); doc.add(new Field("salary", st.nextToken(), textNoNorms)); doc.add(new Field("type", st.nextToken(), textNoNorms)); doc.add(new Field("description", st.nextToken(), textNoNorms)); writer.addDocument(doc); } line = br.readLine(); } writer.close(); //open searcher // this example never closes it reader! IndexReader reader = DirectoryReader.open(rd); searcher = new IndexSearcher(reader); }
From source file:com.foundationdb.lucene.SimpleTest.java
License:Open Source License
@Test public void indexBasic() throws Exception { StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_44); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_44, analyzer); // recreate the index on each execution config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); config.setCodec(new FDBCodec()); FDBDirectory dir = createDirectoryForMethod(); IndexWriter writer = new IndexWriter(dir, config); try {/*from w w w . j a va 2 s . com*/ writer.addDocument(Arrays.asList(new TextField("title", "The title of my first document", Store.YES), new TextField("content", "The content of the first document", Store.NO))); writer.addDocument(Arrays.asList(new TextField("title", "The title of the second document", Store.YES), new TextField("content", "And this is the content", Store.NO))); } finally { writer.close(); } assertDocumentsAreThere(dir, 2); }