List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode
public IndexWriterConfig setOpenMode(OpenMode openMode)
From source file:edu.ur.ir.index.DefaultPowerPointTextExtractorTest.java
License:Apache License
/** * Test basic search within a plain text document * /*w w w. ja v a2s . c om*/ * @param description * @throws Exception */ public void testIndexPowerPointDocument() throws Exception { RepositoryBasedTestHelper repoHelper = new RepositoryBasedTestHelper(); Repository repo = repoHelper.createRepository("localFileServer", "displayName", "file_database", "my_repository", properties.getProperty("a_repo_path"), "default_folder"); // create the first file to store in the temporary folder String tempDirectory = properties.getProperty("ir_core_temp_directory"); File directory = new File(tempDirectory); // helper to create the file FileUtil testUtil = new FileUtil(); testUtil.createDirectory(directory); String baseLocation = properties.getProperty("ir_core_location"); String powerPointFile = properties.getProperty("power_point_file"); File f1 = new File(baseLocation + powerPointFile); assert f1 != null : "File should not be null"; assert f1.canRead() : "Should be able to read the file " + f1.getAbsolutePath(); FileInfo info = repo.getFileDatabase().addFile(f1, "indexed_power_point_file"); info.setExtension("ppt"); FileTextExtractor documentCreator = new DefaultPowerPointTextExtractor(); assert documentCreator.canExtractText(info.getExtension()) : "Cannot create document for extension " + info.getExtension(); String text = documentCreator.getText(new File(info.getFullPath())); Document doc = new Document(); doc.add(new Field("body", text, Field.Store.NO, Field.Index.ANALYZED)); assert doc != null : "Document should be created"; // create the lucene directory in memory Directory dir; try { dir = new RAMDirectory(); } catch (Exception e1) { throw new RuntimeException(e1); } // store the document IndexWriter writer = null; try { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_35, new StandardWithACIIFoldingFilter()); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); writer = new IndexWriter(dir, indexWriterConfig); writer.addDocument(doc); writer.close(); } catch (Exception e) { throw new RuntimeException(e); } finally { if (writer != null) { try { writer.close(); } catch (Exception e) { // do nothing } } } // search the document try { int hits = executeQuery("body", "irFile", dir); assert hits == 1 : "Hit count should equal 1 but equals " + hits; hits = executeQuery("body", "hello", dir); assert hits == 1 : "Hit count should equal 1 but equals " + hits; } catch (Exception e) { throw new RuntimeException(e); } repoHelper.cleanUpRepository(); }
From source file:edu.ur.ir.index.DefaultPowerPointXmlTextExtractorTest.java
License:Apache License
/** * Test basic search within a plain text document * // w w w . j a va 2s. c o m * @param description * @throws Exception */ public void testIndexPowerPointDocument() throws Exception { RepositoryBasedTestHelper repoHelper = new RepositoryBasedTestHelper(); Repository repo = repoHelper.createRepository("localFileServer", "displayName", "file_database", "my_repository", properties.getProperty("a_repo_path"), "default_folder"); // create the first file to store in the temporary folder String tempDirectory = properties.getProperty("ir_core_temp_directory"); File directory = new File(tempDirectory); // helper to create the file FileUtil testUtil = new FileUtil(); testUtil.createDirectory(directory); String baseLocation = properties.getProperty("ir_core_location"); String powerPointXmlFile = properties.getProperty("power_point_xml_file"); File f1 = new File(baseLocation + powerPointXmlFile); assert f1 != null : "File should not be null"; assert f1.canRead() : "Should be able to read the file " + f1.getAbsolutePath(); FileInfo info = repo.getFileDatabase().addFile(f1, "indexed_power_point_file"); info.setExtension("pptx"); FileTextExtractor documentCreator = new DefaultPowerPointXmlTextExtractor(); assert documentCreator.canExtractText(info.getExtension()) : "Cannot create document for extension " + info.getExtension(); String text = documentCreator.getText(new File(info.getFullPath())); Document doc = new Document(); doc.add(new Field("body", text, Field.Store.NO, Field.Index.ANALYZED)); assert doc != null : "Document should be created"; // create the lucene directory in memory Directory dir; try { dir = new RAMDirectory(); } catch (Exception e1) { throw new RuntimeException(e1); } // store the document IndexWriter writer = null; try { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_35, new StandardWithACIIFoldingFilter()); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); writer = new IndexWriter(dir, indexWriterConfig); writer.addDocument(doc); writer.close(); } catch (Exception e) { throw new RuntimeException(e); } finally { if (writer != null) { try { writer.close(); } catch (Exception e) { // do nothing } } } // search the document try { int hits = executeQuery("body", "irFile", dir); assert hits == 1 : "Hit count should equal 1 but equals " + hits; hits = executeQuery("body", "hello", dir); assert hits == 1 : "Hit count should equal 1 but equals " + hits; } catch (Exception e) { throw new RuntimeException(e); } repoHelper.cleanUpRepository(); }
From source file:edu.ur.ir.index.DefaultRtfTextExtractorTest.java
License:Apache License
/** * Test basic search within a plain text document * /* ww w . j av a 2s .c om*/ * @param description * @throws Exception */ public void testIndexRtfDocument() throws Exception { RepositoryBasedTestHelper repoHelper = new RepositoryBasedTestHelper(); Repository repo = repoHelper.createRepository("localFileServer", "displayName", "file_database", "my_repository", properties.getProperty("a_repo_path"), "default_folder"); // create the first file to store in the temporary folder String tempDirectory = properties.getProperty("ir_core_temp_directory"); File directory = new File(tempDirectory); // helper to create the file FileUtil testUtil = new FileUtil(); testUtil.createDirectory(directory); String baseLocation = properties.getProperty("ir_core_location"); String rtfFile = properties.getProperty("rtf_file"); File f1 = new File(baseLocation + rtfFile); assert f1 != null : "File should not be null"; assert f1.canRead() : "Should be able to read the file " + f1.getAbsolutePath(); FileInfo info = repo.getFileDatabase().addFile(f1, "indexed_doc_file"); info.setExtension("rtf"); FileTextExtractor documentCreator = new DefaultRtfTextExtractor(); assert documentCreator.canExtractText(info.getExtension()) : "Cannot create document for extension " + info.getExtension(); String text = documentCreator.getText(new File(info.getFullPath())); Document doc = new Document(); doc.add(new Field("body", text, Field.Store.NO, Field.Index.ANALYZED)); assert doc != null : "Document should be created"; // create the lucene directory in memory Directory dir; try { dir = new RAMDirectory(); } catch (Exception e1) { throw new RuntimeException(e1); } // store the document IndexWriter writer = null; try { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_35, new StandardWithACIIFoldingFilter()); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); writer = new IndexWriter(dir, indexWriterConfig); writer.addDocument(doc); writer.close(); } catch (Exception e) { throw new RuntimeException(e); } finally { if (writer != null) { try { writer.close(); } catch (Exception e) { // do nothing } } } // search the document try { int hits = executeQuery("body", "irFile", dir); assert hits == 1 : "Hit count should equal 1 but equals " + hits; hits = executeQuery("body", "hello", dir); assert hits == 1 : "Hit count should equal 1 but equals " + hits; } catch (Exception e) { throw new RuntimeException(e); } repoHelper.cleanUpRepository(); }
From source file:edu.ur.ir.index.DefaultWordTextExtractorTest.java
License:Apache License
/** * Test basic search within a plain text document * //from w ww .j ava 2 s . c o m * @param description * @throws Exception */ public void testIndexWordDocument() throws Exception { RepositoryBasedTestHelper repoHelper = new RepositoryBasedTestHelper(); Repository repo = repoHelper.createRepository("localFileServer", "displayName", "file_database", "my_repository", properties.getProperty("a_repo_path"), "default_folder"); // create the first file to store in the temporary folder String tempDirectory = properties.getProperty("ir_core_temp_directory"); File directory = new File(tempDirectory); // helper to create the file FileUtil testUtil = new FileUtil(); testUtil.createDirectory(directory); String baseLocation = properties.getProperty("ir_core_location"); String wordFile = properties.getProperty("word_file"); File f1 = new File(baseLocation + wordFile); assert f1 != null : "File should not be null"; assert f1.canRead() : "Should be able to read the file " + f1.getAbsolutePath(); FileInfo info = repo.getFileDatabase().addFile(f1, "indexed_doc_file"); info.setExtension("doc"); FileTextExtractor documentCreator = new DefaultWordTextExtractor(); assert documentCreator.canExtractText(info.getExtension()) : "Cannot create document for extension " + info.getExtension(); String text = documentCreator.getText(new File(info.getFullPath())); Document doc = new Document(); doc.add(new Field("body", text, Field.Store.NO, Field.Index.ANALYZED)); assert doc != null : "Document should be created"; // create the lucene directory in memory Directory dir; try { dir = new RAMDirectory(); } catch (Exception e1) { throw new RuntimeException(e1); } // store the document IndexWriter writer = null; try { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_35, new StandardWithACIIFoldingFilter()); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); writer = new IndexWriter(dir, indexWriterConfig); writer.addDocument(doc); writer.close(); } catch (Exception e) { throw new RuntimeException(e); } finally { if (writer != null) { try { writer.close(); } catch (Exception e) { // do nothing } } } // search the document try { int hits = executeQuery("body", "irFile", dir); assert hits == 1 : "Hit count should equal 1 but equals " + hits; hits = executeQuery("body", "hello", dir); assert hits == 1 : "Hit count should equal 1 but equals " + hits; } catch (Exception e) { throw new RuntimeException(e); } repoHelper.cleanUpRepository(); }
From source file:edu.ur.ir.index.DefaultWordXmlTextExtractorTest.java
License:Apache License
/** * Test basic search within a plain text document * //from www . ja v a 2 s . c o m * @param description * @throws Exception */ public void testIndexWordXmlDocument() throws Exception { RepositoryBasedTestHelper repoHelper = new RepositoryBasedTestHelper(); Repository repo = repoHelper.createRepository("localFileServer", "displayName", "file_database", "my_repository", properties.getProperty("a_repo_path"), "default_folder"); // create the first file to store in the temporary folder String tempDirectory = properties.getProperty("ir_core_temp_directory"); File directory = new File(tempDirectory); // helper to create the file FileUtil testUtil = new FileUtil(); testUtil.createDirectory(directory); String baseLocation = properties.getProperty("ir_core_location"); String wordXmlFile = properties.getProperty("word_xml_file"); File f1 = new File(baseLocation + wordXmlFile); assert f1 != null : "File should not be null"; assert f1.canRead() : "Should be able to read the file " + f1.getAbsolutePath(); FileInfo info = repo.getFileDatabase().addFile(f1, "indexed_docx_file"); info.setExtension("docx"); FileTextExtractor documentCreator = new DefaultWordXmlTextExtractor(); assert documentCreator.canExtractText(info.getExtension()) : "Cannot create document for extension " + info.getExtension(); String text = documentCreator.getText(new File(info.getFullPath())); Document doc = new Document(); doc.add(new Field("body", text, Field.Store.NO, Field.Index.ANALYZED)); assert doc != null : "Document should be created"; // create the lucene directory in memory Directory dir; try { dir = new RAMDirectory(); } catch (Exception e1) { throw new RuntimeException(e1); } // store the document IndexWriter writer = null; try { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_35, new StandardWithACIIFoldingFilter()); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); writer = new IndexWriter(dir, indexWriterConfig); writer.addDocument(doc); writer.close(); } catch (Exception e) { throw new RuntimeException(e); } finally { if (writer != null) { try { writer.close(); } catch (Exception e) { // do nothing } } } // search the document try { int hits = executeQuery("body", "irFile", dir); assert hits == 1 : "Hit count should equal 1 but equals " + hits; hits = executeQuery("body", "hello", dir); assert hits == 1 : "Hit count should equal 1 but equals " + hits; } catch (Exception e) { throw new RuntimeException(e); } repoHelper.cleanUpRepository(); }
From source file:edu.ur.ir.user.service.DefaultUserGroupIndexService.java
License:Apache License
/** * Re-index the specified user groups. This can be used to re-index * all user groups//from w w w . j av a 2 s . co m * * @param userGroups - user groups to re index * @param userGroupIndexFolder - folder location of the index * @param overwriteExistingIndex - if set to true, will overwrite the exiting index. */ public void add(List<IrUserGroup> userGroups, File userGroupIndexFolder, boolean overwriteExistingIndex) { LinkedList<Document> docs = new LinkedList<Document>(); for (IrUserGroup g : userGroups) { log.debug("Adding user group " + g); docs.add(getDocument(g)); } IndexWriter writer = null; Directory directory = null; try { directory = FSDirectory.open(userGroupIndexFolder); if (overwriteExistingIndex) { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_35, analyzer); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); writer = new IndexWriter(directory, indexWriterConfig); } else { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_35, analyzer); writer = new IndexWriter(directory, indexWriterConfig); } for (Document d : docs) { writer.addDocument(d); } writer.commit(); } catch (Exception e) { log.error(e); errorEmailService.sendError(e); } finally { if (writer != null) { try { writer.close(); } catch (Exception e) { log.error(e); try { if (IndexWriter.isLocked(directory)) { IndexWriter.unlock(directory); } } catch (IOException e1) { log.error(e1); } } } writer = null; if (directory != null) { try { directory.close(); } catch (Exception e) { log.error(e); } } directory = null; docs = null; } }
From source file:edu.utsa.sifter.Indexer.java
License:Apache License
static IndexWriter getIndexWriter(final String path, final String stopwords, final SifterConfig conf) throws IOException { Directory dir = FSDirectory.open(new File(path)); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_44, getStopList(stopwords)); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_44, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iwc.setRAMBufferSizeMB(conf.INDEXING_BUFFER_SIZE); iwc.setMaxThreadStates(conf.THREAD_POOL_SIZE); IndexWriter writer = new IndexWriter(dir, iwc); return writer; }
From source file:edu.utsa.sifter.som.MainSOM.java
License:Apache License
IndexWriter createWriter(final File somIdx, final SifterConfig conf) throws CorruptIndexException, IOException { Directory dir = FSDirectory.open(somIdx); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iwc.setRAMBufferSizeMB(conf.INDEXING_BUFFER_SIZE); IndexWriter writer = new IndexWriter(dir, iwc); return writer; }
From source file:edu.virginia.cs.index.AnswerIndexer.java
/** * Creates the initial index files on disk * * @param indexPath/*from w ww. ja va 2 s . com*/ * @return * @throws IOException */ private static IndexWriter setupIndex(String indexPath) throws IOException { Analyzer analyzer = new SpecialAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); config.setRAMBufferSizeMB(2048.0); FSDirectory dir = FSDirectory.open(new File(indexPath)); IndexWriter writer = new IndexWriter(dir, config); return writer; }
From source file:edu.virginia.cs.index.PostLinkIndexer.java
/** * Creates the initial index files on disk * * @param indexPath// w w w . ja va2s. c o m * @return * @throws IOException */ private static IndexWriter setupIndex(String indexPath) throws IOException { Analyzer analyzer = new SpecialAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); config.setRAMBufferSizeMB(2048.0); FSDirectory dir; IndexWriter writer = null; dir = FSDirectory.open(new File(indexPath)); writer = new IndexWriter(dir, config); return writer; }