List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode
public IndexWriterConfig setOpenMode(OpenMode openMode)
From source file:crawler.WebCrawler.java
License:Open Source License
/** * Constructor.//from w w w . j a v a2 s. c o m */ public WebCrawler() { threadNumber = configurator.propertyInteger("threadNumber"); // System.out.println("threadNumber " + threadNumber); verbose = configurator.propertyBoolean("verbose"); // System.out.println("verbose " + verbose); logFilePath = configurator.property("logFilePath"); redirect = (logFilePath.trim().isEmpty() ? false : true); // System.out.println("logFilePath " + logFilePath); storagePath = configurator.property("storagePath"); // System.out.println("storagePath " + storagePath); indexPath = configurator.property("indexPath"); // System.out.println("indexPath " + indexPath); maximumFileNumber = configurator.propertyInteger("maximumFileNumber"); // System.out.println("maximumFileNumber " + maximumFileNumber); timeout = configurator.propertyInteger("timeout"); // System.out.println("timeout " + timeout); depth = configurator.propertyInteger("depth"); // System.out.println("depth " + depth); followImgLinks = configurator.propertyBoolean("followImgLinks"); // System.out.println("followImgLinks " + followImgLinks); sitemapAssisted = configurator.propertyBoolean("sitemapAssisted"); // System.out.println("sitemapAssisted " + sitemapAssisted); agent = configurator.property("agent"); // System.out.println("agent " + agent); executor = Executors.newFixedThreadPool(this.threadNumber); futures = new LinkedList<Future<?>>(); visited = new ConcurrentHashMap<String, URL>(); try { Directory directory = new NIOFSDirectory(new File(this.indexPath)); Version lv = Version.LUCENE_41; Analyzer a = new EnglishAnalyzer(lv); IndexWriterConfig iwc = new IndexWriterConfig(lv, a); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iwc.setWriteLockTimeout(20000); luceneIndexWriter = new IndexWriter(directory, iwc); } catch (IOException ioe) { ioe.printStackTrace(); } }
From source file:cs412.project.search.IndexFiles.java
License:Apache License
public IndexFiles(String docsPath, String indexPath) { boolean create = true; if (docsPath == null) { System.exit(1);//from www.jav a2 s.c om } final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath).toPath()); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:cs412.project.search.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; // String docsPath = "H:\\data set 4"; //CHANGE BELOW TO YOUR PATH String docsPath = "Split Files/"; boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1];/*from w ww.j a v a 2 s .c o m*/ i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath).toPath()); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:cs571.proj1.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;/*from w w w. j a v a 2 s . c o m*/ boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } else if ("-tfidf".equals(args[i])) { tfidf = true; } else if ("-bm25".equals(args[i])) { bm25 = true; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (tfidf) iwc.setSimilarity(new TFIDF()); if (bm25) iwc.setSimilarity(new BM25()); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); System.out.println("Total # of Docs Indexed: " + numOfDocuments); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:dbn.db.FullTextTrigger.java
/** * Get the Lucene index access//from w w w . j ava2 s. co m * * @param conn SQL connection * @throws SQLException Unable to access the Lucene index */ private static void getIndexAccess(Connection conn) throws SQLException { if (!isActive) { throw new SQLException("NRS is no longer active"); } boolean obtainedUpdateLock = false; if (!indexLock.writeLock().hasLock()) { indexLock.updateLock().lock(); obtainedUpdateLock = true; } try { if (indexPath == null || indexWriter == null) { indexLock.writeLock().lock(); try { if (indexPath == null) { getIndexPath(conn); } if (directory == null) { directory = FSDirectory.open(indexPath); } if (indexWriter == null) { IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); indexWriter = new IndexWriter(directory, config); Document document = new Document(); document.add(new StringField("_QUERY", "_CONTROL_DOCUMENT_", Field.Store.YES)); indexWriter.updateDocument(new Term("_QUERY", "_CONTROL_DOCUMENT_"), document); indexWriter.commit(); indexReader = DirectoryReader.open(directory); indexSearcher = new IndexSearcher(indexReader); } } finally { indexLock.writeLock().unlock(); } } } catch (IOException | SQLException exc) { Logger.logErrorMessage("Unable to access the Lucene index", exc); throw new SQLException("Unable to access the Lucene index", exc); } finally { if (obtainedUpdateLock) { indexLock.updateLock().unlock(); } } }
From source file:de.blizzy.documentr.search.AllDocIdsCollectorTest.java
License:Open Source License
@Before public void setUp() throws IOException { directory = new RAMDirectory(); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_40, analyzer); writerConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(directory, writerConfig); writer.addDocument(createDocument()); writer.addDocument(createDocument()); writer.addDocument(createDocument()); writer.commit();/*from w ww . j av a 2 s.c o m*/ writer.close(true); reader = DirectoryReader.open(directory); }
From source file:de.blizzy.documentr.search.GetSearchHitTaskTest.java
License:Open Source License
@Before public void setUp() throws IOException { directory = new RAMDirectory(); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_40, analyzer); writerConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(directory, writerConfig); writer.addDocument(createDocument("project", "branch", "home", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ new String[] { "tag1", "tag2" }, //$NON-NLS-1$ //$NON-NLS-2$ "title", "some text")); //$NON-NLS-1$ //$NON-NLS-2$ writer.commit();//from www . java2 s . c o m writer.close(true); reader = DirectoryReader.open(directory); Query query = new TermQuery(new Term("text", "some")); //$NON-NLS-1$ //$NON-NLS-2$ task = new GetSearchHitTask(query, reader, 0, analyzer); }
From source file:de.blizzy.documentr.search.InaccessibleDocIdsCollectorTest.java
License:Open Source License
@Before public void setUp() throws IOException { directory = new RAMDirectory(); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_40, analyzer); writerConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(directory, writerConfig); writer.addDocument(createDocument("project", "branch1", "home")); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ writer.addDocument(createDocument("project", "branch2", "home")); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ writer.commit();/*from w w w . j a v a2 s . co m*/ writer.close(true); reader = DirectoryReader.open(directory); collector = new InaccessibleDocIdsCollector(Permission.VIEW, authentication, permissionEvaluator); }
From source file:de.blizzy.documentr.search.PageIndex.java
License:Open Source License
@PostConstruct public void init() throws IOException { File indexDir = new File(settings.getDocumentrDataDir(), "index"); //$NON-NLS-1$ File pageIndexDir = new File(indexDir, "page"); //$NON-NLS-1$ FileUtils.forceMkdir(pageIndexDir);//w w w . j av a 2s. co m directory = FSDirectory.open(pageIndexDir); Analyzer defaultAnalyzer = new EnglishAnalyzer(Version.LUCENE_40); Map<String, Analyzer> fieldAnalyzers = Maps.newHashMap(); fieldAnalyzers.put(ALL_TEXT_SUGGESTIONS, new StandardAnalyzer(Version.LUCENE_40)); analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer, fieldAnalyzers); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, analyzer); config.setOpenMode(OpenMode.CREATE_OR_APPEND); writer = new IndexWriter(directory, config); writer.commit(); readerManager = new ReaderManager(directory); searcherManager = new SearcherManager(directory, null); log.info("checking if index is empty"); //$NON-NLS-1$ if (getNumDocuments() == 0) { reindexEverything(); } }
From source file:de.blizzy.documentr.search.PagePermissionFilterTest.java
License:Open Source License
@Before public void setUp() throws IOException { directory = new RAMDirectory(); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_40, analyzer); writerConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(directory, writerConfig); writer.addDocument(createDocument("project", "branch1", "home")); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ writer.addDocument(createDocument("project", "branch2", "home")); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ writer.commit();// ww w . j av a 2s. co m writer.close(true); reader = DirectoryReader.open(directory); BitSet docs = new BitSet(); docs.set(1); Bits docIds = new DocIdBitSet(docs); filter = new PagePermissionFilter(docIds); }