List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode
public IndexWriterConfig setOpenMode(OpenMode openMode)
From source file:kbp2013.index.IndexWikipediaCorpus_v2.java
License:Open Source License
/** * //from w ww. j a v a 2s. c o m * * @param args * @throws IOException * @throws Exception */ public static void main(String[] args) throws IOException, Exception { initializeFromDefault(); System.out.println("Indexing Wikipedia Dump to directory '" + wikiluceneIndex + "'..."); INDEX_DIR = new File(wikiluceneIndex); if (INDEX_DIR.exists() && create == 1) { System.out.println("Cannot save index to '" + INDEX_DIR + "' directory, please delete it first"); System.exit(1); } if (wikidump.endsWith(".bzip2") == false) { System.out.println("NOTICE: The Wikipedia dump must be in bzip2 format."); System.exit(0); } Directory dir = FSDirectory.open(new File(wikiluceneIndex)); // Open lucene stuff Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_44, analyzer); // configure Lucene Stuff iwc.setMaxThreadStates(100); // manage append mode if (create == 0) { // add new document to an existing index iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); // if appending, check index if (checkindex == 1) { System.out.println("Checking index ..."); CheckIndex ci = new CheckIndex(dir); ci.checkIndex(); System.out.println("End of Checking index"); } } else { iwc.setOpenMode(OpenMode.CREATE); } // build writer IndexWriter writer = new IndexWriter(dir, iwc); // -------------------------- // // Open the Wikipedia Dump // //--------------------------- //Processing the large xml file in bzip2 format InputStream fileInputStream = new BufferedInputStream(new FileInputStream(wikidump)); BZip2InputStream inputStream = new BZip2InputStream(fileInputStream, false); InputStreamReader isr = new InputStreamReader(inputStream); BufferedReader reader = new BufferedReader(isr); String line; //temporary stores the content of each file StringBuilder pageBuffer = new StringBuilder(); //contains the title of the current page String docTitle = ""; //contains the content of the current page String content = ""; int docCount = 0; // number of documents that have been stored Date start = new Date(); //log the time when the indexing process starts while ((line = reader.readLine()) != null) { if (StringUtils.contains(line, "</page>") == true) { if (pageBuffer.length() > 0) { //get the title of the page int startIndex = pageBuffer.toString().indexOf("<title>") + 7; int endIndex = pageBuffer.toString().indexOf("</title>"); docTitle = pageBuffer.toString().substring(startIndex, endIndex); //get the content of the page int startPageIndex = pageBuffer.toString().indexOf("<page>"); content = pageBuffer.toString().substring(startPageIndex) + "</page>"; //verify the namespace of the page, it should be 0 int namespaceValue = Integer .parseInt(content.substring(content.indexOf("<ns>") + 4, content.indexOf("</ns>"))); if (namespaceValue != 0) { //reset buffer pageBuffer = new StringBuilder(); continue; } //verify that it is not a redirect page if (content.indexOf("<text xml:space=\"preserve\">#REDIRECT") != -1) { //reset buffer pageBuffer = new StringBuilder(); continue; } else { indexDocument(writer, content, docTitle.toLowerCase()); System.err.println("Processed " + docCount + " documents"); } docCount++; } //reset buffer pageBuffer = new StringBuilder(); } pageBuffer.append(line); } fileInputStream.close(); writer.close(); Date end = new Date(); // close properly the index writer // !! Caution !! in case of error, if this is not closed, the index is corrupted // and have to be regenerated reader.close(); System.err.println(end.getTime() - start.getTime() + " total milliseconds"); }
From source file:kpl.db.FullTextTrigger.java
/** * Get the Lucene index access// w w w. ja v a 2s .c om * * @param conn SQL connection * @throws SQLException Unable to access the Lucene index */ private static void getIndexAccess(Connection conn) throws SQLException { if (!isActive) { throw new SQLException("KRS is no longer active"); } boolean obtainedUpdateLock = false; if (!indexLock.writeLock().hasLock()) { indexLock.updateLock().lock(); obtainedUpdateLock = true; } try { if (indexPath == null || indexWriter == null) { indexLock.writeLock().lock(); try { if (indexPath == null) { getIndexPath(conn); } if (directory == null) { directory = FSDirectory.open(indexPath); } if (indexWriter == null) { IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); indexWriter = new IndexWriter(directory, config); Document document = new Document(); document.add(new StringField("_QUERY", "_CONTROL_DOCUMENT_", Field.Store.YES)); indexWriter.updateDocument(new Term("_QUERY", "_CONTROL_DOCUMENT_"), document); indexWriter.commit(); indexReader = DirectoryReader.open(directory); indexSearcher = new IndexSearcher(indexReader); } } finally { indexLock.writeLock().unlock(); } } } catch (IOException | SQLException exc) { Logger.logErrorMessage("Unable to access the Lucene index", exc); throw new SQLException("Unable to access the Lucene index", exc); } finally { if (obtainedUpdateLock) { indexLock.updateLock().unlock(); } } }
From source file:l3.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;/*from w ww . j ava 2 s .c om*/ boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:l4.CompoundIndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;//w ww . j a va2 s.c o m boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer); // TieredMergePolicy mergePolicy = new TieredMergePolicy(); // iwc.setMergePolicy(mergePolicy.setNoCFSRatio(1.0d)); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:lab_mri.SearchEngine.java
public void open() throws IOException { IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, new CustomAnalyzer()); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); writer = new IndexWriter(FSDirectory.open(dir), config); }
From source file:lia.chapter2.IndexingTest.java
License:Apache License
private IndexWriter getWriter() throws IOException { // 2 final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new StandardAnalyzer()); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); return new IndexWriter(directory, indexWriterConfig); }
From source file:lia.recent.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = "data"; boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1];/* www .j av a 2 s . c o m*/ i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_46, analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:lsre.utils.LuceneUtils.java
License:Open Source License
/** * Creates an IndexWriter for given index path, with given analyzer. * * @param directory the path to the index directory * @param create set to true if you want to create a new index * @param analyzer gives the analyzer used for the Indexwriter. * @return an IndexWriter/*from ww w .j a v a 2 s.c om*/ * @throws IOException */ public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer) throws IOException { // set the analyzer according to the method params Analyzer tmpAnalyzer = null; if (analyzer == AnalyzerType.SimpleAnalyzer) tmpAnalyzer = new SimpleAnalyzer(); // LetterTokenizer with LowerCaseFilter else if (analyzer == AnalyzerType.WhitespaceAnalyzer) tmpAnalyzer = new WhitespaceAnalyzer(); // WhitespaceTokenizer else if (analyzer == AnalyzerType.KeywordAnalyzer) tmpAnalyzer = new KeywordAnalyzer(); // entire string as one token. else if (analyzer == AnalyzerType.StandardAnalyzer) tmpAnalyzer = new StandardAnalyzer(); // The config IndexWriterConfig config = new IndexWriterConfig(tmpAnalyzer); config.setRAMBufferSizeMB(512); config.setCommitOnClose(true); if (create) config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists. else config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise. config.setCodec(new LsreCustomCodec()); return new IndexWriter(directory, config); }
From source file:lsre.utils.LuceneUtils.java
License:Open Source License
public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer, double RAMBufferSize) throws IOException { // set the analyzer according to the method params Analyzer tmpAnalyzer = null;/*from w w w . j a v a 2 s. co m*/ if (analyzer == AnalyzerType.SimpleAnalyzer) tmpAnalyzer = new SimpleAnalyzer(); else if (analyzer == AnalyzerType.WhitespaceAnalyzer) tmpAnalyzer = new WhitespaceAnalyzer(); // The config IndexWriterConfig config = new IndexWriterConfig(tmpAnalyzer); if (create) config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists. else config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise. config.setRAMBufferSizeMB(RAMBufferSize); config.setCodec(new LsreCustomCodec()); return new IndexWriter(directory, config); }
From source file:lucene.demo.search.FileIndexer.java
License:Apache License
public void index() { final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1);/*from w ww .j a v a2s. c o m*/ } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_48); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, analyzer); if (createMode) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }