List of usage examples for org.apache.lucene.index IndexWriterConfig IndexWriterConfig
public IndexWriterConfig(Analyzer analyzer)
From source file:cs412.project.search.IndexFiles.java
License:Apache License
public IndexFiles(String docsPath, String indexPath) { boolean create = true; if (docsPath == null) { System.exit(1);// w w w . j a v a 2 s . co m } final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath).toPath()); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:cs412.project.search.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; // String docsPath = "H:\\data set 4"; //CHANGE BELOW TO YOUR PATH String docsPath = "Split Files/"; boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1];//from w w w .j a v a 2s. c om i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath).toPath()); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:cs571.proj1.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;//from w w w . j a va2 s. co m boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } else if ("-tfidf".equals(args[i])) { tfidf = true; } else if ("-bm25".equals(args[i])) { bm25 = true; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (tfidf) iwc.setSimilarity(new TFIDF()); if (bm25) iwc.setSimilarity(new BM25()); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); System.out.println("Total # of Docs Indexed: " + numOfDocuments); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:csdn.lucene.first.version.Indexer.java
License:Apache License
public Indexer(String indexDir) throws IOException { // open file in indexDir Path pathA = Paths.get(indexDir); Directory dir = FSDirectory.open(pathA); IndexWriterConfig config = new IndexWriterConfig(new IKAnalyzer(true)); writer = new IndexWriter(dir, config); }
From source file:dbn.db.FullTextTrigger.java
/** * Get the Lucene index access/*from ww w . ja v a 2 s. c o m*/ * * @param conn SQL connection * @throws SQLException Unable to access the Lucene index */ private static void getIndexAccess(Connection conn) throws SQLException { if (!isActive) { throw new SQLException("NRS is no longer active"); } boolean obtainedUpdateLock = false; if (!indexLock.writeLock().hasLock()) { indexLock.updateLock().lock(); obtainedUpdateLock = true; } try { if (indexPath == null || indexWriter == null) { indexLock.writeLock().lock(); try { if (indexPath == null) { getIndexPath(conn); } if (directory == null) { directory = FSDirectory.open(indexPath); } if (indexWriter == null) { IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); indexWriter = new IndexWriter(directory, config); Document document = new Document(); document.add(new StringField("_QUERY", "_CONTROL_DOCUMENT_", Field.Store.YES)); indexWriter.updateDocument(new Term("_QUERY", "_CONTROL_DOCUMENT_"), document); indexWriter.commit(); indexReader = DirectoryReader.open(directory); indexSearcher = new IndexSearcher(indexReader); } } finally { indexLock.writeLock().unlock(); } } } catch (IOException | SQLException exc) { Logger.logErrorMessage("Unable to access the Lucene index", exc); throw new SQLException("Unable to access the Lucene index", exc); } finally { if (obtainedUpdateLock) { indexLock.updateLock().unlock(); } } }
From source file:de.anycook.db.lucene.FulltextIndex.java
License:Open Source License
private IndexWriterConfig createIndexWriterConfig() { return new IndexWriterConfig(analyzer); }
From source file:de.citec.lucene.CreateIndex.java
public static void main(String[] args) throws IOException { Analyzer analyzer = null;//from w ww . j a v a 2 s. c om List<String> files = new ArrayList<>(); files.add("/Users/swalter/Documents/EsaDeutsch/new_copus_german.txt"); String indexPath = "/Users/swalter/Documents/EsaDeutsch/Index/"; Language language = Language.DE; Directory dir = FSDirectory.open(Paths.get(indexPath)); if (language.equals(Language.DE)) analyzer = new GermanAnalyzer(); if (language.equals(Language.ES)) analyzer = new SpanishAnalyzer(); if (language.equals(Language.EN)) analyzer = new EnglishAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE); iwc.setRAMBufferSizeMB(12000); try (IndexWriter writer = new IndexWriter(dir, iwc)) { files.forEach(f -> { try { indexDocs(writer, Paths.get(f)); } catch (IOException ex) { Logger.getLogger(CreateIndex.class.getName()).log(Level.SEVERE, null, ex); } }); } System.out.println(counter); }
From source file:de.citec.sc.sentence.preprocessing.lucene.CreateIndex.java
public static void main(String[] args) throws IOException { Analyzer analyzer = null;//from w w w . j a va 2s .co m List<String> files = new ArrayList<>(); files.add("/Users/swalter/Downloads/german_sentences_reduced.txt"); String indexPath = "/Users/swalter/Index/GermanIndexReduced/"; Language language = Language.DE; Directory dir = FSDirectory.open(Paths.get(indexPath)); //files.add("/home/bettina/CITEC/MATOLL/preprocessSentences/idealSentences/idealSents_mecab_jdepp_rmvPunct_CoNLLU"); //String indexPath = "/home/bettina/CITEC/MATOLL/preprocessSentences/idealSentences/index"; //Language language = Language.JA; //Directory dir = FSDirectory.open(Paths.get(indexPath)); if (language.equals(Language.DE)) analyzer = new GermanAnalyzer(); if (language.equals(Language.ES)) analyzer = new SpanishAnalyzer(); if (language.equals(Language.EN)) analyzer = new EnglishAnalyzer(); if (language.equals(Language.JA)) analyzer = new JapaneseAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE); iwc.setRAMBufferSizeMB(12000); try (IndexWriter writer = new IndexWriter(dir, iwc)) { files.forEach(f -> { try { indexDocs(writer, Paths.get(f), language); } catch (IOException ex) { Logger.getLogger(CreateIndex.class.getName()).log(Level.SEVERE, null, ex); } }); } }
From source file:de.dfki.km.leech.lucene.ToLuceneContentHandler.java
License:Open Source License
synchronized protected IndexWriter getCurrentWriter() throws CorruptIndexException, LockObtainFailedException, IOException { if (getSplitAndMergeIndex() <= 0) return m_initialLuceneWriter; if (m_luceneWriter.maxDoc() < getSplitAndMergeIndex()) return m_luceneWriter; Directory directory = m_initialLuceneWriter.getDirectory(); Path fOurTmpDir = null;/* w ww . j a va 2 s . co m*/ if (directory instanceof FSDirectory) { if (m_luceneWriter != m_initialLuceneWriter) m_llIndexWriter2Close.add(m_luceneWriter); String strTmpPath = ((FSDirectory) directory).getDirectory().toAbsolutePath().toString(); // if(strTmpPath.charAt(strTmpPath.length() - 1) == '/' || strTmpPath.charAt(strTmpPath.length() - 1) == '\\') // strTmpPath = strTmpPath.substring(0, strTmpPath.length() - 1); strTmpPath += "_" + (m_hsTmpLuceneWriterPaths2Merge.size() + 1); fOurTmpDir = Paths.get(strTmpPath); } else { // wir brauchen was temporres File parentDir = new File(System.getProperty("java.io.tmpdir")); fOurTmpDir = Paths.get(parentDir.getAbsolutePath() + "/leechTmp/" + UUID.randomUUID().toString().replaceAll("\\W", "_")); } Logger.getLogger(ToLuceneContentHandler.class.getName()).info("Current index exceeds " + m_iSplitIndexDocumentCount + " documents. Will create another temporary one under " + fOurTmpDir); @SuppressWarnings("deprecation") IndexWriterConfig config = new IndexWriterConfig(m_initialLuceneWriter.getConfig().getAnalyzer()); config.setOpenMode(OpenMode.CREATE); m_luceneWriter = new IndexWriter(new SimpleFSDirectory(fOurTmpDir), config); m_hsTmpLuceneWriterPaths2Merge.add(fOurTmpDir.toAbsolutePath().toString()); return m_luceneWriter; }
From source file:de.dfki.km.leech.parser.incremental.IncrementalCrawlingHistory.java
License:Open Source License
/** * Creates all writer, reader, and searcher objects if necessary * /*w ww .j av a 2 s . c om*/ * @throws CorruptIndexException * @throws LockObtainFailedException * @throws IOException */ public void openLuceneStuff() throws CorruptIndexException, LockObtainFailedException, IOException { if (m_indexWriter == null) { IndexWriterConfig config = new IndexWriterConfig(new KeywordAnalyzer()); config.setOpenMode(OpenMode.CREATE_OR_APPEND); m_indexWriter = new IndexWriter(new SimpleFSDirectory(Paths.get(m_strHistoryPath)), config); } if (m_indexReader == null) m_indexReader = DirectoryReader.open(m_indexWriter, true, true); if (m_indexSearcher == null) m_indexSearcher = new IndexSearcher(m_indexReader); }