List of usage examples for org.apache.lucene.index IndexWriter IndexWriter
public IndexWriter(Directory d, IndexWriterConfig conf) throws IOException
conf
. From source file:collene.TestLuceneAssumptions.java
License:Apache License
@Test public void testCanSeeUpdatesAfterAdd() throws Exception { // this verifies that any reader can see updates after documents are added. File fdir = TestUtil.getRandomTempDir(); pleaseDelete.add(fdir);// w w w.j a v a2s. c o m Directory dir = FSDirectory.open(fdir); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(dir, config); Document doc0 = new Document(); Document doc1 = new Document(); doc0.add(new Field("f0", "aaa", TextField.TYPE_STORED)); doc1.add(new Field("f0", "bbb", TextField.TYPE_STORED)); List<Document> docs = Lists.newArrayList(doc0, doc1); writer.addDocuments(docs, analyzer); IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false)); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "f0", new StandardAnalyzer(Version.LUCENE_4_9)); Query query = parser.parse("bbb"); TopDocs topDocs = searcher.search(query, 10); Assert.assertEquals(1, topDocs.totalHits); Assert.assertEquals(1, topDocs.scoreDocs.length); writer.close(); dir.close(); }
From source file:collene.TestLuceneAssumptions.java
License:Apache License
@Test public void listAfterEachStep() throws Exception { File fdir = TestUtil.getRandomTempDir(); pleaseDelete.add(fdir);//w w w. j a va2 s.co m Directory dir = FSDirectory.open(fdir); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); //System.out.println("Before creating writer"); dump(fdir, dir); IndexWriter writer = new IndexWriter(dir, config); //System.out.println("After creating writer"); dump(fdir, dir); List<Document> docs = new ArrayList<Document>(); for (int i = 0; i < 50000; i++) { Document doc = new Document(); for (int f = 0; f < 5; f++) { doc.add(new Field("field_" + f, TestUtil.randomString(128), TextField.TYPE_STORED)); } docs.add(doc); } writer.addDocuments(docs, analyzer); docs.clear(); //System.out.println("After doc add 0"); dump(fdir, dir); for (int i = 0; i < 50000; i++) { Document doc = new Document(); for (int f = 0; f < 5; f++) { doc.add(new Field("field_" + f, TestUtil.randomString(128), TextField.TYPE_STORED)); } docs.add(doc); } writer.addDocuments(docs, analyzer); docs.clear(); //System.out.println("After doc add 1"); dump(fdir, dir); writer.commit(); //System.out.println("After commit"); dump(fdir, dir); writer.forceMerge(1, true); //System.out.println("Right after merge"); dump(fdir, dir); try { Thread.currentThread().sleep(5000); } catch (Exception ex) { } //System.out.println("After sleeping after merge"); dump(fdir, dir); writer.close(); //System.out.println("After writer close"); dump(fdir, dir); dir.close(); //System.out.println("After dir close"); dump(fdir, dir); }
From source file:collene.TestShakespeare.java
License:Apache License
@Test public void rest() throws IOException, ParseException { File shakespeareDir = new File("src/test/resources/shakespeare"); File[] files = shakespeareDir.listFiles(new FileFilter() { @Override/*from w ww .ja va 2s. c o m*/ public boolean accept(File pathname) { return !pathname.isHidden(); } }); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(directory, config); long startIndexTime = System.currentTimeMillis(); final int flushLines = 200; int totalLines = 0; Collection<Document> documents = new ArrayList<Document>(); for (File f : files) { String play = f.getName(); int lineNumber = 1; BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f))); String line = reader.readLine(); while (line != null) { // index it. Document doc = new Document(); doc.add(new NumericDocValuesField("line", lineNumber)); doc.add(new Field("play", play, TextField.TYPE_STORED)); doc.add(new Field("content", line, TextField.TYPE_STORED)); documents.add(doc); totalLines += 1; if (totalLines % flushLines == 0) { writer.addDocuments(documents); documents.clear(); } lineNumber += 1; line = reader.readLine(); } reader.close(); } if (documents.size() > 0) { writer.addDocuments(documents); } long endIndexTime = System.currentTimeMillis(); System.out.println( String.format("Index for %s took %d ms", directory.toString(), endIndexTime - startIndexTime)); //System.out.println(String.format("%s committed", directory.getClass().getSimpleName())); // writer.forceMerge(1); // System.out.println(String.format("%s merged", directory.getClass().getSimpleName())); // let's search! IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false)); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "content", analyzer); String[] queryTerms = new String[] { "trumpet" }; for (String term : queryTerms) { long searchStart = System.currentTimeMillis(); Query query = parser.parse(term); TopDocs docs = searcher.search(query, 10); long searchEnd = System.currentTimeMillis(); System.out.println(String.format("%s %d total hits in %d", directory.getClass().getSimpleName(), docs.totalHits, searchEnd - searchStart)); for (ScoreDoc doc : docs.scoreDocs) { System.out.println(String.format("%d %.2f %d", doc.doc, doc.score, doc.shardIndex)); } } writer.close(true); //System.out.println(String.format("%s closed", directory.getClass().getSimpleName())); System.out.println("I think these are the files:"); for (String s : directory.listAll()) { System.out.println(s); } directory.close(); }
From source file:com.adanac.module.blog.search.LuceneHelper.java
License:Apache License
private static void generateIndex(String path, String id, String title, String content, List<Map<String, String>> dataList) { try {/*from w w w.jav a 2s. co m*/ Directory dir = FSDirectory.open(Paths.get(INDEX_PATH + path)); Analyzer analyzer = new SmartChineseAnalyzer(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); indexWriterConfig.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, indexWriterConfig); for (Map<String, String> data : dataList) { Document document = new Document(); Field idField = new IntField("id", Integer.valueOf(data.get(id)), Field.Store.YES); Field indexedContentField = new TextField("indexedContent", data.get(title) + SEPARATOR + data.get(content), Field.Store.YES); document.add(idField); document.add(indexedContentField); writer.addDocument(document); if (logger.isInfoEnabled()) { logger.info("add index for : [" + data.get(title) + "]"); } } writer.close(); } catch (Exception e) { logger.error("add index failed ...", e); } }
From source file:com.admarketplace.isg.lucene.demo.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;/*w ww.j a va 2 s . c om*/ boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.aliasi.lingmed.entrezgene.IndexEntrezGene.java
License:Lingpipe license
public void run() { mLogger.info("Begin indexing"); try {/* w w w . j av a 2s . c o m*/ IndexWriter indexWriter = new IndexWriter(mIndex, mCodec.getAnalyzer()); EntrezGeneIndexer indexer = new EntrezGeneIndexer(indexWriter, mCodec, mType); // save raw XML for <Entrezgene> element Parser<ObjectHandler<EntrezGene>> parser = new EntrezParser(true); parser.setHandler(indexer); if (mDistFileName.endsWith(".xml")) { InputSource inSource = new InputSource(mDistFileName); parser.parse(inSource); } else if (mDistFileName.endsWith(".gz")) { parseGZip(parser, mDistFile); } else { String msg = "Unknown file extension. File=" + mDistFileName; throw new IllegalArgumentException(msg); } mLogger.info("Parsed index, now optimize."); indexer.close(); mLogger.info("Processing complete."); } catch (Exception e) { mLogger.warn("Unexpected Exception: " + e.getMessage()); mLogger.warn("stack trace: " + Logging.logStackTrace(e)); IllegalStateException e2 = new IllegalStateException(e.getMessage()); e2.setStackTrace(e.getStackTrace()); throw e2; } }
From source file:com.aliasi.lingmed.homologene.IndexHomologene.java
License:Lingpipe license
public void run() { mLogger.info("Begin indexing"); try {/*w ww. j a v a2s . com*/ IndexWriter indexWriter = new IndexWriter(mIndex, mCodec.getAnalyzer()); HomologeneIndexer indexer = new HomologeneIndexer(indexWriter, mCodec); Parser<ObjectHandler<HomologeneGroup>> parser = new HomologeneParser(true); parser.setHandler(indexer); InputSource inSource = new InputSource(mDistFileName); parser.parse(inSource); mLogger.info("Parsed index, now optimize."); indexer.close(); mLogger.info("Processing complete."); } catch (Exception e) { mLogger.warn("Unexpected Exception: " + e.getMessage()); mLogger.warn("stack trace: " + Logging.logStackTrace(e)); IllegalStateException e2 = new IllegalStateException(e.getMessage()); e2.setStackTrace(e.getStackTrace()); throw e2; } }
From source file:com.aliasi.lingmed.medline.IndexMedline.java
License:Lingpipe license
/** * Run the command. See class documentation above for details on * arguments and behavior.// w ww . j a v a 2 s . c o m */ public void run() { System.out.println("start run"); try { File[] files = getLaterFiles(mDistDir); System.out.println("Total files to process: " + files.length); System.out.println("File names: " + java.util.Arrays.asList(files)); // if (mLogger.isDebugEnabled()) // mLogger.debug("File names: " + java.util.Arrays.asList(files)); if (files.length > 0) { MedlineParser parser = new MedlineParser(true); // true = save raw XML Directory fsDir = FSDirectory.open(mIndex); IndexWriterConfig iwConf = new IndexWriterConfig(Version.LUCENE_36, mCodec.getAnalyzer()); iwConf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); iwConf.setRAMBufferSizeMB(RAM_BUF_SIZE); if (sIsBaseline) { LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.setMergeFactor(MERGE_FACTOR_HI); iwConf.setMergePolicy(ldmp); } IndexWriter indexWriter = new IndexWriter(fsDir, iwConf); for (File file : files) { System.out.println("processing file: " + file); MedlineIndexer indexer = new MedlineIndexer(indexWriter, mCodec); parser.setHandler(indexer); parseFile(parser, file); indexer.close(); recordFile(indexWriter, file.getName()); System.out.println("completed processing file: " + file); } System.out.println("All files parsed, now optimize index"); indexWriter.forceMerge(1); indexWriter.commit(); indexWriter.close(); } System.out.println("Processing complete."); } catch (Exception e) { // mLogger.warn("Unexpected Exception: "+e.getMessage()); // mLogger.warn("stack trace: "+Logging.logStackTrace(e)); // mLogger.warn("Aborting this run"); IllegalStateException e2 = new IllegalStateException(e.getMessage()); e2.setStackTrace(e.getStackTrace()); throw e2; } }
From source file:com.aliasi.lingmed.medline.OptimizeMedline.java
License:Lingpipe license
/** * Run the command. See class documentation above for details on * arguments and behavior.//from w w w .j a va 2s. c o m */ public void run() { // mLogger.info("start run"); try { Directory fsDir = FSDirectory.open(mIndex); IndexWriterConfig iwConf = new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36)); iwConf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter indexWriter = new IndexWriter(fsDir, iwConf); indexWriter.forceMerge(1); indexWriter.commit(); // mLogger.info("Processing complete."); } catch (Exception e) { // mLogger.warn("Unexpected Exception: "+e.getMessage()); // mLogger.warn("stack trace: "+Logging.logStackTrace(e)); // mLogger.warn("Aborting this run"); IllegalStateException e2 = new IllegalStateException(e.getMessage()); e2.setStackTrace(e.getStackTrace()); throw e2; } }
From source file:com.aliasi.lingmed.omim.IndexOmim.java
License:Lingpipe license
public void run() { mLogger.info("Begin indexing"); try {// www.j a va 2s . c om IndexWriter indexWriter = new IndexWriter(mIndex, mCodec.getAnalyzer()); OmimIndexer indexer = new OmimIndexer(indexWriter); Parser<ObjectHandler<OmimRecord>> parser = new OmimParser(true); parser.setHandler(indexer); InputSource inSource = new InputSource(); inSource.setCharacterStream(new FileReader(mDistFile)); parser.parse(inSource); mLogger.info("Parsed index, now optimize."); indexer.close(); mLogger.info("Processing complete."); } catch (Exception e) { mLogger.warn("Unexpected Exception: " + e.getMessage()); mLogger.warn("stack trace: " + Logging.logStackTrace(e)); IllegalStateException e2 = new IllegalStateException(e.getMessage()); e2.setStackTrace(e.getStackTrace()); throw e2; } }