List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode
public IndexWriterConfig setOpenMode(OpenMode openMode)
From source file:lucene.demo.search.FileSearcher.java
License:Apache License
private void removeDocs(Query query) throws IOException { Directory dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_48); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(dir, iwc); writer.deleteDocuments(query);/*from w w w .jav a 2s. c o m*/ writer.commit(); writer.close(); }
From source file:luceneindexcreator.LuceneIndexCreator.java
public boolean openIndex() { try {//from w w w .j a v a2 s.c om Directory dir = FSDirectory.open(new File(indexPath)); //populating stopwords with stopwords.txt file CharArraySet stopWords = new CharArraySet(Version.LUCENE_CURRENT, 0, true); try { File file = new File("stopwords.txt"); FileReader fr = new FileReader(file); BufferedReader br = new BufferedReader(fr); String line; while ((line = br.readLine()) != null) { stopWords.add(line); } fr.close(); } catch (IOException e) { e.printStackTrace(); } StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_47, stopWords); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer); //Always overwrite the directory iwc.setOpenMode(OpenMode.CREATE); indexWriter = new IndexWriter(dir, iwc); return true; } catch (Exception e) { System.err.println("Error opening the index. " + e.getMessage()); } return false; }
From source file:luceneindexer.files.LuceneWriter.java
public boolean openIndex() { try {//ww w . j a va 2s . co m //Open the directory so lucene knows how to deal with it Directory dir = FSDirectory.open(new File(pathToIndex)); //Chose the analyzer we are going to use to write documents to the index. We need to specify the version //of the Lucene index type we want to use Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44); //Create an index writer configuration. Same thing here with the index version IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_44, analyzer); //we are always going to overwrite the index that is currently in the directory iwc.setOpenMode(OpenMode.CREATE); //let's open that index and get a writer to hand back to the main code indexWriter = new IndexWriter(dir, iwc); return true; } catch (Exception e) { System.out.println("Threw an exception trying to open the index for writing: " + e.getClass() + " :: " + e.getMessage()); return false; } }
From source file:luceneingester.TrecIngester.java
License:Apache License
public static void main(String[] clArgs) throws Exception { Args args = new Args(clArgs); final String dirPath = args.getString("-indexPath") + "/index"; final String dataDir = args.getString("-dataDir"); final int docCountLimit = args.getInt("-docCountLimit"); // -1 means all docs from the source: final int numThreads = args.getInt("-threadCount"); final boolean verbose = args.getFlag("-verbose"); final boolean printDPS = args.getFlag("-printDPS"); final boolean doUpdate = args.getFlag("-update"); final boolean positions = args.getFlag("-positions"); args.check();//from w w w.java 2s . c om final Analyzer a = new EnglishAnalyzer(); final TrecContentSource trecSource = createTrecSource(dataDir); final Directory dir = FSDirectory.open(Paths.get(dirPath)); System.out.println("Index path: " + dirPath); System.out.println("Doc count limit: " + (docCountLimit == -1 ? "all docs" : "" + docCountLimit)); System.out.println("Threads: " + numThreads); System.out.println("Verbose: " + (verbose ? "yes" : "no")); System.out.println("Positions: " + (positions ? "yes" : "no")); if (verbose) { InfoStream.setDefault(new PrintStreamInfoStream(System.out)); } final IndexWriterConfig iwc = new IndexWriterConfig(a); if (doUpdate) { iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND); } else { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } System.out.println("IW config=" + iwc); final IndexWriter w = new IndexWriter(dir, iwc); IndexThreads threads = new IndexThreads(w, positions, trecSource, numThreads, docCountLimit, printDPS); System.out.println("\nIndexer: start"); final long t0 = System.currentTimeMillis(); threads.start(); while (!threads.done()) { Thread.sleep(100); } threads.stop(); final long t1 = System.currentTimeMillis(); System.out.println( "\nIndexer: indexing done (" + (t1 - t0) / 1000.0 + " sec); total " + w.maxDoc() + " docs"); if (!doUpdate && docCountLimit != -1 && w.maxDoc() != docCountLimit) { throw new RuntimeException("w.maxDoc()=" + w.maxDoc() + " but expected " + docCountLimit); } if (threads.failed.get()) { throw new RuntimeException("exceptions during indexing"); } final long t2; t2 = System.currentTimeMillis(); final Map<String, String> commitData = new HashMap<String, String>(); commitData.put("userData", "multi"); w.setCommitData(commitData); w.commit(); final long t3 = System.currentTimeMillis(); System.out.println("\nIndexer: commit multi (took " + (t3 - t2) / 1000.0 + " sec)"); System.out.println("\nIndexer: at close: " + w.segString()); final long tCloseStart = System.currentTimeMillis(); w.close(); System.out.println("\nIndexer: close took " + (System.currentTimeMillis() - tCloseStart) / 1000.0 + " sec"); dir.close(); final long tFinal = System.currentTimeMillis(); System.out.println("\nIndexer: finished (" + (tFinal - t0) / 1000.0 + " sec)"); System.out.println("\nIndexer: net bytes indexed " + threads.getBytesIndexed()); System.out.println( "\nIndexer: " + (threads.getBytesIndexed() / 1024. / 1024. / 1024. / ((tFinal - t0) / 3600000.)) + " GB/hour plain text"); }
From source file:lucenetew.LuceneTEW.java
public static void ParseXML(StringBuilder sb) throws IOException, ParseException { String indexPath = "C:\\Users\\Hp Kevin\\Documents\\NetBeansProjects\\LuceneTEW\\Indice"; Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, iwc); org.apache.lucene.document.Document doc; //... Continuar aqui! String xml = sb.toString();/* w ww.j a va 2s . co m*/ xml = limpiarXML(xml); //System.out.println(xml); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder; try { builder = factory.newDocumentBuilder(); Document document = builder.parse(new InputSource(new StringReader(xml))); NodeList nSubList; Node tempNodo, tempSubNodo; NodeList nList = document.getElementsByTagName("DOC"); for (int i = 0; i < nList.getLength(); i++) { tempNodo = nList.item(i); nSubList = tempNodo.getChildNodes(); doc = new org.apache.lucene.document.Document(); for (int j = 0; j < nSubList.getLength(); j++) { tempSubNodo = nSubList.item(j); if (!tempSubNodo.getNodeName().contains("#")) { //System.out.println(tempSubNodo.getNodeName()); //System.out.println(tempSubNodo.getTextContent()); //LLenar indice doc.add(new StringField(tempSubNodo.getNodeName(), tempSubNodo.getTextContent(), Field.Store.YES)); System.out.println(tempSubNodo.getNodeName() + ' ' + tempSubNodo.getTextContent()); } } writer.addDocument(doc); // System.out.println(doc.toString()); } //System.out.println(document); //System.out.println(nList.getLength()); writer.close(); System.out.println("Indice Creado"); } catch (ParserConfigurationException | SAXException | IOException | DOMException e) { } //Reader IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer2 = new StandardAnalyzer(); QueryParser parser = new QueryParser("BRAND", analyzer2); org.apache.lucene.search.Query query = parser.parse("jeep"); TopDocs results = searcher.search(query, 100000); ScoreDoc[] hits = results.scoreDocs; System.out.println(hits.length); }
From source file:lucenetools.DocIndexer.java
License:Apache License
/** * @param args the command line arguments * @throws java.io.IOException/*www. j av a 2 s .com*/ * @throws java.text.ParseException * @throws java.lang.ClassNotFoundException * @throws java.lang.NoSuchMethodException * @throws java.lang.InstantiationException * @throws java.lang.IllegalAccessException * @throws java.lang.reflect.InvocationTargetException */ public static void main(String[] args) throws IOException, ParseException, ClassNotFoundException, NoSuchMethodException, InstantiationException, IllegalAccessException, IllegalArgumentException, InvocationTargetException { // disable the exceedingly verbose node4j output from Tika Logger.getRootLogger().removeAllAppenders(); Logger.getRootLogger().setLevel(Level.OFF); Options opts = new Options(); CommandLine commandLine = new CommandLine(); // if no command line options specified, user wants help if (0 == args.length) { commandLine.showHelp(); System.exit(0); } // extract command line args and store in opts if (!commandLine.parse(args, opts)) System.exit(1); if (opts.showHelp) { commandLine.showHelp(); System.exit(0); } // validate all command line options if (!commandLine.isValid(opts)) System.exit(1); // consolidate stop files into a single CharSetArray String[] stopFiles = { opts.defaultStopFile, opts.userStopFile }; CharArraySet stopWordSet = StopWordSetGenerator.generate(stopFiles); // consolidate spelling files String[] spellingFiles = { opts.defaultSpellFile, opts.userSpellFile }; SpellingFile.consolidate(spellingFiles); HashMap<String, String> spellingHashtable = SpellingFile.getHashtable(); // generate the slang hash map String[] slangFiles = { opts.defaultSlangFile }; SpellingFile.consolidate(slangFiles); HashMap<String, String> slangHashtable = SpellingFile.getHashtable(); // create the user-specified analyzer analyzer = AnalyzerFactory.create(opts.analyzerName, stopWordSet, spellingHashtable, slangHashtable, opts.tokenOpts, opts.modelFile); // check if the analyzer is valid if (analyzer == null) { System.out.println("Error: No analyzer with that name."); System.exit(1); } System.out.println("\nDocIndexer version " + VERSION + ".\n"); commandLine.printOpts(opts); // naive way to determine whether to use Twitter document extraction // or assume each document is a single document isTwitter = opts.analyzerName.toLowerCase().contains("twitter"); long maxMemory = Runtime.getRuntime().maxMemory() / 1024 / 1024; System.out.println("Java runtime max memory: " + maxMemory + " MB."); if (opts.analyze > 0) { // show analysis results then exit if (opts.useMongo) { DocIndexerMongo.showTweetAnalysis(opts.analyze, opts, analyzer); System.exit(0); } else { if (isTwitter) DebugAnalyzer.showTweetAnalysis(opts.analyze, opts.inputDir, analyzer); else DebugAnalyzer.showDocAnalysis(opts.analyze, opts.inputDir, analyzer); System.exit(0); } } long start = System.nanoTime(); try { Path outpath = Paths.get(opts.outputDir); outpath = outpath.toAbsolutePath(); System.out.println("Writing index to: '" + outpath.toString() + "' ...\n"); Directory dir = FSDirectory.open(new File(opts.outputDir)); IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer); // create a new index in the directory, removing any // previously-indexed documents config.setOpenMode(OpenMode.CREATE); // Optional: for better indexing performance, if you are // indexing many documents, increase the RAM buffer. But if // you do this, increase the max heap size available to the // JVM (eg add -Xmx512m or -Xmxlg). //config.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, config); if (opts.useMongo) { // Parse the configuration file and use the connection // details to index the documents. DocIndexerMongo.indexDocs(writer, opts, analyzer); docsIndexed = DocIndexerMongo.getDocsIndexed(); } else { // Index documents from the provided input directory. final File docDir = new File(opts.inputDir); rootDir = Paths.get(docDir.getPath()); indexDocs(writer, docDir, isTwitter, opts); } // NOTE: if you want to maximize search performance, you can // optionally call forceMerge here. This can be a terribly // costly operation, so generally it's only worth it when // your index is relatively static (i.e. you are finished // adding documents to it). //writer.forceMerge(1); // commit docs to the index writer.close(); } catch (IOException e) { ErrorReporter.reportException(e.getClass().toString(), e.getMessage()); System.exit(-1); } long end = System.nanoTime(); double elapsed = (end - start) * 1.0e-9; System.out.println("\n\nIndexed " + docsIndexed + " documents."); System.out.printf("Elapsed time: %.2f seconds, avg. rate: %.2f docs/s.\n\n", elapsed, docsIndexed / elapsed); }
From source file:mm.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String indexPath = "C:\\Users\\mnorhamizan\\Documents\\index"; String docsPath = "C:\\Users\\mnorhamizan\\Documents\\testdata"; boolean create = true; final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1);/*from w w w . java 2 s . c o m*/ } Runtime runtime = Runtime.getRuntime(); long usedMemoryBefore = runtime.totalMemory() - runtime.freeMemory(); System.out.println("Used Memory before" + usedMemoryBefore); Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); long usedMemoryAfter = runtime.totalMemory() - runtime.freeMemory(); System.out.println("Memory increased:" + (usedMemoryAfter - usedMemoryBefore)); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:model.Index.java
public static void main(String[] args) throws FileNotFoundException, IOException { //set the split word tech Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); //indexwriter config info IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer); //open the index, if there is no index, build a new one indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); Directory directory = null;//from www . ja v a 2s .co m IndexWriter indexWrite = null; try { //set path of the original data directory = FSDirectory.open(new File(Path.IndexDir)); //if the directory is locked , unlock it if (IndexWriter.isLocked(directory)) { IndexWriter.unlock(directory); } //new a object indexWrite indexWrite = new IndexWriter(directory, indexWriterConfig); } catch (Exception e) { e.printStackTrace(); } PreProcessDoc getDoc = new PreProcessDoc(); WebDocument tempDoc = null; while ((tempDoc = getDoc.nextDocument()) != null) { Document doc = new Document(); doc.add(new TextField("link", tempDoc.getDocLink(), Store.YES)); doc.add(new TextField("content", tempDoc.getDocContent(), Store.YES)); try { //write doc into index indexWrite.addDocument(doc); } catch (Exception e) { e.printStackTrace(); } } //commit the data, if not , it would not be saved try { indexWrite.commit(); //close the resource indexWrite.close(); directory.close(); } catch (Exception e) { e.printStackTrace(); } }
From source file:model.IndexFiles.java
License:Apache License
public IndexFiles() { /** Index all text files under a directory. */ String indexPath = "index"; String docPath = "data"; boolean create = true; if (docPath == null) { System.exit(1);//from w w w .j a v a 2s . c om } final Path docDir = Paths.get(docPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); final List<String> stopWords = Arrays.asList("a", "an", "are", "as", "at", "be", "but", "by", "in", "into", "is", "it", "no", "on", "such", "that", "the", "their", "then", "there", "these", "they", "to", "was", "will", "with"); //final List<String> stopWords = Arrays.asList("for", "if"); final CharArraySet stopSet = new CharArraySet(stopWords, false); Analyzer analyzer = new StandardAnalyzer(stopSet); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:mri1.MRI1.java
/** * @param args the command line arguments * @throws java.io.IOException/*w w w.ja va 2 s .c o m*/ * @throws org.apache.lucene.queryparser.classic.ParseException */ public static void main(String[] args) throws IOException, ParseException { File dest = new File("dest"); File source = new File("source"); FSDirectory fsdir = FSDirectory.open(dest); IndexWriterConfig iwc = new IndexWriterConfig(Version.LATEST, new StandardAnalyzer()); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); try (IndexWriter writer = new IndexWriter(fsdir, iwc)) { File[] listFiles = source.listFiles(); for (File file : listFiles) { System.out.println(file.getName()); if (file.isFile() && file.getName().endsWith(".txt")) { Document doc = new Document(); doc.add(new StringField("path", file.getAbsolutePath(), Field.Store.YES)); doc.add(new TextField("content", new FileReader(file))); writer.addDocument(doc); } } } DirectoryReader idxReader = DirectoryReader.open(fsdir); IndexSearcher searcher = new IndexSearcher(idxReader); //Query q=new TermQuery(new Term("content","system")); QueryParser parser = new QueryParser("content", new StandardAnalyzer()); Query q = parser.parse("basic"); TopDocs topDocs = searcher.search(q, 20); ScoreDoc[] sd = topDocs.scoreDocs; for (ScoreDoc d : sd) { System.out.println(searcher.doc(d.doc).get("path") + "\t" + d.score); } }