List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode
public IndexWriterConfig setOpenMode(OpenMode openMode)
From source file:index.IndexEx.java
public static void main(String[] args) { String indexPath = "index"; String docsPath = null;//w w w .j a v a 2 s . c o m boolean create = true; final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:index.IndexNews.java
public static void main(String[] args) throws FileNotFoundException, IOException { //index/* w w w. j ava2 s . c o m*/ String indexPath = "/Users/smita/Documents/ES/index/news/"; String docsPath = null; boolean create = true; String path = "/Users/smita/Documents/data/newsSpace.txt"; Date start = new Date(); // System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); FileInputStream inputStream = null; Scanner sc = null; try { int linecount = 0; inputStream = new FileInputStream(path); sc = new Scanner(inputStream, "UTF-8"); //String hash = sc.nextLine(); while (sc.hasNextLine()) { String item = ""; linecount++; String line = sc.nextLine(); System.out.println("- " + line); // if(line.endsWith("\n")) // { // item=item+" "+line; // line=sc.nextLine(); // //contineous // } // else // { // item=line; // } String agency = line.split("\t")[0]; // String title=line.split("\\t")[2]; // String abs=line.split("\\t")[5]; // String date=line.split("\\t")[7]; //System.out.println("- "+agency); //System.out.println(redirectLbl); // try // { // senseLbl=senseLbl.substring(29,senseLbl.length()-1); // System.out.print("sense: "+senseLbl +": "); //// lbl=lbl.replaceAll("_", " "); //// String cat=line.split(" ")[2]; // //System.out.println(idxLbl); // idxLbl=idxLbl.substring(29,idxLbl.length()-1); // idxLbl=idxLbl.replaceAll("_"," "); // cat=cat.replaceAll("_", " "); //System.out.println(idxLbl); //index line as a doc // Document doc = new Document(); // doc.add(new TextField("idxlbl",idxLbl,Field.Store.YES)); // doc.add(new TextField("senses",senseLbl,Field.Store.YES)); // if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) // { // System.out.println("adding " +linecount ); // writer.addDocument(doc); // // } // else // { // System.out.println("updating " ); // //writer.updateDocument(new Term("path", file.toString()), doc); // } // } // // catch(Exception e2){} } // note that Scanner suppresses exceptions if (sc.ioException() != null) { throw sc.ioException(); } } finally { if (inputStream != null) { inputStream.close(); } if (sc != null) { sc.close(); } } //writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); }
From source file:index.IndexOmimtsv.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { Date start = new Date(); final Path docDir = Paths.get("F:/Ecole(Telecom)/cours telecom/Projet_GMD/bases/omimtsv.tsv"); //final Path docDir = Paths.get("C:/Users/lulu/Desktop/Projet/Donnes/stitch/chemical.sources.v5.0.tsv"); try {// w ww. java2s . c om System.out.println("Indexing to directory '" + "C:/Users/gauthier/Desktop/TELECOM/2A/GMD/Projet/indexOmimtsv" + "'..."); Directory dir = FSDirectory .open(Paths.get("F:/Ecole(Telecom)/cours telecom/Projet_GMD/indexs/indexOmimtsv")); //Directory dir = FSDirectory.open(Paths.get("C:/Users/lulu/Desktop/Projet/Donnes/stitch")); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE); // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:index.IndexWikiAbstract.java
public static void main(String[] args) throws FileNotFoundException, IOException { //index/*from w w w. j a va 2s. c o m*/ String indexPath = "/Users/smita/Documents/ES/index/abstract/"; String docsPath = null; boolean create = true; String path = "/Users/smita/Documents/data/dbpedia/long_abstracts_en.nt"; Date start = new Date(); // System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); FileInputStream inputStream = null; Scanner sc = null; try { int linecount = 0; inputStream = new FileInputStream(path); sc = new Scanner(inputStream, "UTF-8"); String hash = sc.nextLine(); while (sc.hasNextLine()) { linecount++; String line = sc.nextLine(); try { String title = line.split(" ")[0]; String prop = line.split(" ")[1]; String abs = line.substring(title.length() + prop.length() + 2); //System.out.println(abs); abs = abs.substring(0, abs.length() - 6); title = title.replaceAll("_", " "); title = title.substring(29, title.length() - 1); //System.out.println(abs); //index line as a doc Document doc = new Document(); doc.add(new TextField("title", title, Field.Store.YES)); doc.add(new TextField("abs", abs, Field.Store.YES)); if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) { System.out.println("adding " + linecount); writer.addDocument(doc); } else { System.out.println("updating "); //writer.updateDocument(new Term("path", file.toString()), doc); } } catch (Exception e2) { } } // note that Scanner suppresses exceptions if (sc.ioException() != null) { throw sc.ioException(); } } finally { if (inputStream != null) { inputStream.close(); } if (sc != null) { sc.close(); } } writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); }
From source file:index.RedirectIndex.java
public static void main(String[] args) throws FileNotFoundException, IOException { //index// w w w. ja v a 2 s. c o m String indexPath = "/Users/smita/Documents/ES/index/redirect/"; String docsPath = null; boolean create = true; String path = "/Users/smita/Documents/data/dbpedia/redirects_en.ttl"; Date start = new Date(); // System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); FileInputStream inputStream = null; Scanner sc = null; try { int linecount = 0; inputStream = new FileInputStream(path); sc = new Scanner(inputStream, "UTF-8"); String hash = sc.nextLine(); while (sc.hasNextLine()) { linecount++; String line = sc.nextLine(); String redirectLbl = line.split(" ")[0]; String idxLbl = line.split(" ")[2]; System.out.println(line); //System.out.println(redirectLbl); // try // { // redirectLbl=redirectLbl.substring(29,redirectLbl.length()-1); // //System.out.print(redirectLbl +": "); //// lbl=lbl.replaceAll("_", " "); //// String cat=line.split(" ")[2]; // //System.out.println(idxLbl); // idxLbl=idxLbl.substring(29,idxLbl.length()-1); // cat=cat.replaceAll("_", " "); //System.out.println(idxLbl); // //index line as a doc // Document doc = new Document(); // doc.add(new TextField("idxlbl",idxLbl,Field.Store.YES)); // doc.add(new TextField("redirect",redirectLbl,Field.Store.YES)); // if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) // { // System.out.println("adding " +linecount ); // writer.addDocument(doc); // // } // else // { // System.out.println("updating " ); // //writer.updateDocument(new Term("path", file.toString()), doc); // } // } // // catch(Exception e2){} // } // note that Scanner suppresses exceptions if (sc.ioException() != null) { throw sc.ioException(); } } finally { if (inputStream != null) { inputStream.close(); } if (sc != null) { sc.close(); } } writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); }
From source file:indexer.AMIIndexer.java
public void process() throws Exception { System.out.println("Indexing AMI annotations..."); IndexWriterConfig iwcfg = new IndexWriterConfig(analyzer); iwcfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE); writer = new IndexWriter(FSDirectory.open(indexDir.toPath()), iwcfg); indexAnnotation();/* w w w . ja v a 2s.c o m*/ writer.close(); }
From source file:indexer.files.LuceneWriter.java
public boolean openIndex() { try {/*ww w.jav a 2 s. co m*/ //Abrimos el directorio Directory dir = FSDirectory.open(new File(pathToIndex)); //Elegimos un Analyzer . Y especificamos la versin de Lucene que usamos SpanishAnalyzer analyzer = new SpanishAnalyzer(Version.LUCENE_43, new CharArraySet(Version.LUCENE_43, Arrays.asList( StringUtils.split(FileUtils.readFileToString(new File(this.pathToStopWords), "UTF-8"))), true)); //Creamos un IndexWriterConfig IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer); //Siempre vamos a sobreescribir el indice que tenemos en el directorio iwc.setOpenMode(OpenMode.CREATE); indexWriter = new IndexWriter(dir, iwc); return true; } catch (Exception e) { System.out.println("Ocurrio un problema abriendo el documento para escritura: " + e.getClass() + " :: " + e.getMessage()); return false; } }
From source file:indexer.IndexSplitter.java
public IndexSplitter(String propFile) throws Exception { super(propFile); File pureIndexDir = new File(prop.getProperty("splitindex.pure")); File mixedIndexDir = new File(prop.getProperty("splitindex.mixed")); IndexWriterConfig iwcfg_pure = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); iwcfg_pure.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriterConfig iwcfg_mixed = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); iwcfg_mixed.setOpenMode(IndexWriterConfig.OpenMode.CREATE); pureIndexWriter = new IndexWriter(FSDirectory.open(pureIndexDir), iwcfg_pure); mixedIndexWriter = new IndexWriter(FSDirectory.open(mixedIndexDir), iwcfg_mixed); }
From source file:indexer.LuceneIndexer.java
/** * Indexing the files. This method checks for the directories and then * finishes out after the indexing is complete. * @param global This is for reference to the global class variables * and methods./* ww w . j av a2 s . c om*/ * @param createIndex If true a new index will be created from scratch * and the old index will be destroyed. * @param indexPanel If true it will also print the console printout lines * to the main panel. */ public static void IndexFiles(Global global, Boolean createIndex) { String dataDir = global.dataDir; String indexDir = global.indexDir; //Verifies that the data directory exists if (dataDir == null) { System.err.println("Data Directory Is not accessable, Unable to Index files."); } //Verifies that the data directory is readable and writeable final Path docDir = Paths.get(dataDir); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); } startTime = new Date(); try { System.out.println("Indexing to directory '" + indexDir + "'..."); //Setups the analyzer Analyzer analyzer; try (Directory dir = FSDirectory.open(Paths.get(indexDir))) { analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (createIndex) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } iwc.setRAMBufferSizeMB(global.RAM_BUFFER_SIZE); iwc.setMaxBufferedDocs(global.MAX_BUFFERED_DOCS); LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.setMergeFactor(global.MERGE_FACTOR); iwc.setMergePolicy(ldmp); try (IndexWriter writer = new IndexWriter(dir, iwc)) { hm.clear(); indexDocs(writer, docDir, global); //This is a costly operation, we scheduled the time to apply it if (global.merge) { System.out.println("Starting Merge"); writer.forceMerge(1); global.merge = false; } writer.close(); } finishTime = new Date(); long millis = finishTime.getTime() - startTime.getTime(); totalTime = String.format("%02dhr %02dmin %02dsec", TimeUnit.MILLISECONDS.toHours(millis), TimeUnit.MILLISECONDS.toMinutes(millis) - TimeUnit.HOURS.toMinutes(TimeUnit.MILLISECONDS.toHours(millis)), // The change is in this line TimeUnit.MILLISECONDS.toSeconds(millis) - TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(millis))); System.out.println(""); System.out.println(""); System.out.println("Start Time: " + global.sdf.format(startTime.getTime())); System.out.println("Building List Time: " + listBuildTime); System.out.println("Indexing Time: " + indexingTime); System.out.println("Total Time: " + totalTime); System.out.println("Number of Documents: " + amountOfDocuments); System.out.println("Finish Time: " + global.sdf.format(finishTime.getTime())); System.out.println(""); } analyzer.close(); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); log.fatal(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:indexer.PaperIndexer.java
void processAll() throws Exception { System.out.println("Indexing TREC collection..."); IndexWriterConfig iwcfg = new IndexWriterConfig(analyzer); iwcfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE); File indexDirDocs = new File(indexDir + "/docs/"); File indexDirPara = new File(indexDir + "/para/"); writer = new IndexWriter(FSDirectory.open(indexDirDocs.toPath()), iwcfg); iwcfg = new IndexWriterConfig(analyzer); iwcfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE); paraWriter = new IndexWriter(FSDirectory.open(indexDirPara.toPath()), iwcfg); indexAll();//from w w w . ja va2 s.co m writer.close(); paraWriter.close(); }