List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode
public IndexWriterConfig setOpenMode(OpenMode openMode)
From source file:indexer.RealValuedVecIndexer.java
public RealValuedVecIndexer(String propFile, String indexDirName) throws Exception { prop = new Properties(); prop.load(new FileReader(propFile)); numDimensions = Integer.parseInt(prop.getProperty("vec.numdimensions")); DocVector.initVectorRange(prop);// w w w. j av a 2s . co m numIntervals = DocVector.numIntervals; boolean syntheticQueries = prop.getProperty("data.source").equals("synthetic"); if (syntheticQueries) { rvgen = new RandomVecGen(prop); indexPath = rvgen.randomSamplesFileName() + ".index"; } else indexPath = prop.getProperty(indexDirName); IndexWriterConfig iwcfg = new IndexWriterConfig(new WhitespaceAnalyzer()); iwcfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE); writer = new IndexWriter(FSDirectory.open(new File(indexPath).toPath()), iwcfg); }
From source file:indexer.SiftSubvecIndexer.java
public SiftSubvecIndexer(String propFile) throws Exception { super(propFile); subSpaceDimension = Integer.parseInt(prop.getProperty("subspace.dimension")); numsubVecs = numDimensions / subSpaceDimension; assert (numsubVecs * subSpaceDimension == numDimensions); subspaceWriters = new IndexWriter[numsubVecs]; for (int i = 0; i < numsubVecs; i++) { IndexWriterConfig iwcfg = new IndexWriterConfig(new WhitespaceAnalyzer()); iwcfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE); File indexDir = new File(this.indexPath + i); if (!indexDir.exists()) indexDir.mkdir();/*w w w . ja v a 2 s .com*/ subspaceWriters[i] = new IndexWriter(FSDirectory.open(indexDir.toPath()), iwcfg); } }
From source file:indexer.SpamRemover.java
public void filterIndex() throws Exception { System.out.println("Filtering index at " + indexDir.getPath()); IndexWriterConfig iwcfg = new IndexWriterConfig(Version.LUCENE_4_9, new WhitespaceAnalyzer(Version.LUCENE_4_9)); iwcfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE); writer = new IndexWriter(FSDirectory.open(outIndexDir), iwcfg); reader = DirectoryReader.open(FSDirectory.open(indexDir)); int nDocs = reader.numDocs(); for (int i = 0; i < nDocs; i++) { Document doc = reader.document(i); String docId = doc.get(ClueWebDocIndexer.FIELD_ID); if (nonSpams.isIndexable(docId)) writer.addDocument(doc);//from w ww.j a v a2 s .co m } reader.close(); writer.close(); }
From source file:Indexing.TRECDocParser.java
void processAll() throws Exception { System.out.println("Indexing TREC collection..."); IndexWriterConfig iwcfg = new IndexWriterConfig(analyzer); iwcfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); Path p = indexDir.toPath();//from ww w .ja va2s . co m writer = new IndexWriter(FSDirectory.open(p), iwcfg); indexAll(); writer.close(); }
From source file:indextranslator.BOWTranslator.java
public BOWTranslator(String propfile) throws Exception { prop = new Properties(); prop.load(new FileReader(propfile)); inIndexPath = prop.getProperty("index"); outIndexPath = prop.getProperty("translated.index"); IndexWriterConfig iwcfg = new IndexWriterConfig(new PayloadAnalyzer()); iwcfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE); writer = new IndexWriter(FSDirectory.open(new File(outIndexPath).toPath()), iwcfg); reader = DirectoryReader.open(FSDirectory.open(new File(inIndexPath).toPath())); // Load the dict in memory dict = new Dictionary(Integer.parseInt(prop.getProperty("numtranslated_words", "3")), Float.parseFloat(prop.getProperty("translation.threshold_weight", "0.01"))); dict.load(prop.getProperty("dict")); }
From source file:info.boytsov.lucene.CreateIndex.java
License:Open Source License
public static void main(String[] args) throws Exception { if (args.length != 3 && args.length != 4) { printUsage();/* w w w . ja v a 2 s .co m*/ System.exit(1); } String indexType = args[0]; String indexSource = args[1]; int commitInterval = 1000000; if (args.length >= 4) { commitInterval = Integer.parseInt(args[3]); } System.out.println("Commiting after indexing " + commitInterval + " docs"); File outputDir = new File(args[2]); if (!outputDir.exists()) { if (!outputDir.mkdirs()) { System.out.println("couldn't create " + outputDir.getAbsolutePath()); return; } } if (!outputDir.isDirectory()) { System.out.println(outputDir.getAbsolutePath() + " is not a directory!"); return; } if (!outputDir.canWrite()) { System.out.println("Can't write to " + outputDir.getAbsolutePath()); return; } FSDirectory dir = FSDirectory.open(outputDir); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);// default // stop // words IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);// overwrites // if // needed IndexWriter indexWriter = new IndexWriter(dir, config); DocMaker docMaker = new DocMaker(); Properties properties = new Properties(); properties.setProperty("content.source.forever", "false"); // will // parse // each // document // only // once properties.setProperty("doc.index.props", "true"); // We want to store small-size fields like URL or even title ... properties.setProperty("doc.stored", "true"); // but not the large one (great savings, 3x reduction in space)! properties.setProperty("doc.body.stored", "false"); ContentSource source = CreateSource(indexType, indexSource, properties); if (source == null) { System.err.println("Failed to create a source: " + indexType + "(" + indexSource + ")"); printUsage(); System.exit(1); } Config c = new Config(properties); source.setConfig(c); source.resetInputs();// though this does not seem needed, it is // (gets the file opened?) docMaker.setConfig(c, source); int count = 0; System.out.println("Starting Indexing of " + indexType + " source " + indexSource); long start = System.currentTimeMillis(); Document doc; try { while ((doc = docMaker.makeDocument()) != null) { indexWriter.addDocument(doc); ++count; if (count % 5000 == 0) { System.out.println( "Indexed " + count + " documents in " + (System.currentTimeMillis() - start) + " ms"); } if (count % commitInterval == 0) { indexWriter.commit(); System.out.println("Committed"); } } } catch (org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException nmd) { System.out.println("Caught NoMoreDataException! -- Finishing"); // All done } long finish = System.currentTimeMillis(); System.out.println("Indexing " + count + " documents took " + (finish - start) + " ms"); System.out.println("Total data processed: " + source.getTotalBytesCount() + " bytes"); System.out.println("Index should be located at " + dir.getDirectory().getAbsolutePath()); docMaker.close(); indexWriter.commit(); indexWriter.close(); }
From source file:info.johtani.jjug.lucene.sample.IndexerSample.java
License:Apache License
public static void main(String[] args) { String indexDirectory = "./indexdir"; String[] texts = {//from ww w .j a v a 2 s .c om "JJUG?Lucene?????johtani?????", "JJUG CCC?Elasticsearch?Kibana????johtani?????", "Elasticsearch?Kibana4????johtani?????" }; IndexWriter writer = null; try { //?? Directory dir = FSDirectory.open(new File(indexDirectory)); //?? StandardAnalyzer analyzer = new StandardAnalyzer(); //????? IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer); //CREATE_OR_APPEND????????????? //CREATE????????? //config.setOpenMode(OpenMode.CREATE_OR_APPEND); config.setOpenMode(OpenMode.CREATE); writer = new IndexWriter(dir, config); //? for (String text : texts) { //???? writer.addDocument(getDocument(text)); } // ???List? //List<Document> docs = new ArrayList<Document>(); //docs.add(document); //writer.addDocuments(docs); //writer????????? //writer.commit(); } catch (IOException e) { e.printStackTrace(); } finally { try { //close?? if (writer != null) { writer.close(); } } catch (IOException e) { //ignore } } System.out.println("Finished!"); }
From source file:intelligentWebAlgorithms.algos.search.lucene.LuceneIndexBuilder.java
License:Apache License
private IndexWriter getIndexWriter(File file) throws IOException { FSDirectory dir = FSDirectory.open(file); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_44, new StandardAnalyzer(Version.LUCENE_44)); config.setOpenMode(OpenMode.CREATE_OR_APPEND); config.setRAMBufferSizeMB(RamBufferSizeMB); return new IndexWriter(dir, config); }
From source file:invertedindex.IndexCreater.java
public IndexCreater() throws IOException { FSDirectory dir = FSDirectory.open(new File(getIndexPath())); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer); // File fileDir = new File(indexLocation); // for(File file: fileDir.listFiles()) // if (!file.isDirectory()) // file.delete(); // //from w w w . j a v a 2 s . c o m // Add new documents to an existing index: config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); writer = new IndexWriter(dir, config); // try{ // indexFileOrDirectory(f,fileContent); // } // catch(Exception e){ // System.out.println(e); // // } //closeIndex(); }
From source file:invertedindex.LineIndexing.java
public LineIndexing() throws IOException { FSDirectory dir = FSDirectory.open(new File(getLineIndexLocation())); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); writer = new IndexWriter(dir, config); }