List of usage examples for org.apache.lucene.benchmark.byTask.utils Config get
public boolean get(String name, boolean dflt)
From source file:com.grantingersoll.intell.index.EnwikiContentSource.java
License:Apache License
@Override public void setConfig(Config config) { super.setConfig(config); keepImages = config.get("keep.image.only.docs", true); String fileName = config.get("docs.file", null); if (fileName == null) { throw new IllegalArgumentException("docs.file must be set"); }/* w w w . j a v a 2s.co m*/ file = new File(fileName).getAbsoluteFile(); }
From source file:com.leavesfly.lia.admin.CreateThreadedIndexTask.java
License:Apache License
public int doLogic() throws IOException { PerfRunData runData = getRunData();// w w w. ja va2s. c o m Config config = runData.getConfig(); IndexWriter writer = new ThreadedIndexWriter(runData.getDirectory(), runData.getAnalyzer(), true, config.get("writer.num.threads", 4), config.get("writer.max.thread.queue.size", 20), IndexWriter.MaxFieldLength.UNLIMITED); CreateIndexTask.setIndexWriterConfig(writer, config); runData.setIndexWriter(writer); return 1; }
From source file:com.mathworks.xzheng.admin.CreateThreadedIndexTask.java
License:Apache License
public int doLogic() throws IOException { PerfRunData runData = getRunData();//from w ww.j a va2 s.c o m Config config = runData.getConfig(); IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_46, runData.getAnalyzer()); writerConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); writerConfig = CreateIndexTask.createWriterConfig(config, runData, IndexWriterConfig.OpenMode.CREATE, writerConfig.getIndexCommit()); IndexWriter writer = new ThreadedIndexWriter(runData.getDirectory(), writerConfig, config.get("writer.num.threads", 4), config.get("writer.max.thread.queue.size", 20)); runData.setIndexWriter(writer); return 1; }
From source file:com.tamingtext.qa.WexWikiContentSource.java
License:Apache License
@Override public void setConfig(Config config) { super.setConfig(config); String fileName = config.get("docs.file", null); if (fileName == null) { throw new IllegalArgumentException("docs.file must be set"); }/*from ww w.j a va2 s. c o m*/ file = new File(fileName).getAbsoluteFile(); }
From source file:info.boytsov.lucene.parsers.ClueWeb09ContentSource.java
License:Open Source License
@Override public void setConfig(Config config) { super.setConfig(config); // dirs/*w w w. j a v a 2s. c o m*/ File workDir = new File(config.get("work.dir", "work")); String d = config.get("docs.dir", "trec"); dataDir = new File(d); if (!dataDir.isAbsolute()) { dataDir = new File(workDir, d); } // files collectFiles(dataDir, inputFiles); if (inputFiles.size() == 0) { throw new IllegalArgumentException("No files in dataDir: " + dataDir); } // html parser try { String htmlParserClassName = config.get("html.parser", "org.apache.lucene.benchmark.byTask.feeds.DemoHTMLParser"); htmlParser = Class.forName(htmlParserClassName).asSubclass(HTMLParser.class).newInstance(); } catch (Exception e) { // Should not get here. Throw runtime exception. throw new RuntimeException(e); } }
From source file:info.boytsov.lucene.parsers.EnwikiContentSource.java
License:Apache License
@Override public void setConfig(Config config) { super.setConfig(config); keepImages = config.get("keep.image.only.docs", true); String fileName = config.get("docs.file", null); if (fileName != null) { file = new File(fileName).getAbsoluteFile(); }/*from w w w . j a v a 2 s. c o m*/ }
From source file:info.boytsov.lucene.parsers.TrecContentSource.java
License:Apache License
@Override public void setConfig(Config config) { super.setConfig(config); // dirs//w w w. j av a2 s. c o m File workDir = new File(config.get("work.dir", "work")); String d = config.get("docs.dir", "trec"); dataDir = new File(d); if (!dataDir.isAbsolute()) { dataDir = new File(workDir, d); } // files collectFiles(dataDir, inputFiles); if (inputFiles.size() == 0) { throw new IllegalArgumentException("No files in dataDir: " + dataDir); } // trec doc parser try { String trecDocParserClassName = config.get("trec.doc.parser", "org.apache.lucene.benchmark.byTask.feeds.TrecGov2Parser"); trecDocParser = Class.forName(trecDocParserClassName).asSubclass(TrecDocParser.class).newInstance(); } catch (Exception e) { // Should not get here. Throw runtime exception. throw new RuntimeException(e); } // html parser try { String htmlParserClassName = config.get("html.parser", "org.apache.lucene.benchmark.byTask.feeds.DemoHTMLParser"); htmlParser = Class.forName(htmlParserClassName).asSubclass(HTMLParser.class).newInstance(); } catch (Exception e) { // Should not get here. Throw runtime exception. throw new RuntimeException(e); } // encoding if (encoding == null) { encoding = "ISO-8859-1"; } // iteration exclusion in doc name excludeDocnameIteration = config.get("content.source.excludeIteration", false); }
From source file:io.datalayer.lucene.benchmark.CreateThreadedIndexTask.java
License:Apache License
@Override public int doLogic() throws IOException { PerfRunData runData = getRunData();//from ww w .j av a2 s . co m Config config = runData.getConfig(); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_46, runData.getAnalyzer()); IndexWriter writer = new ThreadedIndexWriter(runData.getDirectory(), conf, true, config.get("writer.num.threads", 4), config.get("writer.max.thread.queue.size", 20)); // CreateIndexTask.configureWriter(writer, config); runData.setIndexWriter(writer); return 1; }
From source file:parsers.ClueWebContentSource.java
License:Open Source License
@Override public void setConfig(Config config) { super.setConfig(config); // dirs// w ww. j a v a2 s. c o m File workDir = new File(config.get("work.dir", "work")); String d = config.get("docs.dir", "trec"); dataDir = new File(d); if (!dataDir.isAbsolute()) { dataDir = new File(workDir, d); } try { // files: accept only WARC files ArrayList<Path> tmpp = new ArrayList<Path>(); collectFiles(dataDir.toPath(), tmpp); ArrayList<File> tmpf = new ArrayList<File>(); for (Path p : tmpp) if (p.toString().endsWith("warc.gz")) { tmpf.add(p.toFile()); } else { System.out.println("Ignoring file: " + p); } tmpf.sort(new PathFileComparator()); for (File f : tmpf) { inputFiles.add(f.toPath()); System.out.println(f); } if (inputFiles.size() == 0) { throw new IllegalArgumentException("No files in dataDir: " + dataDir); } // html parser String htmlParserClassName = config.get("html.parser", "org.apache.lucene.benchmark.byTask.feeds.DemoHTMLParser"); htmlParser = Class.forName(htmlParserClassName).asSubclass(HTMLParser.class).newInstance(); } catch (Exception e) { // Should not get here. Throw runtime exception. throw new RuntimeException(e); } verbose = true; }
From source file:parsers.TrecContentSource.java
License:Apache License
@Override public void setConfig(Config config) { super.setConfig(config); // dirs/*w ww.java2s . c o m*/ File workDir = new File(config.get("work.dir", "work")); String d = config.get("docs.dir", "trec"); dataDir = new File(d); if (!dataDir.isAbsolute()) { dataDir = new File(workDir, d); } try { // files collectFiles(dataDir.toPath(), inputFiles); if (inputFiles.size() == 0) { throw new IllegalArgumentException("No files in dataDir: " + dataDir); } // trec doc parser String trecDocParserClassName = config.get("trec.doc.parser", "org.apache.lucene.benchmark.byTask.feeds.TrecGov2Parser"); trecDocParser = Class.forName(trecDocParserClassName).asSubclass(TrecDocParser.class).newInstance(); } catch (Exception e) { // Should not get here. Throw runtime exception. throw new RuntimeException(e); } // html parser try { String htmlParserClassName = config.get("html.parser", "org.apache.lucene.benchmark.byTask.feeds.DemoHTMLParser"); htmlParser = Class.forName(htmlParserClassName).asSubclass(HTMLParser.class).newInstance(); } catch (Exception e) { // Should not get here. Throw runtime exception. throw new RuntimeException(e); } // encoding if (encoding == null) { encoding = "ISO-8859-1"; } // iteration exclusion in doc name excludeDocnameIteration = config.get("content.source.excludeIteration", true); }