Example usage for org.apache.lucene.benchmark.byTask.utils Config get

List of usage examples for org.apache.lucene.benchmark.byTask.utils Config get

Introduction

In this page you can find the example usage for org.apache.lucene.benchmark.byTask.utils Config get.

Prototype

public boolean get(String name, boolean dflt) 

Source Link

Document

Return a boolean property.

Usage

From source file:com.grantingersoll.intell.index.EnwikiContentSource.java

License:Apache License

@Override
public void setConfig(Config config) {
    super.setConfig(config);
    keepImages = config.get("keep.image.only.docs", true);
    String fileName = config.get("docs.file", null);
    if (fileName == null) {
        throw new IllegalArgumentException("docs.file must be set");
    }/* w  w  w  .  j  a  v  a 2s.co  m*/
    file = new File(fileName).getAbsoluteFile();
}

From source file:com.leavesfly.lia.admin.CreateThreadedIndexTask.java

License:Apache License

public int doLogic() throws IOException {
    PerfRunData runData = getRunData();// w  w  w.  ja  va2s. c  o  m
    Config config = runData.getConfig();
    IndexWriter writer = new ThreadedIndexWriter(runData.getDirectory(), runData.getAnalyzer(), true,
            config.get("writer.num.threads", 4), config.get("writer.max.thread.queue.size", 20),
            IndexWriter.MaxFieldLength.UNLIMITED);
    CreateIndexTask.setIndexWriterConfig(writer, config);
    runData.setIndexWriter(writer);
    return 1;
}

From source file:com.mathworks.xzheng.admin.CreateThreadedIndexTask.java

License:Apache License

public int doLogic() throws IOException {
    PerfRunData runData = getRunData();//from  w ww.j  a  va2 s.c  o m
    Config config = runData.getConfig();

    IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_46, runData.getAnalyzer());
    writerConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

    writerConfig = CreateIndexTask.createWriterConfig(config, runData, IndexWriterConfig.OpenMode.CREATE,
            writerConfig.getIndexCommit());

    IndexWriter writer = new ThreadedIndexWriter(runData.getDirectory(), writerConfig,
            config.get("writer.num.threads", 4), config.get("writer.max.thread.queue.size", 20));
    runData.setIndexWriter(writer);
    return 1;
}

From source file:com.tamingtext.qa.WexWikiContentSource.java

License:Apache License

@Override
public void setConfig(Config config) {
    super.setConfig(config);
    String fileName = config.get("docs.file", null);
    if (fileName == null) {
        throw new IllegalArgumentException("docs.file must be set");
    }/*from   ww w.j a  va2 s.  c  o  m*/
    file = new File(fileName).getAbsoluteFile();
}

From source file:info.boytsov.lucene.parsers.ClueWeb09ContentSource.java

License:Open Source License

@Override
public void setConfig(Config config) {
    super.setConfig(config);

    // dirs/*w  w w.  j  a  v  a  2s. c  o  m*/
    File workDir = new File(config.get("work.dir", "work"));
    String d = config.get("docs.dir", "trec");
    dataDir = new File(d);
    if (!dataDir.isAbsolute()) {
        dataDir = new File(workDir, d);
    }

    // files
    collectFiles(dataDir, inputFiles);
    if (inputFiles.size() == 0) {
        throw new IllegalArgumentException("No files in dataDir: " + dataDir);
    }

    // html parser
    try {
        String htmlParserClassName = config.get("html.parser",
                "org.apache.lucene.benchmark.byTask.feeds.DemoHTMLParser");
        htmlParser = Class.forName(htmlParserClassName).asSubclass(HTMLParser.class).newInstance();
    } catch (Exception e) {
        // Should not get here. Throw runtime exception.
        throw new RuntimeException(e);
    }
}

From source file:info.boytsov.lucene.parsers.EnwikiContentSource.java

License:Apache License

@Override
public void setConfig(Config config) {
    super.setConfig(config);
    keepImages = config.get("keep.image.only.docs", true);
    String fileName = config.get("docs.file", null);
    if (fileName != null) {
        file = new File(fileName).getAbsoluteFile();
    }/*from   w  w  w  . j a v  a 2  s.  c o m*/
}

From source file:info.boytsov.lucene.parsers.TrecContentSource.java

License:Apache License

@Override
public void setConfig(Config config) {
    super.setConfig(config);
    // dirs//w w  w. j  av  a2  s. c o m
    File workDir = new File(config.get("work.dir", "work"));
    String d = config.get("docs.dir", "trec");
    dataDir = new File(d);
    if (!dataDir.isAbsolute()) {
        dataDir = new File(workDir, d);
    }
    // files
    collectFiles(dataDir, inputFiles);
    if (inputFiles.size() == 0) {
        throw new IllegalArgumentException("No files in dataDir: " + dataDir);
    }
    // trec doc parser
    try {
        String trecDocParserClassName = config.get("trec.doc.parser",
                "org.apache.lucene.benchmark.byTask.feeds.TrecGov2Parser");
        trecDocParser = Class.forName(trecDocParserClassName).asSubclass(TrecDocParser.class).newInstance();
    } catch (Exception e) {
        // Should not get here. Throw runtime exception.
        throw new RuntimeException(e);
    }
    // html parser
    try {
        String htmlParserClassName = config.get("html.parser",
                "org.apache.lucene.benchmark.byTask.feeds.DemoHTMLParser");
        htmlParser = Class.forName(htmlParserClassName).asSubclass(HTMLParser.class).newInstance();
    } catch (Exception e) {
        // Should not get here. Throw runtime exception.
        throw new RuntimeException(e);
    }
    // encoding
    if (encoding == null) {
        encoding = "ISO-8859-1";
    }
    // iteration exclusion in doc name 
    excludeDocnameIteration = config.get("content.source.excludeIteration", false);
}

From source file:io.datalayer.lucene.benchmark.CreateThreadedIndexTask.java

License:Apache License

@Override
public int doLogic() throws IOException {
    PerfRunData runData = getRunData();//from ww w  .j  av a2 s .  co  m
    Config config = runData.getConfig();
    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_46, runData.getAnalyzer());
    IndexWriter writer = new ThreadedIndexWriter(runData.getDirectory(), conf, true,
            config.get("writer.num.threads", 4), config.get("writer.max.thread.queue.size", 20));
    // CreateIndexTask.configureWriter(writer, config);
    runData.setIndexWriter(writer);
    return 1;
}

From source file:parsers.ClueWebContentSource.java

License:Open Source License

@Override
public void setConfig(Config config) {
    super.setConfig(config);

    // dirs// w  ww.  j  a  v a2 s. c  o m
    File workDir = new File(config.get("work.dir", "work"));
    String d = config.get("docs.dir", "trec");
    dataDir = new File(d);
    if (!dataDir.isAbsolute()) {
        dataDir = new File(workDir, d);
    }

    try {
        // files: accept only WARC files
        ArrayList<Path> tmpp = new ArrayList<Path>();
        collectFiles(dataDir.toPath(), tmpp);

        ArrayList<File> tmpf = new ArrayList<File>();
        for (Path p : tmpp)
            if (p.toString().endsWith("warc.gz")) {
                tmpf.add(p.toFile());
            } else {
                System.out.println("Ignoring file: " + p);
            }

        tmpf.sort(new PathFileComparator());
        for (File f : tmpf) {
            inputFiles.add(f.toPath());
            System.out.println(f);
        }

        if (inputFiles.size() == 0) {
            throw new IllegalArgumentException("No files in dataDir: " + dataDir);
        }

        // html parser      

        String htmlParserClassName = config.get("html.parser",
                "org.apache.lucene.benchmark.byTask.feeds.DemoHTMLParser");
        htmlParser = Class.forName(htmlParserClassName).asSubclass(HTMLParser.class).newInstance();
    } catch (Exception e) {
        // Should not get here. Throw runtime exception.
        throw new RuntimeException(e);
    }

    verbose = true;
}

From source file:parsers.TrecContentSource.java

License:Apache License

@Override
public void setConfig(Config config) {
    super.setConfig(config);
    // dirs/*w  ww.java2s  . c  o m*/
    File workDir = new File(config.get("work.dir", "work"));
    String d = config.get("docs.dir", "trec");
    dataDir = new File(d);
    if (!dataDir.isAbsolute()) {
        dataDir = new File(workDir, d);
    }

    try {
        // files
        collectFiles(dataDir.toPath(), inputFiles);
        if (inputFiles.size() == 0) {
            throw new IllegalArgumentException("No files in dataDir: " + dataDir);
        }
        // trec doc parser      

        String trecDocParserClassName = config.get("trec.doc.parser",
                "org.apache.lucene.benchmark.byTask.feeds.TrecGov2Parser");
        trecDocParser = Class.forName(trecDocParserClassName).asSubclass(TrecDocParser.class).newInstance();
    } catch (Exception e) {
        // Should not get here. Throw runtime exception.
        throw new RuntimeException(e);
    }
    // html parser
    try {
        String htmlParserClassName = config.get("html.parser",
                "org.apache.lucene.benchmark.byTask.feeds.DemoHTMLParser");
        htmlParser = Class.forName(htmlParserClassName).asSubclass(HTMLParser.class).newInstance();
    } catch (Exception e) {
        // Should not get here. Throw runtime exception.
        throw new RuntimeException(e);
    }
    // encoding
    if (encoding == null) {
        encoding = "ISO-8859-1";
    }
    // iteration exclusion in doc name 
    excludeDocnameIteration = config.get("content.source.excludeIteration", true);
}