Example usage for org.apache.hadoop.mapred JobConf set

List of usage examples for org.apache.hadoop.mapred JobConf set

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf set.

Prototype

public void set(String name, String value) 

Source Link

Document

Set the value of the name property.

Usage

From source file:ivory.ptc.AnchorTextInvertedIndex.java

License:Apache License

@Override
public int runTool() throws Exception {
    JobConf conf = new JobConf(getConf(), AnchorTextInvertedIndex.class);
    FileSystem fs = FileSystem.get(conf);
    String inPath = conf.get("Ivory.InputPath");
    String outPath = conf.get("Ivory.OutputPath");
    Path inputPath = new Path(inPath);
    Path outputPath = new Path(outPath);
    int mapTasks = conf.getInt("Ivory.NumMapTasks", 1);
    int reduceTasks = conf.getInt("Ivory.NumReduceTasks", 100);
    String weightingSchemeParameters = conf.get("Ivory.WeightingSchemeParameters");

    LOG.info("BuildAnchorTextInvertedIndex");
    LOG.info(" - input path: " + inPath);
    LOG.info(" - output path: " + outPath);
    LOG.info(" - number of reducers: " + reduceTasks);
    LOG.info(" - weighting scheme: " + conf.get("Ivory.WeightingScheme"));
    LOG.info(" - weighting scheme parameters: " + weightingSchemeParameters);

    String[] params = weightingSchemeParameters.split(PARAMETER_SEPARATER);
    for (String param : params) {
        DistributedCache.addCacheFile(new URI(param), conf);
    }/*  ww  w  .  j a v a 2s  .c  o m*/

    conf.setJobName("BuildAnchorTextInvertedIndex");
    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);
    conf.set("mapred.child.java.opts", "-Xmx4096m");
    conf.setInt("mapred.task.timeout", 60000000);

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(AnchorTextTarget.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(ArrayListWritable.class);
    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(MyReducer.class);

    fs.delete(outputPath);
    JobClient.runJob(conf);
    return 0;
}

From source file:ivory.ptc.driver.XMLFormatJudgments.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        printUsage();//from  w  w  w  . j  a va  2  s  .c om
        return -1;
    }
    JobConf conf = new JobConf(getConf(), XMLFormatJudgments.class);
    // Command line arguments
    String inPath = args[0];
    String outPath = args[1];
    String docnoMapping = args[2];
    Path inputPath = new Path(inPath);
    Path outputPath = new Path(outPath);
    int mapTasks = 1;
    int reduceTasks = 1;

    conf.setJobName("FormatPseudoJudgments");
    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    DistributedCache.addCacheFile(new URI(docnoMapping), conf);
    FileSystem.get(conf).delete(outputPath);
    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    conf.setMapOutputKeyClass(PseudoQuery.class);
    conf.setMapOutputValueClass(PseudoJudgments.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(IdentityMapper.class);
    conf.setReducerClass(MyReducer.class);

    JobClient.runJob(conf);
    return 0;
}

From source file:ivory.ptc.driver.XMLFormatQueries.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();/*  w  w w .j  ava  2  s.  co m*/
        return -1;
    }

    JobConf conf = new JobConf(getConf(), XMLFormatQueries.class);
    // Command line arguments
    String inPath = args[0];
    String outPath = args[1];
    Path inputPath = new Path(inPath);
    Path outputPath = new Path(outPath);
    int mapTasks = 1;
    int reduceTasks = 1;

    conf.setJobName("FormatPseudoQueries");
    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    FileSystem.get(conf).delete(outputPath);
    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    conf.setMapOutputKeyClass(PseudoQuery.class);
    conf.setMapOutputValueClass(PseudoJudgments.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(IdentityMapper.class);
    conf.setReducerClass(MyReducer.class);
    JobClient.runJob(conf);
    return 0;
}

From source file:ivory.ptc.SortedPseudoTestCollection.java

License:Apache License

public int runTool() throws Exception {
    JobConf conf = new JobConf(getConf(), SortedPseudoTestCollection.class);
    FileSystem fs = FileSystem.get(conf);
    String inPath = conf.get("Ivory.InputPath");
    String outPath = conf.get("Ivory.OutputPath");
    Path inputPath = new Path(inPath);
    Path outputPath = new Path(outPath);
    int mapTasks = 1;
    int reduceTasks = 1;

    LOG.info("SortedPseudoTestCollection");
    LOG.info(" - Input path: " + conf.get("Ivory.InputPath"));
    LOG.info(" - Output path: " + conf.get("Ivory.OutputPath"));
    LOG.info(" - JudgmentExtractor: " + conf.get("Ivory.JudgmentExtractor"));
    LOG.info(" - JudgmentExtractorParameters: " + conf.get("Ivory.JudgmentExtractorParameters"));
    LOG.info(" - SamplingCriterion: " + conf.get("Ivory.SamplingCriterion"));
    LOG.info(" - SamplingCriterionParameters: " + conf.get("Ivory.SamplingCriterionParameters"));
    LOG.info(" - QueryScorer: " + conf.get("Ivory.QueryScorer"));

    conf.setJobName("SortedPTC");
    conf.setNumMapTasks(mapTasks);//  ww w  .j  a  v  a  2 s . c  om
    conf.setNumReduceTasks(reduceTasks);
    conf.set("mapred.child.java.opts", "-Xmx4096m");

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setMapOutputKeyClass(PseudoQuery.class);
    conf.setMapOutputValueClass(PseudoJudgments.class);
    conf.setOutputKeyClass(PseudoQuery.class);
    conf.setOutputValueClass(PseudoJudgments.class);
    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(MyReducer.class);

    fs.delete(outputPath);
    JobClient.runJob(conf);
    return 0;
}

From source file:ivory.server.RunDistributedRetrievalServers.java

License:Apache License

/**
 * Runs this tool.//from ww w  .j a  v a 2  s. com
 */
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        printUsage();
        return -1;
    }

    String configFile = args[0];

    FileSystem fs = FileSystem.get(getConf());

    Document d = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(fs.open(new Path(configFile)));

    sLogger.info("Reading configuration to determine number of servers to launch:");
    List<String> sids = new ArrayList<String>();
    NodeList servers = d.getElementsByTagName("server");
    for (int i = 0; i < servers.getLength(); i++) {
        Node node = servers.item(i);

        // get server id
        String sid = XMLTools.getAttributeValue(node, "id", null);
        if (sid == null) {
            throw new Exception("Must specify a query id attribute for every server!");
        }

        sLogger.info(" - sid: " + sid);
        sids.add(sid);
    }

    int port = 7000;
    int numServers = sids.size();
    String configPath = args[1];

    if (fs.exists(new Path(configPath))) {
        fs.delete(new Path(configPath), true);
    }

    String fname = appendPath(configPath, "config-" + numServers + ".txt");
    sLogger.info("Writing configuration to: " + fname);
    StringBuffer sb = new StringBuffer();
    for (int n = 0; n < numServers; n++) {
        port++;
        sb.append(sids.get(n) + " " + port + "\n");
    }

    FSDataOutputStream out = fs.create(new Path(fname), true);
    out.writeBytes(sb.toString());
    out.close();

    JobConf conf = new JobConf(RetrievalServer.class);

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(0);

    conf.setInputFormat(NLineInputFormat.class);
    conf.setOutputFormat(NullOutputFormat.class);
    conf.setMapperClass(ServerMapper.class);

    FileInputFormat.setInputPaths(conf, new Path(fname));

    conf.set("Ivory.ConfigFile", configFile);
    conf.set("Ivory.ConfigPath", configPath);
    conf.setJobName("RetrievalServers");
    //conf.set("mapred.child.java.opts", "-Xmx2048m");
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    // conf.set("mapred.job.queue.name", "search");

    JobClient client = new JobClient(conf);
    client.submitJob(conf);

    sLogger.info("Waiting for servers to start up...");

    // poll HDFS for hostnames and ports
    boolean allStarted = true;
    do {
        allStarted = true;
        for (int n = 0; n < numServers; n++) {
            String f = appendPath(configPath, sids.get(n) + ".host");
            if (!fs.exists(new Path(f))) {
                allStarted = false;
            }
        }
        Thread.sleep(10000);
        sLogger.info(" ...");
    } while (!allStarted);

    // poll HDFS for ready signal that the index is ready
    boolean allReady = true;
    do {
        allReady = true;
        for (int n = 0; n < numServers; n++) {
            String f = appendPath(configPath, sids.get(n) + ".ready");
            if (!fs.exists(new Path(f))) {
                allReady = false;
            }
        }
        Thread.sleep(10000);
        sLogger.info(" ...");
    } while (!allReady);

    sLogger.info("All servers ready!");
    sLogger.info("Host information:");
    for (int n = 0; n < numServers; n++) {
        String f = appendPath(configPath, sids.get(n) + ".host");
        sLogger.info(" sid=" + sids.get(n) + ", " + FSProperty.readString(fs, f));
    }

    return 0;
}

From source file:ivory.server.RunRetrievalBroker.java

License:Apache License

/**
 * Runs this tool.//w w w. j a  va  2s . com
 */
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();
        return -1;
    }

    String configPath = args[0];

    FileSystem fs = FileSystem.get(getConf());

    String ids = "";

    sLogger.info("Starting retrieval broker...");
    sLogger.info("server config path: " + configPath);
    FileStatus[] stats = fs.listStatus(new Path(configPath));

    if (stats == null) {
        sLogger.info("Error: " + configPath + " not found!");
        return -1;
    }

    String scoreMergeModel = args[1];
    if (!scoreMergeModel.equals("sort") && !scoreMergeModel.equals("normalize")) {
        throw new RuntimeException("Unsupported score merging model: " + args[1]);
    }

    for (int i = 0; i < stats.length; i++) {
        String s = stats[i].getPath().toString();
        if (!s.endsWith(".host"))
            continue;

        String sid = s.substring(s.lastIndexOf("/") + 1, s.lastIndexOf(".host"));
        sLogger.info("sid=" + sid + ", host=" + s);

        if (ids.length() != 0)
            ids += ";";

        ids += sid;
    }

    JobConf conf = new JobConf(RunRetrievalBroker.class);
    conf.setJobName("RetrievalBroker");

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(0);

    conf.setInputFormat(NullInputFormat.class);
    conf.setOutputFormat(NullOutputFormat.class);
    conf.setMapperClass(ServerMapper.class);

    conf.set("serverIDs", ids);
    conf.set("ServerAddressPath", configPath);
    conf.set("ScoreMergeModel", scoreMergeModel);
    conf.set("mapred.child.java.opts", "-Xmx2048m");

    fs.delete(new Path(appendPath(configPath, "broker.ready")), true);

    JobClient client = new JobClient(conf);
    client.submitJob(conf);

    sLogger.info("broker started!");

    while (true) {
        String f = appendPath(configPath, "broker.ready");
        if (fs.exists(new Path(f))) {
            break;
        }

        Thread.sleep(5000);
    }

    String s = FSProperty.readString(FileSystem.get(conf), appendPath(configPath, "broker.ready"));
    sLogger.info("broker ready at " + s);

    return 0;
}

From source file:ivory.smrf.retrieval.distributed.RunDistributedRetrievalServers.java

License:Apache License

/**
 * Runs this tool.//from   www .ja v a  2  s .  c  o  m
 */
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        printUsage();
        return -1;
    }

    String configFile = args[0];

    FileSystem fs = FileSystem.get(getConf());

    Document d = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(fs.open(new Path(configFile)));

    sLogger.info("Reading configuration to determine number of servers to launch:");
    List<String> sids = new ArrayList<String>();
    NodeList servers = d.getElementsByTagName("server");
    for (int i = 0; i < servers.getLength(); i++) {
        Node node = servers.item(i);

        // get server id
        String sid = XMLTools.getAttributeValue(node, "id", null);
        if (sid == null) {
            throw new Exception("Must specify a query id attribute for every server!");
        }

        sLogger.info(" - sid: " + sid);
        sids.add(sid);
    }

    int port = 7000;
    int numServers = sids.size();
    String configPath = args[1];

    if (fs.exists(new Path(configPath))) {
        fs.delete(new Path(configPath), true);
    }

    String fname = appendPath(configPath, "config-" + numServers + ".txt");
    sLogger.info("Writing configuration to: " + fname);
    StringBuffer sb = new StringBuffer();
    for (int n = 0; n < numServers; n++) {
        port++;
        sb.append(sids.get(n) + " " + port + "\n");
    }

    FSDataOutputStream out = fs.create(new Path(fname), true);
    out.writeBytes(sb.toString());
    out.close();

    JobConf conf = new JobConf(getConf(), RetrievalServer.class);

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(0);

    conf.setInputFormat(NLineInputFormat.class);
    conf.setOutputFormat(NullOutputFormat.class);
    conf.setMapperClass(ServerMapper.class);

    FileInputFormat.setInputPaths(conf, new Path(fname));

    conf.set("Ivory.ConfigFile", configFile);
    conf.set("Ivory.ConfigPath", configPath);
    conf.setJobName("RetrievalServers");
    //conf.set("mapred.child.java.opts", "-Xmx2048m");
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    // conf.set("mapred.job.queue.name", "search");

    JobClient client = new JobClient(conf);
    client.submitJob(conf);

    sLogger.info("Waiting for servers to start up...");

    // poll HDFS for hostnames and ports
    boolean allStarted = true;
    do {
        allStarted = true;
        for (int n = 0; n < numServers; n++) {
            String f = appendPath(configPath, sids.get(n) + ".host");
            if (!fs.exists(new Path(f))) {
                allStarted = false;
            }
        }
        Thread.sleep(10000);
        sLogger.info(" ...");
    } while (!allStarted);

    // poll HDFS for ready signal that the index is ready
    boolean allReady = true;
    do {
        allReady = true;
        for (int n = 0; n < numServers; n++) {
            String f = appendPath(configPath, sids.get(n) + ".ready");
            if (!fs.exists(new Path(f))) {
                allReady = false;
            }
        }
        Thread.sleep(10000);
        sLogger.info(" ...");
    } while (!allReady);

    sLogger.info("All servers ready!");
    sLogger.info("Host information:");
    for (int n = 0; n < numServers; n++) {
        String f = appendPath(configPath, sids.get(n) + ".host");
        sLogger.info(" sid=" + sids.get(n) + ", " + FSProperty.readString(fs, f));
    }

    return 0;
}

From source file:ivory.smrf.retrieval.distributed.RunQueryBroker.java

License:Apache License

/**
 * Runs this tool.//from   w w  w  .j a va2 s .  co  m
 */
public int run(String[] args) throws Exception {
    if (args.length != 5) {
        printUsage();
        return -1;
    }

    String configPath = args[0];
    FileSystem fs = FileSystem.get(getConf());

    sLogger.info("server config path: " + configPath);
    FileStatus[] stats = fs.listStatus(new Path(configPath));

    if (stats == null) {
        sLogger.info("Error: " + configPath + " not found!");
        return -1;
    }

    String runtag = args[1];
    String queriesFilePath = args[2];
    String resultsFilePath = args[3];
    int numHits = Integer.parseInt(args[4]);

    JobConf conf = new JobConf(getConf(), RunQueryBroker.class);
    conf.setJobName("RunQueryBroker");

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(0);

    conf.setInputFormat(NullInputFormat.class);
    conf.setOutputFormat(NullOutputFormat.class);
    conf.setMapperClass(Server.class);

    conf.set("QueriesFilePath", queriesFilePath);
    conf.set("ConfigPath", configPath);
    conf.set("ResultsFilePath", resultsFilePath);
    conf.set("Runtag", runtag);
    conf.setInt("NumHits", numHits);

    conf.set("mapred.child.java.opts", "-Xmx2048m");

    JobClient client = new JobClient(conf);
    client.submitJob(conf);

    sLogger.info("runner started!");

    return 0;
}

From source file:ivory.smrf.retrieval.distributed.RunRetrievalBroker.java

License:Apache License

/**
 * Runs this tool./*  w w w  .ja v  a  2s. c  om*/
 */
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();
        return -1;
    }

    String configPath = args[0];

    FileSystem fs = FileSystem.get(getConf());

    String ids = "";

    sLogger.info("Starting retrieval broker...");
    sLogger.info("server config path: " + configPath);
    FileStatus[] stats = fs.listStatus(new Path(configPath));

    if (stats == null) {
        sLogger.info("Error: " + configPath + " not found!");
        return -1;
    }

    String scoreMergeModel = args[1];
    if (!scoreMergeModel.equals("sort") && !scoreMergeModel.equals("normalize")) {
        throw new RuntimeException("Unsupported score merging model: " + args[1]);
    }

    for (int i = 0; i < stats.length; i++) {
        String s = stats[i].getPath().toString();
        if (!s.endsWith(".host"))
            continue;

        String sid = s.substring(s.lastIndexOf("/") + 1, s.lastIndexOf(".host"));
        sLogger.info("sid=" + sid + ", host=" + s);

        if (ids.length() != 0)
            ids += ";";

        ids += sid;
    }

    JobConf conf = new JobConf(getConf(), RunRetrievalBroker.class);
    conf.setJobName("RetrievalBroker");

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(0);

    conf.setInputFormat(NullInputFormat.class);
    conf.setOutputFormat(NullOutputFormat.class);
    conf.setMapperClass(ServerMapper.class);

    conf.set("serverIDs", ids);
    conf.set("ServerAddressPath", configPath);
    conf.set("ScoreMergeModel", scoreMergeModel);
    conf.set("mapred.child.java.opts", "-Xmx2048m");

    fs.delete(new Path(appendPath(configPath, "broker.ready")), true);

    JobClient client = new JobClient(conf);
    client.submitJob(conf);

    sLogger.info("broker started!");

    while (true) {
        String f = appendPath(configPath, "broker.ready");
        if (fs.exists(new Path(f))) {
            break;
        }

        Thread.sleep(5000);
    }

    String s = FSProperty.readString(FileSystem.get(conf), appendPath(configPath, "broker.ready"));
    sLogger.info("broker ready at " + s);

    return 0;
}

From source file:ivory.smrf.retrieval.RunQueryBroker.java

License:Apache License

/**
 * Runs this tool./*from   w w w  .j a v a2  s. c  om*/
 */
public int run(String[] args) throws Exception {
    if (args.length != 5) {
        printUsage();
        return -1;
    }

    String configPath = args[0];
    FileSystem fs = FileSystem.get(getConf());

    sLogger.info("server config path: " + configPath);
    FileStatus[] stats = fs.listStatus(new Path(configPath));

    if (stats == null) {
        sLogger.info("Error: " + configPath + " not found!");
        return -1;
    }

    String runtag = args[1];
    String queriesFilePath = args[2];
    String resultsFilePath = args[3];
    int numHits = Integer.parseInt(args[4]);

    JobConf conf = new JobConf(RunQueryBroker.class);
    conf.setJobName("RunQueryBroker");

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(0);

    conf.setInputFormat(NullInputFormat.class);
    conf.setOutputFormat(NullOutputFormat.class);
    conf.setMapperClass(Server.class);

    conf.set("QueriesFilePath", queriesFilePath);
    conf.set("ConfigPath", configPath);
    conf.set("ResultsFilePath", resultsFilePath);
    conf.set("Runtag", runtag);
    conf.setInt("NumHits", numHits);

    conf.set("mapred.child.java.opts", "-Xmx2048m");

    JobClient client = new JobClient(conf);
    client.submitJob(conf);

    sLogger.info("runner started!");

    return 0;
}