Example usage for org.apache.hadoop.mapred JobConf addResource

List of usage examples for org.apache.hadoop.mapred JobConf addResource

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf addResource.

Prototype

public void addResource(String name) 

Source Link

Document

Add a configuration resource.

Usage

From source file:org.apache.hyracks.imru.dataflow.Hdtest.java

License:Apache License

public static JobSpecification createJob() throws Exception {
    JobSpecification spec = new JobSpecification();
    spec.setFrameSize(4096);//from w ww .  j a  va  2 s  .co m

    String PATH_TO_HADOOP_CONF = "/home/wangrui/a/imru/hadoop-0.20.2/conf";
    String HDFS_INPUT_PATH = "/customer/customer.tbl,/customer_result/part-0";
    JobConf conf = new JobConf();
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
    FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH);
    conf.setInputFormat(TextInputFormat.class);
    RecordDescriptor recordDesc = new RecordDescriptor(
            new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
    InputSplit[] splits = conf.getInputFormat().getSplits(conf, 1);
    HDFSReadOperatorDescriptor readOperator = new HDFSReadOperatorDescriptor(spec, recordDesc, conf, splits,
            new String[] { "NC0", "NC1" }, new IKeyValueParserFactory<LongWritable, Text>() {
                @Override
                public IKeyValueParser<LongWritable, Text> createKeyValueParser(final IHyracksTaskContext ctx) {
                    return new IKeyValueParser<LongWritable, Text>() {
                        TupleWriter tupleWriter;

                        @Override
                        public void open(IFrameWriter writer) throws HyracksDataException {
                            tupleWriter = new TupleWriter(ctx, writer, 1);
                        }

                        @Override
                        public void parse(LongWritable key, Text value, IFrameWriter writer, String fileString)
                                throws HyracksDataException {
                            try {
                                tupleWriter.write(value.getBytes(), 0, value.getLength());
                                tupleWriter.finishField();
                                tupleWriter.finishTuple();
                            } catch (IOException e) {
                                throw new HyracksDataException(e);
                            }
                        }

                        @Override
                        public void close(IFrameWriter writer) throws HyracksDataException {
                            tupleWriter.close();
                        }
                    };
                }

            });

    // createPartitionConstraint(spec, readOperator, new String[] {"NC0"});
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, readOperator, new String[] { "NC0", "NC1" });

    IOperatorDescriptor writer = new HDFSOD(spec, null, null, null);
    // createPartitionConstraint(spec, writer, outSplits);

    spec.connect(new OneToOneConnectorDescriptor(spec), readOperator, 0, writer, 0);

    spec.addRoot(writer);
    return spec;
}

From source file:org.apache.oozie.action.hadoop.LauncherMainTester.java

License:Apache License

private static JobConf createSleepMapperReducerJobConf() {
    JobConf jConf = new JobConf(true);
    jConf.addResource(new Path("file:///", System.getProperty("oozie.action.conf.xml")));
    jConf.setMapperClass(SleepMapperReducerForTest.class);
    jConf.setReducerClass(SleepMapperReducerForTest.class);
    jConf.setOutputKeyClass(Text.class);
    jConf.setOutputValueClass(IntWritable.class);
    jConf.setInputFormat(TextInputFormat.class);
    jConf.setOutputFormat(TextOutputFormat.class);
    jConf.setNumReduceTasks(1);//from  w  w w.ja v  a 2 s  .co m
    jConf.set(SleepMapperReducerForTest.SLEEP_TIME_MILLIS_KEY, "60000");
    return jConf;
}

From source file:org.apache.pig.backend.hadoop.executionengine.HExecutionEngine.java

License:Apache License

public JobConf getLocalConf() {
    JobConf jc = new JobConf(false);

    jc.addResource(CORE_DEFAULT_SITE);
    jc.addResource(MAPRED_DEFAULT_SITE);
    jc.addResource(YARN_DEFAULT_SITE);/*from  w  ww  .  j  a  va2 s. c  om*/

    return jc;
}

From source file:org.apache.pig.backend.hadoop.executionengine.HExecutionEngine.java

License:Apache License

public JobConf getExecConf(Properties properties) throws ExecException {
    JobConf jc = null;
    // Check existence of user provided configs
    String isHadoopConfigsOverriden = properties.getProperty("pig.use.overriden.hadoop.configs");
    if (isHadoopConfigsOverriden != null && isHadoopConfigsOverriden.equals("true")) {
        jc = new JobConf(ConfigurationUtil.toConfiguration(properties));
    } else {//from w  w w  .j a v a2  s .  c om
        // Check existence of hadoop-site.xml or core-site.xml in
        // classpath if user provided confs are not being used
        Configuration testConf = new Configuration();
        ClassLoader cl = testConf.getClassLoader();
        URL hadoop_site = cl.getResource(HADOOP_SITE);
        URL core_site = cl.getResource(CORE_SITE);

        if (hadoop_site == null && core_site == null) {
            throw new ExecException(
                    "Cannot find hadoop configurations in classpath "
                            + "(neither hadoop-site.xml nor core-site.xml was found in the classpath)."
                            + " If you plan to use local mode, please put -x local option in command line",
                    4010);
        }
        jc = new JobConf();
    }
    jc.addResource("pig-cluster-hadoop-site.xml");
    jc.addResource(YARN_SITE);
    return jc;
}

From source file:org.apache.pig.backend.hadoop.executionengine.MRExecutionEngine.java

License:Apache License

@SuppressWarnings({ "deprecation", "resource" })
private void init(Properties properties) throws ExecException {
    // First set the ssh socket factory
    setSSHFactory();//from   www .j a  v a2 s.  c o  m

    String cluster = null;
    String nameNode = null;

    // We need to build a configuration object first in the manner described
    // below
    // and then get back a properties object to inspect the
    // JOB_TRACKER_LOCATION
    // and FILE_SYSTEM_LOCATION. The reason to do this is if we looked only
    // at
    // the existing properties object, we may not get the right settings. So
    // we want
    // to read the configurations in the order specified below and only then
    // look
    // for JOB_TRACKER_LOCATION and FILE_SYSTEM_LOCATION.

    // Hadoop by default specifies two resources, loaded in-order from the
    // classpath:
    // 1. hadoop-default.xml : Read-only defaults for hadoop.
    // 2. hadoop-site.xml: Site-specific configuration for a given hadoop
    // installation.
    // Now add the settings from "properties" object to override any
    // existing properties
    // All of the above is accomplished in the method call below

    JobConf jc = null;
    if (!this.pigContext.getExecType().isLocal()) {
        // Check existence of user provided configs
        String isHadoopConfigsOverriden = properties.getProperty("pig.use.overriden.hadoop.configs");
        if (isHadoopConfigsOverriden != null && isHadoopConfigsOverriden.equals("true")) {
            jc = new JobConf(ConfigurationUtil.toConfiguration(properties));
        } else {
            // Check existence of hadoop-site.xml or core-site.xml in
            // classpath
            // if user provided confs are not being used
            Configuration testConf = new Configuration();
            ClassLoader cl = testConf.getClassLoader();
            URL hadoop_site = cl.getResource(HADOOP_SITE);
            URL core_site = cl.getResource(CORE_SITE);

            if (hadoop_site == null && core_site == null) {
                throw new ExecException(
                        "Cannot find hadoop configurations in classpath (neither hadoop-site.xml nor core-site.xml was found in the classpath)."
                                + " If you plan to use local mode, please put -x local option in command line",
                        4010);
            }
            jc = new JobConf();
        }
        jc.addResource("pig-cluster-hadoop-site.xml");
        jc.addResource(YARN_SITE);

        // Trick to invoke static initializer of DistributedFileSystem to
        // add hdfs-default.xml
        // into configuration
        new DistributedFileSystem();

        // the method below alters the properties object by overriding the
        // hadoop properties with the values from properties and recomputing
        // the properties
        recomputeProperties(jc, properties);
    } else {
        // If we are running in local mode we dont read the hadoop conf file
        if (properties.getProperty("mapreduce.framework.name") == null) {
            properties.setProperty("mapreduce.framework.name", "local");
        }
        properties.setProperty(JOB_TRACKER_LOCATION, LOCAL);
        properties.setProperty(FILE_SYSTEM_LOCATION, "file:///");
        properties.setProperty(ALTERNATIVE_FILE_SYSTEM_LOCATION, "file:///");

        jc = new JobConf(false);
        jc.addResource("core-default.xml");
        jc.addResource("mapred-default.xml");
        jc.addResource("yarn-default.xml");
        recomputeProperties(jc, properties);
    }

    cluster = jc.get(JOB_TRACKER_LOCATION);
    nameNode = jc.get(FILE_SYSTEM_LOCATION);
    if (nameNode == null)
        nameNode = (String) pigContext.getProperties().get(ALTERNATIVE_FILE_SYSTEM_LOCATION);

    if (cluster != null && cluster.length() > 0) {
        if (!cluster.contains(":") && !cluster.equalsIgnoreCase(LOCAL)) {
            cluster = cluster + ":50020";
        }
        properties.setProperty(JOB_TRACKER_LOCATION, cluster);
    }

    if (nameNode != null && nameNode.length() > 0) {
        if (!nameNode.contains(":") && !nameNode.equalsIgnoreCase(LOCAL)) {
            nameNode = nameNode + ":8020";
        }
        properties.setProperty(FILE_SYSTEM_LOCATION, nameNode);
    }

    log.info("Connecting to hadoop file system at: " + (nameNode == null ? LOCAL : nameNode));
    // constructor sets DEFAULT_REPLICATION_FACTOR_KEY
    ds = new HDataStorage(properties);

    if (cluster != null && !cluster.equalsIgnoreCase(LOCAL)) {
        log.info("Connecting to map-reduce job tracker at: " + jc.get(JOB_TRACKER_LOCATION));
    }

    // Set job-specific configuration knobs
    jobConf = jc;
}

From source file:org.apache.pig.backend.hadoop.executionengine.tez.TezExecutionEngine.java

License:Apache License

@Override
public JobConf getExecConf(Properties properties) throws ExecException {
    JobConf jc = super.getExecConf(properties);
    jc.addResource(TezConfiguration.TEZ_SITE_XML);
    return jc;// w w  w  . jav  a 2  s  .  c  om
}

From source file:org.apache.vxquery.xtest.MiniDFS.java

License:Apache License

public void startHDFS() throws IOException {

    FileSystem lfs = FileSystem.getLocal(new Configuration());
    JobConf conf = new JobConf();
    String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
    Path hdfs_conf = new Path(PATH_TO_HADOOP_CONF);
    if (!lfs.exists(hdfs_conf)) {
        PATH_TO_HADOOP_CONF = "vxquery-xtest/src/test/resources/hadoop/conf";
        hdfs_conf = new Path(PATH_TO_HADOOP_CONF);
        if (!lfs.exists(hdfs_conf)) {
            PATH_TO_HADOOP_CONF = "../vxquery-xtest/src/test/resources/hadoop/conf";
            hdfs_conf = new Path(PATH_TO_HADOOP_CONF);
        }//from w w w.j av a 2s. c o  m
    }
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
    int numDataNodes = 1;
    int nameNodePort = 40000;

    // cleanup artifacts created on the local file system
    lfs.delete(new Path("build"), true);
    System.setProperty("hadoop.log.dir", "logs");
    MiniDFSCluster.Builder build = new MiniDFSCluster.Builder(conf);
    build.nameNodePort(nameNodePort);
    build.nameNodeHttpPort(nameNodePort + 34);
    build.numDataNodes(numDataNodes);
    build.checkExitOnShutdown(true);
    build.startupOption(StartupOption.REGULAR);
    build.format(true);
    build.waitSafeMode(true);
    dfsCluster = build.build();

    FileSystem dfs = FileSystem.get(conf);
    String DATA_PATH = "src/test/resources/TestSources/ghcnd";
    Path src = new Path(DATA_PATH);
    if (!lfs.exists(src)) {
        DATA_PATH = "vxquery-xtest/src/test/resources/TestSources/ghcnd";
        src = new Path(DATA_PATH);
        if (!lfs.exists(src)) {
            DATA_PATH = "../vxquery-xtest/src/test/resources/TestSources/ghcnd";
            src = new Path(DATA_PATH);
        }
    }
    dfs.mkdirs(new Path("/tmp"));
    Path dest = new Path("/tmp/vxquery-hdfs-test");
    dfs.copyFromLocalFile(src, dest);
    if (dfs.exists(dest)) {
        System.err.println("Test files copied to HDFS successfully");
    }
}

From source file:org.archive.jbs.Parse.java

License:Apache License

/**
 * Command-line driver.  Runs the Parse as a Hadoop job.
 *//*from w  w  w .j ava2 s . c om*/
public static void main(String args[]) throws Exception {
    JobConf conf = new JobConf(Parse.class);

    // Load the default set of config properties, including the
    // essential properties needed by the bits of Nutch that we are
    // still using.  These properties can still be over-ridden by
    // command-line args.
    conf.addResource("conf-parse.xml");

    int result = ToolRunner.run(conf, new Parse(), args);

    System.exit(result);
}

From source file:org.deeplearning4j.hadoop.util.HdfsUtils.java

License:Apache License

/**
 * Adapted from //from  w ww  .j  av a 2  s.c o  m
 * http://terrier.org/docs/v3.5/javadoc/org/terrier/utility/io/HadoopUtility.html#saveClassPathToJob%28org.apache.hadoop.mapred.JobConf%29
 * @param jobConf
 * @throws IOException
 */
public static List<Path> saveClassPathToJob(JobConf jobConf) throws Exception {
    String hdfs = getHost(jobConf);

    HdfsLock lock = new HdfsLock(hdfs);
    String hdfs2 = getHdfs(jobConf);
    if (jobConf.get(HDFS_HOST) != null) {
        if (lock.isLocked()) {
            List<Path> ret = lock.getPaths();
            StringBuffer files = new StringBuffer();
            StringBuffer classPath = new StringBuffer();
            for (Path path : ret) {
                files.append(hdfs2 + path.toString());
                files.append(",");
                classPath.append(hdfs2 + path.toString());
                classPath.append(":");
                jobConf.addResource(path.toUri().toURL());
            }
            String classPathToSet = classPath.toString().substring(0, classPath.lastIndexOf(":"));
            String filesToSet = files.toString().substring(0, files.lastIndexOf(","));
            log.info("Setting class path " + classPathToSet);
            log.info("Using files " + filesToSet);
            jobConf.set("mapred.cache.files", filesToSet);
            jobConf.set("mapred.job.classpath.files", classPathToSet);
            return ret;
        }
    }
    List<Path> paths = new ArrayList<Path>();
    log.info("Copying classpath to job");

    final String[] jars = findJarFiles(new String[] { System.getenv().get("CLASSPATH"),
            System.getProperty("java.class.path"), System.getProperty("surefire.test.class.path") });

    final FileSystem defFS = FileSystem.get(jobConf);
    int numFilesWritten = 0;
    for (String jarFile : jars) {
        //class path issues
        if (jarFile.contains("hadoop-client")) {
            log.info("Skipping hadoop-client");
            continue;
        } else if (jarFile.contains("mapreduce-run")) {
            log.info("Skipping map reduce run");
            continue;
        }

        Path srcJarFilePath = new Path("file:///" + jarFile);
        String filename = srcJarFilePath.getName();
        Path tmpJarFilePath = makeFile(jobConf, filename);
        log.info("Uploading " + jarFile + " to " + tmpJarFilePath.toString());
        try {
            defFS.copyFromLocalFile(srcJarFilePath, tmpJarFilePath);
            jobConf.addResource(tmpJarFilePath);
            paths.add(tmpJarFilePath);
            numFilesWritten++;
        } catch (Exception e) {
            for (Path path : paths) {
                if (defFS.exists(path))
                    defFS.delete(path, true);
            }

            lock.close();
            log.error(String.format("Exception writing to hdfs; rolling back %d jar files ", numFilesWritten),
                    e);
            throw new IOException("Couldn't write jar file " + jarFile);
        }
    }
    try {
        lock.create(paths);
    } catch (KeeperException.SessionExpiredException e) {
        lock = new HdfsLock(hdfs);
        lock.create(paths);

    }

    lock.close();
    //resolve any differences by removing  clashing names in the files (archives are removed from files)

    Set<Path> remove = new HashSet<Path>();
    for (Path path : paths) {
        boolean exists = false;
        try {
            exists = defFS.exists(path);
        } catch (IllegalArgumentException e) {
            exists = false;
        }
        if (!exists)
            remove.add(path);
    }
    paths.removeAll(remove);
    return paths;
}

From source file:org.lamapacos.preprocessor.filter.SegmentFilter.java

License:Apache License

@Override
public void configure(JobConf job) {
    setConf(job);/*  w ww.j  a  v a 2s.  co m*/
    //conf file
    job.addResource("lamapacos-preprocessor.xml");
    RegexsFilter.setPatterns(job);
    try {
        this.fs = FileSystem.get(getConf());
    } catch (IOException e) {
        LOG.error("IOException:", e);
    }
}