Example usage for org.apache.hadoop.mapred JobConf addResource

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf addResource.

Prototype

public void addResource(String name)

Source Link

Document

Add a configuration resource.

Usage

From source file:org.apache.hyracks.imru.dataflow.Hdtest.java

License:Apache License

public static JobSpecification createJob() throws Exception {
    JobSpecification spec = new JobSpecification();
    spec.setFrameSize(4096);//from w ww .  j a  va  2 s  .co m

    String PATH_TO_HADOOP_CONF = "/home/wangrui/a/imru/hadoop-0.20.2/conf";
    String HDFS_INPUT_PATH = "/customer/customer.tbl,/customer_result/part-0";
    JobConf conf = new JobConf();
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
    FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH);
    conf.setInputFormat(TextInputFormat.class);
    RecordDescriptor recordDesc = new RecordDescriptor(
            new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
    InputSplit[] splits = conf.getInputFormat().getSplits(conf, 1);
    HDFSReadOperatorDescriptor readOperator = new HDFSReadOperatorDescriptor(spec, recordDesc, conf, splits,
            new String[] { "NC0", "NC1" }, new IKeyValueParserFactory<LongWritable, Text>() {
                @Override
                public IKeyValueParser<LongWritable, Text> createKeyValueParser(final IHyracksTaskContext ctx) {
                    return new IKeyValueParser<LongWritable, Text>() {
                        TupleWriter tupleWriter;

                        @Override
                        public void open(IFrameWriter writer) throws HyracksDataException {
                            tupleWriter = new TupleWriter(ctx, writer, 1);
                        }

                        @Override
                        public void parse(LongWritable key, Text value, IFrameWriter writer, String fileString)
                                throws HyracksDataException {
                            try {
                                tupleWriter.write(value.getBytes(), 0, value.getLength());
                                tupleWriter.finishField();
                                tupleWriter.finishTuple();
                            } catch (IOException e) {
                                throw new HyracksDataException(e);
                            }
                        }

                        @Override
                        public void close(IFrameWriter writer) throws HyracksDataException {
                            tupleWriter.close();
                        }
                    };
                }

            });

    // createPartitionConstraint(spec, readOperator, new String[] {"NC0"});
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, readOperator, new String[] { "NC0", "NC1" });

    IOperatorDescriptor writer = new HDFSOD(spec, null, null, null);
    // createPartitionConstraint(spec, writer, outSplits);

    spec.connect(new OneToOneConnectorDescriptor(spec), readOperator, 0, writer, 0);

    spec.addRoot(writer);
    return spec;
}

From source file:org.apache.oozie.action.hadoop.LauncherMainTester.java

License:Apache License

private static JobConf createSleepMapperReducerJobConf() {
    JobConf jConf = new JobConf(true);
    jConf.addResource(new Path("file:///", System.getProperty("oozie.action.conf.xml")));
    jConf.setMapperClass(SleepMapperReducerForTest.class);
    jConf.setReducerClass(SleepMapperReducerForTest.class);
    jConf.setOutputKeyClass(Text.class);
    jConf.setOutputValueClass(IntWritable.class);
    jConf.setInputFormat(TextInputFormat.class);
    jConf.setOutputFormat(TextOutputFormat.class);
    jConf.setNumReduceTasks(1);//from  w  w w.ja v  a 2 s  .co m
    jConf.set(SleepMapperReducerForTest.SLEEP_TIME_MILLIS_KEY, "60000");
    return jConf;
}

From source file:org.apache.pig.backend.hadoop.executionengine.HExecutionEngine.java

License:Apache License

public JobConf getLocalConf() {
    JobConf jc = new JobConf(false);

    jc.addResource(CORE_DEFAULT_SITE);
    jc.addResource(MAPRED_DEFAULT_SITE);
    jc.addResource(YARN_DEFAULT_SITE);/*from  w  ww  .  j  a  va2 s. c  om*/

    return jc;
}

From source file:org.apache.pig.backend.hadoop.executionengine.HExecutionEngine.java

License:Apache License

public JobConf getExecConf(Properties properties) throws ExecException {
    JobConf jc = null;
    // Check existence of user provided configs
    String isHadoopConfigsOverriden = properties.getProperty("pig.use.overriden.hadoop.configs");
    if (isHadoopConfigsOverriden != null && isHadoopConfigsOverriden.equals("true")) {
        jc = new JobConf(ConfigurationUtil.toConfiguration(properties));
    } else {//from w  w w  .j a v a2  s .  c om
        // Check existence of hadoop-site.xml or core-site.xml in
        // classpath if user provided confs are not being used
        Configuration testConf = new Configuration();
        ClassLoader cl = testConf.getClassLoader();
        URL hadoop_site = cl.getResource(HADOOP_SITE);
        URL core_site = cl.getResource(CORE_SITE);

        if (hadoop_site == null && core_site == null) {
            throw new ExecException(
                    "Cannot find hadoop configurations in classpath "
                            + "(neither hadoop-site.xml nor core-site.xml was found in the classpath)."
                            + " If you plan to use local mode, please put -x local option in command line",
                    4010);
        }
        jc = new JobConf();
    }
    jc.addResource("pig-cluster-hadoop-site.xml");
    jc.addResource(YARN_SITE);
    return jc;
}

From source file:org.apache.pig.backend.hadoop.executionengine.MRExecutionEngine.java

License:Apache License

@SuppressWarnings({ "deprecation", "resource" })
private void init(Properties properties) throws ExecException {
    // First set the ssh socket factory
    setSSHFactory();//from   www .j a  v a2 s.  c o  m

    String cluster = null;
    String nameNode = null;

    // We need to build a configuration object first in the manner described
    // below
    // and then get back a properties object to inspect the
    // JOB_TRACKER_LOCATION
    // and FILE_SYSTEM_LOCATION. The reason to do this is if we looked only
    // at
    // the existing properties object, we may not get the right settings. So
    // we want
    // to read the configurations in the order specified below and only then
    // look
    // for JOB_TRACKER_LOCATION and FILE_SYSTEM_LOCATION.

    // Hadoop by default specifies two resources, loaded in-order from the
    // classpath:
    // 1. hadoop-default.xml : Read-only defaults for hadoop.
    // 2. hadoop-site.xml: Site-specific configuration for a given hadoop
    // installation.
    // Now add the settings from "properties" object to override any
    // existing properties
    // All of the above is accomplished in the method call below

    JobConf jc = null;
    if (!this.pigContext.getExecType().isLocal()) {
        // Check existence of user provided configs
        String isHadoopConfigsOverriden = properties.getProperty("pig.use.overriden.hadoop.configs");
        if (isHadoopConfigsOverriden != null && isHadoopConfigsOverriden.equals("true")) {
            jc = new JobConf(ConfigurationUtil.toConfiguration(properties));
        } else {
            // Check existence of hadoop-site.xml or core-site.xml in
            // classpath
            // if user provided confs are not being used
            Configuration testConf = new Configuration();
            ClassLoader cl = testConf.getClassLoader();
            URL hadoop_site = cl.getResource(HADOOP_SITE);
            URL core_site = cl.getResource(CORE_SITE);

            if (hadoop_site == null && core_site == null) {
                throw new ExecException(
                        "Cannot find hadoop configurations in classpath (neither hadoop-site.xml nor core-site.xml was found in the classpath)."
                                + " If you plan to use local mode, please put -x local option in command line",
                        4010);
            }
            jc = new JobConf();
        }
        jc.addResource("pig-cluster-hadoop-site.xml");
        jc.addResource(YARN_SITE);

        // Trick to invoke static initializer of DistributedFileSystem to
        // add hdfs-default.xml
        // into configuration
        new DistributedFileSystem();

        // the method below alters the properties object by overriding the
        // hadoop properties with the values from properties and recomputing
        // the properties
        recomputeProperties(jc, properties);
    } else {
        // If we are running in local mode we dont read the hadoop conf file
        if (properties.getProperty("mapreduce.framework.name") == null) {
            properties.setProperty("mapreduce.framework.name", "local");
        }
        properties.setProperty(JOB_TRACKER_LOCATION, LOCAL);
        properties.setProperty(FILE_SYSTEM_LOCATION, "file:///");
        properties.setProperty(ALTERNATIVE_FILE_SYSTEM_LOCATION, "file:///");

        jc = new JobConf(false);
        jc.addResource("core-default.xml");
        jc.addResource("mapred-default.xml");
        jc.addResource("yarn-default.xml");
        recomputeProperties(jc, properties);
    }

    cluster = jc.get(JOB_TRACKER_LOCATION);
    nameNode = jc.get(FILE_SYSTEM_LOCATION);
    if (nameNode == null)
        nameNode = (String) pigContext.getProperties().get(ALTERNATIVE_FILE_SYSTEM_LOCATION);

    if (cluster != null && cluster.length() > 0) {
        if (!cluster.contains(":") && !cluster.equalsIgnoreCase(LOCAL)) {
            cluster = cluster + ":50020";
        }
        properties.setProperty(JOB_TRACKER_LOCATION, cluster);
    }

    if (nameNode != null && nameNode.length() > 0) {
        if (!nameNode.contains(":") && !nameNode.equalsIgnoreCase(LOCAL)) {
            nameNode = nameNode + ":8020";
        }
        properties.setProperty(FILE_SYSTEM_LOCATION, nameNode);
    }

    log.info("Connecting to hadoop file system at: " + (nameNode == null ? LOCAL : nameNode));
    // constructor sets DEFAULT_REPLICATION_FACTOR_KEY
    ds = new HDataStorage(properties);

    if (cluster != null && !cluster.equalsIgnoreCase(LOCAL)) {
        log.info("Connecting to map-reduce job tracker at: " + jc.get(JOB_TRACKER_LOCATION));
    }

    // Set job-specific configuration knobs
    jobConf = jc;
}

From source file:org.apache.pig.backend.hadoop.executionengine.tez.TezExecutionEngine.java

License:Apache License

@Override
public JobConf getExecConf(Properties properties) throws ExecException {
    JobConf jc = super.getExecConf(properties);
    jc.addResource(TezConfiguration.TEZ_SITE_XML);
    return jc;// w w  w  . jav  a 2  s  .  c  om
}

From source file:org.apache.vxquery.xtest.MiniDFS.java

License:Apache License

public void startHDFS() throws IOException {

    FileSystem lfs = FileSystem.getLocal(new Configuration());
    JobConf conf = new JobConf();
    String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
    Path hdfs_conf = new Path(PATH_TO_HADOOP_CONF);
    if (!lfs.exists(hdfs_conf)) {
        PATH_TO_HADOOP_CONF = "vxquery-xtest/src/test/resources/hadoop/conf";
        hdfs_conf = new Path(PATH_TO_HADOOP_CONF);
        if (!lfs.exists(hdfs_conf)) {
            PATH_TO_HADOOP_CONF = "../vxquery-xtest/src/test/resources/hadoop/conf";
            hdfs_conf = new Path(PATH_TO_HADOOP_CONF);
        }//from w w w.j av a 2s. c o  m
    }
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
    int numDataNodes = 1;
    int nameNodePort = 40000;

    // cleanup artifacts created on the local file system
    lfs.delete(new Path("build"), true);
    System.setProperty("hadoop.log.dir", "logs");
    MiniDFSCluster.Builder build = new MiniDFSCluster.Builder(conf);
    build.nameNodePort(nameNodePort);
    build.nameNodeHttpPort(nameNodePort + 34);
    build.numDataNodes(numDataNodes);
    build.checkExitOnShutdown(true);
    build.startupOption(StartupOption.REGULAR);
    build.format(true);
    build.waitSafeMode(true);
    dfsCluster = build.build();

    FileSystem dfs = FileSystem.get(conf);
    String DATA_PATH = "src/test/resources/TestSources/ghcnd";
    Path src = new Path(DATA_PATH);
    if (!lfs.exists(src)) {
        DATA_PATH = "vxquery-xtest/src/test/resources/TestSources/ghcnd";
        src = new Path(DATA_PATH);
        if (!lfs.exists(src)) {
            DATA_PATH = "../vxquery-xtest/src/test/resources/TestSources/ghcnd";
            src = new Path(DATA_PATH);
        }
    }
    dfs.mkdirs(new Path("/tmp"));
    Path dest = new Path("/tmp/vxquery-hdfs-test");
    dfs.copyFromLocalFile(src, dest);
    if (dfs.exists(dest)) {
        System.err.println("Test files copied to HDFS successfully");
    }
}

From source file:org.archive.jbs.Parse.java

License:Apache License

/**
 * Command-line driver.  Runs the Parse as a Hadoop job.
 *//*from w  w  w .j ava2 s . c om*/
public static void main(String args[]) throws Exception {
    JobConf conf = new JobConf(Parse.class);

    // Load the default set of config properties, including the
    // essential properties needed by the bits of Nutch that we are
    // still using.  These properties can still be over-ridden by
    // command-line args.
    conf.addResource("conf-parse.xml");

    int result = ToolRunner.run(conf, new Parse(), args);

    System.exit(result);
}

From source file:org.deeplearning4j.hadoop.util.HdfsUtils.java

License:Apache License

/**
 * Adapted from //from  w ww  .j  av a 2  s.c o  m
 * http://terrier.org/docs/v3.5/javadoc/org/terrier/utility/io/HadoopUtility.html#saveClassPathToJob%28org.apache.hadoop.mapred.JobConf%29
 * @param jobConf
 * @throws IOException
 */
public static List<Path> saveClassPathToJob(JobConf jobConf) throws Exception {
    String hdfs = getHost(jobConf);

    HdfsLock lock = new HdfsLock(hdfs);
    String hdfs2 = getHdfs(jobConf);
    if (jobConf.get(HDFS_HOST) != null) {
        if (lock.isLocked()) {
            List<Path> ret = lock.getPaths();
            StringBuffer files = new StringBuffer();
            StringBuffer classPath = new StringBuffer();
            for (Path path : ret) {
                files.append(hdfs2 + path.toString());
                files.append(",");
                classPath.append(hdfs2 + path.toString());
                classPath.append(":");
                jobConf.addResource(path.toUri().toURL());
            }
            String classPathToSet = classPath.toString().substring(0, classPath.lastIndexOf(":"));
            String filesToSet = files.toString().substring(0, files.lastIndexOf(","));
            log.info("Setting class path " + classPathToSet);
            log.info("Using files " + filesToSet);
            jobConf.set("mapred.cache.files", filesToSet);
            jobConf.set("mapred.job.classpath.files", classPathToSet);
            return ret;
        }
    }
    List<Path> paths = new ArrayList<Path>();
    log.info("Copying classpath to job");

    final String[] jars = findJarFiles(new String[] { System.getenv().get("CLASSPATH"),
            System.getProperty("java.class.path"), System.getProperty("surefire.test.class.path") });

    final FileSystem defFS = FileSystem.get(jobConf);
    int numFilesWritten = 0;
    for (String jarFile : jars) {
        //class path issues
        if (jarFile.contains("hadoop-client")) {
            log.info("Skipping hadoop-client");
            continue;
        } else if (jarFile.contains("mapreduce-run")) {
            log.info("Skipping map reduce run");
            continue;
        }

        Path srcJarFilePath = new Path("file:///" + jarFile);
        String filename = srcJarFilePath.getName();
        Path tmpJarFilePath = makeFile(jobConf, filename);
        log.info("Uploading " + jarFile + " to " + tmpJarFilePath.toString());
        try {
            defFS.copyFromLocalFile(srcJarFilePath, tmpJarFilePath);
            jobConf.addResource(tmpJarFilePath);
            paths.add(tmpJarFilePath);
            numFilesWritten++;
        } catch (Exception e) {
            for (Path path : paths) {
                if (defFS.exists(path))
                    defFS.delete(path, true);
            }

            lock.close();
            log.error(String.format("Exception writing to hdfs; rolling back %d jar files ", numFilesWritten),
                    e);
            throw new IOException("Couldn't write jar file " + jarFile);
        }
    }
    try {
        lock.create(paths);
    } catch (KeeperException.SessionExpiredException e) {
        lock = new HdfsLock(hdfs);
        lock.create(paths);

    }

    lock.close();
    //resolve any differences by removing  clashing names in the files (archives are removed from files)

    Set<Path> remove = new HashSet<Path>();
    for (Path path : paths) {
        boolean exists = false;
        try {
            exists = defFS.exists(path);
        } catch (IllegalArgumentException e) {
            exists = false;
        }
        if (!exists)
            remove.add(path);
    }
    paths.removeAll(remove);
    return paths;
}

From source file:org.lamapacos.preprocessor.filter.SegmentFilter.java

License:Apache License

@Override
public void configure(JobConf job) {
    setConf(job);/*  w ww.j  a  v a 2s.  co m*/
    //conf file
    job.addResource("lamapacos-preprocessor.xml");
    RegexsFilter.setPatterns(job);
    try {
        this.fs = FileSystem.get(getConf());
    } catch (IOException e) {
        LOG.error("IOException:", e);
    }
}