Example usage for org.apache.hadoop.mapred JobConf JobConf

List of usage examples for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf() 

Source Link

Document

Construct a map/reduce job configuration.

Usage

From source file:cascading.flow.hadoop.util.HadoopUtil.java

License:Open Source License

public static JobConf createJobConf(Map<Object, Object> properties, JobConf defaultJobconf) {
    JobConf jobConf = defaultJobconf == null ? new JobConf() : copyJobConf(defaultJobconf);

    if (properties == null)
        return jobConf;

    return copyConfiguration(properties, jobConf);
}

From source file:cascading.flow.hadoop.util.HadoopUtil.java

License:Open Source License

public static Thread getHDFSShutdownHook() {
    Exception caughtException;/*  w w  w  .  ja v  a 2 s. c  o  m*/

    try {
        // we must init the FS so the finalizer is registered
        FileSystem.getLocal(new JobConf());

        Field field = FileSystem.class.getDeclaredField("clientFinalizer");
        field.setAccessible(true);

        Thread finalizer = (Thread) field.get(null);

        if (finalizer != null)
            Runtime.getRuntime().removeShutdownHook(finalizer);

        return finalizer;
    } catch (NoSuchFieldException exception) {
        caughtException = exception;
    } catch (IllegalAccessException exception) {
        caughtException = exception;
    } catch (IOException exception) {
        caughtException = exception;
    }

    LOG.debug("unable to find and remove client hdfs shutdown hook, received exception: {}",
            caughtException.getClass().getName());

    return null;
}

From source file:cascading.flow.hadoop.util.HadoopUtil.java

License:Open Source License

public static Configuration removePropertiesFrom(Configuration jobConf, String... keys) {
    Map<Object, Object> properties = createProperties(jobConf);

    for (String key : keys)
        properties.remove(key);// w w  w.  j  a  v a2  s  . c  o m

    return copyConfiguration(properties, new JobConf());
}

From source file:cascading.flow.MapReduceFlowStep.java

License:Open Source License

@Override
protected JobConf getJobConf(JobConf parentConf) throws IOException {
    // allow to delete
    sink.sinkInit(new JobConf());

    return jobConf;
}

From source file:cascading.hive.HivePartitionDemo.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Properties properties = new Properties();
    AppProps.setApplicationName(properties, "cascading hive partitioning demo");

    JobConf jobConf = new JobConf();
    FileSystem fs = FileSystem.get(jobConf);
    fs.copyFromLocalFile(false, true, new Path(accessLog), new Path("/tmp/access.log"));

    String[] columnNames = new String[] { "ts", "customer", "bucket", "operation", "key", "region" };
    String[] columnTypes = new String[] { "timestamp", "string", "string", "string", "string", "string" };
    String[] partitionKeys = new String[] { "region" };

    HiveTableDescriptor partitionedDescriptor = new HiveTableDescriptor("mydb", "mytable", columnNames,
            columnTypes, partitionKeys, "\t");

    HiveTap hiveTap = new HiveTap(partitionedDescriptor, partitionedDescriptor.toScheme());

    Tap partitionTap = new HivePartitionTap(hiveTap);

    Fields allFields = new Fields(columnNames);

    Tap input = new Hfs(new TextDelimited(allFields), "hdfs:/tmp/access.log");

    class Echo extends BaseOperation implements Function {
        public Echo(Fields fieldDeclaration) {
            super(2, fieldDeclaration);
        }/*  w ww  .  j  a v  a 2 s. c o m*/

        @Override
        public void operate(FlowProcess flowProcess, FunctionCall functionCall) {
            TupleEntry argument = functionCall.getArguments();
            functionCall.getOutputCollector().add(argument.getTuple());
        }
    }

    Pipe pipe = new Each(" import ", allFields, new Echo(allFields), Fields.RESULTS);

    Flow flow = new HadoopFlowConnector().connect(input, partitionTap, pipe);

    flow.complete();

    Class.forName("org.apache.hadoop.hive.jdbc.HiveDriver");

    Connection con = DriverManager.getConnection("jdbc:hive://", "", "");
    Statement stmt = con.createStatement();

    ResultSet rs = stmt.executeQuery("select * from mydb.mytable where region = 'ASIA' ");

    String[] names = partitionedDescriptor.getColumnNames();
    System.out.println("----------------------Hive JDBC--------------------------");
    while (rs.next()) {
        StringBuffer buf = new StringBuffer("JDBC>>> ");
        for (int i = 0; i < names.length; i++) {
            String name = names[i];
            buf.append(name).append("=").append(rs.getObject(i + 1)).append(", ");
        }
        System.out.println(buf.toString());
    }
    System.out.println("---------------------------------------------------------");
    stmt.close();
    con.close();

    // do the same as the JDBC above, but in Cascading.

    class RegionFilter extends BaseOperation implements Filter {
        final String region;

        public RegionFilter(String region) {
            this.region = region;
        }

        @Override
        public boolean isRemove(FlowProcess flowProcess, FilterCall filterCall) {
            if (filterCall.getArguments().getString("region").equals(this.region))
                return false;
            return true;
        }
    }

    Tap requestsInAsiaSink = new Hfs(new TextDelimited(allFields), "hdfs:/tmp/requests-from-asia",
            SinkMode.REPLACE);

    Pipe headPipe = new Each("requests from ASIA", allFields, new RegionFilter("ASIA"));

    Flow headFlow = new HadoopFlowConnector().connect(partitionTap, requestsInAsiaSink, headPipe);

    headFlow.complete();

    TupleEntryIterator tupleEntryIterator = requestsInAsiaSink.openForRead(headFlow.getFlowProcess());

    while (tupleEntryIterator.hasNext()) {
        TupleEntry tupleEntry = tupleEntryIterator.next();
        System.out.println("Cascading>>> " + tupleEntry);
    }
    tupleEntryIterator.close();
}

From source file:cascading.hive.HiveViewDemo.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Properties properties = new Properties();
    AppProps.setApplicationName(properties, "cascading hive partitioning demo");

    JobConf jobConf = new JobConf();

    FileSystem fs = FileSystem.get(jobConf);
    fs.copyFromLocalFile(false, true, new Path(accessLog), new Path("/tmp/access.log"));

    String[] columnNames = new String[] { "ts", "customer", "bucket", "operation", "key", "region" };
    String[] columnTypes = new String[] { "timestamp", "string", "string", "string", "string", "string" };
    String[] partitionKeys = new String[] { "region" };

    HiveTableDescriptor partitionedDescriptor = new HiveTableDescriptor("mydb2", "mytable2", columnNames,
            columnTypes, partitionKeys, "\t");

    HiveTap hiveTap = new HiveTap(partitionedDescriptor, partitionedDescriptor.toScheme());

    Tap outputTap = new HivePartitionTap(hiveTap);
    class Echo extends BaseOperation implements Function {
        public Echo(Fields fieldDeclaration) {
            super(2, fieldDeclaration);
        }//from   ww  w.ja  v  a2  s .c  om

        @Override
        public void operate(FlowProcess flowProcess, FunctionCall functionCall) {
            TupleEntry argument = functionCall.getArguments();
            functionCall.getOutputCollector().add(argument.getTuple());
        }
    }

    Fields allFields = new Fields(columnNames);

    Pipe pipe = new Each(" echo ", allFields, new Echo(allFields), Fields.RESULTS);

    Tap input = new Hfs(new TextDelimited(allFields), "hdfs:/tmp/access.log");

    Flow flow = new HadoopFlowConnector().connect(input, outputTap, pipe);

    flow.complete();

    String viewSelect = "select distinct customer from mydb2.mytable2 where region = 'ASIA'";
    String viewDef = "create or replace view customers_in_asia as " + viewSelect;
    HiveViewAnalyzer analyzer = new HiveViewAnalyzer();
    Collection<Tap> inputs = analyzer.asTaps(viewSelect);

    HiveFlow viewflow = new HiveFlow("create view", viewDef, inputs);
    viewflow.complete();

    Class.forName("org.apache.hadoop.hive.jdbc.HiveDriver");

    Connection con = DriverManager.getConnection("jdbc:hive://", "", "");
    Statement stmt = con.createStatement();

    ResultSet rs = stmt.executeQuery("select * from customers_in_asia ");

    System.out.println("----------------------Hive JDBC--------------------------");
    while (rs.next())
        System.out.println("customer=" + rs.getString(1));

    System.out.println("---------------------------------------------------------");
    stmt.close();
    con.close();

}

From source file:cascading.load.Main.java

License:Open Source License

private void printSummary(CascadeStats stats) throws IOException {
    stats.captureDetail();//ww w .  j  a  v  a  2s .  co m

    OutputStream outputStream = options.hasStatsRoot() ? new ByteArrayOutputStream() : System.out;
    PrintWriter writer = new PrintWriter(outputStream);

    writer.println(options);

    StatsPrinter.printCascadeStats(writer, stats);

    if (options.hasStatsRoot()) {
        String[] lines = outputStream.toString().split("\n");

        Hfs statsTap = new Hfs(new TextLine(), options.getStatsRoot(), SinkMode.REPLACE);

        TupleEntryCollector tapWriter = statsTap.openForWrite(new JobConf());

        for (String line : lines)
            tapWriter.add(new Tuple(line));

        tapWriter.close();
    }
}

From source file:cascading.platform.hadoop.HadoopPlatform.java

License:Open Source License

@Override
public synchronized void setUp() throws IOException {
    if (configuration != null)
        return;// w w  w .  j av  a2  s  .  co  m

    if (!isUseCluster()) {
        LOG.info("not using cluster");
        configuration = new JobConf();

        // enforce the local file system in local mode
        configuration.set("fs.default.name", "file:///");
        configuration.set("mapred.job.tracker", "local");
        configuration.set("mapreduce.jobtracker.staging.root.dir",
                System.getProperty("user.dir") + "/build/tmp/cascading/staging");

        String stagingDir = configuration.get("mapreduce.jobtracker.staging.root.dir");

        if (Util.isEmpty(stagingDir))
            configuration.set("mapreduce.jobtracker.staging.root.dir",
                    System.getProperty("user.dir") + "/build/tmp/cascading/staging");

        fileSys = FileSystem.get(configuration);
    } else {
        LOG.info("using cluster");

        if (Util.isEmpty(System.getProperty("hadoop.log.dir")))
            System.setProperty("hadoop.log.dir", "cascading-hadoop/build/test/log");

        if (Util.isEmpty(System.getProperty("hadoop.tmp.dir")))
            System.setProperty("hadoop.tmp.dir", "cascading-hadoop/build/test/tmp");

        new File(System.getProperty("hadoop.log.dir")).mkdirs(); // ignored

        JobConf conf = new JobConf();

        if (!Util.isEmpty(System.getProperty("mapred.jar"))) {
            LOG.info("using a remote cluster with jar: {}", System.getProperty("mapred.jar"));
            configuration = conf;

            ((JobConf) configuration).setJar(System.getProperty("mapred.jar"));

            if (!Util.isEmpty(System.getProperty("fs.default.name"))) {
                LOG.info("using {}={}", "fs.default.name", System.getProperty("fs.default.name"));
                configuration.set("fs.default.name", System.getProperty("fs.default.name"));
            }

            if (!Util.isEmpty(System.getProperty("mapred.job.tracker"))) {
                LOG.info("using {}={}", "mapred.job.tracker", System.getProperty("mapred.job.tracker"));
                configuration.set("mapred.job.tracker", System.getProperty("mapred.job.tracker"));
            }

            configuration.set("mapreduce.user.classpath.first", "true"); // use test dependencies
            fileSys = FileSystem.get(configuration);
        } else {
            dfs = new MiniDFSCluster(conf, 4, true, null);
            fileSys = dfs.getFileSystem();
            mr = new MiniMRCluster(4, fileSys.getUri().toString(), 1, null, null, conf);

            configuration = mr.createJobConf();
        }

        //      jobConf.set( "mapred.map.max.attempts", "1" );
        //      jobConf.set( "mapred.reduce.max.attempts", "1" );
        configuration.set("mapred.child.java.opts", "-Xmx512m");
        configuration.setInt("mapred.job.reuse.jvm.num.tasks", -1);
        configuration.setInt("jobclient.completion.poll.interval", 50);
        configuration.setInt("jobclient.progress.monitor.poll.interval", 50);
        ((JobConf) configuration).setMapSpeculativeExecution(false);
        ((JobConf) configuration).setReduceSpeculativeExecution(false);
    }

    ((JobConf) configuration).setNumMapTasks(numMappers);
    ((JobConf) configuration).setNumReduceTasks(numReducers);

    Map<Object, Object> globalProperties = getGlobalProperties();

    if (logger != null)
        globalProperties.put("log4j.logger", logger);

    FlowProps.setJobPollingInterval(globalProperties, 10); // should speed up tests

    HadoopPlanner.copyProperties((JobConf) configuration, globalProperties); // copy any external properties

    HadoopPlanner.copyJobConf(properties, (JobConf) configuration); // put all properties on the jobconf
}

From source file:cascading.platform.hadoop2.Hadoop2MR1Platform.java

License:Open Source License

@Override
public synchronized void setUp() throws IOException {
    if (configuration != null)
        return;//w w  w  .j a va 2  s.c  o m

    if (!isUseCluster()) {
        LOG.info("not using cluster");
        configuration = new JobConf();

        // enforce settings to make local mode behave the same across distributions
        configuration.set("fs.defaultFS", "file:///");
        configuration.set("mapreduce.framework.name", "local");
        configuration.set("mapreduce.jobtracker.staging.root.dir",
                System.getProperty("user.dir") + "/" + "build/tmp/cascading/staging");

        String stagingDir = configuration.get("mapreduce.jobtracker.staging.root.dir");

        if (Util.isEmpty(stagingDir))
            configuration.set("mapreduce.jobtracker.staging.root.dir",
                    System.getProperty("user.dir") + "/build/tmp/cascading/staging");

        fileSys = FileSystem.get(configuration);
    } else {
        LOG.info("using cluster");

        if (Util.isEmpty(System.getProperty("hadoop.log.dir")))
            System.setProperty("hadoop.log.dir", "build/test/log");

        if (Util.isEmpty(System.getProperty("hadoop.tmp.dir")))
            System.setProperty("hadoop.tmp.dir", "build/test/tmp");

        new File(System.getProperty("hadoop.log.dir")).mkdirs(); // ignored

        JobConf conf = new JobConf();

        if (!Util.isEmpty(System.getProperty("mapred.jar"))) {
            LOG.info("using a remote cluster with jar: {}", System.getProperty("mapred.jar"));
            configuration = conf;

            ((JobConf) configuration).setJar(System.getProperty("mapred.jar"));

            if (!Util.isEmpty(System.getProperty("fs.default.name"))) {
                LOG.info("using {}={}", "fs.default.name", System.getProperty("fs.default.name"));
                configuration.set("fs.default.name", System.getProperty("fs.default.name"));
            }

            if (!Util.isEmpty(System.getProperty("mapred.job.tracker"))) {
                LOG.info("using {}={}", "mapred.job.tracker", System.getProperty("mapred.job.tracker"));
                configuration.set("mapred.job.tracker", System.getProperty("mapred.job.tracker"));
            }

            if (!Util.isEmpty(System.getProperty("fs.defaultFS"))) {
                LOG.info("using {}={}", "fs.defaultFS", System.getProperty("fs.defaultFS"));
                configuration.set("fs.defaultFS", System.getProperty("fs.defaultFS"));
            }

            if (!Util.isEmpty(System.getProperty("yarn.resourcemanager.address"))) {
                LOG.info("using {}={}", "yarn.resourcemanager.address",
                        System.getProperty("yarn.resourcemanager.address"));
                configuration.set("yarn.resourcemanager.address",
                        System.getProperty("yarn.resourcemanager.address"));
            }

            if (!Util.isEmpty(System.getProperty("mapreduce.jobhistory.address"))) {
                LOG.info("using {}={}", "mapreduce.jobhistory.address",
                        System.getProperty("mapreduce.jobhistory.address"));
                configuration.set("mapreduce.jobhistory.address",
                        System.getProperty("mapreduce.jobhistory.address"));
            }

            configuration.set("mapreduce.user.classpath.first", "true"); // use test dependencies
            configuration.set("mapreduce.framework.name", "yarn");

            fileSys = FileSystem.get(configuration);
        } else {
            conf.setBoolean("yarn.is.minicluster", true);
            //      conf.setInt( "yarn.nodemanager.delete.debug-delay-sec", -1 );
            //      conf.set( "yarn.scheduler.capacity.root.queues", "default" );
            //      conf.set( "yarn.scheduler.capacity.root.default.capacity", "100" );
            // disable blacklisting hosts not to fail localhost during unit tests
            conf.setBoolean("yarn.app.mapreduce.am.job.node-blacklisting.enable", false);

            dfs = new MiniDFSCluster(conf, 4, true, null);
            fileSys = dfs.getFileSystem();

            FileSystem.setDefaultUri(conf, fileSys.getUri());

            mr = MiniMRClientClusterFactory.create(this.getClass(), 4, conf);

            configuration = mr.getConfig();
        }

        configuration.set("mapred.child.java.opts", "-Xmx512m");
        configuration.setInt("mapreduce.job.jvm.numtasks", -1);
        configuration.setInt("mapreduce.client.completion.pollinterval", 50);
        configuration.setInt("mapreduce.client.progressmonitor.pollinterval", 50);
        configuration.setBoolean("mapreduce.map.speculative", false);
        configuration.setBoolean("mapreduce.reduce.speculative", false);
    }

    configuration.setInt("mapreduce.job.maps", numMappers);
    configuration.setInt("mapreduce.job.reduces", numReducers);

    Map<Object, Object> globalProperties = getGlobalProperties();

    if (logger != null)
        globalProperties.put("log4j.logger", logger);

    FlowProps.setJobPollingInterval(globalProperties, 10); // should speed up tests

    Hadoop2MR1Planner.copyProperties(configuration, globalProperties); // copy any external properties

    Hadoop2MR1Planner.copyConfiguration(properties, configuration); // put all properties on the jobconf
}

From source file:cascading.plumber.grids.AbstractGridTest.java

License:Apache License

@Test
public void shouldCopyJobConfIntoProperties() {
    JobConf jobConf = new JobConf();
    jobConf.set(KEY, VALUE);//from   ww  w  .  ja  v  a2 s .  c o m
    new MockGrid().createFlowConnector(jobConf);
}