List of usage examples for org.apache.hadoop.mapred JobConf JobConf
public JobConf()
From source file:cascading.flow.hadoop.util.HadoopUtil.java
License:Open Source License
public static JobConf createJobConf(Map<Object, Object> properties, JobConf defaultJobconf) { JobConf jobConf = defaultJobconf == null ? new JobConf() : copyJobConf(defaultJobconf); if (properties == null) return jobConf; return copyConfiguration(properties, jobConf); }
From source file:cascading.flow.hadoop.util.HadoopUtil.java
License:Open Source License
public static Thread getHDFSShutdownHook() { Exception caughtException;/* w w w . ja v a 2 s. c o m*/ try { // we must init the FS so the finalizer is registered FileSystem.getLocal(new JobConf()); Field field = FileSystem.class.getDeclaredField("clientFinalizer"); field.setAccessible(true); Thread finalizer = (Thread) field.get(null); if (finalizer != null) Runtime.getRuntime().removeShutdownHook(finalizer); return finalizer; } catch (NoSuchFieldException exception) { caughtException = exception; } catch (IllegalAccessException exception) { caughtException = exception; } catch (IOException exception) { caughtException = exception; } LOG.debug("unable to find and remove client hdfs shutdown hook, received exception: {}", caughtException.getClass().getName()); return null; }
From source file:cascading.flow.hadoop.util.HadoopUtil.java
License:Open Source License
public static Configuration removePropertiesFrom(Configuration jobConf, String... keys) { Map<Object, Object> properties = createProperties(jobConf); for (String key : keys) properties.remove(key);// w w w. j a v a2 s . c o m return copyConfiguration(properties, new JobConf()); }
From source file:cascading.flow.MapReduceFlowStep.java
License:Open Source License
@Override protected JobConf getJobConf(JobConf parentConf) throws IOException { // allow to delete sink.sinkInit(new JobConf()); return jobConf; }
From source file:cascading.hive.HivePartitionDemo.java
License:Open Source License
public static void main(String[] args) throws Exception { Properties properties = new Properties(); AppProps.setApplicationName(properties, "cascading hive partitioning demo"); JobConf jobConf = new JobConf(); FileSystem fs = FileSystem.get(jobConf); fs.copyFromLocalFile(false, true, new Path(accessLog), new Path("/tmp/access.log")); String[] columnNames = new String[] { "ts", "customer", "bucket", "operation", "key", "region" }; String[] columnTypes = new String[] { "timestamp", "string", "string", "string", "string", "string" }; String[] partitionKeys = new String[] { "region" }; HiveTableDescriptor partitionedDescriptor = new HiveTableDescriptor("mydb", "mytable", columnNames, columnTypes, partitionKeys, "\t"); HiveTap hiveTap = new HiveTap(partitionedDescriptor, partitionedDescriptor.toScheme()); Tap partitionTap = new HivePartitionTap(hiveTap); Fields allFields = new Fields(columnNames); Tap input = new Hfs(new TextDelimited(allFields), "hdfs:/tmp/access.log"); class Echo extends BaseOperation implements Function { public Echo(Fields fieldDeclaration) { super(2, fieldDeclaration); }/* w ww . j a v a 2 s. c o m*/ @Override public void operate(FlowProcess flowProcess, FunctionCall functionCall) { TupleEntry argument = functionCall.getArguments(); functionCall.getOutputCollector().add(argument.getTuple()); } } Pipe pipe = new Each(" import ", allFields, new Echo(allFields), Fields.RESULTS); Flow flow = new HadoopFlowConnector().connect(input, partitionTap, pipe); flow.complete(); Class.forName("org.apache.hadoop.hive.jdbc.HiveDriver"); Connection con = DriverManager.getConnection("jdbc:hive://", "", ""); Statement stmt = con.createStatement(); ResultSet rs = stmt.executeQuery("select * from mydb.mytable where region = 'ASIA' "); String[] names = partitionedDescriptor.getColumnNames(); System.out.println("----------------------Hive JDBC--------------------------"); while (rs.next()) { StringBuffer buf = new StringBuffer("JDBC>>> "); for (int i = 0; i < names.length; i++) { String name = names[i]; buf.append(name).append("=").append(rs.getObject(i + 1)).append(", "); } System.out.println(buf.toString()); } System.out.println("---------------------------------------------------------"); stmt.close(); con.close(); // do the same as the JDBC above, but in Cascading. class RegionFilter extends BaseOperation implements Filter { final String region; public RegionFilter(String region) { this.region = region; } @Override public boolean isRemove(FlowProcess flowProcess, FilterCall filterCall) { if (filterCall.getArguments().getString("region").equals(this.region)) return false; return true; } } Tap requestsInAsiaSink = new Hfs(new TextDelimited(allFields), "hdfs:/tmp/requests-from-asia", SinkMode.REPLACE); Pipe headPipe = new Each("requests from ASIA", allFields, new RegionFilter("ASIA")); Flow headFlow = new HadoopFlowConnector().connect(partitionTap, requestsInAsiaSink, headPipe); headFlow.complete(); TupleEntryIterator tupleEntryIterator = requestsInAsiaSink.openForRead(headFlow.getFlowProcess()); while (tupleEntryIterator.hasNext()) { TupleEntry tupleEntry = tupleEntryIterator.next(); System.out.println("Cascading>>> " + tupleEntry); } tupleEntryIterator.close(); }
From source file:cascading.hive.HiveViewDemo.java
License:Open Source License
public static void main(String[] args) throws Exception { Properties properties = new Properties(); AppProps.setApplicationName(properties, "cascading hive partitioning demo"); JobConf jobConf = new JobConf(); FileSystem fs = FileSystem.get(jobConf); fs.copyFromLocalFile(false, true, new Path(accessLog), new Path("/tmp/access.log")); String[] columnNames = new String[] { "ts", "customer", "bucket", "operation", "key", "region" }; String[] columnTypes = new String[] { "timestamp", "string", "string", "string", "string", "string" }; String[] partitionKeys = new String[] { "region" }; HiveTableDescriptor partitionedDescriptor = new HiveTableDescriptor("mydb2", "mytable2", columnNames, columnTypes, partitionKeys, "\t"); HiveTap hiveTap = new HiveTap(partitionedDescriptor, partitionedDescriptor.toScheme()); Tap outputTap = new HivePartitionTap(hiveTap); class Echo extends BaseOperation implements Function { public Echo(Fields fieldDeclaration) { super(2, fieldDeclaration); }//from ww w.ja v a2 s .c om @Override public void operate(FlowProcess flowProcess, FunctionCall functionCall) { TupleEntry argument = functionCall.getArguments(); functionCall.getOutputCollector().add(argument.getTuple()); } } Fields allFields = new Fields(columnNames); Pipe pipe = new Each(" echo ", allFields, new Echo(allFields), Fields.RESULTS); Tap input = new Hfs(new TextDelimited(allFields), "hdfs:/tmp/access.log"); Flow flow = new HadoopFlowConnector().connect(input, outputTap, pipe); flow.complete(); String viewSelect = "select distinct customer from mydb2.mytable2 where region = 'ASIA'"; String viewDef = "create or replace view customers_in_asia as " + viewSelect; HiveViewAnalyzer analyzer = new HiveViewAnalyzer(); Collection<Tap> inputs = analyzer.asTaps(viewSelect); HiveFlow viewflow = new HiveFlow("create view", viewDef, inputs); viewflow.complete(); Class.forName("org.apache.hadoop.hive.jdbc.HiveDriver"); Connection con = DriverManager.getConnection("jdbc:hive://", "", ""); Statement stmt = con.createStatement(); ResultSet rs = stmt.executeQuery("select * from customers_in_asia "); System.out.println("----------------------Hive JDBC--------------------------"); while (rs.next()) System.out.println("customer=" + rs.getString(1)); System.out.println("---------------------------------------------------------"); stmt.close(); con.close(); }
From source file:cascading.load.Main.java
License:Open Source License
private void printSummary(CascadeStats stats) throws IOException { stats.captureDetail();//ww w . j a v a 2s . co m OutputStream outputStream = options.hasStatsRoot() ? new ByteArrayOutputStream() : System.out; PrintWriter writer = new PrintWriter(outputStream); writer.println(options); StatsPrinter.printCascadeStats(writer, stats); if (options.hasStatsRoot()) { String[] lines = outputStream.toString().split("\n"); Hfs statsTap = new Hfs(new TextLine(), options.getStatsRoot(), SinkMode.REPLACE); TupleEntryCollector tapWriter = statsTap.openForWrite(new JobConf()); for (String line : lines) tapWriter.add(new Tuple(line)); tapWriter.close(); } }
From source file:cascading.platform.hadoop.HadoopPlatform.java
License:Open Source License
@Override public synchronized void setUp() throws IOException { if (configuration != null) return;// w w w . j av a2 s . co m if (!isUseCluster()) { LOG.info("not using cluster"); configuration = new JobConf(); // enforce the local file system in local mode configuration.set("fs.default.name", "file:///"); configuration.set("mapred.job.tracker", "local"); configuration.set("mapreduce.jobtracker.staging.root.dir", System.getProperty("user.dir") + "/build/tmp/cascading/staging"); String stagingDir = configuration.get("mapreduce.jobtracker.staging.root.dir"); if (Util.isEmpty(stagingDir)) configuration.set("mapreduce.jobtracker.staging.root.dir", System.getProperty("user.dir") + "/build/tmp/cascading/staging"); fileSys = FileSystem.get(configuration); } else { LOG.info("using cluster"); if (Util.isEmpty(System.getProperty("hadoop.log.dir"))) System.setProperty("hadoop.log.dir", "cascading-hadoop/build/test/log"); if (Util.isEmpty(System.getProperty("hadoop.tmp.dir"))) System.setProperty("hadoop.tmp.dir", "cascading-hadoop/build/test/tmp"); new File(System.getProperty("hadoop.log.dir")).mkdirs(); // ignored JobConf conf = new JobConf(); if (!Util.isEmpty(System.getProperty("mapred.jar"))) { LOG.info("using a remote cluster with jar: {}", System.getProperty("mapred.jar")); configuration = conf; ((JobConf) configuration).setJar(System.getProperty("mapred.jar")); if (!Util.isEmpty(System.getProperty("fs.default.name"))) { LOG.info("using {}={}", "fs.default.name", System.getProperty("fs.default.name")); configuration.set("fs.default.name", System.getProperty("fs.default.name")); } if (!Util.isEmpty(System.getProperty("mapred.job.tracker"))) { LOG.info("using {}={}", "mapred.job.tracker", System.getProperty("mapred.job.tracker")); configuration.set("mapred.job.tracker", System.getProperty("mapred.job.tracker")); } configuration.set("mapreduce.user.classpath.first", "true"); // use test dependencies fileSys = FileSystem.get(configuration); } else { dfs = new MiniDFSCluster(conf, 4, true, null); fileSys = dfs.getFileSystem(); mr = new MiniMRCluster(4, fileSys.getUri().toString(), 1, null, null, conf); configuration = mr.createJobConf(); } // jobConf.set( "mapred.map.max.attempts", "1" ); // jobConf.set( "mapred.reduce.max.attempts", "1" ); configuration.set("mapred.child.java.opts", "-Xmx512m"); configuration.setInt("mapred.job.reuse.jvm.num.tasks", -1); configuration.setInt("jobclient.completion.poll.interval", 50); configuration.setInt("jobclient.progress.monitor.poll.interval", 50); ((JobConf) configuration).setMapSpeculativeExecution(false); ((JobConf) configuration).setReduceSpeculativeExecution(false); } ((JobConf) configuration).setNumMapTasks(numMappers); ((JobConf) configuration).setNumReduceTasks(numReducers); Map<Object, Object> globalProperties = getGlobalProperties(); if (logger != null) globalProperties.put("log4j.logger", logger); FlowProps.setJobPollingInterval(globalProperties, 10); // should speed up tests HadoopPlanner.copyProperties((JobConf) configuration, globalProperties); // copy any external properties HadoopPlanner.copyJobConf(properties, (JobConf) configuration); // put all properties on the jobconf }
From source file:cascading.platform.hadoop2.Hadoop2MR1Platform.java
License:Open Source License
@Override public synchronized void setUp() throws IOException { if (configuration != null) return;//w w w .j a va 2 s.c o m if (!isUseCluster()) { LOG.info("not using cluster"); configuration = new JobConf(); // enforce settings to make local mode behave the same across distributions configuration.set("fs.defaultFS", "file:///"); configuration.set("mapreduce.framework.name", "local"); configuration.set("mapreduce.jobtracker.staging.root.dir", System.getProperty("user.dir") + "/" + "build/tmp/cascading/staging"); String stagingDir = configuration.get("mapreduce.jobtracker.staging.root.dir"); if (Util.isEmpty(stagingDir)) configuration.set("mapreduce.jobtracker.staging.root.dir", System.getProperty("user.dir") + "/build/tmp/cascading/staging"); fileSys = FileSystem.get(configuration); } else { LOG.info("using cluster"); if (Util.isEmpty(System.getProperty("hadoop.log.dir"))) System.setProperty("hadoop.log.dir", "build/test/log"); if (Util.isEmpty(System.getProperty("hadoop.tmp.dir"))) System.setProperty("hadoop.tmp.dir", "build/test/tmp"); new File(System.getProperty("hadoop.log.dir")).mkdirs(); // ignored JobConf conf = new JobConf(); if (!Util.isEmpty(System.getProperty("mapred.jar"))) { LOG.info("using a remote cluster with jar: {}", System.getProperty("mapred.jar")); configuration = conf; ((JobConf) configuration).setJar(System.getProperty("mapred.jar")); if (!Util.isEmpty(System.getProperty("fs.default.name"))) { LOG.info("using {}={}", "fs.default.name", System.getProperty("fs.default.name")); configuration.set("fs.default.name", System.getProperty("fs.default.name")); } if (!Util.isEmpty(System.getProperty("mapred.job.tracker"))) { LOG.info("using {}={}", "mapred.job.tracker", System.getProperty("mapred.job.tracker")); configuration.set("mapred.job.tracker", System.getProperty("mapred.job.tracker")); } if (!Util.isEmpty(System.getProperty("fs.defaultFS"))) { LOG.info("using {}={}", "fs.defaultFS", System.getProperty("fs.defaultFS")); configuration.set("fs.defaultFS", System.getProperty("fs.defaultFS")); } if (!Util.isEmpty(System.getProperty("yarn.resourcemanager.address"))) { LOG.info("using {}={}", "yarn.resourcemanager.address", System.getProperty("yarn.resourcemanager.address")); configuration.set("yarn.resourcemanager.address", System.getProperty("yarn.resourcemanager.address")); } if (!Util.isEmpty(System.getProperty("mapreduce.jobhistory.address"))) { LOG.info("using {}={}", "mapreduce.jobhistory.address", System.getProperty("mapreduce.jobhistory.address")); configuration.set("mapreduce.jobhistory.address", System.getProperty("mapreduce.jobhistory.address")); } configuration.set("mapreduce.user.classpath.first", "true"); // use test dependencies configuration.set("mapreduce.framework.name", "yarn"); fileSys = FileSystem.get(configuration); } else { conf.setBoolean("yarn.is.minicluster", true); // conf.setInt( "yarn.nodemanager.delete.debug-delay-sec", -1 ); // conf.set( "yarn.scheduler.capacity.root.queues", "default" ); // conf.set( "yarn.scheduler.capacity.root.default.capacity", "100" ); // disable blacklisting hosts not to fail localhost during unit tests conf.setBoolean("yarn.app.mapreduce.am.job.node-blacklisting.enable", false); dfs = new MiniDFSCluster(conf, 4, true, null); fileSys = dfs.getFileSystem(); FileSystem.setDefaultUri(conf, fileSys.getUri()); mr = MiniMRClientClusterFactory.create(this.getClass(), 4, conf); configuration = mr.getConfig(); } configuration.set("mapred.child.java.opts", "-Xmx512m"); configuration.setInt("mapreduce.job.jvm.numtasks", -1); configuration.setInt("mapreduce.client.completion.pollinterval", 50); configuration.setInt("mapreduce.client.progressmonitor.pollinterval", 50); configuration.setBoolean("mapreduce.map.speculative", false); configuration.setBoolean("mapreduce.reduce.speculative", false); } configuration.setInt("mapreduce.job.maps", numMappers); configuration.setInt("mapreduce.job.reduces", numReducers); Map<Object, Object> globalProperties = getGlobalProperties(); if (logger != null) globalProperties.put("log4j.logger", logger); FlowProps.setJobPollingInterval(globalProperties, 10); // should speed up tests Hadoop2MR1Planner.copyProperties(configuration, globalProperties); // copy any external properties Hadoop2MR1Planner.copyConfiguration(properties, configuration); // put all properties on the jobconf }
From source file:cascading.plumber.grids.AbstractGridTest.java
License:Apache License
@Test public void shouldCopyJobConfIntoProperties() { JobConf jobConf = new JobConf(); jobConf.set(KEY, VALUE);//from ww w . ja v a2 s . c o m new MockGrid().createFlowConnector(jobConf); }