List of usage examples for org.apache.hadoop.mapred JobConf set
public void set(String name, String value)
value
of the name
property. From source file:cascading.tap.hadoop.HadoopMR1TapPlatformTest.java
License:Open Source License
@Test public void testCombinedHfs() throws Exception { getPlatform().copyFromLocal(inputFileLower); getPlatform().copyFromLocal(inputFileUpper); Hfs sourceLower = new Hfs(new TextLine(new Fields("offset", "line")), InputData.inputFileLower); Hfs sourceUpper = new Hfs(new TextLine(new Fields("offset", "line")), InputData.inputFileUpper); // create a CombinedHfs instance on these files Tap source = new MultiSourceTap<Hfs, JobConf, RecordReader>(sourceLower, sourceUpper); FlowProcess<JobConf> process = getPlatform().getFlowProcess(); JobConf conf = process.getConfigCopy(); // set the combine flag conf.setBoolean(HfsProps.COMBINE_INPUT_FILES, true); conf.set("cascading.flow.platform", "hadoop"); // only supported on mr based platforms // test the input format and the split source.sourceConfInit(process, conf); InputFormat inputFormat = conf.getInputFormat(); assertEquals(Hfs.CombinedInputFormat.class, inputFormat.getClass()); InputSplit[] splits = inputFormat.getSplits(conf, 1); assertEquals(1, splits.length);//from ww w .java2 s.com validateLength(source.openForRead(process), 10); }
From source file:cascading.tap.hadoop.io.MultiInputFormat.java
License:Open Source License
/** * Used to set the current JobConf with all sub jobs configurations. * * @param toJob// w w w. j a v a2 s . com * @param fromJobs */ public static void addInputFormat(JobConf toJob, JobConf... fromJobs) { toJob.setInputFormat(MultiInputFormat.class); List<Map<String, String>> configs = new ArrayList<Map<String, String>>(); List<Path> allPaths = new ArrayList<Path>(); boolean isLocal = false; for (JobConf fromJob : fromJobs) { if (fromJob.get("mapred.input.format.class") == null) throw new CascadingException( "mapred.input.format.class is required, should be set in source Scheme#sourceConfInit"); configs.add(HadoopUtil.getConfig(toJob, fromJob)); Collections.addAll(allPaths, FileInputFormat.getInputPaths(fromJob)); if (!isLocal) isLocal = HadoopUtil.isLocal(fromJob); } if (!allPaths.isEmpty()) // it's possible there aren't any FileInputFormat.setInputPaths(toJob, (Path[]) allPaths.toArray(new Path[allPaths.size()])); try { toJob.set("cascading.multiinputformats", HadoopUtil.serializeBase64(configs, toJob, true)); } catch (IOException exception) { throw new CascadingException("unable to pack input formats", exception); } if (isLocal) HadoopUtil.setLocal(toJob); }
From source file:cascading.tap.hadoop.MultiInputFormat.java
License:Open Source License
/** * Used to set the current JobConf with all sub jobs configurations. * * @param toJob/*from ww w . ja va2s. c om*/ * @param fromJobs */ public static void addInputFormat(JobConf toJob, JobConf... fromJobs) { toJob.setInputFormat(MultiInputFormat.class); List<Map<String, String>> configs = new ArrayList<Map<String, String>>(); List<Path> allPaths = new ArrayList<Path>(); boolean isLocal = false; for (JobConf fromJob : fromJobs) { configs.add(getConfig(toJob, fromJob)); Collections.addAll(allPaths, FileInputFormat.getInputPaths(fromJob)); if (!isLocal) isLocal = fromJob.get("mapred.job.tracker").equalsIgnoreCase("local"); } FileInputFormat.setInputPaths(toJob, (Path[]) allPaths.toArray(new Path[allPaths.size()])); try { toJob.set("cascading.multiinputformats", Util.serializeBase64(configs)); } catch (IOException exception) { throw new CascadingException("unable to pack input formats", exception); } if (isLocal) toJob.set("mapred.job.tracker", "local"); }
From source file:cascading.tap.hadoop.MultiInputFormat.java
License:Open Source License
static JobConf mergeConf(JobConf job, Map<String, String> config, boolean directly) { JobConf currentConf = directly ? job : new JobConf(job); for (String key : config.keySet()) { if (LOG.isDebugEnabled()) LOG.debug("merging key: " + key + " value: " + config.get(key)); currentConf.set(key, config.get(key)); }/*from w ww .j a v a2 s . co m*/ return currentConf; }
From source file:cascading.tap.Hfs.java
License:Open Source License
private void makeLocal(JobConf conf, Path qualifiedPath, String infoMessage) { if (!conf.get("mapred.job.tracker", "").equalsIgnoreCase("local") && qualifiedPath.toUri().getScheme().equalsIgnoreCase("file")) { if (LOG.isInfoEnabled()) LOG.info(infoMessage + toString()); conf.set("mapred.job.tracker", "local"); // force job to run locally }//from w w w . j av a 2 s . c o m }
From source file:cascading.tuple.hadoop.HadoopSerializationTest.java
License:Open Source License
public void testInputOutputSerialization() throws IOException { long time = System.currentTimeMillis(); JobConf jobConf = new JobConf(); jobConf.set("io.serializations", TestSerialization.class.getName() + "," + WritableSerialization.class.getName()); // disable/replace WritableSerialization class jobConf.set("cascading.serialization.tokens", "1000=" + BooleanWritable.class.getName() + ",10001=" + Text.class.getName()); // not using Text, just testing parsing TupleSerialization tupleSerialization = new TupleSerialization(jobConf); File file = new File(outputPath); file.mkdirs();/*from w w w .java 2s .c o m*/ file = new File(file, "/test.bytes"); TupleOutputStream output = new TupleOutputStream(new FileOutputStream(file, false), tupleSerialization.getElementWriter()); for (int i = 0; i < 501; i++) // 501 is arbitrary { String aString = "string number " + i; double random = Math.random(); output.writeTuple(new Tuple(i, aString, random, new TestText(aString), new Tuple("inner tuple", new BytesWritable("some string".getBytes())), new BytesWritable(Integer.toString(i).getBytes("UTF-8")), new BooleanWritable(false))); } output.close(); assertEquals("wrong size", 89967L, file.length()); // just makes sure the file size doesnt change from expected TupleInputStream input = new TupleInputStream(new FileInputStream(file), tupleSerialization.getElementReader()); int k = -1; for (int i = 0; i < 501; i++) { Tuple tuple = input.readTuple(); int value = tuple.getInteger(0); assertTrue("wrong diff", value - k == 1); assertTrue("wrong type", tuple.get(3) instanceof TestText); assertTrue("wrong type", tuple.get(4) instanceof Tuple); assertTrue("wrong type", tuple.get(5) instanceof BytesWritable); byte[] bytes = ((BytesWritable) tuple.get(5)).getBytes(); String string = new String(bytes, 0, bytes.length > 1 ? bytes.length - 1 : bytes.length, "UTF-8"); assertEquals("wrong value", Integer.parseInt(string), i); assertTrue("wrong type", tuple.get(6) instanceof BooleanWritable); k = value; } input.close(); System.out.println("time = " + (System.currentTimeMillis() - time)); }
From source file:cascading.tuple.hadoop.TupleSerialization.java
License:Open Source License
/** * Adds this class as a Hadoop Serialization class. This method is safe to call redundantly. * * @param jobConf of type JobConf//from w ww. j a v a 2s . co m */ public static void setSerializations(JobConf jobConf) { String serializations = getSerializations(jobConf); if (serializations.contains(TupleSerialization.class.getName())) return; jobConf.set("io.serializations", Util.join(",", Util.removeNulls(serializations, TupleSerialization.class.getName()))); }
From source file:cascalog.TupleMemoryInputFormat.java
License:Open Source License
public static void setObject(JobConf conf, String key, Object o) { conf.set(key, StringUtils.byteToHexString(serialize(o))); }
From source file:co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.java
License:Apache License
/** * setInput() takes a custom query and a separate "bounding query" to use * instead of the custom "count query" used by DBInputFormat. */// w w w . j a v a 2s . com public static void setInput(JobConf job, String inputQuery, String inputBoundingQuery, ArrayList params) throws IOException { DBInputFormat.setInput(job, GenericDBWritable.class, inputQuery, ""); if (inputBoundingQuery != null) { job.set(DBConfiguration.INPUT_BOUNDING_QUERY, inputBoundingQuery); } if (params != null) { DefaultStringifier<ArrayList> stringifier = new DefaultStringifier<ArrayList>(job, ArrayList.class); job.set(HIHOConf.QUERY_PARAMS, stringifier.toString(params)); logger.debug("Converted params and saved them into config"); } job.setInputFormat(DBQueryInputFormat.class); }
From source file:com.acme.extensions.data.SeedingHadoopAdapter.java
License:Apache License
@Override protected void set(JobConf conf) throws Exception { super.set(conf); // set the input format to this adapter conf.setInputFormat(this.getClass()); conf.set("seed", String.valueOf(seed)); }