List of usage examples for org.apache.hadoop.mapred JobConf getStrings
public String[] getStrings(String name)
name
property as an array of String
s. From source file:com.cloudera.science.avro.streaming.AvroAsJSONInputFormat.java
License:Open Source License
private void loadSchemas(JobConf job) throws IOException { this.schemas = Lists.newArrayList(); SchemaLoader loader = new SchemaLoader(job); String schemaLiteral = job.get(SCHEMA_LITERAL); if (schemaLiteral != null) { schemas.add(loader.loadLiteral(schemaLiteral)); return;/* ww w .j a v a2s .c om*/ } else { String[] schemaUrls = job.getStrings(SCHEMA_URL); String[] typeNames = job.getStrings(SCHEMA_TYPE_NAME); if (schemaUrls != null) { for (String schemaUrl : schemaUrls) { schemas.add(loader.loadFromUrl(schemaUrl)); } } else if (typeNames != null) { for (String typeName : typeNames) { schemas.add(loader.loadFromTypeName(typeName)); } } else { throw new IllegalArgumentException("No schema information provided"); } if (schemas.size() > 1) { // Need to track input paths Path[] inputs = FileInputFormat.getInputPaths(job); if (inputs.length != schemas.size()) { throw new IllegalArgumentException(String.format( "Number of input paths (%d) does not match number of schemas specified (%d)", inputs.length, schemas.size())); } this.inputPaths = new String[inputs.length]; for (int i = 0; i < inputs.length; i++) { inputPaths[i] = inputs[i].toString(); } } } }
From source file:com.ebay.erl.mobius.core.mapred.MultiInputsHelpersRepository.java
License:Apache License
/** * constructor/*from ww w .j av a 2s.c om*/ */ private MultiInputsHelpersRepository(JobConf conf) { this.mapping = new TreeMap<Class<? extends InputFormat>, MultiInputsHelper>(new ClassComparator()); this.register(FileInputFormat.class, FileInputFormatHelper.class); if (!conf.get("mobius.multi.inputs.helpers", "").isEmpty()) { // mobius.multi.inputs.helpers in the format of> InputFormatClassName:HelperClassName(,InputFormatClassName:HelperClassName)? String[] helpers = conf.getStrings("mobius.multi.inputs.helpers"); for (String aHeler : helpers) { String[] data = aHeler.split(":"); String inputFormatClassName = data[0]; String helperClassName = data[1]; Class<? extends InputFormat> inputFormat = (Class<? extends InputFormat>) Util .getClass(inputFormatClassName); Class<? extends MultiInputsHelper> helperClass = (Class<? extends MultiInputsHelper>) Util .getClass(helperClassName); this.register(inputFormat, helperClass); } } }
From source file:com.ema.hadoop.bestclient.BCMapper.java
@Override public void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException { DateFormat formatter = new SimpleDateFormat("dd/MM/yyyy"); try {// ww w . j a v a 2 s .c o m String line = value.toString(); String[] lineTab = line.split(";"); String client = lineTab[0]; int somme = Integer.parseInt(lineTab[2]); Date date = formatter.parse(lineTab[1]); JobConf jobConf = (JobConf) context.getConfiguration(); String[] dateTable = jobConf.getStrings("dates"); Date dateStart = formatter.parse(dateTable[0]); Date dateEnd = formatter.parse(dateTable[1]); if (date.after(dateStart) && date.before(dateEnd)) { context.write(new Text(client), new IntWritable(somme)); } else { Logger.getLogger(BCMapper.class.getName()).log(Level.INFO, "ELSE ddddddddddddddddddddd"); Logger.getLogger(BCMapper.class.getName()).log(Level.INFO, "param start " + dateTable[0]); Logger.getLogger(BCMapper.class.getName()).log(Level.INFO, "Date start " + dateStart.toString()); Logger.getLogger(BCMapper.class.getName()).log(Level.INFO, "param fin " + dateTable[1]); Logger.getLogger(BCMapper.class.getName()).log(Level.INFO, "Date fin " + dateEnd.toString()); } } catch (ParseException e) { Logger.getLogger(BCMapper.class.getName()).log(Level.INFO, "Parse exception"); } }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.CSVAssignRowIDMapper.java
License:Open Source License
@Override @SuppressWarnings("deprecation") public void configure(JobConf job) { byte thisIndex; try {/*w w w . ja v a 2 s . c o m*/ //it doesn't make sense to have repeated file names in the input, since this is for reblock thisIndex = MRJobConfiguration.getInputMatrixIndexesInMapper(job).get(0); outKey.set(thisIndex); FileSystem fs = FileSystem.get(job); Path thisPath = new Path(job.get("map.input.file")).makeQualified(fs); filename = thisPath.toString(); String[] strs = job.getStrings(CSVReblockMR.SMALLEST_FILE_NAME_PER_INPUT); Path headerPath = new Path(strs[thisIndex]).makeQualified(fs); if (headerPath.toString().equals(filename)) headerFile = true; } catch (IOException e) { throw new RuntimeException(e); } try { CSVReblockInstruction[] reblockInstructions = MRJobConfiguration.getCSVReblockInstructions(job); for (CSVReblockInstruction ins : reblockInstructions) { if (ins.input == thisIndex) { delim = Pattern.quote(ins.delim); ignoreFirstLine = ins.hasHeader; break; } } } catch (DMLUnsupportedOperationException e) { throw new RuntimeException(e); } catch (DMLRuntimeException e) { throw new RuntimeException(e); } // load properties relevant to transform try { boolean omit = job.getBoolean(MRJobConfiguration.TF_TRANSFORM, false); if (omit) _agents = new TfUtils(job, true); } catch (IOException e) { throw new RuntimeException(e); } catch (JSONException e) { throw new RuntimeException(e); } }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.CSVReblockMapper.java
License:Open Source License
@Override @SuppressWarnings("deprecation") public void configure(JobConf job) { super.configure(job); //get the number colums per block //load the offset mapping byte matrixIndex = representativeMatrixes.get(0); try {/*from w w w . ja va 2s .c o m*/ FileSystem fs = FileSystem.get(job); Path thisPath = new Path(job.get("map.input.file")).makeQualified(fs); String filename = thisPath.toString(); Path headerPath = new Path(job.getStrings(CSVReblockMR.SMALLEST_FILE_NAME_PER_INPUT)[matrixIndex]) .makeQualified(fs); if (headerPath.toString().equals(filename)) headerFile = true; ByteWritable key = new ByteWritable(); OffsetCount value = new OffsetCount(); Path p = new Path(job.get(CSVReblockMR.ROWID_FILE_NAME)); SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, job); while (reader.next(key, value)) { if (key.get() == matrixIndex && filename.equals(value.filename)) offsetMap.put(value.fileOffset, value.count); } reader.close(); } catch (IOException e) { throw new RuntimeException(e); } CSVReblockInstruction ins = csv_reblock_instructions.get(0).get(0); _delim = ins.delim; ignoreFirstLine = ins.hasHeader; idxRow = new IndexedBlockRow(); int maxBclen = 0; for (ArrayList<CSVReblockInstruction> insv : csv_reblock_instructions) for (CSVReblockInstruction in : insv) { if (maxBclen < in.bclen) maxBclen = in.bclen; } //always dense since common csv usecase idxRow.getRow().data.reset(1, maxBclen, false); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static ArrayList<Byte> getInputMatrixIndexesInMapper(JobConf job) throws IOException { String[] matrices = job.getStrings(INPUT_MATRICIES_DIRS_CONFIG); String str = job.get(MAPFUNC_INPUT_MATRICIES_INDEXES_CONFIG); byte[] indexes; if (str == null || str.isEmpty()) { indexes = new byte[matrices.length]; for (int i = 0; i < indexes.length; i++) indexes[i] = (byte) i; } else {// w w w .j a v a2s.co m String[] strs = str.split(Instruction.INSTRUCTION_DELIM); indexes = new byte[strs.length]; for (int i = 0; i < strs.length; i++) indexes[i] = Byte.parseByte(strs[i]); } int numMatrices = matrices.length; if (numMatrices > Byte.MAX_VALUE) throw new RuntimeException("number of matrices is too large > " + Byte.MAX_VALUE); for (int i = 0; i < matrices.length; i++) matrices[i] = new Path(matrices[i]).toString(); FileSystem fs = FileSystem.get(job); Path thisFile = new Path(job.get("map.input.file")).makeQualified(fs); //Path p=new Path(thisFileName); Path thisDir = thisFile.getParent().makeQualified(fs); ArrayList<Byte> representativeMatrixes = new ArrayList<Byte>(); for (int i = 0; i < matrices.length; i++) { Path p = new Path(matrices[i]).makeQualified(fs); if (thisFile.toUri().compareTo(p.toUri()) == 0 || thisDir.toUri().compareTo(p.toUri()) == 0) representativeMatrixes.add(indexes[i]); } return representativeMatrixes; }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static String[] getOutputs(JobConf job) { return job.getStrings(OUTPUT_MATRICES_DIRS_CONFIG); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static String[] getInputPaths(JobConf job) { return job.getStrings(INPUT_MATRICIES_DIRS_CONFIG); }
From source file:FormatStorage.Head.java
License:Open Source License
public void fromJobConf(JobConf job) throws Exception { byte var = (byte) job.getInt(ConstVar.HD_var, 0); byte compress = (byte) job.getInt(ConstVar.HD_compress, 0); byte compressStyle = (byte) job.getInt(ConstVar.HD_compressStyle, 0); short primaryIndex = (short) job.getInt(ConstVar.HD_primaryIndex, -1); byte encode = (byte) job.getInt(ConstVar.HD_encode, 0); byte encodeStyle = (byte) job.getInt(ConstVar.HD_encodeStyle, 0); String keyString = job.get(ConstVar.HD_key); String[] fieldStrings = job.getStrings(ConstVar.HD_fieldMap); LOG.info("in fromJobConf, compressed:" + compress + ",compressStyle:" + compressStyle); setVar(var);//from ww w. j a v a 2 s . co m setCompress(compress); setCompressStyle(compressStyle); setEncode(encode); setEncodeStyle(encodeStyle); if (keyString != null && keyString.length() != 0) { setKey(keyString); } short fieldNum = 0; if (fieldStrings != null) { fieldNum = (short) fieldStrings.length; } FieldMap fieldMap = new FieldMap(); for (short i = 0; i < fieldNum; i++) { String[] def = fieldStrings[i].split(ConstVar.RecordSplit); byte type = Byte.valueOf(def[0]); int len = Integer.valueOf(def[1]); short index = Short.valueOf(def[2]); fieldMap.addField(new Field(type, len, index)); } setFieldMap(fieldMap); setPrimaryIndex(primaryIndex); }
From source file:hydrograph.engine.hadoop.recordreader.DelimitedAndFixedWidthRecordReader.java
License:Apache License
public DelimitedAndFixedWidthRecordReader(JobConf conf, FileSplit split) throws IOException { lengthsAndDelimiters = DelimitedAndFixedWidthHelper .modifyIdentifier(DelimitedAndFixedWidthHelper.stringToArray(conf.get("lengthsAndDelimiters"))); lengthsAndDelimitersType = conf.getStrings("lengthsAndDelimitersType"); quote = conf.get("quote"); charsetName = conf.get("charsetName"); start = split.getStart();//from ww w . jav a2 s . c o m pos = start; end = start + split.getLength(); file = split.getPath(); fs = file.getFileSystem(conf); fileIn = fs.open(split.getPath()); fileIn.seek(start); inputStreamReader = new InputStreamReader(fileIn, charsetName); singleChar = new char[1]; stringBuilder = new StringBuilder(); isQuotePresent = isQuotePresent(quote); }