List of usage examples for org.apache.hadoop.mapred JobConf getStrings
public String[] getStrings(String name)
name
property as an array of String
s. From source file:IndexService.IColumnInputFormat.java
License:Open Source License
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { Path tmpPath = null;/*from w w w. j a va 2 s .c o m*/ FileSystem fs = FileSystem.get(job); List<IColumnInputSplit> splits = new ArrayList<IColumnInputSplit>(); HashMap<String, FileStatus> files = new HashMap<String, FileStatus>(); String[] inputfiles = job.getStrings("mapred.input.dir"); for (String file : inputfiles) { FileStatus[] fss = fs.globStatus(new Path(file + "_idx*")); FileStatus status = null; long length = 0; for (FileStatus ss : fss) { if (ss.getLen() > length) { length = ss.getLen(); status = ss; } } files.put(file, status); } for (String filekey : files.keySet()) { FileStatus file = files.get(filekey); Path path = file.getPath(); Path keypath = new Path(filekey); long length = file.getLen(); tmpPath = keypath; BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if (blkLocations.length <= 1) { IColumnInputSplit split = new IColumnInputSplit(keypath, length, blkLocations[0].getHosts()); splits.add(split); } else { String filename = path.toString(); IFormatDataFile ifd = new IFormatDataFile(job); ifd.open(filename); ISegmentIndex segmentIndex = ifd.segIndex(); for (int i = 0; i < segmentIndex.getSegnum(); i++) { IColumnInputSplit split = new IColumnInputSplit(keypath, segmentIndex.getseglen(i), segmentIndex.getILineIndex(i).beginline(), segmentIndex.getILineIndex(i).endline() - segmentIndex.getILineIndex(i).beginline() + 1, blkLocations[i].getHosts()); splits.add(split); } ifd.close(); } } if (splits.size() == 0) { splits.add(new IColumnInputSplit(tmpPath, 0, 0, 0, new String[0])); } System.out.println("Total # of splits: " + splits.size()); return splits.toArray(new IColumnInputSplit[splits.size()]); }
From source file:IndexService.IndexIFormatOutputWriter.java
License:Open Source License
public IndexIFormatOutputWriter(String fileName, JobConf job) throws IOException { this.conf = job; ifdf = new IFormatDataFile(job); ihead = new IHead(); String[] fieldStrings = job.getStrings(ConstVar.HD_fieldMap); IFieldMap fieldMap = new IFieldMap(); for (int i = 0; i < fieldStrings.length; i++) { String[] def = fieldStrings[i].split(ConstVar.RecordSplit); byte type = Byte.valueOf(def[0]); int index = Short.valueOf(def[1]); fieldMap.addFieldType(new IRecord.IFType(type, index)); }// w ww. ja v a 2s . c o m ihead.setFieldMap(fieldMap); String[] files = job.getStrings(ConstVar.HD_index_filemap); IUserDefinedHeadInfo iudhi = new IUserDefinedHeadInfo(); iudhi.addInfo(123456, job.get("datafiletype")); for (int i = 0; i < files.length; i++) { iudhi.addInfo(i, files[i]); } ihead.setUdi(iudhi); ihead.setPrimaryIndex(0); ifdf.create(fileName, ihead); record = ifdf.getIRecordObj(); }
From source file:IndexService.IndexMergeIFormatWriter.java
License:Open Source License
public IndexMergeIFormatWriter(String fileName, JobConf job) throws IOException { this.conf = job; ifdf = new IFormatDataFile(job); ihead = new IHead(); String[] fieldStrings = job.getStrings(ConstVar.HD_fieldMap); IFieldMap fieldMap = new IFieldMap(); for (int i = 0; i < fieldStrings.length; i++) { String[] def = fieldStrings[i].split(ConstVar.RecordSplit); byte type = Byte.valueOf(def[0]); int index = Short.valueOf(def[1]); fieldMap.addFieldType(new IRecord.IFType(type, index)); }//from w ww.java2 s .co m ihead.setFieldMap(fieldMap); String[] files = job.getStrings(ConstVar.HD_index_filemap); IUserDefinedHeadInfo iudhi = new IUserDefinedHeadInfo(); iudhi.addInfo(123456, job.get("datafiletype")); for (int i = 0; i < files.length; i++) { iudhi.addInfo(i, files[i]); } ihead.setUdi(iudhi); ihead.setPrimaryIndex(0); ifdf.create(fileName, ihead); record = ifdf.getIRecordObj(); }
From source file:org.apache.sysml.runtime.matrix.mapred.CSVAssignRowIDMapper.java
License:Apache License
@Override @SuppressWarnings("deprecation") public void configure(JobConf job) { byte thisIndex; try {/*from w ww .j a v a 2s . com*/ //it doesn't make sense to have repeated file names in the input, since this is for reblock thisIndex = MRJobConfiguration.getInputMatrixIndexesInMapper(job).get(0); outKey.set(thisIndex); Path thisPath = new Path(job.get(MRConfigurationNames.MR_MAP_INPUT_FILE)); FileSystem fs = IOUtilFunctions.getFileSystem(thisPath, job); thisPath = thisPath.makeQualified(fs); filename = thisPath.toString(); String[] strs = job.getStrings(CSVReblockMR.SMALLEST_FILE_NAME_PER_INPUT); Path headerPath = new Path(strs[thisIndex]).makeQualified(fs); headerFile = headerPath.toString().equals(filename); CSVReblockInstruction[] reblockInstructions = MRJobConfiguration.getCSVReblockInstructions(job); for (CSVReblockInstruction ins : reblockInstructions) if (ins.input == thisIndex) { delim = Pattern.quote(ins.delim); ignoreFirstLine = ins.hasHeader; break; } } catch (Exception e) { throw new RuntimeException(e); } }
From source file:org.apache.sysml.runtime.matrix.mapred.CSVReblockMapper.java
License:Apache License
@Override @SuppressWarnings("deprecation") public void configure(JobConf job) { super.configure(job); //get the number colums per block //load the offset mapping byte matrixIndex = representativeMatrixes.get(0); try {// w w w . ja v a2s . co m Path thisPath = new Path(job.get(MRConfigurationNames.MR_MAP_INPUT_FILE)); FileSystem fs = IOUtilFunctions.getFileSystem(thisPath, job); thisPath = thisPath.makeQualified(fs); String filename = thisPath.toString(); Path headerPath = new Path(job.getStrings(CSVReblockMR.SMALLEST_FILE_NAME_PER_INPUT)[matrixIndex]) .makeQualified(fs); if (headerPath.toString().equals(filename)) headerFile = true; ByteWritable key = new ByteWritable(); OffsetCount value = new OffsetCount(); Path p = new Path(job.get(CSVReblockMR.ROWID_FILE_NAME)); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, p, job); while (reader.next(key, value)) { if (key.get() == matrixIndex && filename.equals(value.filename)) offsetMap.put(value.fileOffset, value.count); } } finally { IOUtilFunctions.closeSilently(reader); } } catch (IOException e) { throw new RuntimeException(e); } CSVReblockInstruction ins = csv_reblock_instructions.get(0).get(0); _delim = ins.delim; ignoreFirstLine = ins.hasHeader; idxRow = new IndexedBlockRow(); int maxBclen = 0; for (ArrayList<CSVReblockInstruction> insv : csv_reblock_instructions) for (CSVReblockInstruction in : insv) { if (maxBclen < in.bclen) maxBclen = in.bclen; } //always dense since common csv usecase idxRow.getRow().data.reset(1, maxBclen, false); }
From source file:org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.java
License:Apache License
public static ArrayList<Byte> getInputMatrixIndexesInMapper(JobConf job) throws IOException { String[] matrices = job.getStrings(INPUT_MATRICIES_DIRS_CONFIG); String str = job.get(MAPFUNC_INPUT_MATRICIES_INDEXES_CONFIG); byte[] indexes; if (str == null || str.isEmpty()) { indexes = new byte[matrices.length]; for (int i = 0; i < indexes.length; i++) indexes[i] = (byte) i; } else {//from w w w . jav a 2 s. c o m String[] strs = str.split(Instruction.INSTRUCTION_DELIM); indexes = new byte[strs.length]; for (int i = 0; i < strs.length; i++) indexes[i] = Byte.parseByte(strs[i]); } int numMatrices = matrices.length; if (numMatrices > Byte.MAX_VALUE) throw new RuntimeException("number of matrices is too large > " + Byte.MAX_VALUE); for (int i = 0; i < matrices.length; i++) matrices[i] = new Path(matrices[i]).toString(); Path thisFile = new Path(job.get(MRConfigurationNames.MR_MAP_INPUT_FILE)); FileSystem fs = IOUtilFunctions.getFileSystem(thisFile, job); thisFile = thisFile.makeQualified(fs); Path thisDir = thisFile.getParent().makeQualified(fs); ArrayList<Byte> representativeMatrixes = new ArrayList<>(); for (int i = 0; i < matrices.length; i++) { Path p = new Path(matrices[i]).makeQualified(fs); if (thisFile.toUri().equals(p.toUri()) || thisDir.toUri().equals(p.toUri())) representativeMatrixes.add(indexes[i]); } return representativeMatrixes; }
From source file:org.saarus.service.hadoop.util.JsonOutputFormat.java
License:Apache License
@Override public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { boolean isCompressed = getCompressOutput(job); String keyValueSeparator = job.get("mapred.textoutputformat.separator", "\t"); String[] headers = job.getStrings("column.headers"); if (!isCompressed) { Path file = FileOutputFormat.getTaskOutputPath(job, name); FileSystem fs = file.getFileSystem(job); FSDataOutputStream fileOut = fs.create(file, progress); return new JsonRecordWriter<K, V>(fileOut, keyValueSeparator, headers); } else {/*from w w w. j ava 2s . c om*/ Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); // create the named codec CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job); // build the filename including the extension Path file = FileOutputFormat.getTaskOutputPath(job, name + codec.getDefaultExtension()); FileSystem fs = file.getFileSystem(job); FSDataOutputStream fileOut = fs.create(file, progress); return new JsonRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator, headers); } }