Example usage for org.apache.hadoop.mapred JobConf getStrings

List of usage examples for org.apache.hadoop.mapred JobConf getStrings

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getStrings.

Prototype

public String[] getStrings(String name) 

Source Link

Document

Get the comma delimited values of the name property as an array of Strings.

Usage

From source file:IndexService.IColumnInputFormat.java

License:Open Source License

public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    Path tmpPath = null;/*from w  w w.  j a va 2 s  .c  o m*/
    FileSystem fs = FileSystem.get(job);
    List<IColumnInputSplit> splits = new ArrayList<IColumnInputSplit>();
    HashMap<String, FileStatus> files = new HashMap<String, FileStatus>();
    String[] inputfiles = job.getStrings("mapred.input.dir");

    for (String file : inputfiles) {
        FileStatus[] fss = fs.globStatus(new Path(file + "_idx*"));
        FileStatus status = null;
        long length = 0;
        for (FileStatus ss : fss) {
            if (ss.getLen() > length) {
                length = ss.getLen();
                status = ss;
            }
        }
        files.put(file, status);
    }

    for (String filekey : files.keySet()) {
        FileStatus file = files.get(filekey);
        Path path = file.getPath();
        Path keypath = new Path(filekey);
        long length = file.getLen();

        tmpPath = keypath;

        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);

        if (blkLocations.length <= 1) {
            IColumnInputSplit split = new IColumnInputSplit(keypath, length, blkLocations[0].getHosts());
            splits.add(split);
        } else {

            String filename = path.toString();
            IFormatDataFile ifd = new IFormatDataFile(job);
            ifd.open(filename);

            ISegmentIndex segmentIndex = ifd.segIndex();

            for (int i = 0; i < segmentIndex.getSegnum(); i++) {
                IColumnInputSplit split = new IColumnInputSplit(keypath, segmentIndex.getseglen(i),
                        segmentIndex.getILineIndex(i).beginline(),
                        segmentIndex.getILineIndex(i).endline() - segmentIndex.getILineIndex(i).beginline() + 1,
                        blkLocations[i].getHosts());
                splits.add(split);
            }

            ifd.close();
        }
    }

    if (splits.size() == 0) {
        splits.add(new IColumnInputSplit(tmpPath, 0, 0, 0, new String[0]));
    }

    System.out.println("Total # of splits: " + splits.size());
    return splits.toArray(new IColumnInputSplit[splits.size()]);

}

From source file:IndexService.IndexIFormatOutputWriter.java

License:Open Source License

public IndexIFormatOutputWriter(String fileName, JobConf job) throws IOException {
    this.conf = job;
    ifdf = new IFormatDataFile(job);
    ihead = new IHead();
    String[] fieldStrings = job.getStrings(ConstVar.HD_fieldMap);
    IFieldMap fieldMap = new IFieldMap();
    for (int i = 0; i < fieldStrings.length; i++) {
        String[] def = fieldStrings[i].split(ConstVar.RecordSplit);
        byte type = Byte.valueOf(def[0]);
        int index = Short.valueOf(def[1]);
        fieldMap.addFieldType(new IRecord.IFType(type, index));
    }//  w ww. ja  v a 2s  . c o  m
    ihead.setFieldMap(fieldMap);

    String[] files = job.getStrings(ConstVar.HD_index_filemap);
    IUserDefinedHeadInfo iudhi = new IUserDefinedHeadInfo();
    iudhi.addInfo(123456, job.get("datafiletype"));
    for (int i = 0; i < files.length; i++) {
        iudhi.addInfo(i, files[i]);
    }
    ihead.setUdi(iudhi);
    ihead.setPrimaryIndex(0);
    ifdf.create(fileName, ihead);
    record = ifdf.getIRecordObj();
}

From source file:IndexService.IndexMergeIFormatWriter.java

License:Open Source License

public IndexMergeIFormatWriter(String fileName, JobConf job) throws IOException {
    this.conf = job;
    ifdf = new IFormatDataFile(job);
    ihead = new IHead();
    String[] fieldStrings = job.getStrings(ConstVar.HD_fieldMap);
    IFieldMap fieldMap = new IFieldMap();
    for (int i = 0; i < fieldStrings.length; i++) {
        String[] def = fieldStrings[i].split(ConstVar.RecordSplit);
        byte type = Byte.valueOf(def[0]);
        int index = Short.valueOf(def[1]);
        fieldMap.addFieldType(new IRecord.IFType(type, index));
    }//from w ww.java2  s  .co m
    ihead.setFieldMap(fieldMap);

    String[] files = job.getStrings(ConstVar.HD_index_filemap);
    IUserDefinedHeadInfo iudhi = new IUserDefinedHeadInfo();
    iudhi.addInfo(123456, job.get("datafiletype"));
    for (int i = 0; i < files.length; i++) {
        iudhi.addInfo(i, files[i]);
    }

    ihead.setUdi(iudhi);
    ihead.setPrimaryIndex(0);
    ifdf.create(fileName, ihead);
    record = ifdf.getIRecordObj();
}

From source file:org.apache.sysml.runtime.matrix.mapred.CSVAssignRowIDMapper.java

License:Apache License

@Override
@SuppressWarnings("deprecation")
public void configure(JobConf job) {
    byte thisIndex;
    try {/*from w  ww  .j  a  v  a 2s  .  com*/
        //it doesn't make sense to have repeated file names in the input, since this is for reblock
        thisIndex = MRJobConfiguration.getInputMatrixIndexesInMapper(job).get(0);
        outKey.set(thisIndex);
        Path thisPath = new Path(job.get(MRConfigurationNames.MR_MAP_INPUT_FILE));
        FileSystem fs = IOUtilFunctions.getFileSystem(thisPath, job);
        thisPath = thisPath.makeQualified(fs);
        filename = thisPath.toString();
        String[] strs = job.getStrings(CSVReblockMR.SMALLEST_FILE_NAME_PER_INPUT);
        Path headerPath = new Path(strs[thisIndex]).makeQualified(fs);
        headerFile = headerPath.toString().equals(filename);

        CSVReblockInstruction[] reblockInstructions = MRJobConfiguration.getCSVReblockInstructions(job);
        for (CSVReblockInstruction ins : reblockInstructions)
            if (ins.input == thisIndex) {
                delim = Pattern.quote(ins.delim);
                ignoreFirstLine = ins.hasHeader;
                break;
            }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:org.apache.sysml.runtime.matrix.mapred.CSVReblockMapper.java

License:Apache License

@Override
@SuppressWarnings("deprecation")
public void configure(JobConf job) {
    super.configure(job);
    //get the number colums per block

    //load the offset mapping
    byte matrixIndex = representativeMatrixes.get(0);
    try {//  w w w .  ja v  a2s  . co m
        Path thisPath = new Path(job.get(MRConfigurationNames.MR_MAP_INPUT_FILE));
        FileSystem fs = IOUtilFunctions.getFileSystem(thisPath, job);
        thisPath = thisPath.makeQualified(fs);
        String filename = thisPath.toString();
        Path headerPath = new Path(job.getStrings(CSVReblockMR.SMALLEST_FILE_NAME_PER_INPUT)[matrixIndex])
                .makeQualified(fs);
        if (headerPath.toString().equals(filename))
            headerFile = true;

        ByteWritable key = new ByteWritable();
        OffsetCount value = new OffsetCount();
        Path p = new Path(job.get(CSVReblockMR.ROWID_FILE_NAME));
        SequenceFile.Reader reader = null;
        try {
            reader = new SequenceFile.Reader(fs, p, job);
            while (reader.next(key, value)) {
                if (key.get() == matrixIndex && filename.equals(value.filename))
                    offsetMap.put(value.fileOffset, value.count);
            }
        } finally {
            IOUtilFunctions.closeSilently(reader);
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    CSVReblockInstruction ins = csv_reblock_instructions.get(0).get(0);
    _delim = ins.delim;
    ignoreFirstLine = ins.hasHeader;

    idxRow = new IndexedBlockRow();
    int maxBclen = 0;

    for (ArrayList<CSVReblockInstruction> insv : csv_reblock_instructions)
        for (CSVReblockInstruction in : insv) {
            if (maxBclen < in.bclen)
                maxBclen = in.bclen;
        }

    //always dense since common csv usecase
    idxRow.getRow().data.reset(1, maxBclen, false);
}

From source file:org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.java

License:Apache License

public static ArrayList<Byte> getInputMatrixIndexesInMapper(JobConf job) throws IOException {
    String[] matrices = job.getStrings(INPUT_MATRICIES_DIRS_CONFIG);
    String str = job.get(MAPFUNC_INPUT_MATRICIES_INDEXES_CONFIG);
    byte[] indexes;
    if (str == null || str.isEmpty()) {
        indexes = new byte[matrices.length];
        for (int i = 0; i < indexes.length; i++)
            indexes[i] = (byte) i;
    } else {//from  w w w  . jav  a 2  s.  c  o  m
        String[] strs = str.split(Instruction.INSTRUCTION_DELIM);
        indexes = new byte[strs.length];
        for (int i = 0; i < strs.length; i++)
            indexes[i] = Byte.parseByte(strs[i]);
    }

    int numMatrices = matrices.length;
    if (numMatrices > Byte.MAX_VALUE)
        throw new RuntimeException("number of matrices is too large > " + Byte.MAX_VALUE);
    for (int i = 0; i < matrices.length; i++)
        matrices[i] = new Path(matrices[i]).toString();

    Path thisFile = new Path(job.get(MRConfigurationNames.MR_MAP_INPUT_FILE));
    FileSystem fs = IOUtilFunctions.getFileSystem(thisFile, job);
    thisFile = thisFile.makeQualified(fs);

    Path thisDir = thisFile.getParent().makeQualified(fs);
    ArrayList<Byte> representativeMatrixes = new ArrayList<>();
    for (int i = 0; i < matrices.length; i++) {
        Path p = new Path(matrices[i]).makeQualified(fs);
        if (thisFile.toUri().equals(p.toUri()) || thisDir.toUri().equals(p.toUri()))
            representativeMatrixes.add(indexes[i]);
    }
    return representativeMatrixes;
}

From source file:org.saarus.service.hadoop.util.JsonOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress)
        throws IOException {
    boolean isCompressed = getCompressOutput(job);
    String keyValueSeparator = job.get("mapred.textoutputformat.separator", "\t");
    String[] headers = job.getStrings("column.headers");
    if (!isCompressed) {
        Path file = FileOutputFormat.getTaskOutputPath(job, name);
        FileSystem fs = file.getFileSystem(job);
        FSDataOutputStream fileOut = fs.create(file, progress);
        return new JsonRecordWriter<K, V>(fileOut, keyValueSeparator, headers);
    } else {/*from  w  w w.  j  ava  2s .  c om*/
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        // create the named codec
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job);
        // build the filename including the extension
        Path file = FileOutputFormat.getTaskOutputPath(job, name + codec.getDefaultExtension());
        FileSystem fs = file.getFileSystem(job);
        FSDataOutputStream fileOut = fs.create(file, progress);
        return new JsonRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                keyValueSeparator, headers);
    }
}