Example usage for org.apache.hadoop.mapred JobConf getStrings

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getStrings.

Prototype

public String[] getStrings(String name)

Source Link

Document

Get the comma delimited values of the name property as an array of Strings.

Usage

From source file:IndexService.IColumnInputFormat.java

License:Open Source License

public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    Path tmpPath = null;/*from w  w w.  j a va 2 s  .c  o m*/
    FileSystem fs = FileSystem.get(job);
    List<IColumnInputSplit> splits = new ArrayList<IColumnInputSplit>();
    HashMap<String, FileStatus> files = new HashMap<String, FileStatus>();
    String[] inputfiles = job.getStrings("mapred.input.dir");

    for (String file : inputfiles) {
        FileStatus[] fss = fs.globStatus(new Path(file + "_idx*"));
        FileStatus status = null;
        long length = 0;
        for (FileStatus ss : fss) {
            if (ss.getLen() > length) {
                length = ss.getLen();
                status = ss;
            }
        }
        files.put(file, status);
    }

    for (String filekey : files.keySet()) {
        FileStatus file = files.get(filekey);
        Path path = file.getPath();
        Path keypath = new Path(filekey);
        long length = file.getLen();

        tmpPath = keypath;

        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);

        if (blkLocations.length <= 1) {
            IColumnInputSplit split = new IColumnInputSplit(keypath, length, blkLocations[0].getHosts());
            splits.add(split);
        } else {

            String filename = path.toString();
            IFormatDataFile ifd = new IFormatDataFile(job);
            ifd.open(filename);

            ISegmentIndex segmentIndex = ifd.segIndex();

            for (int i = 0; i < segmentIndex.getSegnum(); i++) {
                IColumnInputSplit split = new IColumnInputSplit(keypath, segmentIndex.getseglen(i),
                        segmentIndex.getILineIndex(i).beginline(),
                        segmentIndex.getILineIndex(i).endline() - segmentIndex.getILineIndex(i).beginline() + 1,
                        blkLocations[i].getHosts());
                splits.add(split);
            }

            ifd.close();
        }
    }

    if (splits.size() == 0) {
        splits.add(new IColumnInputSplit(tmpPath, 0, 0, 0, new String[0]));
    }

    System.out.println("Total # of splits: " + splits.size());
    return splits.toArray(new IColumnInputSplit[splits.size()]);

}

From source file:IndexService.IndexIFormatOutputWriter.java

License:Open Source License

public IndexIFormatOutputWriter(String fileName, JobConf job) throws IOException {
    this.conf = job;
    ifdf = new IFormatDataFile(job);
    ihead = new IHead();
    String[] fieldStrings = job.getStrings(ConstVar.HD_fieldMap);
    IFieldMap fieldMap = new IFieldMap();
    for (int i = 0; i < fieldStrings.length; i++) {
        String[] def = fieldStrings[i].split(ConstVar.RecordSplit);
        byte type = Byte.valueOf(def[0]);
        int index = Short.valueOf(def[1]);
        fieldMap.addFieldType(new IRecord.IFType(type, index));
    }//  w ww. ja  v a 2s  . c o  m
    ihead.setFieldMap(fieldMap);

    String[] files = job.getStrings(ConstVar.HD_index_filemap);
    IUserDefinedHeadInfo iudhi = new IUserDefinedHeadInfo();
    iudhi.addInfo(123456, job.get("datafiletype"));
    for (int i = 0; i < files.length; i++) {
        iudhi.addInfo(i, files[i]);
    }
    ihead.setUdi(iudhi);
    ihead.setPrimaryIndex(0);
    ifdf.create(fileName, ihead);
    record = ifdf.getIRecordObj();
}

From source file:IndexService.IndexMergeIFormatWriter.java

License:Open Source License

public IndexMergeIFormatWriter(String fileName, JobConf job) throws IOException {
    this.conf = job;
    ifdf = new IFormatDataFile(job);
    ihead = new IHead();
    String[] fieldStrings = job.getStrings(ConstVar.HD_fieldMap);
    IFieldMap fieldMap = new IFieldMap();
    for (int i = 0; i < fieldStrings.length; i++) {
        String[] def = fieldStrings[i].split(ConstVar.RecordSplit);
        byte type = Byte.valueOf(def[0]);
        int index = Short.valueOf(def[1]);
        fieldMap.addFieldType(new IRecord.IFType(type, index));
    }//from w ww.java2  s  .co m
    ihead.setFieldMap(fieldMap);

    String[] files = job.getStrings(ConstVar.HD_index_filemap);
    IUserDefinedHeadInfo iudhi = new IUserDefinedHeadInfo();
    iudhi.addInfo(123456, job.get("datafiletype"));
    for (int i = 0; i < files.length; i++) {
        iudhi.addInfo(i, files[i]);
    }

    ihead.setUdi(iudhi);
    ihead.setPrimaryIndex(0);
    ifdf.create(fileName, ihead);
    record = ifdf.getIRecordObj();
}

From source file:org.apache.sysml.runtime.matrix.mapred.CSVAssignRowIDMapper.java

License:Apache License

@Override
@SuppressWarnings("deprecation")
public void configure(JobConf job) {
    byte thisIndex;
    try {/*from w  ww  .j  a  v  a 2s  .  com*/
        //it doesn't make sense to have repeated file names in the input, since this is for reblock
        thisIndex = MRJobConfiguration.getInputMatrixIndexesInMapper(job).get(0);
        outKey.set(thisIndex);
        Path thisPath = new Path(job.get(MRConfigurationNames.MR_MAP_INPUT_FILE));
        FileSystem fs = IOUtilFunctions.getFileSystem(thisPath, job);
        thisPath = thisPath.makeQualified(fs);
        filename = thisPath.toString();
        String[] strs = job.getStrings(CSVReblockMR.SMALLEST_FILE_NAME_PER_INPUT);
        Path headerPath = new Path(strs[thisIndex]).makeQualified(fs);
        headerFile = headerPath.toString().equals(filename);

        CSVReblockInstruction[] reblockInstructions = MRJobConfiguration.getCSVReblockInstructions(job);
        for (CSVReblockInstruction ins : reblockInstructions)
            if (ins.input == thisIndex) {
                delim = Pattern.quote(ins.delim);
                ignoreFirstLine = ins.hasHeader;
                break;
            }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:org.apache.sysml.runtime.matrix.mapred.CSVReblockMapper.java

License:Apache License

@Override
@SuppressWarnings("deprecation")
public void configure(JobConf job) {
    super.configure(job);
    //get the number colums per block

    //load the offset mapping
    byte matrixIndex = representativeMatrixes.get(0);
    try {//  w w w .  ja v  a2s  . co m
        Path thisPath = new Path(job.get(MRConfigurationNames.MR_MAP_INPUT_FILE));
        FileSystem fs = IOUtilFunctions.getFileSystem(thisPath, job);
        thisPath = thisPath.makeQualified(fs);
        String filename = thisPath.toString();
        Path headerPath = new Path(job.getStrings(CSVReblockMR.SMALLEST_FILE_NAME_PER_INPUT)[matrixIndex])
                .makeQualified(fs);
        if (headerPath.toString().equals(filename))
            headerFile = true;

        ByteWritable key = new ByteWritable();
        OffsetCount value = new OffsetCount();
        Path p = new Path(job.get(CSVReblockMR.ROWID_FILE_NAME));
        SequenceFile.Reader reader = null;
        try {
            reader = new SequenceFile.Reader(fs, p, job);
            while (reader.next(key, value)) {
                if (key.get() == matrixIndex && filename.equals(value.filename))
                    offsetMap.put(value.fileOffset, value.count);
            }
        } finally {
            IOUtilFunctions.closeSilently(reader);
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    CSVReblockInstruction ins = csv_reblock_instructions.get(0).get(0);
    _delim = ins.delim;
    ignoreFirstLine = ins.hasHeader;

    idxRow = new IndexedBlockRow();
    int maxBclen = 0;

    for (ArrayList<CSVReblockInstruction> insv : csv_reblock_instructions)
        for (CSVReblockInstruction in : insv) {
            if (maxBclen < in.bclen)
                maxBclen = in.bclen;
        }

    //always dense since common csv usecase
    idxRow.getRow().data.reset(1, maxBclen, false);
}

From source file:org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.java

License:Apache License

public static ArrayList<Byte> getInputMatrixIndexesInMapper(JobConf job) throws IOException {
    String[] matrices = job.getStrings(INPUT_MATRICIES_DIRS_CONFIG);
    String str = job.get(MAPFUNC_INPUT_MATRICIES_INDEXES_CONFIG);
    byte[] indexes;
    if (str == null || str.isEmpty()) {
        indexes = new byte[matrices.length];
        for (int i = 0; i < indexes.length; i++)
            indexes[i] = (byte) i;
    } else {//from  w w w  . jav  a 2  s.  c  o  m
        String[] strs = str.split(Instruction.INSTRUCTION_DELIM);
        indexes = new byte[strs.length];
        for (int i = 0; i < strs.length; i++)
            indexes[i] = Byte.parseByte(strs[i]);
    }

    int numMatrices = matrices.length;
    if (numMatrices > Byte.MAX_VALUE)
        throw new RuntimeException("number of matrices is too large > " + Byte.MAX_VALUE);
    for (int i = 0; i < matrices.length; i++)
        matrices[i] = new Path(matrices[i]).toString();

    Path thisFile = new Path(job.get(MRConfigurationNames.MR_MAP_INPUT_FILE));
    FileSystem fs = IOUtilFunctions.getFileSystem(thisFile, job);
    thisFile = thisFile.makeQualified(fs);

    Path thisDir = thisFile.getParent().makeQualified(fs);
    ArrayList<Byte> representativeMatrixes = new ArrayList<>();
    for (int i = 0; i < matrices.length; i++) {
        Path p = new Path(matrices[i]).makeQualified(fs);
        if (thisFile.toUri().equals(p.toUri()) || thisDir.toUri().equals(p.toUri()))
            representativeMatrixes.add(indexes[i]);
    }
    return representativeMatrixes;
}

From source file:org.saarus.service.hadoop.util.JsonOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress)
        throws IOException {
    boolean isCompressed = getCompressOutput(job);
    String keyValueSeparator = job.get("mapred.textoutputformat.separator", "\t");
    String[] headers = job.getStrings("column.headers");
    if (!isCompressed) {
        Path file = FileOutputFormat.getTaskOutputPath(job, name);
        FileSystem fs = file.getFileSystem(job);
        FSDataOutputStream fileOut = fs.create(file, progress);
        return new JsonRecordWriter<K, V>(fileOut, keyValueSeparator, headers);
    } else {/*from  w  w w.  j  ava  2s .  c om*/
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        // create the named codec
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job);
        // build the filename including the extension
        Path file = FileOutputFormat.getTaskOutputPath(job, name + codec.getDefaultExtension());
        FileSystem fs = file.getFileSystem(job);
        FSDataOutputStream fileOut = fs.create(file, progress);
        return new JsonRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                keyValueSeparator, headers);
    }
}