Example usage for org.apache.hadoop.mapred JobConf getStrings

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getStrings.

Prototype

public String[] getStrings(String name)

Source Link

Document

Get the comma delimited values of the name property as an array of Strings.

Usage

From source file:com.cloudera.science.avro.streaming.AvroAsJSONInputFormat.java

License:Open Source License

private void loadSchemas(JobConf job) throws IOException {
    this.schemas = Lists.newArrayList();
    SchemaLoader loader = new SchemaLoader(job);
    String schemaLiteral = job.get(SCHEMA_LITERAL);
    if (schemaLiteral != null) {
        schemas.add(loader.loadLiteral(schemaLiteral));
        return;/*  ww  w .j a v  a2s .c om*/
    } else {
        String[] schemaUrls = job.getStrings(SCHEMA_URL);
        String[] typeNames = job.getStrings(SCHEMA_TYPE_NAME);
        if (schemaUrls != null) {
            for (String schemaUrl : schemaUrls) {
                schemas.add(loader.loadFromUrl(schemaUrl));
            }
        } else if (typeNames != null) {
            for (String typeName : typeNames) {
                schemas.add(loader.loadFromTypeName(typeName));
            }
        } else {
            throw new IllegalArgumentException("No schema information provided");
        }

        if (schemas.size() > 1) {
            // Need to track input paths
            Path[] inputs = FileInputFormat.getInputPaths(job);
            if (inputs.length != schemas.size()) {
                throw new IllegalArgumentException(String.format(
                        "Number of input paths (%d) does not match number of schemas specified (%d)",
                        inputs.length, schemas.size()));
            }
            this.inputPaths = new String[inputs.length];
            for (int i = 0; i < inputs.length; i++) {
                inputPaths[i] = inputs[i].toString();
            }
        }
    }
}

From source file:com.ebay.erl.mobius.core.mapred.MultiInputsHelpersRepository.java

License:Apache License

/**
 * constructor/*from  ww  w .j  av a 2s.c  om*/
 */
private MultiInputsHelpersRepository(JobConf conf) {
    this.mapping = new TreeMap<Class<? extends InputFormat>, MultiInputsHelper>(new ClassComparator());

    this.register(FileInputFormat.class, FileInputFormatHelper.class);

    if (!conf.get("mobius.multi.inputs.helpers", "").isEmpty()) {
        // mobius.multi.inputs.helpers in the format of> InputFormatClassName:HelperClassName(,InputFormatClassName:HelperClassName)?
        String[] helpers = conf.getStrings("mobius.multi.inputs.helpers");
        for (String aHeler : helpers) {
            String[] data = aHeler.split(":");
            String inputFormatClassName = data[0];
            String helperClassName = data[1];

            Class<? extends InputFormat> inputFormat = (Class<? extends InputFormat>) Util
                    .getClass(inputFormatClassName);
            Class<? extends MultiInputsHelper> helperClass = (Class<? extends MultiInputsHelper>) Util
                    .getClass(helperClassName);

            this.register(inputFormat, helperClass);
        }
    }
}

From source file:com.ema.hadoop.bestclient.BCMapper.java

@Override
public void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException {
    DateFormat formatter = new SimpleDateFormat("dd/MM/yyyy");

    try {// ww w  .  j a v a 2 s .c  o m
        String line = value.toString();
        String[] lineTab = line.split(";");

        String client = lineTab[0];
        int somme = Integer.parseInt(lineTab[2]);
        Date date = formatter.parse(lineTab[1]);

        JobConf jobConf = (JobConf) context.getConfiguration();
        String[] dateTable = jobConf.getStrings("dates");

        Date dateStart = formatter.parse(dateTable[0]);
        Date dateEnd = formatter.parse(dateTable[1]);

        if (date.after(dateStart) && date.before(dateEnd)) {
            context.write(new Text(client), new IntWritable(somme));
        } else {
            Logger.getLogger(BCMapper.class.getName()).log(Level.INFO, "ELSE ddddddddddddddddddddd");
            Logger.getLogger(BCMapper.class.getName()).log(Level.INFO, "param start " + dateTable[0]);
            Logger.getLogger(BCMapper.class.getName()).log(Level.INFO, "Date start " + dateStart.toString());
            Logger.getLogger(BCMapper.class.getName()).log(Level.INFO, "param fin " + dateTable[1]);
            Logger.getLogger(BCMapper.class.getName()).log(Level.INFO, "Date fin " + dateEnd.toString());
        }

    } catch (ParseException e) {
        Logger.getLogger(BCMapper.class.getName()).log(Level.INFO, "Parse exception");
    }

}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.CSVAssignRowIDMapper.java

License:Open Source License

@Override
@SuppressWarnings("deprecation")
public void configure(JobConf job) {
    byte thisIndex;
    try {/*w w w .  ja  v  a 2 s  .  c  o  m*/
        //it doesn't make sense to have repeated file names in the input, since this is for reblock
        thisIndex = MRJobConfiguration.getInputMatrixIndexesInMapper(job).get(0);
        outKey.set(thisIndex);
        FileSystem fs = FileSystem.get(job);
        Path thisPath = new Path(job.get("map.input.file")).makeQualified(fs);
        filename = thisPath.toString();
        String[] strs = job.getStrings(CSVReblockMR.SMALLEST_FILE_NAME_PER_INPUT);
        Path headerPath = new Path(strs[thisIndex]).makeQualified(fs);
        if (headerPath.toString().equals(filename))
            headerFile = true;
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    try {
        CSVReblockInstruction[] reblockInstructions = MRJobConfiguration.getCSVReblockInstructions(job);
        for (CSVReblockInstruction ins : reblockInstructions) {
            if (ins.input == thisIndex) {
                delim = Pattern.quote(ins.delim);
                ignoreFirstLine = ins.hasHeader;
                break;
            }
        }
    } catch (DMLUnsupportedOperationException e) {
        throw new RuntimeException(e);
    } catch (DMLRuntimeException e) {
        throw new RuntimeException(e);
    }

    // load properties relevant to transform
    try {
        boolean omit = job.getBoolean(MRJobConfiguration.TF_TRANSFORM, false);
        if (omit)
            _agents = new TfUtils(job, true);
    } catch (IOException e) {
        throw new RuntimeException(e);
    } catch (JSONException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.CSVReblockMapper.java

License:Open Source License

@Override
@SuppressWarnings("deprecation")
public void configure(JobConf job) {
    super.configure(job);
    //get the number colums per block

    //load the offset mapping
    byte matrixIndex = representativeMatrixes.get(0);
    try {/*from  w w  w  .  ja va  2s .c o m*/
        FileSystem fs = FileSystem.get(job);
        Path thisPath = new Path(job.get("map.input.file")).makeQualified(fs);
        String filename = thisPath.toString();
        Path headerPath = new Path(job.getStrings(CSVReblockMR.SMALLEST_FILE_NAME_PER_INPUT)[matrixIndex])
                .makeQualified(fs);
        if (headerPath.toString().equals(filename))
            headerFile = true;

        ByteWritable key = new ByteWritable();
        OffsetCount value = new OffsetCount();
        Path p = new Path(job.get(CSVReblockMR.ROWID_FILE_NAME));
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, job);
        while (reader.next(key, value)) {
            if (key.get() == matrixIndex && filename.equals(value.filename))
                offsetMap.put(value.fileOffset, value.count);
        }
        reader.close();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    CSVReblockInstruction ins = csv_reblock_instructions.get(0).get(0);
    _delim = ins.delim;
    ignoreFirstLine = ins.hasHeader;

    idxRow = new IndexedBlockRow();
    int maxBclen = 0;

    for (ArrayList<CSVReblockInstruction> insv : csv_reblock_instructions)
        for (CSVReblockInstruction in : insv) {
            if (maxBclen < in.bclen)
                maxBclen = in.bclen;
        }

    //always dense since common csv usecase
    idxRow.getRow().data.reset(1, maxBclen, false);
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

public static ArrayList<Byte> getInputMatrixIndexesInMapper(JobConf job) throws IOException {
    String[] matrices = job.getStrings(INPUT_MATRICIES_DIRS_CONFIG);
    String str = job.get(MAPFUNC_INPUT_MATRICIES_INDEXES_CONFIG);
    byte[] indexes;
    if (str == null || str.isEmpty()) {
        indexes = new byte[matrices.length];
        for (int i = 0; i < indexes.length; i++)
            indexes[i] = (byte) i;
    } else {// w w w .j a v  a2s.co  m
        String[] strs = str.split(Instruction.INSTRUCTION_DELIM);
        indexes = new byte[strs.length];
        for (int i = 0; i < strs.length; i++)
            indexes[i] = Byte.parseByte(strs[i]);
    }

    int numMatrices = matrices.length;
    if (numMatrices > Byte.MAX_VALUE)
        throw new RuntimeException("number of matrices is too large > " + Byte.MAX_VALUE);
    for (int i = 0; i < matrices.length; i++)
        matrices[i] = new Path(matrices[i]).toString();

    FileSystem fs = FileSystem.get(job);
    Path thisFile = new Path(job.get("map.input.file")).makeQualified(fs);

    //Path p=new Path(thisFileName);

    Path thisDir = thisFile.getParent().makeQualified(fs);
    ArrayList<Byte> representativeMatrixes = new ArrayList<Byte>();
    for (int i = 0; i < matrices.length; i++) {
        Path p = new Path(matrices[i]).makeQualified(fs);
        if (thisFile.toUri().compareTo(p.toUri()) == 0 || thisDir.toUri().compareTo(p.toUri()) == 0)
            representativeMatrixes.add(indexes[i]);
    }
    return representativeMatrixes;
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

public static String[] getOutputs(JobConf job) {
    return job.getStrings(OUTPUT_MATRICES_DIRS_CONFIG);
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

public static String[] getInputPaths(JobConf job) {
    return job.getStrings(INPUT_MATRICIES_DIRS_CONFIG);
}

From source file:FormatStorage.Head.java

License:Open Source License

public void fromJobConf(JobConf job) throws Exception {
    byte var = (byte) job.getInt(ConstVar.HD_var, 0);
    byte compress = (byte) job.getInt(ConstVar.HD_compress, 0);
    byte compressStyle = (byte) job.getInt(ConstVar.HD_compressStyle, 0);
    short primaryIndex = (short) job.getInt(ConstVar.HD_primaryIndex, -1);
    byte encode = (byte) job.getInt(ConstVar.HD_encode, 0);
    byte encodeStyle = (byte) job.getInt(ConstVar.HD_encodeStyle, 0);
    String keyString = job.get(ConstVar.HD_key);
    String[] fieldStrings = job.getStrings(ConstVar.HD_fieldMap);

    LOG.info("in fromJobConf, compressed:" + compress + ",compressStyle:" + compressStyle);

    setVar(var);//from   ww  w. j  a v  a 2  s . co  m
    setCompress(compress);
    setCompressStyle(compressStyle);

    setEncode(encode);
    setEncodeStyle(encodeStyle);
    if (keyString != null && keyString.length() != 0) {
        setKey(keyString);
    }

    short fieldNum = 0;
    if (fieldStrings != null) {
        fieldNum = (short) fieldStrings.length;
    }

    FieldMap fieldMap = new FieldMap();
    for (short i = 0; i < fieldNum; i++) {
        String[] def = fieldStrings[i].split(ConstVar.RecordSplit);
        byte type = Byte.valueOf(def[0]);
        int len = Integer.valueOf(def[1]);
        short index = Short.valueOf(def[2]);

        fieldMap.addField(new Field(type, len, index));
    }

    setFieldMap(fieldMap);
    setPrimaryIndex(primaryIndex);
}

From source file:hydrograph.engine.hadoop.recordreader.DelimitedAndFixedWidthRecordReader.java

License:Apache License

public DelimitedAndFixedWidthRecordReader(JobConf conf, FileSplit split) throws IOException {
    lengthsAndDelimiters = DelimitedAndFixedWidthHelper
            .modifyIdentifier(DelimitedAndFixedWidthHelper.stringToArray(conf.get("lengthsAndDelimiters")));
    lengthsAndDelimitersType = conf.getStrings("lengthsAndDelimitersType");
    quote = conf.get("quote");
    charsetName = conf.get("charsetName");
    start = split.getStart();//from  ww  w .  jav a2  s . c o m
    pos = start;
    end = start + split.getLength();
    file = split.getPath();
    fs = file.getFileSystem(conf);
    fileIn = fs.open(split.getPath());
    fileIn.seek(start);
    inputStreamReader = new InputStreamReader(fileIn, charsetName);
    singleChar = new char[1];
    stringBuilder = new StringBuilder();
    isQuotePresent = isQuotePresent(quote);
}