Example usage for org.apache.hadoop.mapred JobConf getStrings

List of usage examples for org.apache.hadoop.mapred JobConf getStrings

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getStrings.

Prototype

public String[] getStrings(String name) 

Source Link

Document

Get the comma delimited values of the name property as an array of Strings.

Usage

From source file:com.cloudera.science.avro.streaming.AvroAsJSONInputFormat.java

License:Open Source License

private void loadSchemas(JobConf job) throws IOException {
    this.schemas = Lists.newArrayList();
    SchemaLoader loader = new SchemaLoader(job);
    String schemaLiteral = job.get(SCHEMA_LITERAL);
    if (schemaLiteral != null) {
        schemas.add(loader.loadLiteral(schemaLiteral));
        return;/*  ww  w .j a v  a2s .c om*/
    } else {
        String[] schemaUrls = job.getStrings(SCHEMA_URL);
        String[] typeNames = job.getStrings(SCHEMA_TYPE_NAME);
        if (schemaUrls != null) {
            for (String schemaUrl : schemaUrls) {
                schemas.add(loader.loadFromUrl(schemaUrl));
            }
        } else if (typeNames != null) {
            for (String typeName : typeNames) {
                schemas.add(loader.loadFromTypeName(typeName));
            }
        } else {
            throw new IllegalArgumentException("No schema information provided");
        }

        if (schemas.size() > 1) {
            // Need to track input paths
            Path[] inputs = FileInputFormat.getInputPaths(job);
            if (inputs.length != schemas.size()) {
                throw new IllegalArgumentException(String.format(
                        "Number of input paths (%d) does not match number of schemas specified (%d)",
                        inputs.length, schemas.size()));
            }
            this.inputPaths = new String[inputs.length];
            for (int i = 0; i < inputs.length; i++) {
                inputPaths[i] = inputs[i].toString();
            }
        }
    }
}

From source file:com.ebay.erl.mobius.core.mapred.MultiInputsHelpersRepository.java

License:Apache License

/**
 * constructor/*from  ww  w .j  av a 2s.c  om*/
 */
private MultiInputsHelpersRepository(JobConf conf) {
    this.mapping = new TreeMap<Class<? extends InputFormat>, MultiInputsHelper>(new ClassComparator());

    this.register(FileInputFormat.class, FileInputFormatHelper.class);

    if (!conf.get("mobius.multi.inputs.helpers", "").isEmpty()) {
        // mobius.multi.inputs.helpers in the format of> InputFormatClassName:HelperClassName(,InputFormatClassName:HelperClassName)?
        String[] helpers = conf.getStrings("mobius.multi.inputs.helpers");
        for (String aHeler : helpers) {
            String[] data = aHeler.split(":");
            String inputFormatClassName = data[0];
            String helperClassName = data[1];

            Class<? extends InputFormat> inputFormat = (Class<? extends InputFormat>) Util
                    .getClass(inputFormatClassName);
            Class<? extends MultiInputsHelper> helperClass = (Class<? extends MultiInputsHelper>) Util
                    .getClass(helperClassName);

            this.register(inputFormat, helperClass);
        }
    }
}

From source file:com.ema.hadoop.bestclient.BCMapper.java

@Override
public void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException {
    DateFormat formatter = new SimpleDateFormat("dd/MM/yyyy");

    try {// ww w  .  j a v a 2 s .c  o m
        String line = value.toString();
        String[] lineTab = line.split(";");

        String client = lineTab[0];
        int somme = Integer.parseInt(lineTab[2]);
        Date date = formatter.parse(lineTab[1]);

        JobConf jobConf = (JobConf) context.getConfiguration();
        String[] dateTable = jobConf.getStrings("dates");

        Date dateStart = formatter.parse(dateTable[0]);
        Date dateEnd = formatter.parse(dateTable[1]);

        if (date.after(dateStart) && date.before(dateEnd)) {
            context.write(new Text(client), new IntWritable(somme));
        } else {
            Logger.getLogger(BCMapper.class.getName()).log(Level.INFO, "ELSE ddddddddddddddddddddd");
            Logger.getLogger(BCMapper.class.getName()).log(Level.INFO, "param start " + dateTable[0]);
            Logger.getLogger(BCMapper.class.getName()).log(Level.INFO, "Date start " + dateStart.toString());
            Logger.getLogger(BCMapper.class.getName()).log(Level.INFO, "param fin " + dateTable[1]);
            Logger.getLogger(BCMapper.class.getName()).log(Level.INFO, "Date fin " + dateEnd.toString());
        }

    } catch (ParseException e) {
        Logger.getLogger(BCMapper.class.getName()).log(Level.INFO, "Parse exception");
    }

}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.CSVAssignRowIDMapper.java

License:Open Source License

@Override
@SuppressWarnings("deprecation")
public void configure(JobConf job) {
    byte thisIndex;
    try {/*w w w .  ja  v  a 2 s  .  c  o  m*/
        //it doesn't make sense to have repeated file names in the input, since this is for reblock
        thisIndex = MRJobConfiguration.getInputMatrixIndexesInMapper(job).get(0);
        outKey.set(thisIndex);
        FileSystem fs = FileSystem.get(job);
        Path thisPath = new Path(job.get("map.input.file")).makeQualified(fs);
        filename = thisPath.toString();
        String[] strs = job.getStrings(CSVReblockMR.SMALLEST_FILE_NAME_PER_INPUT);
        Path headerPath = new Path(strs[thisIndex]).makeQualified(fs);
        if (headerPath.toString().equals(filename))
            headerFile = true;
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    try {
        CSVReblockInstruction[] reblockInstructions = MRJobConfiguration.getCSVReblockInstructions(job);
        for (CSVReblockInstruction ins : reblockInstructions) {
            if (ins.input == thisIndex) {
                delim = Pattern.quote(ins.delim);
                ignoreFirstLine = ins.hasHeader;
                break;
            }
        }
    } catch (DMLUnsupportedOperationException e) {
        throw new RuntimeException(e);
    } catch (DMLRuntimeException e) {
        throw new RuntimeException(e);
    }

    // load properties relevant to transform
    try {
        boolean omit = job.getBoolean(MRJobConfiguration.TF_TRANSFORM, false);
        if (omit)
            _agents = new TfUtils(job, true);
    } catch (IOException e) {
        throw new RuntimeException(e);
    } catch (JSONException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.CSVReblockMapper.java

License:Open Source License

@Override
@SuppressWarnings("deprecation")
public void configure(JobConf job) {
    super.configure(job);
    //get the number colums per block

    //load the offset mapping
    byte matrixIndex = representativeMatrixes.get(0);
    try {/*from  w w  w  .  ja va  2s .c o m*/
        FileSystem fs = FileSystem.get(job);
        Path thisPath = new Path(job.get("map.input.file")).makeQualified(fs);
        String filename = thisPath.toString();
        Path headerPath = new Path(job.getStrings(CSVReblockMR.SMALLEST_FILE_NAME_PER_INPUT)[matrixIndex])
                .makeQualified(fs);
        if (headerPath.toString().equals(filename))
            headerFile = true;

        ByteWritable key = new ByteWritable();
        OffsetCount value = new OffsetCount();
        Path p = new Path(job.get(CSVReblockMR.ROWID_FILE_NAME));
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, job);
        while (reader.next(key, value)) {
            if (key.get() == matrixIndex && filename.equals(value.filename))
                offsetMap.put(value.fileOffset, value.count);
        }
        reader.close();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    CSVReblockInstruction ins = csv_reblock_instructions.get(0).get(0);
    _delim = ins.delim;
    ignoreFirstLine = ins.hasHeader;

    idxRow = new IndexedBlockRow();
    int maxBclen = 0;

    for (ArrayList<CSVReblockInstruction> insv : csv_reblock_instructions)
        for (CSVReblockInstruction in : insv) {
            if (maxBclen < in.bclen)
                maxBclen = in.bclen;
        }

    //always dense since common csv usecase
    idxRow.getRow().data.reset(1, maxBclen, false);
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

public static ArrayList<Byte> getInputMatrixIndexesInMapper(JobConf job) throws IOException {
    String[] matrices = job.getStrings(INPUT_MATRICIES_DIRS_CONFIG);
    String str = job.get(MAPFUNC_INPUT_MATRICIES_INDEXES_CONFIG);
    byte[] indexes;
    if (str == null || str.isEmpty()) {
        indexes = new byte[matrices.length];
        for (int i = 0; i < indexes.length; i++)
            indexes[i] = (byte) i;
    } else {// w w w .j a v  a2s.co  m
        String[] strs = str.split(Instruction.INSTRUCTION_DELIM);
        indexes = new byte[strs.length];
        for (int i = 0; i < strs.length; i++)
            indexes[i] = Byte.parseByte(strs[i]);
    }

    int numMatrices = matrices.length;
    if (numMatrices > Byte.MAX_VALUE)
        throw new RuntimeException("number of matrices is too large > " + Byte.MAX_VALUE);
    for (int i = 0; i < matrices.length; i++)
        matrices[i] = new Path(matrices[i]).toString();

    FileSystem fs = FileSystem.get(job);
    Path thisFile = new Path(job.get("map.input.file")).makeQualified(fs);

    //Path p=new Path(thisFileName);

    Path thisDir = thisFile.getParent().makeQualified(fs);
    ArrayList<Byte> representativeMatrixes = new ArrayList<Byte>();
    for (int i = 0; i < matrices.length; i++) {
        Path p = new Path(matrices[i]).makeQualified(fs);
        if (thisFile.toUri().compareTo(p.toUri()) == 0 || thisDir.toUri().compareTo(p.toUri()) == 0)
            representativeMatrixes.add(indexes[i]);
    }
    return representativeMatrixes;
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

public static String[] getOutputs(JobConf job) {
    return job.getStrings(OUTPUT_MATRICES_DIRS_CONFIG);
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

public static String[] getInputPaths(JobConf job) {
    return job.getStrings(INPUT_MATRICIES_DIRS_CONFIG);
}

From source file:FormatStorage.Head.java

License:Open Source License

public void fromJobConf(JobConf job) throws Exception {
    byte var = (byte) job.getInt(ConstVar.HD_var, 0);
    byte compress = (byte) job.getInt(ConstVar.HD_compress, 0);
    byte compressStyle = (byte) job.getInt(ConstVar.HD_compressStyle, 0);
    short primaryIndex = (short) job.getInt(ConstVar.HD_primaryIndex, -1);
    byte encode = (byte) job.getInt(ConstVar.HD_encode, 0);
    byte encodeStyle = (byte) job.getInt(ConstVar.HD_encodeStyle, 0);
    String keyString = job.get(ConstVar.HD_key);
    String[] fieldStrings = job.getStrings(ConstVar.HD_fieldMap);

    LOG.info("in fromJobConf, compressed:" + compress + ",compressStyle:" + compressStyle);

    setVar(var);//from   ww  w. j  a v  a 2  s . co  m
    setCompress(compress);
    setCompressStyle(compressStyle);

    setEncode(encode);
    setEncodeStyle(encodeStyle);
    if (keyString != null && keyString.length() != 0) {
        setKey(keyString);
    }

    short fieldNum = 0;
    if (fieldStrings != null) {
        fieldNum = (short) fieldStrings.length;
    }

    FieldMap fieldMap = new FieldMap();
    for (short i = 0; i < fieldNum; i++) {
        String[] def = fieldStrings[i].split(ConstVar.RecordSplit);
        byte type = Byte.valueOf(def[0]);
        int len = Integer.valueOf(def[1]);
        short index = Short.valueOf(def[2]);

        fieldMap.addField(new Field(type, len, index));
    }

    setFieldMap(fieldMap);
    setPrimaryIndex(primaryIndex);
}

From source file:hydrograph.engine.hadoop.recordreader.DelimitedAndFixedWidthRecordReader.java

License:Apache License

public DelimitedAndFixedWidthRecordReader(JobConf conf, FileSplit split) throws IOException {
    lengthsAndDelimiters = DelimitedAndFixedWidthHelper
            .modifyIdentifier(DelimitedAndFixedWidthHelper.stringToArray(conf.get("lengthsAndDelimiters")));
    lengthsAndDelimitersType = conf.getStrings("lengthsAndDelimitersType");
    quote = conf.get("quote");
    charsetName = conf.get("charsetName");
    start = split.getStart();//from  ww  w .  jav a2  s . c o m
    pos = start;
    end = start + split.getLength();
    file = split.getPath();
    fs = file.getFileSystem(conf);
    fileIn = fs.open(split.getPath());
    fileIn.seek(start);
    inputStreamReader = new InputStreamReader(fileIn, charsetName);
    singleChar = new char[1];
    stringBuilder = new StringBuilder();
    isQuotePresent = isQuotePresent(quote);
}