List of usage examples for org.apache.hadoop.io LongWritable get
public long get()
From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.RemoteDPParForMR.java
License:Open Source License
/** * Result file contains hierarchy of workerID-resultvar(incl filename). We deduplicate * on the workerID. Without JVM reuse each task refers to a unique workerID, so we * will not find any duplicates. With JVM reuse, however, each slot refers to a workerID, * and there are duplicate filenames due to partial aggregation and overwrite of fname * (the RemoteParWorkerMapper ensures uniqueness of those files independent of the * runtime implementation). //from w ww .j a v a2 s. c o m * * @param job * @param fname * @return * @throws DMLRuntimeException */ @SuppressWarnings("deprecation") public static LocalVariableMap[] readResultFile(JobConf job, String fname) throws DMLRuntimeException, IOException { HashMap<Long, LocalVariableMap> tmp = new HashMap<Long, LocalVariableMap>(); FileSystem fs = FileSystem.get(job); Path path = new Path(fname); LongWritable key = new LongWritable(); //workerID Text value = new Text(); //serialized var header (incl filename) int countAll = 0; for (Path lpath : MatrixReader.getSequenceFilePaths(fs, path)) { SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(job), lpath, job); try { while (reader.next(key, value)) { //System.out.println("key="+key.get()+", value="+value.toString()); if (!tmp.containsKey(key.get())) tmp.put(key.get(), new LocalVariableMap()); Object[] dat = ProgramConverter.parseDataObject(value.toString()); tmp.get(key.get()).put((String) dat[0], (Data) dat[1]); countAll++; } } finally { if (reader != null) reader.close(); } } LOG.debug("Num remote worker results (before deduplication): " + countAll); LOG.debug("Num remote worker results: " + tmp.size()); //create return array return tmp.values().toArray(new LocalVariableMap[0]); }
From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.RemoteDPParWorkerReducer.java
License:Open Source License
@Override public void reduce(LongWritable key, Iterator<Writable> valueList, OutputCollector<Writable, Writable> out, Reporter reporter) throws IOException { //cache collector/reporter (for write in close) _out = out;/* w w w. j a v a 2s. c o m*/ _report = reporter; //collect input partition if (_info == OutputInfo.BinaryBlockOutputInfo) _partition = collectBinaryBlock(valueList); else _partition = collectBinaryCellInput(valueList); //update in-memory matrix partition MatrixObject mo = (MatrixObject) _ec.getVariable(_inputVar); mo.setInMemoryPartition(_partition); //execute program LOG.trace("execute RemoteDPParWorkerReducer " + _stringID + " (" + _workerID + ")"); try { //create tasks for input data Task lTask = new Task(TaskType.SET); lTask.addIteration(new IntObject(_iterVar, key.get())); //execute program executeTask(lTask); } catch (Exception ex) { throw new IOException("ParFOR: Failed to execute task.", ex); } //statistic maintenance (after final export) RemoteParForUtils.incrementParForMRCounters(_report, 1, 1); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.CSVAssignRowIDMapper.java
License:Open Source License
@Override public void map(LongWritable key, Text value, OutputCollector<ByteWritable, OffsetCount> out, Reporter report) throws IOException { if (first) {//from w w w . j a va2 s.co m first = false; fileOffset = key.get(); outCache = out; } if (key.get() == 0 && headerFile)//getting the number of colums { if (!ignoreFirstLine) { report.incrCounter(CSVReblockMR.NUM_COLS_IN_MATRIX, outKey.toString(), value.toString().split(delim, -1).length); if (!omit(value.toString())) num++; } else realFirstLine = true; } else { if (realFirstLine) { report.incrCounter(CSVReblockMR.NUM_COLS_IN_MATRIX, outKey.toString(), value.toString().split(delim, -1).length); realFirstLine = false; } if (!omit(value.toString())) num++; } }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.CSVReblockMapper.java
License:Open Source License
@Override public void map(LongWritable key, Text value, OutputCollector<TaggedFirstSecondIndexes, BlockRow> out, Reporter reporter) throws IOException { if (first) {//w ww. ja v a 2s . co m rowOffset = offsetMap.get(key.get()); first = false; } if (key.get() == 0 && headerFile && ignoreFirstLine) return; String[] cells = IOUtilFunctions.split(value.toString(), _delim); for (int i = 0; i < representativeMatrixes.size(); i++) for (CSVReblockInstruction ins : csv_reblock_instructions.get(i)) { idxRow = processRow(idxRow, cells, rowOffset, num, ins.output, ins.brlen, ins.bclen, ins.fill, ins.fillValue, out); } num++; }
From source file:com.ibm.bi.dml.runtime.transform.ApplyTfBBMapper.java
License:Open Source License
@Override public void map(LongWritable rawKey, Text rawValue, OutputCollector<TaggedFirstSecondIndexes, CSVReblockMR.BlockRow> out, Reporter reporter) throws IOException { if (_first) { rowOffset = offsetMap.get(rawKey.get()); _reporter = reporter;//from w ww . jav a 2 s. c o m _first = false; } // output the header line if (rawKey.get() == 0 && _partFileWithHeader) { tfmapper.processHeaderLine(); if (tfmapper.hasHeader()) return; } // parse the input line and apply transformation String[] words = tfmapper.getWords(rawValue); if (!tfmapper.omit(words)) { words = tfmapper.apply(words); try { tfmapper.check(words); // Perform CSV Reblock CSVReblockInstruction ins = csv_reblock_instructions.get(0).get(0); idxRow = CSVReblockMapper.processRow(idxRow, words, rowOffset, num, ins.output, ins.brlen, ins.bclen, ins.fill, ins.fillValue, out); } catch (DMLRuntimeException e) { throw new RuntimeException(e.getMessage() + ":" + rawValue.toString()); } num++; } }
From source file:com.ibm.bi.dml.runtime.transform.ApplyTfCSVMapper.java
License:Open Source License
@Override public void map(LongWritable rawKey, Text rawValue, OutputCollector<NullWritable, Text> out, Reporter reporter) throws IOException { if (_firstRecordInSplit) { _firstRecordInSplit = false;// w w w .ja v a 2s . c o m _reporter = reporter; // generate custom output paths so that order of rows in the // output (across part files) matches w/ that from input data set String partFileSuffix = tfmapper.getPartFileID(_rJob, rawKey.get()); Path mapOutputPath = new Path(tfmapper.getOutputPath() + "/transform-part-" + partFileSuffix); // setup the writer for mapper's output // the default part-..... files will be deleted later once the job finishes br = new BufferedWriter(new OutputStreamWriter(FileSystem.get(_rJob).create(mapOutputPath, true))); } // output the header line if (rawKey.get() == 0 && _partFileWithHeader) { _reporter = reporter; tfmapper.processHeaderLine(); if (tfmapper.hasHeader()) return; } // parse the input line and apply transformation String[] words = tfmapper.getWords(rawValue); if (!tfmapper.omit(words)) { try { words = tfmapper.apply(words); String outStr = tfmapper.checkAndPrepOutputString(words); //out.collect(NullWritable.get(), new Text(outStr)); br.write(outStr + "\n"); } catch (DMLRuntimeException e) { throw new RuntimeException(e.getMessage() + ": " + rawValue.toString()); } } }
From source file:com.ibm.bi.dml.runtime.transform.GTFMTDMapper.java
License:Open Source License
public void map(LongWritable rawKey, Text rawValue, OutputCollector<IntWritable, DistinctValue> out, Reporter reporter) throws IOException { if (_firstRecordInSplit) { _firstRecordInSplit = false;/*from www . java2s. c om*/ _collector = out; _offsetInPartFile = rawKey.get(); } // ignore header if (_agents.hasHeader() && rawKey.get() == 0 && _partFileWithHeader) return; _agents.prepareTfMtd(rawValue.toString()); }
From source file:com.ibm.jaql.io.hadoop.converter.FromDelConverter.java
License:Apache License
/** Converts the given line into a JSON value. */ @Override/*from w w w . j a va2s .c o m*/ public JsonValue convert(LongWritable key, Text value, JsonValue target) { return convert(key.get(), value.getBytes(), value.getLength(), target); }
From source file:com.ibm.jaql.io.hadoop.converter.LongWritableToJson.java
License:Apache License
@Override public JsonLong convert(LongWritable src, JsonValue target) { MutableJsonLong result = (target instanceof MutableJsonLong) ? (MutableJsonLong) target : new MutableJsonLong(); result.set(src.get()); return result; }
From source file:com.impetus.code.examples.hadoop.mapred.earthquake.EarthQuakeMapper.java
License:Apache License
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { if (key.get() > 0) { String[] parsedData = value.toString().split(","); String date = DateCoverter.convertDate(parsedData[0]); if (date != null) { context.write(new Text(date), new IntWritable(1)); }/*from w w w . ja v a2 s . co m*/ } }