List of usage examples for org.apache.hadoop.io Text getLength
@Override public int getLength()
From source file:com.kylinolap.job.hadoop.cube.NewBaseCuboidMapperTest.java
License:Apache License
@Test @Ignore//from w w w .ja v a2 s . c o m public void testMapperWithHeader() throws Exception { String cubeName = "test_kylin_cube_with_slr_ready"; mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL, // metadata); mapDriver.withInput(new Text("key"), new Text("0,2013-05-05,Auction,80053,0,5,41.204172263562,0,10000638")); List<Pair<Text, Text>> result = mapDriver.run(); CubeManager cubeMgr = CubeManager.getInstance(this.getTestConfig()); CubeInstance cube = cubeMgr.getCube(cubeName); assertEquals(1, result.size()); Text rowkey = result.get(0).getFirst(); byte[] key = rowkey.getBytes(); byte[] header = Bytes.head(key, 26); byte[] sellerId = Bytes.tail(header, 18); byte[] cuboidId = Bytes.head(header, 8); byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26); RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment()); decoder.decode(key); assertEquals( "[10000638, 2013-05-05, Computers/Tablets & Networking, MonitorProjectors & Accs, Monitors, Auction, 0, 5]", decoder.getValues().toString()); assertTrue(Bytes.toString(sellerId).startsWith("10000638")); assertEquals(255, Bytes.toLong(cuboidId)); assertEquals(21, restKey.length); verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "41.204172263562", "41.204172263562", "41.204172263562", 1); }
From source file:com.kylinolap.job.hadoop.cube.RangeKeyDistributionMapper.java
License:Apache License
@Override public void map(Text key, Text value, Context context) throws IOException, InterruptedException { lastKey = key;//ww w. j ava 2 s . c o m int bytesLength = key.getLength() + value.getLength(); bytesRead += bytesLength; if (bytesRead >= ONE_MEGA_BYTES) { outputValue.set(bytesRead); context.write(key, outputValue); // reset bytesRead bytesRead = 0; } }
From source file:com.kylinolap.job.hadoop.cube.RowKeyDistributionCheckerMapper.java
License:Apache License
@Override public void map(Text key, Text value, Context context) throws IOException, InterruptedException { for (Text t : keyList) { if (key.compareTo(t) < 0) { Long v = resultMap.get(t); long length = key.getLength() + value.getLength(); v += length;/*from w w w .j a v a 2s . c o m*/ resultMap.put(t, v); break; } } }
From source file:com.kylinolap.job.hadoop.invertedindex.IIDistinctColumnsMapper.java
License:Apache License
@Override public void map(KEYIN key, Text value, Context context) throws IOException, InterruptedException { if (delim == -1) { delim = splitter.detectDelim(value, columns.length); }//ww w. j av a2s . c o m int nParts = splitter.split(value.getBytes(), value.getLength(), (byte) delim); SplittedBytes[] parts = splitter.getSplitBuffers(); if (nParts != columns.length) { throw new RuntimeException("Got " + parts.length + " from -- " + value.toString() + " -- but only " + columns.length + " expected"); } for (short i = 0; i < nParts; i++) { outputKey.set(i); outputValue.set(parts[i].value, 0, parts[i].length); context.write(outputKey, outputValue); } }
From source file:com.kylinolap.job.hadoop.invertedindex.IIDistinctColumnsReducer.java
License:Apache License
@Override public void reduce(ShortWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { String columnName = columns[key.get()]; HashSet<ByteArray> set = new HashSet<ByteArray>(); for (Text textValue : values) { ByteArray value = new ByteArray(Bytes.copy(textValue.getBytes(), 0, textValue.getLength())); set.add(value);//from w ww . ja va 2 s . c o m } Configuration conf = context.getConfiguration(); FileSystem fs = FileSystem.get(conf); String outputPath = conf.get(BatchConstants.OUTPUT_PATH); FSDataOutputStream out = fs.create(new Path(outputPath, columnName)); try { for (ByteArray value : set) { out.write(value.data); out.write('\n'); } } finally { out.close(); } }
From source file:com.kylinolap.job.hadoop.invertedindex.InvertedIndexMapper.java
License:Apache License
@Override public void map(KEYIN key, Text value, Context context) throws IOException, InterruptedException { if (delim == -1) { delim = splitter.detectDelim(value, info.getColumnCount()); }// www . j a va 2s.c o m int nParts = splitter.split(value.getBytes(), value.getLength(), (byte) delim); SplittedBytes[] parts = splitter.getSplitBuffers(); if (nParts != info.getColumnCount()) { throw new RuntimeException("Got " + parts.length + " from -- " + value.toString() + " -- but only " + info.getColumnCount() + " expected"); } rec.reset(); for (int i = 0; i < nParts; i++) { rec.setValueString(i, Bytes.toString(parts[i].value, 0, parts[i].length)); } outputKey.set(rec.getTimestamp()); // outputValue's backing bytes array is the same as rec context.write(outputKey, outputValue); }
From source file:com.linkedin.json.JsonSequenceFileInputFormat.java
License:Apache License
@Override public RecordReader<Object, Object> createRecordReader(final InputSplit split, final TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); String inputPathString = ((FileSplit) split).getPath().toUri().getPath(); log.info("Input file path:" + inputPathString); Path inputPath = new Path(inputPathString); SequenceFile.Reader reader = new SequenceFile.Reader(inputPath.getFileSystem(conf), inputPath, conf); SequenceFile.Metadata meta = reader.getMetadata(); try {/* w w w . j av a 2 s . c o m*/ final Text keySchema = meta.get(new Text("key.schema")); final Text valueSchema = meta.get(new Text("value.schema")); if (0 == keySchema.getLength() || 0 == valueSchema.getLength()) { throw new Exception(String.format("Cannot have a 0 length schema. keySchema[%s], valueSchema[%s]", keySchema, valueSchema)); } return new JsonObjectRecordReader(new JsonTypeSerializer(keySchema.toString()), new JsonTypeSerializer(valueSchema.toString()), baseInputFormat.createRecordReader(split, context)); } catch (Exception e) { throw new IOException("Failed to Load Schema from file:" + inputPathString + "\n"); } }
From source file:com.lovelysystems.hive.udf.ESHashUDF.java
License:Apache License
private static long DJB_HASH(Text value) { long hash = 5381; for (int i = 0; i < value.getLength(); i++) { hash = ((hash << 5) + hash) + value.charAt(i); }/* ww w. j av a2 s .co m*/ return hash; }
From source file:com.marklogic.contentpump.SingleDocumentWriter.java
License:Apache License
@Override public void write(DocumentURI uri, MarkLogicDocument content) throws IOException, InterruptedException { OutputStream os = null;/* www .j a v a2 s. com*/ try { String childPath = URIUtil.getPathFromURI(uri); Path path; if (childPath.charAt(0) == '/') { // concatenate outputPath with path to form the path path = new Path(dir.toString() + childPath); } else { path = new Path(dir, childPath); } FileSystem fs = path.getFileSystem(conf); if (fs instanceof DistributedFileSystem) { os = fs.create(path, false); } else { File f = new File(path.toUri().getPath()); if (!f.exists()) { f.getParentFile().mkdirs(); f.createNewFile(); } os = new FileOutputStream(f, false); } ContentType type = content.getContentType(); if (ContentType.BINARY.equals(type)) { if (content.isStreamable()) { InputStream is = null; try { is = content.getContentAsByteStream(); long size = content.getContentSize(); long bufSize = Math.min(size, 512 << 10); byte[] buf = new byte[(int) bufSize]; for (long toRead = size, read = 0; toRead > 0; toRead -= read) { read = is.read(buf, 0, (int) bufSize); if (read > 0) { os.write(buf, 0, (int) read); } else { LOG.error("Premature EOF: uri=" + uri + ",toRead=" + toRead); break; } } } finally { if (is != null) { is.close(); } } } else { os.write(content.getContentAsByteArray()); } } else if (ContentType.TEXT.equals(type) || ContentType.XML.equals(type) || ContentType.JSON.equals(type)) { if (encoding.equals("UTF-8")) { Text t = content.getContentAsText(); os.write(t.getBytes(), 0, t.getLength()); } else { String t = content.getContentAsString(); os.write(t.getBytes(encoding)); } if (LOG.isTraceEnabled()) { Text t = content.getContentAsText(); LOG.trace(t); byte[] bytes = content.getContentAsByteArray(); StringBuilder sb = new StringBuilder(); for (int i = 0; i < bytes.length; i++) { sb.append(Byte.toString(bytes[i])); sb.append(" "); } LOG.trace(sb); } } else { LOG.error("Skipping " + uri + ". Unsupported content type: " + type.name()); } } catch (Exception e) { LOG.error("Error saving: " + uri, e); } finally { if (os != null) { os.close(); } } }
From source file:com.mycustomloader.vsamloader.VSAMLoader.java
License:Apache License
@Override public Tuple getNext() throws IOException { mProtoTuple = new ArrayList<Object>(); boolean inField = false; boolean inQuotedField = false; boolean evenQuotesSeen = true; if (!mRequiredColumnsInitialized) { if (signature != null) { Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass()); mRequiredColumns = (boolean[]) ObjectSerializer.deserialize(p.getProperty(signature)); }//w w w. j a v a 2s . co m mRequiredColumnsInitialized = true; } try { if (!in.nextKeyValue()) { return null; } Text value = (Text) in.getCurrentValue(); byte[] buf = value.getBytes(); int len = value.getLength(); int fieldID = 0; ByteBuffer fieldBuffer = ByteBuffer.allocate(len); for (int i = 0; i < len; i++) { byte b = buf[i]; inField = true; if (inQuotedField) { if (b == DOUBLE_QUOTE) { evenQuotesSeen = !evenQuotesSeen; if (evenQuotesSeen) { fieldBuffer.put(DOUBLE_QUOTE); } } else if (!evenQuotesSeen && (b == FIELD_DEL || b == RECORD_DEL)) { inQuotedField = false; inField = false; readField(fieldBuffer, fieldID++); } else { fieldBuffer.put(b); } } else if (b == DOUBLE_QUOTE) { inQuotedField = true; evenQuotesSeen = true; } else if (b == FIELD_DEL) { inField = false; readField(fieldBuffer, fieldID++); // end of the field } else { evenQuotesSeen = true; fieldBuffer.put(b); } } if (inField) readField(fieldBuffer, fieldID++); } catch (InterruptedException e) { int errCode = 6018; String errMsg = "Error while reading input"; throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT, e); } Tuple t = mTupleFactory.newTupleNoCopy(mProtoTuple); return t; }