Example usage for org.apache.hadoop.io Text getLength

List of usage examples for org.apache.hadoop.io Text getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getLength.

Prototype

@Override
public int getLength() 

Source Link

Document

Returns the number of bytes in the byte array

Usage

From source file:com.kylinolap.job.hadoop.cube.NewBaseCuboidMapperTest.java

License:Apache License

@Test
@Ignore//from w w  w .ja  v  a2  s .  c  o  m
public void testMapperWithHeader() throws Exception {
    String cubeName = "test_kylin_cube_with_slr_ready";
    mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
    // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL,
    // metadata);
    mapDriver.withInput(new Text("key"), new Text("0,2013-05-05,Auction,80053,0,5,41.204172263562,0,10000638"));

    List<Pair<Text, Text>> result = mapDriver.run();

    CubeManager cubeMgr = CubeManager.getInstance(this.getTestConfig());
    CubeInstance cube = cubeMgr.getCube(cubeName);

    assertEquals(1, result.size());
    Text rowkey = result.get(0).getFirst();
    byte[] key = rowkey.getBytes();
    byte[] header = Bytes.head(key, 26);
    byte[] sellerId = Bytes.tail(header, 18);
    byte[] cuboidId = Bytes.head(header, 8);
    byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26);

    RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment());
    decoder.decode(key);
    assertEquals(
            "[10000638, 2013-05-05, Computers/Tablets & Networking, MonitorProjectors & Accs, Monitors, Auction, 0, 5]",
            decoder.getValues().toString());

    assertTrue(Bytes.toString(sellerId).startsWith("10000638"));
    assertEquals(255, Bytes.toLong(cuboidId));
    assertEquals(21, restKey.length);

    verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "41.204172263562",
            "41.204172263562", "41.204172263562", 1);
}

From source file:com.kylinolap.job.hadoop.cube.RangeKeyDistributionMapper.java

License:Apache License

@Override
public void map(Text key, Text value, Context context) throws IOException, InterruptedException {
    lastKey = key;//ww w. j ava  2 s  .  c  o  m

    int bytesLength = key.getLength() + value.getLength();
    bytesRead += bytesLength;

    if (bytesRead >= ONE_MEGA_BYTES) {
        outputValue.set(bytesRead);
        context.write(key, outputValue);

        // reset bytesRead
        bytesRead = 0;
    }

}

From source file:com.kylinolap.job.hadoop.cube.RowKeyDistributionCheckerMapper.java

License:Apache License

@Override
public void map(Text key, Text value, Context context) throws IOException, InterruptedException {
    for (Text t : keyList) {
        if (key.compareTo(t) < 0) {
            Long v = resultMap.get(t);
            long length = key.getLength() + value.getLength();
            v += length;/*from  w w  w .j a  v a 2s . c o  m*/
            resultMap.put(t, v);
            break;
        }
    }
}

From source file:com.kylinolap.job.hadoop.invertedindex.IIDistinctColumnsMapper.java

License:Apache License

@Override
public void map(KEYIN key, Text value, Context context) throws IOException, InterruptedException {
    if (delim == -1) {
        delim = splitter.detectDelim(value, columns.length);
    }//ww  w. j av a2s  . c o m

    int nParts = splitter.split(value.getBytes(), value.getLength(), (byte) delim);
    SplittedBytes[] parts = splitter.getSplitBuffers();

    if (nParts != columns.length) {
        throw new RuntimeException("Got " + parts.length + " from -- " + value.toString() + " -- but only "
                + columns.length + " expected");
    }

    for (short i = 0; i < nParts; i++) {
        outputKey.set(i);
        outputValue.set(parts[i].value, 0, parts[i].length);
        context.write(outputKey, outputValue);
    }
}

From source file:com.kylinolap.job.hadoop.invertedindex.IIDistinctColumnsReducer.java

License:Apache License

@Override
public void reduce(ShortWritable key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
    String columnName = columns[key.get()];

    HashSet<ByteArray> set = new HashSet<ByteArray>();
    for (Text textValue : values) {
        ByteArray value = new ByteArray(Bytes.copy(textValue.getBytes(), 0, textValue.getLength()));
        set.add(value);//from   w ww .  ja va 2 s . c  o  m
    }

    Configuration conf = context.getConfiguration();
    FileSystem fs = FileSystem.get(conf);
    String outputPath = conf.get(BatchConstants.OUTPUT_PATH);
    FSDataOutputStream out = fs.create(new Path(outputPath, columnName));

    try {
        for (ByteArray value : set) {
            out.write(value.data);
            out.write('\n');
        }
    } finally {
        out.close();
    }

}

From source file:com.kylinolap.job.hadoop.invertedindex.InvertedIndexMapper.java

License:Apache License

@Override
public void map(KEYIN key, Text value, Context context) throws IOException, InterruptedException {
    if (delim == -1) {
        delim = splitter.detectDelim(value, info.getColumnCount());
    }//  www . j a va  2s.c  o m

    int nParts = splitter.split(value.getBytes(), value.getLength(), (byte) delim);
    SplittedBytes[] parts = splitter.getSplitBuffers();

    if (nParts != info.getColumnCount()) {
        throw new RuntimeException("Got " + parts.length + " from -- " + value.toString() + " -- but only "
                + info.getColumnCount() + " expected");
    }

    rec.reset();
    for (int i = 0; i < nParts; i++) {
        rec.setValueString(i, Bytes.toString(parts[i].value, 0, parts[i].length));
    }

    outputKey.set(rec.getTimestamp());
    // outputValue's backing bytes array is the same as rec

    context.write(outputKey, outputValue);
}

From source file:com.linkedin.json.JsonSequenceFileInputFormat.java

License:Apache License

@Override
public RecordReader<Object, Object> createRecordReader(final InputSplit split, final TaskAttemptContext context)
        throws IOException {
    Configuration conf = context.getConfiguration();

    String inputPathString = ((FileSplit) split).getPath().toUri().getPath();
    log.info("Input file path:" + inputPathString);
    Path inputPath = new Path(inputPathString);

    SequenceFile.Reader reader = new SequenceFile.Reader(inputPath.getFileSystem(conf), inputPath, conf);
    SequenceFile.Metadata meta = reader.getMetadata();

    try {/* w w  w .  j av a 2 s  .  c  o m*/
        final Text keySchema = meta.get(new Text("key.schema"));
        final Text valueSchema = meta.get(new Text("value.schema"));

        if (0 == keySchema.getLength() || 0 == valueSchema.getLength()) {
            throw new Exception(String.format("Cannot have a 0 length schema. keySchema[%s], valueSchema[%s]",
                    keySchema, valueSchema));
        }

        return new JsonObjectRecordReader(new JsonTypeSerializer(keySchema.toString()),
                new JsonTypeSerializer(valueSchema.toString()),
                baseInputFormat.createRecordReader(split, context));
    } catch (Exception e) {
        throw new IOException("Failed to Load Schema from file:" + inputPathString + "\n");
    }
}

From source file:com.lovelysystems.hive.udf.ESHashUDF.java

License:Apache License

private static long DJB_HASH(Text value) {
    long hash = 5381;

    for (int i = 0; i < value.getLength(); i++) {
        hash = ((hash << 5) + hash) + value.charAt(i);
    }/* ww  w.  j av a2  s .co  m*/
    return hash;
}

From source file:com.marklogic.contentpump.SingleDocumentWriter.java

License:Apache License

@Override
public void write(DocumentURI uri, MarkLogicDocument content) throws IOException, InterruptedException {
    OutputStream os = null;/*  www  .j a  v  a2  s. com*/
    try {
        String childPath = URIUtil.getPathFromURI(uri);
        Path path;
        if (childPath.charAt(0) == '/') {
            // concatenate outputPath with path to form the path
            path = new Path(dir.toString() + childPath);
        } else {
            path = new Path(dir, childPath);
        }
        FileSystem fs = path.getFileSystem(conf);
        if (fs instanceof DistributedFileSystem) {
            os = fs.create(path, false);
        } else {
            File f = new File(path.toUri().getPath());
            if (!f.exists()) {
                f.getParentFile().mkdirs();
                f.createNewFile();
            }
            os = new FileOutputStream(f, false);
        }

        ContentType type = content.getContentType();
        if (ContentType.BINARY.equals(type)) {
            if (content.isStreamable()) {
                InputStream is = null;
                try {
                    is = content.getContentAsByteStream();
                    long size = content.getContentSize();
                    long bufSize = Math.min(size, 512 << 10);
                    byte[] buf = new byte[(int) bufSize];
                    for (long toRead = size, read = 0; toRead > 0; toRead -= read) {
                        read = is.read(buf, 0, (int) bufSize);
                        if (read > 0) {
                            os.write(buf, 0, (int) read);
                        } else {
                            LOG.error("Premature EOF: uri=" + uri + ",toRead=" + toRead);
                            break;
                        }
                    }
                } finally {
                    if (is != null) {
                        is.close();
                    }
                }
            } else {
                os.write(content.getContentAsByteArray());
            }
        } else if (ContentType.TEXT.equals(type) || ContentType.XML.equals(type)
                || ContentType.JSON.equals(type)) {
            if (encoding.equals("UTF-8")) {
                Text t = content.getContentAsText();
                os.write(t.getBytes(), 0, t.getLength());
            } else {
                String t = content.getContentAsString();
                os.write(t.getBytes(encoding));
            }
            if (LOG.isTraceEnabled()) {
                Text t = content.getContentAsText();
                LOG.trace(t);
                byte[] bytes = content.getContentAsByteArray();
                StringBuilder sb = new StringBuilder();
                for (int i = 0; i < bytes.length; i++) {
                    sb.append(Byte.toString(bytes[i]));
                    sb.append(" ");
                }
                LOG.trace(sb);
            }
        } else {
            LOG.error("Skipping " + uri + ".  Unsupported content type: " + type.name());
        }
    } catch (Exception e) {
        LOG.error("Error saving: " + uri, e);
    } finally {
        if (os != null) {
            os.close();
        }
    }
}

From source file:com.mycustomloader.vsamloader.VSAMLoader.java

License:Apache License

@Override
public Tuple getNext() throws IOException {
    mProtoTuple = new ArrayList<Object>();

    boolean inField = false;
    boolean inQuotedField = false;
    boolean evenQuotesSeen = true;

    if (!mRequiredColumnsInitialized) {
        if (signature != null) {
            Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass());
            mRequiredColumns = (boolean[]) ObjectSerializer.deserialize(p.getProperty(signature));
        }//w  w  w.  j  a  v a  2s . co m
        mRequiredColumnsInitialized = true;
    }
    try {
        if (!in.nextKeyValue()) {
            return null;
        }
        Text value = (Text) in.getCurrentValue();
        byte[] buf = value.getBytes();
        int len = value.getLength();
        int fieldID = 0;

        ByteBuffer fieldBuffer = ByteBuffer.allocate(len);

        for (int i = 0; i < len; i++) {
            byte b = buf[i];
            inField = true;
            if (inQuotedField) {
                if (b == DOUBLE_QUOTE) {
                    evenQuotesSeen = !evenQuotesSeen;
                    if (evenQuotesSeen) {
                        fieldBuffer.put(DOUBLE_QUOTE);
                    }
                } else if (!evenQuotesSeen && (b == FIELD_DEL || b == RECORD_DEL)) {
                    inQuotedField = false;
                    inField = false;
                    readField(fieldBuffer, fieldID++);
                } else {
                    fieldBuffer.put(b);
                }
            } else if (b == DOUBLE_QUOTE) {
                inQuotedField = true;
                evenQuotesSeen = true;
            } else if (b == FIELD_DEL) {
                inField = false;
                readField(fieldBuffer, fieldID++); // end of the field
            } else {
                evenQuotesSeen = true;
                fieldBuffer.put(b);
            }
        }
        if (inField)
            readField(fieldBuffer, fieldID++);
    } catch (InterruptedException e) {
        int errCode = 6018;
        String errMsg = "Error while reading input";
        throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT, e);
    }

    Tuple t = mTupleFactory.newTupleNoCopy(mProtoTuple);
    return t;
}