Example usage for org.apache.hadoop.io BytesWritable getBytes

List of usage examples for org.apache.hadoop.io BytesWritable getBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io BytesWritable getBytes.

Prototype

@Override
public byte[] getBytes() 

Source Link

Document

Get the data backing the BytesWritable.

Usage

From source file:org.apache.kylin.job.tools.ColumnCardinalityMapperTest.java

License:Apache License

@SuppressWarnings({ "unchecked" })
@Test//  w  w w.  j a  va 2s .co m
@Ignore
public void testMapperOn177() throws IOException {
    mapDriver.clearInput();
    File file = new File("src/test/resources/data/test_cal_dt/part-r-00000");
    FileReader reader = new FileReader(file);
    BufferedReader breader = new BufferedReader(reader);
    String s = breader.readLine();
    int i = 0;
    while (s != null) {
        LongWritable inputKey = new LongWritable(i++);
        mapDriver.addInput(inputKey, new Text(s));
        s = breader.readLine();
    }
    // breader.close();
    List<Pair<IntWritable, BytesWritable>> result = mapDriver.run();
    breader.close();
    assertEquals(9, result.size());

    int key1 = result.get(0).getFirst().get();
    BytesWritable value1 = result.get(0).getSecond();
    byte[] bytes = value1.getBytes();
    HyperLogLogPlusCounter hllc = new HyperLogLogPlusCounter();
    hllc.readRegisters(ByteBuffer.wrap(bytes));
    assertTrue(key1 > 0);
    assertEquals(8, hllc.getCountEstimate());
}

From source file:org.apache.kylin.source.hive.cardinality.ColumnCardinalityReducer.java

License:Apache License

@Override
public void doReduce(IntWritable key, Iterable<BytesWritable> values, Context context)
        throws IOException, InterruptedException {
    int skey = key.get();
    for (BytesWritable v : values) {
        ByteBuffer buffer = ByteBuffer.wrap(v.getBytes());
        HLLCounter hll = new HLLCounter();
        hll.readRegisters(buffer);/*from  ww  w  .j  a v a  2 s.co m*/
        getHllc(skey).merge(hll);
        hll.clear();
    }
}

From source file:org.apache.kylin.source.kafka.hadoop.KafkaFlatTableMapper.java

License:Apache License

@Override
public void doMap(LongWritable key, BytesWritable value, Context context)
        throws IOException, InterruptedException {
    outKey.set(Bytes.toBytes(key.get()));
    outValue.set(value.getBytes(), 0, value.getLength());
    context.write(outKey, outValue);/*from  www.jav  a2s  . com*/
}

From source file:org.apache.mahout.text.SequenceFilesFromDirectoryMapper.java

License:Apache License

public void map(IntWritable key, BytesWritable value, Context context)
        throws IOException, InterruptedException {

    Configuration configuration = context.getConfiguration();
    Path filePath = ((CombineFileSplit) context.getInputSplit()).getPath(key.get());
    String relativeFilePath = HadoopUtil.calcRelativeFilePath(configuration, filePath);

    String filename = this.keyPrefix.length() > 0 ? this.keyPrefix + Path.SEPARATOR + relativeFilePath
            : Path.SEPARATOR + relativeFilePath;

    fileValue.set(value.getBytes(), 0, value.getBytes().length);
    context.write(new Text(filename), fileValue);
}

From source file:org.apache.mahout.text.SequenceFilesFromMailArchivesMapper.java

License:Apache License

public void map(IntWritable key, BytesWritable value, Context context)
        throws IOException, InterruptedException {
    Configuration configuration = context.getConfiguration();
    Path filePath = ((CombineFileSplit) context.getInputSplit()).getPath(key.get());
    String relativeFilePath = HadoopUtil.calcRelativeFilePath(configuration, filePath);
    ByteArrayInputStream is = new ByteArrayInputStream(value.getBytes());
    parseMailboxLineByLine(relativeFilePath, is, context);
}

From source file:org.apache.nutch.tools.arc.ArcSegmentCreator.java

License:Apache License

/**
 * <p>Runs the Map job to translate an arc record into output for Nutch 
 * segments.</p>/*from ww  w  . j  a  va 2  s .  co m*/
 * 
 * @param key The arc record header.
 * @param bytes The arc record raw content bytes.
 * @param output The output collecter.
 * @param reporter The progress reporter.
 */
public void map(Text key, BytesWritable bytes, OutputCollector<Text, NutchWritable> output, Reporter reporter)
        throws IOException {

    String[] headers = key.toString().split("\\s+");
    String urlStr = headers[0];
    String version = headers[2];
    String contentType = headers[3];

    // arcs start with a file description.  for now we ignore this as it is not
    // a content record
    if (urlStr.startsWith("filedesc://")) {
        LOG.info("Ignoring file header: " + urlStr);
        return;
    }
    LOG.info("Processing: " + urlStr);

    // get the raw  bytes from the arc file, create a new crawldatum
    Text url = new Text();
    CrawlDatum datum = new CrawlDatum(CrawlDatum.STATUS_DB_FETCHED, interval, 1.0f);
    String segmentName = getConf().get(Nutch.SEGMENT_NAME_KEY);

    // normalize and filter the urls
    try {
        urlStr = normalizers.normalize(urlStr, URLNormalizers.SCOPE_FETCHER);
        urlStr = urlFilters.filter(urlStr); // filter the url
    } catch (Exception e) {
        if (LOG.isWarnEnabled()) {
            LOG.warn("Skipping " + url + ":" + e);
        }
        urlStr = null;
    }

    // if still a good url then process
    if (urlStr != null) {

        url.set(urlStr);
        try {

            // set the protocol status to success and the crawl status to success
            // create the content from the normalized url and the raw bytes from
            // the arc file,  TODO: currently this doesn't handle text of errors
            // pages (i.e. 404, etc.). We assume we won't get those.
            ProtocolStatus status = ProtocolStatus.STATUS_SUCCESS;
            Content content = new Content(urlStr, urlStr, bytes.getBytes(), contentType, new Metadata(),
                    getConf());

            // set the url version into the metadata
            content.getMetadata().set(URL_VERSION, version);
            ParseStatus pstatus = null;
            pstatus = output(output, segmentName, url, datum, content, status, CrawlDatum.STATUS_FETCH_SUCCESS);
            reporter.progress();
        } catch (Throwable t) { // unexpected exception
            logError(url, t);
            output(output, segmentName, url, datum, null, null, CrawlDatum.STATUS_FETCH_RETRY);
        }
    }
}

From source file:org.apache.orc.TestColumnStatistics.java

License:Apache License

private static BytesWritable bytes(int... items) {
    BytesWritable result = new BytesWritable();
    result.setSize(items.length);//from ww w .  j a v  a2 s. c  o  m
    for (int i = 0; i < items.length; ++i) {
        result.getBytes()[i] = (byte) items[i];
    }
    return result;
}

From source file:org.apache.orc.TestColumnStatistics.java

License:Apache License

void appendRow(VectorizedRowBatch batch, BytesWritable bytes, String str) {
    int row = batch.size++;
    if (bytes == null) {
        batch.cols[0].noNulls = false;/*from   w  ww.  ja  va 2s  . c o m*/
        batch.cols[0].isNull[row] = true;
    } else {
        ((BytesColumnVector) batch.cols[0]).setVal(row, bytes.getBytes(), 0, bytes.getLength());
    }
    if (str == null) {
        batch.cols[1].noNulls = false;
        batch.cols[1].isNull[row] = true;
    } else {
        ((BytesColumnVector) batch.cols[1]).setVal(row, str.getBytes());
    }
}

From source file:org.apache.orc.TestVectorOrcFile.java

License:Apache License

private static void setBigRow(VectorizedRowBatch batch, int rowId, Boolean b1, Byte b2, Short s1, Integer i1,
        Long l1, Float f1, Double d1, BytesWritable b3, String s2, MiddleStruct m1, List<InnerStruct> l2,
        Map<String, InnerStruct> m2) {
    ((LongColumnVector) batch.cols[0]).vector[rowId] = b1 ? 1 : 0;
    ((LongColumnVector) batch.cols[1]).vector[rowId] = b2;
    ((LongColumnVector) batch.cols[2]).vector[rowId] = s1;
    ((LongColumnVector) batch.cols[3]).vector[rowId] = i1;
    ((LongColumnVector) batch.cols[4]).vector[rowId] = l1;
    ((DoubleColumnVector) batch.cols[5]).vector[rowId] = f1;
    ((DoubleColumnVector) batch.cols[6]).vector[rowId] = d1;
    if (b3 != null) {
        ((BytesColumnVector) batch.cols[7]).setVal(rowId, b3.getBytes(), 0, b3.getLength());
    } else {//from w w w  .  ja v a 2s.  co  m
        batch.cols[7].isNull[rowId] = true;
        batch.cols[7].noNulls = false;
    }
    if (s2 != null) {
        ((BytesColumnVector) batch.cols[8]).setVal(rowId, s2.getBytes());
    } else {
        batch.cols[8].isNull[rowId] = true;
        batch.cols[8].noNulls = false;
    }
    setMiddleStruct((StructColumnVector) batch.cols[9], rowId, m1);
    setInnerList((ListColumnVector) batch.cols[10], rowId, l2);
    setInnerMap((MapColumnVector) batch.cols[11], rowId, m2);
}

From source file:org.apache.orc.TestVectorOrcFile.java

License:Apache License

private static void checkBigRow(VectorizedRowBatch batch, int rowInBatch, int rowId, boolean b1, byte b2,
        short s1, int i1, long l1, float f1, double d1, BytesWritable b3, String s2, MiddleStruct m1,
        List<InnerStruct> l2, Map<String, InnerStruct> m2) {
    assertEquals("row " + rowId, b1, getBoolean(batch, rowInBatch));
    assertEquals("row " + rowId, b2, getByte(batch, rowInBatch));
    assertEquals("row " + rowId, s1, getShort(batch, rowInBatch));
    assertEquals("row " + rowId, i1, getInt(batch, rowInBatch));
    assertEquals("row " + rowId, l1, getLong(batch, rowInBatch));
    assertEquals("row " + rowId, f1, getFloat(batch, rowInBatch), 0.0001);
    assertEquals("row " + rowId, d1, getDouble(batch, rowInBatch), 0.0001);
    if (b3 != null) {
        BytesColumnVector bytes = (BytesColumnVector) batch.cols[7];
        assertEquals("row " + rowId, b3.getLength(), bytes.length[rowInBatch]);
        for (int i = 0; i < b3.getLength(); ++i) {
            assertEquals("row " + rowId + " byte " + i, b3.getBytes()[i],
                    bytes.vector[rowInBatch][bytes.start[rowInBatch] + i]);
        }//from   w ww. ja  v  a2  s.c o  m
    } else {
        assertEquals("row " + rowId, true, batch.cols[7].isNull[rowInBatch]);
        assertEquals("row " + rowId, false, batch.cols[7].noNulls);
    }
    if (s2 != null) {
        assertEquals("row " + rowId, s2, getText(batch, rowInBatch).toString());
    } else {
        assertEquals("row " + rowId, true, batch.cols[8].isNull[rowInBatch]);
        assertEquals("row " + rowId, false, batch.cols[8].noNulls);
    }
    checkMiddleStruct((StructColumnVector) batch.cols[9], rowId, rowInBatch, m1);
    checkInnerList((ListColumnVector) batch.cols[10], rowId, rowInBatch, l2);
    checkInnerMap((MapColumnVector) batch.cols[11], rowId, rowInBatch, m2);
}