Example usage for org.apache.hadoop.io Text getBytes

List of usage examples for org.apache.hadoop.io Text getBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getBytes.

Prototype

@Override
public byte[] getBytes() 

Source Link

Document

Returns the raw bytes; however, only data up to #getLength() is valid.

Usage

From source file:org.openx.data.jsonserde.JsonSerDe.java

License:Open Source License

/**
 * Deserializes the object. Reads a Writable and uses JSONObject to
 * parse its text/*  w  w  w  .  ja v a2s.com*/
 * 
 * @param w the text to parse
 * @return a JSONObject
 * @throws SerDeException 
 */
@Override
public Object deserialize(Writable w) throws SerDeException {
    Text rowText = (Text) w;
    deserializedDataSize = rowText.getBytes().length;

    // Try parsing row into JSON object
    Object jObj = null;

    try {
        String txt = rowText.toString().trim();

        if (txt.startsWith("{")) {
            jObj = new JSONObject(txt);
        } else if (txt.startsWith("[")) {
            jObj = new JSONArray(txt);
        }
    } catch (JSONException e) {
        // If row is not a JSON object, make the whole row NULL
        onMalformedJson("Row is not a valid JSON Object - JSONException: " + e.getMessage());
        try {
            jObj = new JSONObject("{}");
        } catch (JSONException ex) {
            onMalformedJson("Error parsing empty row. This should never happen.");
        }
    }

    return jObj;
}

From source file:org.openx.data.jsonserde.JsonSerDe.java

License:Open Source License

/**
 * Hive will call this to serialize an object. Returns a writable object
 * of the same class returned by <a href="#getSerializedClass">getSerializedClass</a>
 * //w  w  w  . j  a  v a 2 s. co m
 * @param obj The object to serialize
 * @param objInspector The ObjectInspector that knows about the object's structure
 * @return a serialized object in form of a Writable. Must be the 
 *         same type returned by <a href="#getSerializedClass">getSerializedClass</a>
 * @throws SerDeException 
 */
@Override
public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
    // make sure it is a struct record
    if (objInspector.getCategory() != Category.STRUCT) {
        throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: "
                + objInspector.getTypeName());
    }

    JSONObject serializer = serializeStruct(obj, (StructObjectInspector) objInspector, columnNames);

    Text t = new Text(serializer.toString());

    serializedDataSize = t.getBytes().length;
    return t;
}

From source file:org.platform.modules.hadoop.format.output.CustomOutputFormat.java

License:Apache License

/**
 * create the final out file, and output row by row. After one row is
 * appended, a configured row separator is appended
 * //from w w w. j  a  v  a  2 s . c  o m
 * @param jc
 *          the job configuration file
 * @param outPath
 *          the final output file to be created
 * @param valueClass
 *          the value class used for create
 * @param isCompressed
 *          whether the content is compressed or not
 * @param tableProperties
 *          the tableProperties of this file's corresponding table
 * @param progress
 *          progress used for status report
 * @return the RecordWriter
 */
@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass,
        boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
    int rowSeparator = 0;
    String rowSeparatorString = tableProperties.getProperty(serdeConstants.LINE_DELIM, "\n");
    try {
        rowSeparator = Byte.parseByte(rowSeparatorString);
    } catch (NumberFormatException e) {
        rowSeparator = rowSeparatorString.charAt(0);
    }

    final int finalRowSeparator = rowSeparator;
    FileSystem fs = outPath.getFileSystem(jc);
    final OutputStream outStream = Utilities.createCompressedStream(jc, fs.create(outPath), isCompressed);
    return new RecordWriter() {
        @SuppressWarnings("deprecation")
        public void write(Writable r) throws IOException {
            if (r instanceof Text) {
                Text tr = (Text) r;
                String strReplace = tr.toString().toLowerCase().replace(":", "::");
                Text txtReplace = new Text();
                txtReplace.set(strReplace);
                outStream.write(txtReplace.getBytes(), 0, txtReplace.getLength());
                //          outStream.write(tr.getBytes(), 0, tr.getLength());
                outStream.write(finalRowSeparator);
            } else {
                // DynamicSerDe always writes out BytesWritable
                BytesWritable bw = (BytesWritable) r;
                outStream.write(bw.get(), 0, bw.getSize());
                outStream.write(finalRowSeparator);
            }
        }

        public void close(boolean abort) throws IOException {
            outStream.close();
        }
    };
}

From source file:org.platform.utils.bigdata.hive.CustomOutputFormat.java

License:Apache License

/**
 * create the final out file, and output row by row. After one row is
 * appended, a configured row separator is appended
 * //from ww w.  j a  va 2 s  .  c  o m
 * @param jc
 *            the job configuration file
 * @param outPath
 *            the final output file to be created
 * @param valueClass
 *            the value class used for create
 * @param isCompressed
 *            whether the content is compressed or not
 * @param tableProperties
 *            the tableProperties of this file's corresponding table
 * @param progress
 *            progress used for status report
 * @return the RecordWriter
 */
@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass,
        boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
    int rowSeparator = 0;
    String rowSeparatorString = tableProperties.getProperty(serdeConstants.LINE_DELIM, "\n");
    try {
        rowSeparator = Byte.parseByte(rowSeparatorString);
    } catch (NumberFormatException e) {
        rowSeparator = rowSeparatorString.charAt(0);
    }

    final int finalRowSeparator = rowSeparator;
    FileSystem fs = outPath.getFileSystem(jc);
    final OutputStream outStream = Utilities.createCompressedStream(jc, fs.create(outPath), isCompressed);
    return new RecordWriter() {
        @SuppressWarnings("deprecation")
        @Override
        public void write(Writable r) throws IOException {
            if (r instanceof Text) {
                Text tr = (Text) r;
                String strReplace = tr.toString().replace(":", "::");
                Text txtReplace = new Text();
                txtReplace.set(strReplace);
                outStream.write(txtReplace.getBytes(), 0, txtReplace.getLength());
                // outStream.write(tr.getBytes(), 0, tr.getLength());
                outStream.write(finalRowSeparator);
            } else {
                // DynamicSerDe always writes out BytesWritable
                BytesWritable bw = (BytesWritable) r;
                outStream.write(bw.get(), 0, bw.getSize());
                outStream.write(finalRowSeparator);
            }
        }

        @Override
        public void close(boolean abort) throws IOException {
            outStream.close();
        }
    };
}

From source file:org.springframework.data.hadoop.store.input.TextFileReader.java

License:Apache License

@Override
public String read() throws IOException {
    if (streamsHolder == null) {
        streamsHolder = getInput(getPath());
        lineReader = new LineReader(streamsHolder.getStream(), delimiter);
    }/*from  www.  j  a va  2s .  c o  m*/
    Text text = new Text();
    lineReader.readLine(text);
    byte[] value = text.getBytes();
    return value != null && value.length > 0 ? new String(value) : null;
}

From source file:org.springframework.data.hadoop.store.input.TextSequenceFileReader.java

License:Apache License

@Override
public String read() throws IOException {
    if (reader == null) {
        reader = getInput();/*from www .j a v a  2s  .co m*/
    }

    Text k = new Text();
    Text v = new Text();
    reader.next(k, v);
    byte[] value = v.getBytes();
    return value != null && value.length > 0 ? new String(value) : null;
}

From source file:org.springframework.data.hadoop.store.text.DelimitedTextStorage.java

License:Apache License

@Override
public synchronized StorageReader getStorageReader(Path path) throws IOException {
    if (lineReader == null) {
        lineReader = new LineReader(getInput(path).getStream(), getConfiguration());
    }/*  ww w .  ja v  a 2s .c o m*/
    return new StorageReader() {

        @Override
        public byte[] read() throws IOException {
            Text text = new Text();
            lineReader.readLine(text);
            return text.getBytes();
        }
    };
}

From source file:org.springframework.data.hadoop.store.text.DelimitedTextStorage.java

License:Apache License

@Override
public synchronized StorageReader getStorageReader(final InputSplit inputSplit) throws IOException {
    StorageReader splitStorageReader = splitStorageReaders.get(inputSplit);

    if (splitStorageReader == null) {

        final StreamsHolder<InputStream> holder = getInput(inputSplit);
        final LineReader splitReader = new LineReader(holder.getStream(), getConfiguration());
        splitLineReaders.put(inputSplit, splitReader);

        final long startx;
        final long endx;
        if (holder.getStream() instanceof SplitCompressionInputStream) {
            startx = ((SplitCompressionInputStream) holder.getStream()).getAdjustedStart();
            endx = ((SplitCompressionInputStream) holder.getStream()).getAdjustedEnd();
        } else {//from  ww w .  jav a2s .  c  o  m
            startx = inputSplit.getStart();
            endx = startx + inputSplit.getLength();
        }

        if (log.isDebugEnabled()) {
            log.debug("Split start=" + startx + " end=" + endx);
        }

        splitStorageReader = new StorageReader() {

            Seekable seekable = (Seekable) holder.getStream();

            long start = startx;

            long end = endx;

            long pos = start;

            @Override
            public byte[] read() throws IOException {
                long position = getFilePosition();
                if (position <= end) {
                    Text text = new Text();
                    int newSize = splitReader.readLine(text);
                    pos += newSize;
                    return text.getBytes();
                } else {
                    return null;
                }
            }

            private long getFilePosition() throws IOException {
                long retVal;
                if (getCodec() != null && seekable != null) {
                    retVal = seekable.getPos();
                } else {
                    retVal = pos;
                }
                return retVal;
            }

        };
        splitStorageReaders.put(inputSplit, splitStorageReader);
    }

    return splitStorageReader;
}

From source file:org.teiid.translator.accumulo.AccumuloQueryExecution.java

License:Open Source License

@Override
public List<?> next() throws TranslatorException, DataNotAvailableException {
    SortedMap<Key, Value> rowItems = readNextRow();
    boolean rowIdAdded = false;
    LinkedHashMap<String, byte[]> values = new LinkedHashMap<String, byte[]>();

    for (Key key : rowItems.keySet()) {
        Text cf = key.getColumnFamily();
        Text cq = key.getColumnQualifier();
        Text rowid = key.getRow();
        Value value = rowItems.get(key);

        Column match = findMatchingColumn(cf, cq);
        if (!rowIdAdded) {
            values.put(AccumuloMetadataProcessor.ROWID, rowid.getBytes());
            rowIdAdded = true;/*from  w  w  w .  j  a  va2s  .c o m*/
        }

        if (match != null) {
            String valueIn = match.getProperty(AccumuloMetadataProcessor.VALUE_IN, false);
            // failed to use isolated scanner, but this if check will accomplish the same in getting the
            // most top value
            if (values.get(match.getName()) == null) {
                values.put(match.getName(), buildValue(valueIn, cq, value));
            }
        }
    }
    return nextRow(values);
}

From source file:org.teiid.translator.accumulo.AccumuloQueryExecution.java

License:Open Source License

private Column findMatchingColumn(Text rowCF, Text rowCQ) {
    String CF = new String(rowCF.getBytes());
    String CQ = new String(rowCQ.getBytes());
    Column column = this.visitor.lookupColumn(CF + "/" + CQ); //$NON-NLS-1$
    if (column == null) {
        // this means CQ is not defined; In this pattern CQ is used for value
        column = this.visitor.lookupColumn(CF);
    }/*from  ww  w .  j a va  2 s .  com*/
    return column;
}