Example usage for org.apache.hadoop.io Text getBytes

List of usage examples for org.apache.hadoop.io Text getBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getBytes.

Prototype

@Override
public byte[] getBytes() 

Source Link

Document

Returns the raw bytes; however, only data up to #getLength() is valid.

Usage

From source file:com.marklogic.contentpump.SingleDocumentWriter.java

License:Apache License

@Override
public void write(DocumentURI uri, MarkLogicDocument content) throws IOException, InterruptedException {
    OutputStream os = null;/*w w  w  .j  a  v  a  2  s. c om*/
    try {
        String childPath = URIUtil.getPathFromURI(uri);
        Path path;
        if (childPath.charAt(0) == '/') {
            // concatenate outputPath with path to form the path
            path = new Path(dir.toString() + childPath);
        } else {
            path = new Path(dir, childPath);
        }
        FileSystem fs = path.getFileSystem(conf);
        if (fs instanceof DistributedFileSystem) {
            os = fs.create(path, false);
        } else {
            File f = new File(path.toUri().getPath());
            if (!f.exists()) {
                f.getParentFile().mkdirs();
                f.createNewFile();
            }
            os = new FileOutputStream(f, false);
        }

        ContentType type = content.getContentType();
        if (ContentType.BINARY.equals(type)) {
            if (content.isStreamable()) {
                InputStream is = null;
                try {
                    is = content.getContentAsByteStream();
                    long size = content.getContentSize();
                    long bufSize = Math.min(size, 512 << 10);
                    byte[] buf = new byte[(int) bufSize];
                    for (long toRead = size, read = 0; toRead > 0; toRead -= read) {
                        read = is.read(buf, 0, (int) bufSize);
                        if (read > 0) {
                            os.write(buf, 0, (int) read);
                        } else {
                            LOG.error("Premature EOF: uri=" + uri + ",toRead=" + toRead);
                            break;
                        }
                    }
                } finally {
                    if (is != null) {
                        is.close();
                    }
                }
            } else {
                os.write(content.getContentAsByteArray());
            }
        } else if (ContentType.TEXT.equals(type) || ContentType.XML.equals(type)
                || ContentType.JSON.equals(type)) {
            if (encoding.equals("UTF-8")) {
                Text t = content.getContentAsText();
                os.write(t.getBytes(), 0, t.getLength());
            } else {
                String t = content.getContentAsString();
                os.write(t.getBytes(encoding));
            }
            if (LOG.isTraceEnabled()) {
                Text t = content.getContentAsText();
                LOG.trace(t);
                byte[] bytes = content.getContentAsByteArray();
                StringBuilder sb = new StringBuilder();
                for (int i = 0; i < bytes.length; i++) {
                    sb.append(Byte.toString(bytes[i]));
                    sb.append(" ");
                }
                LOG.trace(sb);
            }
        } else {
            LOG.error("Skipping " + uri + ".  Unsupported content type: " + type.name());
        }
    } catch (Exception e) {
        LOG.error("Error saving: " + uri, e);
    } finally {
        if (os != null) {
            os.close();
        }
    }
}

From source file:com.marklogic.mapreduce.MarkLogicInputSplit.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    start = in.readLong();//from www  . j a va  2s . c om
    length = in.readLong();
    Text forestIdText = new Text();
    forestIdText.readFields(in);
    forestId = new BigInteger(forestIdText.getBytes());
    hostName = new String[1];
    hostName[0] = Text.readString(in);
    isLastSplit = in.readBoolean();
}

From source file:com.mortardata.pig.JsonLoader.java

License:Apache License

@Override
public Tuple getNext() throws IOException {
    Text val = null;
    try {/*  w  w  w  . j  ava  2 s  .  c om*/
        if (!reader.nextKeyValue())
            return null;
        val = (Text) reader.getCurrentValue();
    } catch (Exception e) {
        throw new IOException(e);
    }

    // Create a parser specific for this input line.  
    // This may not be the most efficient approach.
    ByteArrayInputStream bais = new ByteArrayInputStream(val.getBytes());
    JsonParser p = jsonFactory.createJsonParser(bais);

    Tuple t;

    // schema provided
    if (!useDefaultSchema) {
        // Create a map of field names to ResourceFieldSchema's,
        // and create a map of field names to positions in the tuple.
        // These are used during parsing to handle extra, missing, and/or out-of-order
        // fields properly.

        Map<String, ResourceFieldSchema> schemaMap = new HashMap<String, ResourceFieldSchema>();
        Map<String, Integer> schemaPositionMap = new HashMap<String, Integer>();

        if (requiredFields != null) {
            int count = 0;
            for (int i = 0; i < fields.length; i++) {
                if (requiredFields[i]) {
                    schemaMap.put(fields[i].getName(), fields[i]);
                    schemaPositionMap.put(fields[i].getName(), count);
                    count++;
                }
            }
            t = tupleFactory.newTuple(count);
        } else {
            for (int i = 0; i < fields.length; i++) {
                schemaMap.put(fields[i].getName(), fields[i]);
                schemaPositionMap.put(fields[i].getName(), i);
            }
            t = tupleFactory.newTuple(fields.length);
        }

        try {
            p.nextToken(); // move to start of object
            parseObjectIntoTuple(val.toString(), p, schemaMap, schemaPositionMap, t);
        } catch (JsonParseException jpe) {
            // If the line doesn't parse as a valid JSON object, log an error and move on
            log.error("Error parsing record: " + val + ": " + jpe.toString());
        }
    } else {
        // schema not provided: load whole document as a map
        t = tupleFactory.newTuple(1);
        try {
            p.nextToken(); // move to start of object
            t.set(0, readField(val.toString(), p, schema.getFields()[0]));
        } catch (JsonParseException jpe) {
            log.error("Error parsing record: " + val + ": " + jpe.toString());
        }
    }

    p.close();
    return t;
}

From source file:com.mycustomloader.vsamloader.VSAMLoader.java

License:Apache License

@Override
public Tuple getNext() throws IOException {
    mProtoTuple = new ArrayList<Object>();

    boolean inField = false;
    boolean inQuotedField = false;
    boolean evenQuotesSeen = true;

    if (!mRequiredColumnsInitialized) {
        if (signature != null) {
            Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass());
            mRequiredColumns = (boolean[]) ObjectSerializer.deserialize(p.getProperty(signature));
        }//from   ww  w  .java 2 s  .c  om
        mRequiredColumnsInitialized = true;
    }
    try {
        if (!in.nextKeyValue()) {
            return null;
        }
        Text value = (Text) in.getCurrentValue();
        byte[] buf = value.getBytes();
        int len = value.getLength();
        int fieldID = 0;

        ByteBuffer fieldBuffer = ByteBuffer.allocate(len);

        for (int i = 0; i < len; i++) {
            byte b = buf[i];
            inField = true;
            if (inQuotedField) {
                if (b == DOUBLE_QUOTE) {
                    evenQuotesSeen = !evenQuotesSeen;
                    if (evenQuotesSeen) {
                        fieldBuffer.put(DOUBLE_QUOTE);
                    }
                } else if (!evenQuotesSeen && (b == FIELD_DEL || b == RECORD_DEL)) {
                    inQuotedField = false;
                    inField = false;
                    readField(fieldBuffer, fieldID++);
                } else {
                    fieldBuffer.put(b);
                }
            } else if (b == DOUBLE_QUOTE) {
                inQuotedField = true;
                evenQuotesSeen = true;
            } else if (b == FIELD_DEL) {
                inField = false;
                readField(fieldBuffer, fieldID++); // end of the field
            } else {
                evenQuotesSeen = true;
                fieldBuffer.put(b);
            }
        }
        if (inField)
            readField(fieldBuffer, fieldID++);
    } catch (InterruptedException e) {
        int errCode = 6018;
        String errMsg = "Error while reading input";
        throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT, e);
    }

    Tuple t = mTupleFactory.newTupleNoCopy(mProtoTuple);
    return t;
}

From source file:com.naver.nelo2analyzer.udf.TextLoaderPlusTime.java

License:Apache License

@Override
public Tuple getNext() throws IOException {
    mProtoTuple = new ArrayList<Object>();
    try {/*www .  jav  a2 s  .c o m*/
        boolean notDone = in.nextKeyValue();
        if (!notDone) {
            return null;
        }
        Text value = (Text) in.getCurrentValue();
        timebytes = timeGenerator(sourcePath.toString()).getBytes();
        ba = value.getBytes();

        outputStream = new ByteArrayOutputStream();
        outputStream.write(timebytes);
        outputStream.write(ba);

        c = outputStream.toByteArray();

        // return mTupleFactory.newTupleNoCopy(mProtoTuple);
        System.err.println("? ? ");
        System.err.println("ba = " + new String(ba));
        System.err.println("c = " + new String(c));
        System.err.println("? ? ??");
        forReturn = mTupleFactory.newTuple(new DataByteArray(c, 0, c.length));
        return forReturn;
    } catch (InterruptedException e) {
        throw new IOException("Error getting input");
    } finally {
        outputStream.close();
        outputStream = null;
        c = null;
        ba = null;
        timebytes = null;
        System.gc();
    }
}

From source file:com.pagerankcalculator.calculation.PageRankCalculationMapper.java

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    int tabIdx1 = value.find("\t");
    int tabIdx2 = value.find("\t", tabIdx1 + 1);

    String userID = Text.decode(value.getBytes(), 0, tabIdx1);
    String pageRank = Text.decode(value.getBytes(), tabIdx1 + 1, tabIdx2 - (tabIdx1 + 1));
    String CSVFollowingIDs = Text.decode(value.getBytes(), tabIdx2 + 1, value.getLength() - (tabIdx2 + 1));

    //        System.out.print(userID);
    //        System.out.print("\t");
    //        System.out.print(pageRank);
    //        System.out.print("\t");
    //        System.out.println(CSVFollowingIDs);

    String[] followingIDs = CSVFollowingIDs.split(TwitterPageRank.FOLLOWING_LIST_DELIMETER);
    Integer totalFollowingIDs = followingIDs.length;
    for (String followingID : followingIDs) {
        String pageRankWithTotalFollowing = pageRank + "\t" + totalFollowingIDs.toString();

        context.write(new Text(followingID), new Text(pageRankWithTotalFollowing));
    }/*from  w w  w . j  a  v a2  s .  com*/

    context.write(new Text(userID), new Text(TwitterPageRank.FOLLOWING_LIST_TAG + CSVFollowingIDs));
}

From source file:com.pagerankcalculator.graphparsing.GraphParsingMapper.java

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    int tabIndex = value.find("\t");

    userID = Text.decode(value.getBytes(), 0, tabIndex);
    followerID = Text.decode(value.getBytes(), tabIndex + 1, value.getLength() - (tabIndex + 1));
    context.write(new Text(followerID), new Text(userID));
}

From source file:com.pagerankcalculator.ordering.PageRankSortingMapper.java

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    int tabIdx1 = value.find("\t");
    int tabIdx2 = value.find("\t", tabIdx1 + 1);

    String username = Text.decode(value.getBytes(), 0, tabIdx1);

    Double pageRank = new Double(Text.decode(value.getBytes(), tabIdx1 + 1, tabIdx2 - (tabIdx1 + 1)));

    context.write(new DoubleWritable(pageRank), new Text(username));
}

From source file:com.qq.pig.udf.CustomJsonLoader.java

License:Apache License

public Tuple parseTuple(Text val) throws IOException {
    // Create a parser specific for this input line.  This may not be the
    // most efficient approach.

    //TODO why make a byte copy?
    byte[] newBytes = new byte[val.getLength()];
    System.arraycopy(val.getBytes(), 0, newBytes, 0, val.getLength());

    ByteArrayInputStream bais = new ByteArrayInputStream(newBytes);
    JsonParser p = jsonFactory.createJsonParser(bais);

    // Create the tuple we will be returning.  We create it with the right
    // number of fields, as the Tuple object is optimized for this case.
    ResourceFieldSchema[] fields = schema.getFields();
    Tuple t = tupleFactory.newTuple(fields.length);

    // Read the start object marker.  Throughout this file if the parsing
    // isn't what we expect we return a tuple with null fields rather than
    // throwing an exception.  That way a few mangled lines don't fail the
    // job.//w  w  w  .j av a 2 s  .c  o  m
    if (p.nextToken() != JsonToken.START_OBJECT) {
        warn("Bad record, could not find start of record " + val.toString(), PigWarning.UDF_WARNING_1);
        return t;
    }
    readFields(p, t);
    p.close();
    return t;
}

From source file:com.redsqirl.workflow.server.connect.HDFSInterface.java

License:Open Source License

/**
 * Read a Sequence File//w  ww  . ja  v a  2 s.  co  m
 * 
 * @param path
 * @param delimiter
 * @param maxToRead
 * @param fields
 * @return List of read rows from the path
 * @throws RemoteException
 */
public List<String> selectSeq(String path, String delimiter, int maxToRead, FieldList fields)
        throws RemoteException {

    Path p = new Path(path);
    List<String> ans = null;
    HdfsFileChecker fCh = new HdfsFileChecker(p);
    try {
        FileSystem fs = NameNodeVar.getFS();
        if (fCh.isDirectory()) {
            FileStatus[] fsA = fs.listStatus(p);
            int listSize = Math.min(maxToRead, fsA.length);
            ans = new ArrayList<String>(listSize);
            for (int i = 0; i < listSize; ++i) {
                ans.add(fsA[i].getPath().toString());
            }
        } else if (fCh.isFile()) {
            FSDataInputStream in = fs.open(p);
            LineReader reader = new LineReader(in);
            ans = new ArrayList<String>(maxToRead);
            Text line = new Text();
            reader.readLine(line);
            int lineNb = 0;
            maxToRead *= fields.getSize();
            int i = 0;
            String toWrite = "";
            logger.debug("delim : " + delimiter);
            while (reader.readLine(line) != 0 && lineNb < maxToRead) {
                reader.readLine(line);
                logger.debug("line : " + line);
                ++lineNb;

                FieldType type = fields.getFieldType(fields.getFieldNames().get(i));
                if (type == FieldType.BOOLEAN) {
                    toWrite += BytesWritable.Comparator.readInt(line.getBytes(), 0);
                } else if (type == FieldType.INT) {
                    toWrite += BytesWritable.Comparator.readInt(line.getBytes(), 0);
                } else if (type == FieldType.FLOAT) {
                    toWrite += BytesWritable.Comparator.readFloat(line.getBytes(), 0);
                } else if (type == FieldType.DOUBLE) {
                    toWrite += BytesWritable.Comparator.readDouble(line.getBytes(), 0);
                } else if (type == FieldType.LONG) {
                    toWrite += BytesWritable.Comparator.readLong(line.getBytes(), 0);
                } else if (type == FieldType.STRING) {
                    toWrite += line.getBytes().toString();
                }
                if ((i + 1) % fields.getSize() == 0) {
                    ans.add(toWrite);
                    toWrite = "";
                } else {
                    toWrite += '\001';
                }
                ++i;
                if (i >= fields.getSize()) {
                    i = 0;
                }

            }
        }
        // fs.close();
    } catch (IOException e) {
        logger.error("Cannot select the file or directory: " + p);
        logger.error(e.getMessage());
    }
    // fCh.close();

    return ans;
}