Example usage for org.apache.hadoop.io Text getLength

List of usage examples for org.apache.hadoop.io Text getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getLength.

Prototype

@Override
public int getLength() 

Source Link

Document

Returns the number of bytes in the byte array

Usage

From source file:org.apache.pig.builtin.JsonLoader.java

License:Apache License

public Tuple getNext() throws IOException {
    Text val = null;
    try {//from w w  w . j av a 2  s  . c om
        // Read the next key value pair from the record reader.  If it's
        // finished, return null
        if (!reader.nextKeyValue())
            return null;

        // Get the current value.  We don't use the key.
        val = (Text) reader.getCurrentValue();
    } catch (InterruptedException ie) {
        throw new IOException(ie);
    }

    // Create a parser specific for this input line.  This may not be the
    // most efficient approach.
    byte[] newBytes = new byte[val.getLength()];
    System.arraycopy(val.getBytes(), 0, newBytes, 0, val.getLength());
    ByteArrayInputStream bais = new ByteArrayInputStream(newBytes);
    JsonParser p = jsonFactory.createJsonParser(bais);

    // Create the tuple we will be returning.  We create it with the right
    // number of fields, as the Tuple object is optimized for this case.
    ResourceFieldSchema[] fields = schema.getFields();
    Tuple t = tupleFactory.newTuple(fields.length);

    // Read the start object marker.  Throughout this file if the parsing
    // isn't what we expect we return a tuple with null fields rather than
    // throwing an exception.  That way a few mangled lines don't fail the
    // job.
    if (p.nextToken() != JsonToken.START_OBJECT) {
        warn("Bad record, could not find start of record " + val.toString(), PigWarning.UDF_WARNING_1);
        return t;
    }

    // Read each field in the record
    for (int i = 0; i < fields.length; i++) {
        t.set(i, readField(p, fields[i], i));
    }

    if (p.nextToken() != JsonToken.END_OBJECT) {
        warn("Bad record, could not find end of record " + val.toString(), PigWarning.UDF_WARNING_1);
        return t;
    }
    p.close();
    return t;
}

From source file:org.apache.pig.builtin.PigStorage.java

License:Apache License

@Override
public Tuple getNext() throws IOException {
    mProtoTuple = new ArrayList<Object>();
    if (!mRequiredColumnsInitialized) {
        if (signature != null) {
            Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass());
            mRequiredColumns = (boolean[]) ObjectSerializer.deserialize(p.getProperty(signature));
        }//from ww w .  j a  v a  2s  .c o  m
        mRequiredColumnsInitialized = true;
    }
    //Prepend input source path if source tagging is enabled
    if (tagFile) {
        mProtoTuple.add(new DataByteArray(sourcePath.getName()));
    } else if (tagPath) {
        mProtoTuple.add(new DataByteArray(sourcePath.toString()));
    }

    try {
        boolean notDone = in.nextKeyValue();
        if (!notDone) {
            return null;
        }
        Text value = (Text) in.getCurrentValue();
        byte[] buf = value.getBytes();
        int len = value.getLength();
        int start = 0;
        int fieldID = 0;
        for (int i = 0; i < len; i++) {
            if (buf[i] == fieldDel) {
                if (mRequiredColumns == null
                        || (mRequiredColumns.length > fieldID && mRequiredColumns[fieldID]))
                    addTupleValue(mProtoTuple, buf, start, i);
                start = i + 1;
                fieldID++;
            }
        }
        // pick up the last field
        if (start <= len && (mRequiredColumns == null
                || (mRequiredColumns.length > fieldID && mRequiredColumns[fieldID]))) {
            addTupleValue(mProtoTuple, buf, start, len);
        }
        Tuple t = mTupleFactory.newTupleNoCopy(mProtoTuple);

        return dontLoadSchema ? t : applySchema(t);
    } catch (InterruptedException e) {
        int errCode = 6018;
        String errMsg = "Error while reading input";
        throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT, e);
    }
}

From source file:org.apache.pig.builtin.TextLoader.java

License:Apache License

@Override
public Tuple getNext() throws IOException {
    try {//from   w ww  . j  a v  a2s  .com
        boolean notDone = in.nextKeyValue();
        if (!notDone) {
            return null;
        }
        Text value = (Text) in.getCurrentValue();
        byte[] ba = value.getBytes();
        // make a copy of the bytes representing the input since
        // TextInputFormat will reuse the byte array
        return mTupleFactory.newTuple(new DataByteArray(ba, 0, value.getLength()));
    } catch (InterruptedException e) {
        throw new IOException("Error getting input");
    }
}

From source file:org.apache.pig.impl.util.StorageUtil.java

License:Apache License

/**
 * Transform a line of <code>Text</code> to a <code>Tuple</code>
 *
 * @param val a line of text//from  w  w  w  .j  a va2  s  .  c om
 * @param fieldDel the field delimiter
 * @return tuple constructed from the text
 */
public static Tuple textToTuple(Text val, byte fieldDel) {
    return bytesToTuple(val.getBytes(), 0, val.getLength(), fieldDel);
}

From source file:org.apache.pig.piggybank.storage.CSVExcelStorage.java

License:Apache License

@Override
public Tuple getNext() throws IOException {
    // If SKIP_INPUT_HEADER and this is the first input split, skip header record
    // We store its value as a string though, so we can compare
    // further records to it. If they are the same (this would 
    // happen if multiple small files each with a header were combined
    // into one split), we know to skip the duplicate header record as well.
    if (loadingFirstRecord && headerTreatment == Headers.SKIP_INPUT_HEADER
            && (splitIndex == 0 || splitIndex == -1)) {
        try {//from ww  w  . ja v  a2  s .c  o  m
            if (!in.nextKeyValue())
                return null;
            header = ((Text) in.getCurrentValue()).toString();
        } catch (InterruptedException e) {
            int errCode = 6018;
            String errMsg = "Error while reading input";
            throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT, e);
        }
    }
    loadingFirstRecord = false;

    mProtoTuple = new ArrayList<Object>();

    getNextInQuotedField = false;
    boolean evenQuotesSeen = true;
    boolean sawEmbeddedRecordDelimiter = false;
    byte[] buf = null;

    if (!mRequiredColumnsInitialized) {
        if (udfContextSignature != null) {
            Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass());
            mRequiredColumns = (boolean[]) ObjectSerializer.deserialize(p.getProperty(udfContextSignature));
        }
        mRequiredColumnsInitialized = true;
    }
    // Note: we cannot factor out the check for nextKeyValue() being null,
    // because that call overwrites buf with the new line, which is
    // bad if we have a field with a newline.

    try {
        int recordLen = 0;
        getNextFieldID = 0;

        while (sawEmbeddedRecordDelimiter || getNextFieldID == 0) {
            Text value = null;
            if (sawEmbeddedRecordDelimiter) {

                // Deal with pulling more records from the input, because
                // a double quoted embedded newline was encountered in a field.
                // Save the length of the record so far, plus one byte for the 
                // record delimiter (usually newline) that's embedded in the field 
                // we were working on before falling into this branch:
                int prevLineLen = recordLen + 1;

                // Save previous line (the one with the field that has the newline) in a new array.
                // The last byte will be random; we'll fill in the embedded
                // record delimiter (usually newline) below:
                byte[] prevLineSaved = Arrays.copyOf(buf, prevLineLen);
                prevLineSaved[prevLineLen - 1] = RECORD_DEL;

                // Read the continuation of the record, unless EOF:
                if (!in.nextKeyValue()) {
                    return null;
                }
                value = (Text) in.getCurrentValue();
                recordLen = value.getLength();
                // Grab the continuation's bytes:
                buf = value.getBytes();

                // Combine the previous line and the continuation into a new array.
                // The following copyOf() does half the job: it allocates all the
                // space, and also copies the previous line into that space:
                byte[] prevLineAndContinuation = Arrays.copyOf(prevLineSaved, prevLineLen + recordLen);

                // Now append the continuation. Parms: fromBuf, fromStartPos, toBuf, toStartPos, lengthToCopy:
                System.arraycopy(buf, 0, prevLineAndContinuation, prevLineLen, recordLen);

                // We'll work with the combination now:
                buf = prevLineAndContinuation;

                // Do the whole record over from the start:
                mProtoTuple.clear();
                getNextInQuotedField = false;
                evenQuotesSeen = true;
                getNextFieldID = 0;
                recordLen = prevLineAndContinuation.length;

            } else {
                // Previous record finished cleanly: start with the next record,
                // unless EOF:
                if (!in.nextKeyValue()) {
                    return null;
                }
                value = (Text) in.getCurrentValue();

                // if the line is a duplicate header and 'SKIP_INPUT_HEADER' is set, ignore it
                // (this might happen if multiple files each with a header are combined into a single split)
                if (headerTreatment == Headers.SKIP_INPUT_HEADER && value.toString().equals(header)) {
                    if (!in.nextKeyValue())
                        return null;
                    value = (Text) in.getCurrentValue();
                }

                buf = value.getBytes();
                getNextFieldID = 0;
                recordLen = value.getLength();
            }

            nextTupleSkipChar = false;

            ByteBuffer fieldBuffer = ByteBuffer.allocate(recordLen);

            sawEmbeddedRecordDelimiter = processOneInRecord(evenQuotesSeen, buf, recordLen, fieldBuffer);

            // The last field is never delimited by a FIELD_DEL, but by
            // the end of the record. So we need to add that last field.
            // The '!sawEmbeddedRecordDelimiter' handles the case of
            // embedded newlines; we are amidst a field, not at
            // the final record:
            if (!sawEmbeddedRecordDelimiter)
                readField(fieldBuffer, getNextFieldID++);
        } // end while

    } catch (InterruptedException e) {
        int errCode = 6018;
        String errMsg = "Error while reading input";
        throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT, e);
    }

    Tuple t = mTupleFactory.newTupleNoCopy(mProtoTuple);
    return t;
}

From source file:org.apache.rya.accumulo.pig.AccumuloStorage.java

License:Apache License

@Override
public void putNext(final Tuple t) throws ExecException, IOException {
    final Mutation mut = new Mutation(objToText(t.get(0)));
    final Text cf = objToText(t.get(1));
    final Text cq = objToText(t.get(2));

    if (t.size() > 4) {
        final Text cv = objToText(t.get(3));
        final Value val = new Value(objToBytes(t.get(4)));
        if (cv.getLength() == 0) {
            mut.put(cf, cq, val);
        } else {/*w  w  w . j  a  v  a  2  s .com*/
            mut.put(cf, cq, new ColumnVisibility(cv), val);
        }
    } else {
        final Value val = new Value(objToBytes(t.get(3)));
        mut.put(cf, cq, val);
    }

    try {
        writer.write(tableName, mut);
    } catch (final InterruptedException e) {
        throw new IOException(e);
    }
}

From source file:org.archive.access.nutch.indexer.WaxIndexingFilter.java

License:LGPL

public Document filter(Document doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks) {
    if (url == null || url.getLength() <= 0) {
        LOGGER.error(doc.toString() + " has no url");

        return doc;
    }/*  www  . j  ava 2 s .  co  m*/

    String urlStr = url.toString();

    // Stored, indexed and un-tokenized. Date is already GMT so don't
    // mess w/ timezones. Date is stored as seconds since epoch to
    // facilitate sorting (The Lucene Sort interprets the IA 14-char
    // date string as a float; rounding of float values equates floats
    // that shouldn't equate: e.g:
    // float f = Float.parseFloat("20050524133833");
    // float g = Float.parseFloat("20050524133834");
    // float h = Float.parseFloat("20050524133835");
    // System.out.println(f == g);
    // System.out.println(f == h);
    // ...prints true twice.
    // So, have seconds since epoch for the date we index.
    long seconds = datum.getFetchTime() / 1000;

    if (seconds > Integer.MAX_VALUE) {
        LOGGER.warn("Fetch time " + Long.toString(seconds) + " is > Integer.MAX_VALUE. Setting to zero");

        seconds = 0;
    }

    doc.add(new Field(DATE_KEY, ArchiveUtils.zeroPadInteger((int) seconds), Field.Store.YES,
            Field.Index.UN_TOKENIZED));

    // Add as stored, unindexed, and untokenized. Don't warn if absent.
    // Its not a tradegy.
    add(urlStr, doc, "encoding", parse.getData().getMeta(ENCODING_KEY), false, true, true, false, false);

    // Get metadatas.
    MapWritable mw = datum.getMetaData();
    ParseData pd = parse.getData();

    // Add as stored, indexed, and untokenized but not lowercased.
    add(urlStr, doc, ARCCOLLECTION_KEY, getMetadataValue(ARCCOLLECTION_KEY, pd, mw), false, true, true, false);

    // Add as stored, indexed, and untokenized. Preserve case for
    // arcname since eventually it will be used to find an arc on
    // filesystem.
    add(urlStr, doc, ARCFILENAME_KEY, getMetadataValue(ARCFILENAME_KEY, pd, mw), false, true, true, false);

    add(urlStr, doc, ARCFILEOFFSET_KEY, getMetadataValue(ARCFILEOFFSET_KEY, pd, mw), false, true, false, false);

    // This is a nutch 'more' field.
    add(urlStr, doc, "contentLength", parse.getData().getMeta("contentLength"), false, true, false, false);

    // Mimetype. The ARC2Segment tool stores the content-type into
    // metadata with a key of 'content-type'.
    String mimetype = parse.getData().getMeta(CONTENT_TYPE_KEY);

    if (mimetype == null || mimetype.length() == 0) {
        MimeType mt = (MIME.getMimeType(urlStr));

        if (mt != null) {
            mimetype = mt.getName();
        }
    }

    try {
        // Test the mimetype makes some sense. If not, don't add.
        mimetype = (new MimeType(mimetype)).getName();
    } catch (MimeTypeException e) {
        LOGGER.error(urlStr + ", mimetype " + mimetype + ": " + e.toString());

        // Clear mimetype because caused exception.
        mimetype = null;
    }

    if (mimetype != null) {
        // wera wants the sub and primary types in index. So they are
        // stored but not searchable. nutch adds primary and subtypes
        // as well as complete type all to one 'type' field.
        final String type = "type";
        add(urlStr, doc, type, mimetype, true, false, true, false);
        int index = mimetype.indexOf('/');

        if (index > 0) {
            String tmp = mimetype.substring(0, index);
            add(urlStr, doc, "primaryType", tmp, true, true, false, false);
            add(urlStr, doc, type, tmp, true, false, true, false);

            if (index + 1 < mimetype.length()) {
                tmp = mimetype.substring(index + 1);
                add(urlStr, doc, "subType", tmp, true, true, false, false);
                add(urlStr, doc, type, tmp, true, false, true, false);
            }
        }
    }

    // Add as not lowercased, not stored, indexed, and not tokenized.
    add(urlStr, doc, EXACTURL_KEY, escapeUrl(url.toString()), false, false, true, false);

    // TODO MC - for site search
    try {
        java.net.URL netUrl = new java.net.URL(urlStr);
        String reverseDomain = (new StringBuffer(netUrl.getHost())).reverse().toString();
        add(urlStr, doc, DOMAIN_KEY, reverseDomain, false, true, true, false);
    } catch (Exception MalformedURLException) {
        LOGGER.error("Malformed url " + urlStr + ".");
    }
    // TODO MC - for site search

    return doc;
}

From source file:org.archive.nutchwax.ImporterToHdfs.java

License:Apache License

/**
 * Import an ARCRecord.// ww w. j ava2 s  .c  om
 *
 * @param record
 * @param segmentName
 * @param collectionName
 * @param output
 * @return whether record was imported or not (i.e. filtered out due to URL
 *         filtering rules, etc.)
 */
private boolean importRecord(ARCRecord record, String segmentName, String collectionName,
        OutputCollector output, Writer writer) {
    ARCRecordMetaData meta = record.getMetaData();

    if (LOG.isInfoEnabled()) {
        LOG.info("Consider URL: " + meta.getUrl() + " (" + meta.getMimetype() + ") [" + meta.getLength() + "]");
    }

    if (!this.httpStatusCodeFilter.isAllowed(record.getStatusCode())) {
        if (LOG.isInfoEnabled()) {
            LOG.info("Skip     URL: " + meta.getUrl() + " HTTP status:" + record.getStatusCode());
        }

        return false;
    }

    try {
        // Skip the HTTP headers in the response body, so that the
        // parsers are parsing the reponse body and not the HTTP
        // headers.
        record.skipHttpHeader();

        // We use record.available() rather than meta.getLength()
        // because the latter includes the size of the HTTP header,
        // which we just skipped.
        byte[] bytes = readBytes(record, record.available());

        // If there is no digest, then we assume we're reading an
        // ARCRecord not a WARCRecord. In that case, we close the
        // record, which updates the digest string. Then we tweak the
        // digest string so we have the same for for both ARC and WARC
        // records.
        if (meta.getDigest() == null) {
            record.close();

            // This is a bit hacky, but ARC and WARC records produce
            // two slightly different digest formats. WARC record
            // digests have the algorithm name as a prefix, such as
            // "sha1:PD3SS4WWZVFWTDC63RU2MWX7BVC2Y2VA" but the
            // ArcRecord.getDigestStr() does not. Since we want the
            // formats to match, we prepend the "sha1:" prefix to ARC
            // record digest.
            meta.setDigest("sha1:" + record.getDigestStr());
        }

        // Normalize and filter
        String url = this.normalizeAndFilterUrl(meta.getUrl(), meta.getDigest(), meta.getDate());

        if (url == null) {
            if (LOG.isInfoEnabled()) {
                LOG.info("Skip     URL: " + meta.getUrl());
            }
            return false;
        }

        // We create a key which combines the URL and digest values.
        // This is necessary because Nutch stores all the data in
        // MapFiles, which are basically just {key,value} pairs.
        //
        // If we use just the URL as the key (which is the way Nutch
        // usually works) then we have problems with multiple,
        // different copies of the same URL. If we try and store two
        // different copies of the same URL (each having a different
        // digest) and only use the URL as the key, when the MapFile
        // is written, only *one* copy of the page will be stored.
        //
        // Think about it, we're basically doing:
        // MapFile.put( url, value1 );
        // MapFile.put( url, value2 );
        // Only one of those url,value mappings will keep, the other
        // is over-written.
        //
        // So, by using the url+digest as the key, we can have all the
        // data stored. The only problem is all over in Nutch where
        // the key==url is assumed :(
        String key = url + " " + meta.getDigest();

        Metadata contentMetadata = new Metadata();
        // Set the segment name, just as is done by standard Nutch fetching.
        // Then, add the NutchWAX-specific metadata fields.
        contentMetadata.set(Nutch.SEGMENT_NAME_KEY, segmentName);

        // We store both the normal URL and the URL+digest key for
        // later retrieval by the indexing plugin(s).
        contentMetadata.set(NutchWax.URL_KEY, url);
        // contentMetadata.set( NutchWax.ORIG_KEY, key );

        contentMetadata.set(NutchWax.FILENAME_KEY, meta.getArcFile().getName());
        contentMetadata.set(NutchWax.FILEOFFSET_KEY, String.valueOf(record.getHeader().getOffset()));

        contentMetadata.set(NutchWax.COLLECTION_KEY, collectionName);
        contentMetadata.set(NutchWax.DATE_KEY, meta.getDate());
        contentMetadata.set(NutchWax.DIGEST_KEY, meta.getDigest());
        contentMetadata.set(NutchWax.CONTENT_TYPE_KEY, meta.getMimetype());
        contentMetadata.set(NutchWax.CONTENT_LENGTH_KEY, String.valueOf(meta.getLength()));
        contentMetadata.set(NutchWax.HTTP_RESPONSE_KEY, String.valueOf(record.getStatusCode()));

        Content content = new Content(url, url, bytes, meta.getMimetype(), contentMetadata, getConf());

        // -----------------
        // write to seqencefile

        byte[] contentInOctets = content.getContent();
        String htmlraw = new String();

        // meta only contains char encodings
        // LOG.info("Metadata count: " + contentMetadata.names().length);
        // for (String name : contentMetadata.names()){
        // LOG.info("meta " + name + " : " + contentMetadata.get(name));
        // }
        // try getting content encoding
        try {
            htmlraw = new String(contentInOctets, contentMetadata.get("OriginalCharEncoding"));
        } catch (Exception e) {
            LOG.warn("could not get content with OriginalCharEncoding");
        }
        // if unable, try utf-8
        if (htmlraw.length() == 0) {
            try {
                htmlraw = new String(contentInOctets, "UTF-8");
            } catch (UnsupportedEncodingException e) {
                LOG.error("unable to convert content into string");
            }
        }

        URL url_h = null;
        try {
            url_h = new URL(content.getUrl());
        } catch (MalformedURLException e1) {
            LOG.error("Malformed URL Exception: " + e1.getMessage());
        }
        String protocol = url_h.getProtocol();
        String hostname = url_h.getHost();
        String urlpath = url_h.getPath();
        String param = url_h.getQuery();
        //LOG.info("HOST:" + hostname);
        //LOG.info("PATH:" + urlpath);
        //LOG.info("PROTOCOL:" + protocol);
        //LOG.info("PARAM: " + param);

        String date = meta.getDate();
        // LOG.info("meta date: " + date);
        Text key_h = new Text(protocol + "::" + hostname + "::" + urlpath + "::" + param + "::" + date);
        Text value = new Text(htmlraw);
        try {
            LOG.info("len: " + writer.getLength() + ", key: " + key_h + ", value len: " + value.getLength());
            writer.append(key_h, value);
        } catch (IOException e) {
            LOG.error("SequenceFile IOException: " + e.getMessage());
        }

        // -----------------

        output(output, new Text(key), content);

        return true;
    } catch (Throwable t) {
        LOG.error("Import fail : " + meta.getUrl(), t);
    }

    return false;
}

From source file:org.bdgenomics.adam.io.FastqRecordReader.java

License:Apache License

/**
 * Position the input stream at the start of the first record.
 *
 * @param stream The stream to reposition.
 *///from   w  w  w . j  ava2 s .co m
protected final int positionAtFirstRecord(final FSDataInputStream stream, final CompressionCodec codec)
        throws IOException {
    Text buffer = new Text();
    long originalStart = start;

    LineReader reader;
    if (codec == null) {
        // Advance to the start of the first record that ends with /1
        // We use a temporary LineReader to read lines until we find the
        // position of the right one.  We then seek the file to that position.
        stream.seek(start);
        reader = new LineReader(stream);
    } else {
        // Unlike the codec == null case, we don't seek before creating the
        // reader, SplittableCompressionCodec.createInputStream places the
        // stream at the start of the first compression block after our
        // split start
        //
        // as noted above, we need to be at pos 0 in the stream before
        // calling this
        reader = new LineReader(((SplittableCompressionCodec) codec).createInputStream(stream, null, start, end,
                SplittableCompressionCodec.READ_MODE.BYBLOCK));
    }

    int bytesRead = 0;
    do {
        bytesRead = reader.readLine(buffer, (int) Math.min(maxLineLength, end - start));
        int bufferLength = buffer.getLength();
        if (bytesRead > 0 && !checkBuffer(bufferLength, buffer)) {
            start += bytesRead;
        } else {

            // line starts with @.  Read two more and verify that it starts
            // with a +:
            //
            // @<readname>
            // <sequence>
            // +[readname]
            //
            // if the second line we read starts with a @, we know that
            // we've read:
            //
            // <qualities> <-- @ is a valid ASCII phred encoding
            // @<readname>
            //
            // and thus, the second read is the delimiter and we can break
            long trackForwardPosition = start + bytesRead;

            bytesRead = reader.readLine(buffer, (int) Math.min(maxLineLength, end - start));
            if (buffer.getLength() > 0 && buffer.getBytes()[0] == '@') {
                start = trackForwardPosition;
                break;
            } else {
                trackForwardPosition += bytesRead;
            }

            bytesRead = reader.readLine(buffer, (int) Math.min(maxLineLength, end - start));
            trackForwardPosition += bytesRead;
            if (bytesRead > 0 && buffer.getLength() > 0 && buffer.getBytes()[0] == '+') {
                break; // all good!
            } else {
                start = trackForwardPosition;
            }
        }
    } while (bytesRead > 0);

    pos = start;
    start = originalStart;
    stream.seek(start);
    return (int) (pos - originalStart);
}

From source file:org.bdgenomics.adam.io.FastqRecordReader.java

License:Apache License

/**
 * Parses a read from an interleaved FASTQ file.
 *
 * Only reads a single record.//from ww  w .jav a  2 s. c om
 *
 * @param readName Text record containing read name. Output parameter.
 * @param value Text record containing full record. Output parameter.
 * @return Returns true if read was successful (did not hit EOF).
 *
 * @throws RuntimeException Throws exception if FASTQ record doesn't
 *   have proper formatting (e.g., record doesn't start with @).
 */
protected final boolean lowLevelFastqRead(final Text readName, final Text value) throws IOException {

    if (endOfCompressedSplit) {
        return false;
    }

    // ID line
    readName.clear();
    long skipped = appendLineInto(readName, true);
    if (skipped == 0) {
        return false; // EOF
    }

    if (readName.getBytes()[0] != '@') {
        throw new RuntimeException("unexpected fastq record didn't start with '@' at " + makePositionMessage()
                + ". Line: " + readName + ". \n");
    }
    value.append(readName.getBytes(), 0, readName.getLength());

    // sequence
    appendLineInto(value, false);

    // separator line
    appendLineInto(value, false);

    // quality
    appendLineInto(value, false);

    return true;
}