Example usage for org.apache.hadoop.fs FSDataInputStream seek

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FSDataInputStream seek.

Prototype

@Override
public void seek(long desired) throws IOException

Source Link

Document

Seek to the given offset.

Usage

From source file:SingleFileReader.java

License:Apache License

private void randRead() throws Exception {
    //bufferSize = 4; /*Tachyon reads one int a time*/
    FSDataInputStream is = fs.open(hdfsFilePath);
    // byte[] bbuf = new byte[bufferSize];
    ByteBuffer buf = ByteBuffer.allocate(bufferSize);
    double offsetMax = fileSize - bufferSize - 1;
    long offset = (long) (Math.random() * offsetMax);
    long numIters = (long) (fileSize / bufferSize);
    t.start(2);/*from  w  w w .  j  ava  2 s.c o  m*/
    while (numIters != 0) {
        /*
        if (numIters % 500 == 0) {
        System.out.println(offset);
        }
        */
        is.seek(offset);
        int bytesRead = is.read(buf);
        buf.flip();
        offset = (long) (Math.random() * offsetMax);
        numIters = numIters - 1;
    }
    t.end(2);
    is.close();
}

From source file:SeekableInputStream.java

License:Apache License

public static SeekableInputStream getInstance(Path path, long start, long end, FileSystem fs,
        CompressionCodecFactory compressionCodecs) throws IOException {
    FSDataInputStream din = fs.open(path);
    din.seek(start);
    return new SeekableInputStream(din);
}

From source file:audr.text.utils.FileUtils.java

License:Open Source License

public static byte[] HDFSFile2ByteArray(FSDataInputStream image) throws IOException {
    image.seek(0);
    // BufferedInputStream in = new BufferedInputStream(image);
    ByteArrayOutputStream out = new ByteArrayOutputStream(1024);

    // System.out.println("Available bytes:" + in.available());
    byte[] temp = new byte[1024];
    int size = 0;
    while ((size = image.read(temp)) > 0) {
        out.write(temp, 0, size);// w w  w .j  a  va2s  .  co  m
    }

    byte[] content = out.toByteArray();
    // System.out.println("Readed bytes count:" + new String(content));
    return content;
}

From source file:authordetect.input.SingleBookReader.java

/**
 * @param inputSplit/* w  w  w .  j a v  a 2s .com*/
 * @param context    the information about the task
 * @throws java.io.IOException
 * @throws InterruptedException
 */
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    FileSplit split = (FileSplit) inputSplit;
    Configuration configuration = context.getConfiguration();

    // get the option from configuration:
    // 0 for group by author, 1 for group by book
    int option = configuration.getInt("GROUP_OPTION", 0);

    Path path = split.getPath();
    filename = path.getName();
    FileSystem fileSystem = path.getFileSystem(configuration);
    FSDataInputStream inputStream = fileSystem.open(path);
    lineReader = new LineReader(inputStream, configuration);

    //initial start point and end point
    start = split.getStart();
    end = start + split.getLength();

    inputStream.seek(start);
    if (start != 0) {
        start += lineReader.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start));
    }

    start += lineReader.readLine(currentLine);

    prepareToScanBook(option);
}

From source file:brush.FastqRecordReader.java

License:Apache License

/**
 * Position the input stream at the start of the first record.
 *
 * @param stream The stream to reposition.
 *///  w  w  w .  j av  a 2  s.  c om
protected void positionAtFirstRecord(FSDataInputStream stream) throws IOException {
    Text buffer = new Text();

    if (true) { // (start > 0) // use start>0 to assume that files start with valid data
        // Advance to the start of the first record that ends with /1
        // We use a temporary LineReader to read lines until we find the
        // position of the right one.  We then seek the file to that position.
        stream.seek(start);
        LineReader reader = new LineReader(stream);

        int bytesRead = 0;
        do {
            bytesRead = reader.readLine(buffer, (int) Math.min(MAX_LINE_LENGTH, end - start));
            int bufferLength = buffer.getLength();
            if (bytesRead > 0 && !checkBuffer(bufferLength, buffer)) {
                start += bytesRead;
            } else {
                // line starts with @.  Read two more and verify that it starts with a +
                //
                // If this isn't the start of a record, we want to backtrack to its end
                long backtrackPosition = start + bytesRead;

                bytesRead = reader.readLine(buffer, (int) Math.min(MAX_LINE_LENGTH, end - start));
                bytesRead = reader.readLine(buffer, (int) Math.min(MAX_LINE_LENGTH, end - start));
                if (bytesRead > 0 && buffer.getLength() > 0 && buffer.getBytes()[0] == '+') {
                    break; // all good!
                } else {
                    // backtrack to the end of the record we thought was the start.
                    start = backtrackPosition;
                    stream.seek(start);
                    reader = new LineReader(stream);
                }
            }
        } while (bytesRead > 0);

        stream.seek(start);
    }

    pos = start;
}

From source file:cc.solr.lucene.store.hdfs.HdfsFileReader.java

License:Apache License

public static long getLength(FileSystem fileSystem, Path path) throws IOException {
    FSDataInputStream inputStream = null;
    try {/*w w w . jav a2 s  . co m*/
        FileStatus fileStatus = fileSystem.getFileStatus(path);
        inputStream = fileSystem.open(path);
        long hdfsLength = fileStatus.getLen();
        inputStream.seek(hdfsLength - 12);
        long length = inputStream.readLong();
        int version = inputStream.readInt();
        if (version != VERSION) {
            throw new RuntimeException(
                    "Version of file [" + version + "] does not match reader [" + VERSION + "]");
        }
        return length;
    } finally {
        if (inputStream != null) {
            inputStream.close();
        }
    }
}

From source file:cn.lhfei.hadoop.ch03.FileSystemDoubleCat.java

License:Apache License

public static void main(String[] args) {

    String uri = args[0];//w  w  w . j a v  a 2 s  .com
    FSDataInputStream in = null;
    FileSystem fs = null;
    Configuration conf = new Configuration();

    try {
        fs = FileSystem.get(URI.create(uri), conf);
        in = fs.open(new Path(uri));

        IOUtils.copyBytes(in, System.out, 4096, false);
        in.seek(0l); // go back to the start of the file

        IOUtils.copyBytes(in, System.out, 4096, false);

    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:co.cask.tigon.logging.LogFileReader.java

License:Apache License

/**
 * Recursive method to tail the log. Reads from the current log file
 * instance (i), and if that does not have sufficient size, recurses to the
 * next older instance (i+1). If the caller knows the size of the current
 * file (i), he can pass it via the fileSize parameter.
 * @param lines A list of log lines to append read lines to
 * @param i The current log file instance to start reading from
 * @param size number of bytes to read at most
 * @param sizeHint if known, the caller should pass in the length of the
 *                 current log file instance. This helps to seek to the end
 *                 of a file that has not been closed yet (and hence file
 *                 status does not reflect its correct size). Only needed
 *                 at instance 0. Otherwise (for recursive calls) this is
 *                 -1, and the file size will be obatained via file status.
 * @return The list of lines read//from   ww  w.j  a  v a  2  s .c  o  m
 * @throws java.io.IOException if reading goes badly wrong
 */
private List<String> tail(ArrayList<String> lines, int i, long size, long sizeHint) throws IOException {

    // get the path of the current log file instance (xxx.log[.i])
    Path path = new Path(config.getLogFilePath(), makeFileName(i));

    // check for its existence, if it does not exist, return empty list
    if (!fileSystem.exists(path)) {
        return lines;
    }
    FileStatus status = fileSystem.getFileStatus(path);
    if (!status.isFile()) {
        return lines;
    }

    long fileSize;
    if (sizeHint >= 0) {
        fileSize = sizeHint;
    } else if (i > 0) {
        fileSize = status.getLen();
    } else {
        fileSize = determineTrueFileSize(path, status);
    }

    long seekPos = 0;
    long bytesToRead = size;
    if (fileSize >= size) {
        // if size of currentFile is sufficient, we need to seek to the
        // position that is size bytes from the end of the file.
        seekPos = fileSize - size;
    } else {
        // if size of current file is less than limit, make a recursive
        // call to tail for previous file
        tail(lines, i + 1, size - fileSize, -1);
        bytesToRead = fileSize;
    }

    // open current file for reading
    byte[] bytes = new byte[(int) bytesToRead];
    FSDataInputStream input = fileSystem.open(path);
    try {
        // seek into latest file
        if (seekPos > 0) {
            input.seek(seekPos);
        }
        // read to the end of current file
        input.readFully(bytes);
    } finally {
        input.close();
    }
    int pos = 0;
    if (seekPos > 0) {
        // if we seeked into the file, then we are likely in the middle of the
        // line, and we want to skip up to the first new line
        while (pos < bytesToRead && bytes[pos] != '\n') {
            pos++;
        }
        pos++; // now we are just after the first new line
    }

    // read lines until the end of the buffer
    while (pos < bytesToRead) {
        int start = pos;
        while (pos < bytesToRead && bytes[pos] != '\n') {
            pos++;
        }
        // now we are at end of file or at the new line
        if (pos != start) { // ignore empty lines
            String line = new String(bytes, start, pos - start, LogFileWriter.CHARSET_UTF8);
            lines.add(line);
        }
        pos++; // skip the new line character
    }
    return lines;
}

From source file:co.cask.tigon.logging.LogFileReader.java

License:Apache License

private long determineTrueFileSize(Path path, FileStatus status) throws IOException {
    FSDataInputStream stream = fileSystem.open(path);
    try {//  w w w. jav  a 2 s  .co  m
        stream.seek(status.getLen());
        // we need to read repeatedly until we reach the end of the file
        byte[] buffer = new byte[1024 * 1024];
        while (stream.read(buffer, 0, buffer.length) >= 0) {
            // empty body.
        }
        long trueSize = stream.getPos();
        return trueSize;
    } finally {
        stream.close();
    }
}

From source file:co.nubetech.hiho.dedup.DelimitedLineRecordReader.java

License:Apache License

/**
 * //  w  ww.j ava2 s  .  co m
 * @param delimiter
 * @param column
 * 
 * 
 */

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.delimiter = job.get(DelimitedTextInputFormat.DELIMITER_CONF);
    this.column = job.getInt(DelimitedTextInputFormat.COLUMN_CONF, 0);
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new LineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        in = new LineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}