Example usage for org.apache.hadoop.fs FSDataInputStream seek

List of usage examples for org.apache.hadoop.fs FSDataInputStream seek

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FSDataInputStream seek.

Prototype

@Override
public void seek(long desired) throws IOException 

Source Link

Document

Seek to the given offset.

Usage

From source file:org.apache.carbondata.core.datastore.impl.DFSFileHolderImpl.java

License:Apache License

/**
 * This method will be used to read from file based on number of bytes to be read and positon
 *
 * @param channel file channel/*from w  w  w  . j a  va2 s. c  om*/
 * @param size    number of bytes
 * @param offset  position
 * @return byte buffer
 */
private byte[] read(FSDataInputStream channel, int size, long offset) throws IOException {
    byte[] byteBffer = new byte[size];
    channel.seek(offset);
    channel.readFully(byteBffer);
    return byteBffer;
}

From source file:org.apache.carbondata.core.datastore.impl.DFSFileHolderImpl.java

License:Apache License

@Override
public int readInt(String filePath, long offset) throws IOException {
    FSDataInputStream fileChannel = updateCache(filePath);
    fileChannel.seek(offset);
    return fileChannel.readInt();
}

From source file:org.apache.carbondata.core.datastore.impl.DFSFileHolderImpl.java

License:Apache License

@Override
public long readDouble(String filePath, long offset) throws IOException {
    FSDataInputStream fileChannel = updateCache(filePath);
    fileChannel.seek(offset);
    return fileChannel.readLong();
}

From source file:org.apache.carbondata.core.datastore.impl.DFSFileHolderImpl.java

License:Apache License

@Override
public long readLong(String filePath, long offset) throws IOException {
    FSDataInputStream fileChannel = updateCache(filePath);
    fileChannel.seek(offset);
    return fileChannel.readLong();
}

From source file:org.apache.carbondata.core.datastore.impl.DFSFileReaderImpl.java

License:Apache License

/**
 * This method will be used to read from file based on number of bytes to be read and position
 *
 * @param channel file channel/*from  w  w w. j  a  v  a 2 s  .  c o  m*/
 * @param size    number of bytes
 * @param offset  position
 * @return byte buffer
 */
private byte[] read(FSDataInputStream channel, int size, long offset) throws IOException {
    byte[] byteBuffer = new byte[size];
    channel.seek(offset);
    channel.readFully(byteBuffer);
    return byteBuffer;
}

From source file:org.apache.carbondata.core.datastore.impl.FileFactory.java

License:Apache License

/**
 * return the datainputStream which is seek to the offset of file
 *
 * @param path/* ww  w.  j av  a2  s  .  com*/
 * @param fileType
 * @param bufferSize
 * @param offset
 * @return DataInputStream
 * @throws IOException
 */
public static DataInputStream getDataInputStream(String path, FileType fileType, int bufferSize, long offset)
        throws IOException {
    path = path.replace("\\", "/");
    switch (fileType) {
    case HDFS:
    case ALLUXIO:
    case VIEWFS:
        Path pt = new Path(path);
        FileSystem fs = pt.getFileSystem(configuration);
        FSDataInputStream stream = fs.open(pt, bufferSize);
        stream.seek(offset);
        return new DataInputStream(new BufferedInputStream(stream));
    default:
        path = getUpdatedFilePath(path, fileType);
        FileInputStream fis = new FileInputStream(path);
        long actualSkipSize = 0;
        long skipSize = offset;
        while (actualSkipSize != offset) {
            actualSkipSize += fis.skip(skipSize);
            skipSize = skipSize - actualSkipSize;
        }
        return new DataInputStream(new BufferedInputStream(fis));
    }
}

From source file:org.apache.carbondata.hadoop.stream.StreamRecordReader.java

License:Apache License

protected void initializeAtFirstRow() throws IOException {
    filterValues = new Object[carbonTable.getDimensionOrdinalMax() + measureCount];
    filterRow = new RowImpl();
    filterRow.setValues(filterValues);/*w ww .j av  a  2s  .c om*/

    outputValues = new Object[projection.length];

    Path file = fileSplit.getPath();

    byte[] syncMarker = getSyncMarker(file.toString());

    FileSystem fs = file.getFileSystem(hadoopConf);

    int bufferSize = Integer.parseInt(hadoopConf.get(CarbonStreamInputFormat.READ_BUFFER_SIZE,
            CarbonStreamInputFormat.READ_BUFFER_SIZE_DEFAULT));

    FSDataInputStream fileIn = fs.open(file, bufferSize);
    fileIn.seek(fileSplit.getStart());
    input = new StreamBlockletReader(syncMarker, fileIn, fileSplit.getLength(), fileSplit.getStart() == 0,
            compressorName);

    cacheProvider = CacheProvider.getInstance();
    cache = cacheProvider.createCache(CacheType.FORWARD_DICTIONARY);
    queryTypes = CarbonStreamInputFormat.getComplexDimensions(carbonTable, storageColumns, cache);
}

From source file:org.apache.carbondata.streaming.CarbonStreamRecordReader.java

License:Apache License

private void initializeAtFirstRow() throws IOException {
    filterValues = new Object[carbonTable.getDimensionOrdinalMax() + measureCount];
    filterRow = new RowImpl();
    filterRow.setValues(filterValues);/*from w  w  w . jav  a 2 s .  co m*/

    outputValues = new Object[projection.length];
    outputRow = new GenericInternalRow(outputValues);

    Path file = fileSplit.getPath();

    byte[] syncMarker = getSyncMarker(file.toString());

    FileSystem fs = file.getFileSystem(hadoopConf);

    int bufferSize = Integer.parseInt(hadoopConf.get(CarbonStreamInputFormat.READ_BUFFER_SIZE,
            CarbonStreamInputFormat.READ_BUFFER_SIZE_DEFAULT));

    FSDataInputStream fileIn = fs.open(file, bufferSize);
    fileIn.seek(fileSplit.getStart());
    input = new StreamBlockletReader(syncMarker, fileIn, fileSplit.getLength(), fileSplit.getStart() == 0);

    cacheProvider = CacheProvider.getInstance();
    cache = cacheProvider.createCache(CacheType.FORWARD_DICTIONARY);
    queryTypes = CarbonStreamInputFormat.getComplexDimensions(carbonTable, storageColumns, cache);

    outputSchema = new StructType(
            (StructField[]) DataTypeUtil.getDataTypeConverter().convertCarbonSchemaToSparkSchema(projection));
}

From source file:org.apache.crunch.io.text.csv.CSVInputFormat.java

License:Apache License

/**
 * In summary, this method will start at the beginning of the file, seek to
 * the position corresponding to the desired split size, seek to the end of
 * the line that contains that position, then attempt to seek until the
 * CSVLineReader indicates that the current position is no longer within a CSV
 * record. Then, it will mark that position for a split and a repeat its
 * logic./*from  w  ww . j  a  v a  2 s.c o  m*/
 */
@VisibleForTesting
protected List<FileSplit> getSplitsForFile(final long splitSize, final long fileSize, final Path fileName,
        final FSDataInputStream inputStream) throws IOException {
    final List<FileSplit> splitsList = new ArrayList<FileSplit>();

    long splitStart;
    long currentPosition = 0;

    boolean endOfFile = false;
    while (!endOfFile) {
        // Set the start of this split to the furthest read point in the file
        splitStart = currentPosition;

        // Skip a number of bytes equal to the desired split size to avoid parsing
        // every csv line, which greatly increases the run time
        currentPosition = splitStart + splitSize;

        // The input stream will freak out if we try to seek past the EOF
        if (currentPosition >= fileSize) {
            currentPosition = fileSize;
            endOfFile = true;
            final FileSplit fileSplit = new FileSplit(fileName, splitStart, currentPosition - splitStart,
                    new String[] {});
            splitsList.add(fileSplit);
            break;
        }

        // Every time we seek to the new approximate split point,
        // we need to create a new CSVLineReader around the stream.
        inputStream.seek(currentPosition);
        final CSVLineReader csvLineReader = new CSVLineReader(inputStream, this.bufferSize,
                this.inputFileEncoding, this.openQuoteChar, this.closeQuoteChar, this.escapeChar,
                this.maximumRecordSize);

        // This line is potentially garbage because we most likely just sought to
        // the middle of a line. Read the rest of the line and leave it for the
        // previous split. Then reset the multi-line CSV record boolean, because
        // the partial line will have a very high chance of falsely triggering the
        // class-wide multi-line logic.
        currentPosition += csvLineReader.readFileLine(new Text());
        csvLineReader.resetMultiLine();

        // Now, we may still be in the middle of a multi-line CSV record.
        currentPosition += csvLineReader.readFileLine(new Text());

        // If we are, read until we are not.
        while (csvLineReader.isInMultiLine()) {
            final int bytesRead = csvLineReader.readFileLine(new Text());
            // End of file
            if (bytesRead <= 0) {
                break;
            }
            currentPosition += bytesRead;
        }

        // We're out of the multi-line CSV record, so it's safe to end the
        // previous split.
        splitsList.add(new FileSplit(fileName, splitStart, currentPosition - splitStart, new String[] {}));
    }

    return splitsList;
}

From source file:org.apache.drill.exec.store.parquet.columnreaders.PageReader.java

License:Apache License

private void loadDictionaryIfExists(final ColumnReader<?> parentStatus,
        final ColumnChunkMetaData columnChunkMetaData, final FSDataInputStream f) throws IOException {
    Stopwatch timer = Stopwatch.createUnstarted();
    if (columnChunkMetaData.getDictionaryPageOffset() > 0) {
        f.seek(columnChunkMetaData.getDictionaryPageOffset());
        long start = f.getPos();
        timer.start();/*  w w  w.  j  a  va  2s  .com*/
        final PageHeader pageHeader = Util.readPageHeader(f);
        long timeToRead = timer.elapsed(TimeUnit.MICROSECONDS);
        long pageHeaderBytes = f.getPos() - start;
        this.updateStats(pageHeader, "Page Header", start, timeToRead, pageHeaderBytes, pageHeaderBytes);
        assert pageHeader.type == PageType.DICTIONARY_PAGE;
        readDictionaryPage(pageHeader, parentStatus);
    }
}