List of usage examples for org.apache.hadoop.fs FSDataInputStream seek
@Override public void seek(long desired) throws IOException
From source file:org.apache.carbondata.core.datastore.impl.DFSFileHolderImpl.java
License:Apache License
/** * This method will be used to read from file based on number of bytes to be read and positon * * @param channel file channel/*from w w w . j a va2 s. c om*/ * @param size number of bytes * @param offset position * @return byte buffer */ private byte[] read(FSDataInputStream channel, int size, long offset) throws IOException { byte[] byteBffer = new byte[size]; channel.seek(offset); channel.readFully(byteBffer); return byteBffer; }
From source file:org.apache.carbondata.core.datastore.impl.DFSFileHolderImpl.java
License:Apache License
@Override public int readInt(String filePath, long offset) throws IOException { FSDataInputStream fileChannel = updateCache(filePath); fileChannel.seek(offset); return fileChannel.readInt(); }
From source file:org.apache.carbondata.core.datastore.impl.DFSFileHolderImpl.java
License:Apache License
@Override public long readDouble(String filePath, long offset) throws IOException { FSDataInputStream fileChannel = updateCache(filePath); fileChannel.seek(offset); return fileChannel.readLong(); }
From source file:org.apache.carbondata.core.datastore.impl.DFSFileHolderImpl.java
License:Apache License
@Override public long readLong(String filePath, long offset) throws IOException { FSDataInputStream fileChannel = updateCache(filePath); fileChannel.seek(offset); return fileChannel.readLong(); }
From source file:org.apache.carbondata.core.datastore.impl.DFSFileReaderImpl.java
License:Apache License
/** * This method will be used to read from file based on number of bytes to be read and position * * @param channel file channel/*from w w w. j a v a 2 s . c o m*/ * @param size number of bytes * @param offset position * @return byte buffer */ private byte[] read(FSDataInputStream channel, int size, long offset) throws IOException { byte[] byteBuffer = new byte[size]; channel.seek(offset); channel.readFully(byteBuffer); return byteBuffer; }
From source file:org.apache.carbondata.core.datastore.impl.FileFactory.java
License:Apache License
/** * return the datainputStream which is seek to the offset of file * * @param path/* ww w. j av a2 s . com*/ * @param fileType * @param bufferSize * @param offset * @return DataInputStream * @throws IOException */ public static DataInputStream getDataInputStream(String path, FileType fileType, int bufferSize, long offset) throws IOException { path = path.replace("\\", "/"); switch (fileType) { case HDFS: case ALLUXIO: case VIEWFS: Path pt = new Path(path); FileSystem fs = pt.getFileSystem(configuration); FSDataInputStream stream = fs.open(pt, bufferSize); stream.seek(offset); return new DataInputStream(new BufferedInputStream(stream)); default: path = getUpdatedFilePath(path, fileType); FileInputStream fis = new FileInputStream(path); long actualSkipSize = 0; long skipSize = offset; while (actualSkipSize != offset) { actualSkipSize += fis.skip(skipSize); skipSize = skipSize - actualSkipSize; } return new DataInputStream(new BufferedInputStream(fis)); } }
From source file:org.apache.carbondata.hadoop.stream.StreamRecordReader.java
License:Apache License
protected void initializeAtFirstRow() throws IOException { filterValues = new Object[carbonTable.getDimensionOrdinalMax() + measureCount]; filterRow = new RowImpl(); filterRow.setValues(filterValues);/*w ww .j av a 2s .c om*/ outputValues = new Object[projection.length]; Path file = fileSplit.getPath(); byte[] syncMarker = getSyncMarker(file.toString()); FileSystem fs = file.getFileSystem(hadoopConf); int bufferSize = Integer.parseInt(hadoopConf.get(CarbonStreamInputFormat.READ_BUFFER_SIZE, CarbonStreamInputFormat.READ_BUFFER_SIZE_DEFAULT)); FSDataInputStream fileIn = fs.open(file, bufferSize); fileIn.seek(fileSplit.getStart()); input = new StreamBlockletReader(syncMarker, fileIn, fileSplit.getLength(), fileSplit.getStart() == 0, compressorName); cacheProvider = CacheProvider.getInstance(); cache = cacheProvider.createCache(CacheType.FORWARD_DICTIONARY); queryTypes = CarbonStreamInputFormat.getComplexDimensions(carbonTable, storageColumns, cache); }
From source file:org.apache.carbondata.streaming.CarbonStreamRecordReader.java
License:Apache License
private void initializeAtFirstRow() throws IOException { filterValues = new Object[carbonTable.getDimensionOrdinalMax() + measureCount]; filterRow = new RowImpl(); filterRow.setValues(filterValues);/*from w w w . jav a 2 s . co m*/ outputValues = new Object[projection.length]; outputRow = new GenericInternalRow(outputValues); Path file = fileSplit.getPath(); byte[] syncMarker = getSyncMarker(file.toString()); FileSystem fs = file.getFileSystem(hadoopConf); int bufferSize = Integer.parseInt(hadoopConf.get(CarbonStreamInputFormat.READ_BUFFER_SIZE, CarbonStreamInputFormat.READ_BUFFER_SIZE_DEFAULT)); FSDataInputStream fileIn = fs.open(file, bufferSize); fileIn.seek(fileSplit.getStart()); input = new StreamBlockletReader(syncMarker, fileIn, fileSplit.getLength(), fileSplit.getStart() == 0); cacheProvider = CacheProvider.getInstance(); cache = cacheProvider.createCache(CacheType.FORWARD_DICTIONARY); queryTypes = CarbonStreamInputFormat.getComplexDimensions(carbonTable, storageColumns, cache); outputSchema = new StructType( (StructField[]) DataTypeUtil.getDataTypeConverter().convertCarbonSchemaToSparkSchema(projection)); }
From source file:org.apache.crunch.io.text.csv.CSVInputFormat.java
License:Apache License
/** * In summary, this method will start at the beginning of the file, seek to * the position corresponding to the desired split size, seek to the end of * the line that contains that position, then attempt to seek until the * CSVLineReader indicates that the current position is no longer within a CSV * record. Then, it will mark that position for a split and a repeat its * logic./*from w ww . j a v a 2 s.c o m*/ */ @VisibleForTesting protected List<FileSplit> getSplitsForFile(final long splitSize, final long fileSize, final Path fileName, final FSDataInputStream inputStream) throws IOException { final List<FileSplit> splitsList = new ArrayList<FileSplit>(); long splitStart; long currentPosition = 0; boolean endOfFile = false; while (!endOfFile) { // Set the start of this split to the furthest read point in the file splitStart = currentPosition; // Skip a number of bytes equal to the desired split size to avoid parsing // every csv line, which greatly increases the run time currentPosition = splitStart + splitSize; // The input stream will freak out if we try to seek past the EOF if (currentPosition >= fileSize) { currentPosition = fileSize; endOfFile = true; final FileSplit fileSplit = new FileSplit(fileName, splitStart, currentPosition - splitStart, new String[] {}); splitsList.add(fileSplit); break; } // Every time we seek to the new approximate split point, // we need to create a new CSVLineReader around the stream. inputStream.seek(currentPosition); final CSVLineReader csvLineReader = new CSVLineReader(inputStream, this.bufferSize, this.inputFileEncoding, this.openQuoteChar, this.closeQuoteChar, this.escapeChar, this.maximumRecordSize); // This line is potentially garbage because we most likely just sought to // the middle of a line. Read the rest of the line and leave it for the // previous split. Then reset the multi-line CSV record boolean, because // the partial line will have a very high chance of falsely triggering the // class-wide multi-line logic. currentPosition += csvLineReader.readFileLine(new Text()); csvLineReader.resetMultiLine(); // Now, we may still be in the middle of a multi-line CSV record. currentPosition += csvLineReader.readFileLine(new Text()); // If we are, read until we are not. while (csvLineReader.isInMultiLine()) { final int bytesRead = csvLineReader.readFileLine(new Text()); // End of file if (bytesRead <= 0) { break; } currentPosition += bytesRead; } // We're out of the multi-line CSV record, so it's safe to end the // previous split. splitsList.add(new FileSplit(fileName, splitStart, currentPosition - splitStart, new String[] {})); } return splitsList; }
From source file:org.apache.drill.exec.store.parquet.columnreaders.PageReader.java
License:Apache License
private void loadDictionaryIfExists(final ColumnReader<?> parentStatus, final ColumnChunkMetaData columnChunkMetaData, final FSDataInputStream f) throws IOException { Stopwatch timer = Stopwatch.createUnstarted(); if (columnChunkMetaData.getDictionaryPageOffset() > 0) { f.seek(columnChunkMetaData.getDictionaryPageOffset()); long start = f.getPos(); timer.start();/* w w w. j a va 2s .com*/ final PageHeader pageHeader = Util.readPageHeader(f); long timeToRead = timer.elapsed(TimeUnit.MICROSECONDS); long pageHeaderBytes = f.getPos() - start; this.updateStats(pageHeader, "Page Header", start, timeToRead, pageHeaderBytes, pageHeaderBytes); assert pageHeader.type == PageType.DICTIONARY_PAGE; readDictionaryPage(pageHeader, parentStatus); } }