List of usage examples for org.apache.hadoop.fs FSDataInputStream seek
@Override public void seek(long desired) throws IOException
From source file:SingleFileReader.java
License:Apache License
private void randRead() throws Exception { //bufferSize = 4; /*Tachyon reads one int a time*/ FSDataInputStream is = fs.open(hdfsFilePath); // byte[] bbuf = new byte[bufferSize]; ByteBuffer buf = ByteBuffer.allocate(bufferSize); double offsetMax = fileSize - bufferSize - 1; long offset = (long) (Math.random() * offsetMax); long numIters = (long) (fileSize / bufferSize); t.start(2);/*from w w w . j ava 2 s.c o m*/ while (numIters != 0) { /* if (numIters % 500 == 0) { System.out.println(offset); } */ is.seek(offset); int bytesRead = is.read(buf); buf.flip(); offset = (long) (Math.random() * offsetMax); numIters = numIters - 1; } t.end(2); is.close(); }
From source file:SeekableInputStream.java
License:Apache License
public static SeekableInputStream getInstance(Path path, long start, long end, FileSystem fs, CompressionCodecFactory compressionCodecs) throws IOException { FSDataInputStream din = fs.open(path); din.seek(start); return new SeekableInputStream(din); }
From source file:audr.text.utils.FileUtils.java
License:Open Source License
public static byte[] HDFSFile2ByteArray(FSDataInputStream image) throws IOException { image.seek(0); // BufferedInputStream in = new BufferedInputStream(image); ByteArrayOutputStream out = new ByteArrayOutputStream(1024); // System.out.println("Available bytes:" + in.available()); byte[] temp = new byte[1024]; int size = 0; while ((size = image.read(temp)) > 0) { out.write(temp, 0, size);// w w w .j a va2s . co m } byte[] content = out.toByteArray(); // System.out.println("Readed bytes count:" + new String(content)); return content; }
From source file:authordetect.input.SingleBookReader.java
/** * @param inputSplit/* w w w . j a v a 2s .com*/ * @param context the information about the task * @throws java.io.IOException * @throws InterruptedException */ @Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) inputSplit; Configuration configuration = context.getConfiguration(); // get the option from configuration: // 0 for group by author, 1 for group by book int option = configuration.getInt("GROUP_OPTION", 0); Path path = split.getPath(); filename = path.getName(); FileSystem fileSystem = path.getFileSystem(configuration); FSDataInputStream inputStream = fileSystem.open(path); lineReader = new LineReader(inputStream, configuration); //initial start point and end point start = split.getStart(); end = start + split.getLength(); inputStream.seek(start); if (start != 0) { start += lineReader.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start)); } start += lineReader.readLine(currentLine); prepareToScanBook(option); }
From source file:brush.FastqRecordReader.java
License:Apache License
/** * Position the input stream at the start of the first record. * * @param stream The stream to reposition. */// w w w . j av a 2 s. c om protected void positionAtFirstRecord(FSDataInputStream stream) throws IOException { Text buffer = new Text(); if (true) { // (start > 0) // use start>0 to assume that files start with valid data // Advance to the start of the first record that ends with /1 // We use a temporary LineReader to read lines until we find the // position of the right one. We then seek the file to that position. stream.seek(start); LineReader reader = new LineReader(stream); int bytesRead = 0; do { bytesRead = reader.readLine(buffer, (int) Math.min(MAX_LINE_LENGTH, end - start)); int bufferLength = buffer.getLength(); if (bytesRead > 0 && !checkBuffer(bufferLength, buffer)) { start += bytesRead; } else { // line starts with @. Read two more and verify that it starts with a + // // If this isn't the start of a record, we want to backtrack to its end long backtrackPosition = start + bytesRead; bytesRead = reader.readLine(buffer, (int) Math.min(MAX_LINE_LENGTH, end - start)); bytesRead = reader.readLine(buffer, (int) Math.min(MAX_LINE_LENGTH, end - start)); if (bytesRead > 0 && buffer.getLength() > 0 && buffer.getBytes()[0] == '+') { break; // all good! } else { // backtrack to the end of the record we thought was the start. start = backtrackPosition; stream.seek(start); reader = new LineReader(stream); } } } while (bytesRead > 0); stream.seek(start); } pos = start; }
From source file:cc.solr.lucene.store.hdfs.HdfsFileReader.java
License:Apache License
public static long getLength(FileSystem fileSystem, Path path) throws IOException { FSDataInputStream inputStream = null; try {/*w w w . jav a2 s . co m*/ FileStatus fileStatus = fileSystem.getFileStatus(path); inputStream = fileSystem.open(path); long hdfsLength = fileStatus.getLen(); inputStream.seek(hdfsLength - 12); long length = inputStream.readLong(); int version = inputStream.readInt(); if (version != VERSION) { throw new RuntimeException( "Version of file [" + version + "] does not match reader [" + VERSION + "]"); } return length; } finally { if (inputStream != null) { inputStream.close(); } } }
From source file:cn.lhfei.hadoop.ch03.FileSystemDoubleCat.java
License:Apache License
public static void main(String[] args) { String uri = args[0];//w w w . j a v a 2 s .com FSDataInputStream in = null; FileSystem fs = null; Configuration conf = new Configuration(); try { fs = FileSystem.get(URI.create(uri), conf); in = fs.open(new Path(uri)); IOUtils.copyBytes(in, System.out, 4096, false); in.seek(0l); // go back to the start of the file IOUtils.copyBytes(in, System.out, 4096, false); } catch (IOException e) { e.printStackTrace(); } }
From source file:co.cask.tigon.logging.LogFileReader.java
License:Apache License
/** * Recursive method to tail the log. Reads from the current log file * instance (i), and if that does not have sufficient size, recurses to the * next older instance (i+1). If the caller knows the size of the current * file (i), he can pass it via the fileSize parameter. * @param lines A list of log lines to append read lines to * @param i The current log file instance to start reading from * @param size number of bytes to read at most * @param sizeHint if known, the caller should pass in the length of the * current log file instance. This helps to seek to the end * of a file that has not been closed yet (and hence file * status does not reflect its correct size). Only needed * at instance 0. Otherwise (for recursive calls) this is * -1, and the file size will be obatained via file status. * @return The list of lines read//from ww w.j a v a 2 s .c o m * @throws java.io.IOException if reading goes badly wrong */ private List<String> tail(ArrayList<String> lines, int i, long size, long sizeHint) throws IOException { // get the path of the current log file instance (xxx.log[.i]) Path path = new Path(config.getLogFilePath(), makeFileName(i)); // check for its existence, if it does not exist, return empty list if (!fileSystem.exists(path)) { return lines; } FileStatus status = fileSystem.getFileStatus(path); if (!status.isFile()) { return lines; } long fileSize; if (sizeHint >= 0) { fileSize = sizeHint; } else if (i > 0) { fileSize = status.getLen(); } else { fileSize = determineTrueFileSize(path, status); } long seekPos = 0; long bytesToRead = size; if (fileSize >= size) { // if size of currentFile is sufficient, we need to seek to the // position that is size bytes from the end of the file. seekPos = fileSize - size; } else { // if size of current file is less than limit, make a recursive // call to tail for previous file tail(lines, i + 1, size - fileSize, -1); bytesToRead = fileSize; } // open current file for reading byte[] bytes = new byte[(int) bytesToRead]; FSDataInputStream input = fileSystem.open(path); try { // seek into latest file if (seekPos > 0) { input.seek(seekPos); } // read to the end of current file input.readFully(bytes); } finally { input.close(); } int pos = 0; if (seekPos > 0) { // if we seeked into the file, then we are likely in the middle of the // line, and we want to skip up to the first new line while (pos < bytesToRead && bytes[pos] != '\n') { pos++; } pos++; // now we are just after the first new line } // read lines until the end of the buffer while (pos < bytesToRead) { int start = pos; while (pos < bytesToRead && bytes[pos] != '\n') { pos++; } // now we are at end of file or at the new line if (pos != start) { // ignore empty lines String line = new String(bytes, start, pos - start, LogFileWriter.CHARSET_UTF8); lines.add(line); } pos++; // skip the new line character } return lines; }
From source file:co.cask.tigon.logging.LogFileReader.java
License:Apache License
private long determineTrueFileSize(Path path, FileStatus status) throws IOException { FSDataInputStream stream = fileSystem.open(path); try {// w w w. jav a 2 s .co m stream.seek(status.getLen()); // we need to read repeatedly until we reach the end of the file byte[] buffer = new byte[1024 * 1024]; while (stream.read(buffer, 0, buffer.length) >= 0) { // empty body. } long trueSize = stream.getPos(); return trueSize; } finally { stream.close(); } }
From source file:co.nubetech.hiho.dedup.DelimitedLineRecordReader.java
License:Apache License
/** * // w ww.j ava2 s . co m * @param delimiter * @param column * * */ @Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.delimiter = job.get(DelimitedTextInputFormat.DELIMITER_CONF); this.column = job.getInt(DelimitedTextInputFormat.COLUMN_CONF, 0); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart(); end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; if (codec != null) { in = new LineReader(codec.createInputStream(fileIn), job); end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } in = new LineReader(fileIn, job); } if (skipFirstLine) { // skip first line and re-establish "start". start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }