List of usage examples for org.apache.hadoop.fs FSDataInputStream seek
@Override public void seek(long desired) throws IOException
From source file:co.nubetech.hiho.mapred.MySQLLoadDataMapper.java
License:Apache License
@Override public void map(Text key, FSDataInputStream val, OutputCollector<NullWritable, NullWritable> collector, Reporter reporter) throws IOException { conn = getConnection();/*from w ww . jav a 2 s . com*/ com.mysql.jdbc.Statement stmt = null; String query; String[] columnNames = null; if (hasHeaderLine) { BufferedReader headerReader = new BufferedReader(new InputStreamReader(val)); String header = headerReader.readLine(); if (header == null) return; columnNames = header.split(","); val.seek(header.getBytes(utf8).length + newline.length); } try { stmt = (com.mysql.jdbc.Statement) conn.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE, ResultSet.CONCUR_UPDATABLE); String tablename = (keyIsTableName ? keyToTablename(key) : ""); if (disableKeys && !tablename.equals("")) { reporter.setStatus("Disabling keys on " + tablename); stmt.execute("ALTER TABLE " + tablename + " DISABLE KEYS"); } stmt.setLocalInfileInputStream(val); query = "load data local infile 'abc.txt' into table " + tablename + " "; query += querySuffix; if (hasHeaderLine) query += " (" + StringUtils.join(columnNames, ",") + ")"; reporter.setStatus("Inserting into " + tablename); logger.debug("stmt: " + query); int rows = stmt.executeUpdate(query); logger.debug(rows + " rows updated"); if (disableKeys && !tablename.equals("")) { reporter.setStatus("Re-enabling keys on " + tablename); stmt.execute("ALTER TABLE " + tablename + " ENABLE KEYS"); } if (!tablename.equals("")) reporter.getCounter("MySQLLoadCounters", "ROWS_INSERTED_TABLE_" + tablename).increment(rows); reporter.getCounter("MySQLLoadCounters", "ROWS_INSERTED_TOTAL").increment(rows); } catch (Exception e) { e.printStackTrace(); stmt = null; throw new IOException(e); } finally { try { if (stmt != null) { stmt.close(); } } catch (SQLException s) { s.printStackTrace(); } } }
From source file:co.nubetech.hiho.mapreduce.MySQLLoadDataMapper.java
License:Apache License
@Override public void map(Text key, FSDataInputStream val, Context context) throws IOException, InterruptedException { conn = getConnection();/*from w w w . ja v a 2 s . c o m*/ com.mysql.jdbc.Statement stmt = null; String query; String querySuffix = context.getConfiguration().get(HIHOConf.LOAD_QUERY_SUFFIX); boolean hasHeaderLine = context.getConfiguration().getBoolean(HIHOConf.LOAD_HAS_HEADER, false); boolean keyIsTableName = context.getConfiguration().getBoolean(HIHOConf.LOAD_KEY_IS_TABLENAME, false); String[] columnNames = null; if (hasHeaderLine) { BufferedReader headerReader = new BufferedReader(new InputStreamReader(val)); String header = headerReader.readLine(); if (header == null) return; columnNames = header.split(","); val.seek(header.getBytes(utf8).length + newline.length); } try { stmt = (com.mysql.jdbc.Statement) conn.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE, ResultSet.CONCUR_UPDATABLE); stmt.setLocalInfileInputStream(val); String tablename = (keyIsTableName ? keyToTablename(key) : ""); query = "load data local infile 'abc.txt' into table " + tablename + " "; query += querySuffix; if (hasHeaderLine) query += " (" + StringUtils.join(columnNames, ",") + ")"; logger.debug("stmt: " + query); int rows = stmt.executeUpdate(query); logger.debug(rows + " rows updated"); if (!tablename.equals("")) context.getCounter("MySQLLoadCounters", "ROWS_INSERTED_TABLE_" + tablename).increment(rows); context.getCounter("MySQLLoadCounters", "ROWS_INSERTED_TOTAL").increment(rows); } catch (Exception e) { e.printStackTrace(); stmt = null; throw new IOException(e); } finally { try { if (stmt != null) { stmt.close(); } } catch (SQLException s) { s.printStackTrace(); } } }
From source file:coldstorage.io.Reader.java
License:Apache License
@SuppressWarnings("unchecked") private static List<IndexKey> getList(final FSDataInputStream reader, long length) { final int keyWidth = 8 * 2; final int size = (int) (length / keyWidth); InvocationHandler handler = new InvocationHandler() { @Override/*from w w w . j ava 2 s. c om*/ public Object invoke(Object proxy, Method method, Object[] args) throws Throwable { if (method.getName().equals("size")) { return size; } else if (method.getName().equals("get")) { int index = (Integer) args[0]; long seek = index * (long) keyWidth; reader.seek(seek); IndexKey indexKey = new IndexKey(); indexKey.readFields(reader); return indexKey; } else { throw new RuntimeException("Not supported."); } } }; return (List<IndexKey>) Proxy.newProxyInstance(List.class.getClassLoader(), new Class[] { List.class, RandomAccess.class }, handler); }
From source file:com.aliyun.fs.oss.contract.TestAliyunOSSContractSeek.java
License:Apache License
@Test public void testSeekBeyondDownloadSize() throws Throwable { describe("seek and read beyond download size."); Path byteFile = path("byte_file.txt"); // 'fs.oss.multipart.download.size' = 100 * 1024 byte[] block = dataset(100 * 1024 + 10, 0, 255); FileSystem fs = getFileSystem(); createFile(fs, byteFile, true, block); FSDataInputStream instream = getFileSystem().open(byteFile); instream.seek(100 * 1024 - 1); assertEquals(100 * 1024 - 1, instream.getPos()); assertEquals(144, instream.read());/* ww w. j a v a 2 s. c o m*/ instream.seek(100 * 1024 + 1); assertEquals(100 * 1024 + 1, instream.getPos()); assertEquals(146, instream.read()); }
From source file:com.aliyun.fs.oss.TestAliyunOSSInputStream.java
License:Apache License
@Test public void testSeekFile() throws Exception { Path smallSeekFile = setPath("/test/smallSeekFile.txt"); long size = 5 * 1024 * 1024; ContractTestUtils.generateTestFile(this.fs, smallSeekFile, size, 256, 255); LOG.info("5MB file created: smallSeekFile.txt"); FSDataInputStream instream = this.fs.open(smallSeekFile); int seekTimes = 5; LOG.info("multiple fold position seeking test...:"); for (int i = 0; i < seekTimes; i++) { long pos = size / (seekTimes - i) - 1; LOG.info("begin seeking for pos: " + pos); instream.seek(pos); assertTrue("expected position at:" + pos + ", but got:" + instream.getPos(), instream.getPos() == pos); LOG.info("completed seeking at pos: " + instream.getPos()); }/* w w w . j ava 2 s. c o m*/ LOG.info("random position seeking test...:"); Random rand = new Random(); for (int i = 0; i < seekTimes; i++) { long pos = Math.abs(rand.nextLong()) % size; LOG.info("begin seeking for pos: " + pos); instream.seek(pos); assertTrue("expected position at:" + pos + ", but got:" + instream.getPos(), instream.getPos() == pos); LOG.info("completed seeking at pos: " + instream.getPos()); } IOUtils.closeStream(instream); }
From source file:com.asakusafw.runtime.directio.hadoop.HadoopFileFormatAdapter.java
License:Apache License
@Override public ModelInput<T> createInput(Class<? extends T> dataType, FileSystem fileSystem, final Path path, final long offset, final long fragmentSize, Counter counter) throws IOException, InterruptedException { FSDataInputStream stream = fileSystem.open(path); boolean succeed = false; try {/*from w w w . j a v a 2s . c o m*/ if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format( "Process opening input [stream opened] (path={0}, offset={1}, size={2})", //$NON-NLS-1$ path, offset, fragmentSize)); } if (offset != 0) { stream.seek(offset); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format( "Process opening input [sought to offset] (path={0}, offset={1}, size={2})", //$NON-NLS-1$ path, offset, fragmentSize)); } } CountInputStream cstream; if (LOG.isDebugEnabled()) { cstream = new CountInputStream(stream, counter) { @Override public void close() throws IOException { LOG.debug(MessageFormat.format("Start closing input (path={0}, offset={1}, size={2})", //$NON-NLS-1$ path, offset, fragmentSize)); super.close(); LOG.debug(MessageFormat.format("Finish closing input (path={0}, offset={1}, size={2})", //$NON-NLS-1$ path, offset, fragmentSize)); } }; } else { cstream = new CountInputStream(stream, counter); } ModelInput<T> input = streamFormat.createInput(dataType, path.toString(), cstream, offset, fragmentSize); succeed = true; return input; } finally { if (succeed == false) { try { stream.close(); } catch (IOException e) { LOG.warn(MessageFormat.format("Failed to close input (path={0}, offset={1}, size={2})", path, offset, fragmentSize), e); } } } }
From source file:com.blackberry.logdriver.mapred.avro.AvroBlockRecordReader.java
License:Apache License
private void advanceToSyncMarker(FSDataInputStream in, byte[] syncMarker) throws IOException { byte b = 0;//from w ww . j av a 2s . c om int bytesRead = 0; byte[] sync = header.getSyncMarker(); Iterator<Byte> iterator = null; boolean match = true; Deque<Byte> deque = new ArrayDeque<Byte>(DataFileConstants.SYNC_SIZE); while (true) { b = in.readByte(); deque.add(b); bytesRead++; match = true; if (deque.size() == DataFileConstants.SYNC_SIZE) { match = true; iterator = deque.iterator(); for (int i = 0; i < DataFileConstants.SYNC_SIZE; i++) { if (sync[i] != iterator.next()) { match = false; break; } } if (match) { break; } deque.remove(); } } pos = start + bytesRead; LOG.info("Found sync marker at {}", pos - 16); in.seek(pos); }
From source file:com.blm.orc.ReaderImpl.java
License:Apache License
/** * Ensure this is an ORC file to prevent users from trying to read text * files or RC files as ORC files./*from w w w . j a v a2 s .c o m*/ * @param in the file being read * @param path the filename for error messages * @param psLen the postscript length * @param buffer the tail of the file * @throws IOException */ static void ensureOrcFooter(FSDataInputStream in, Path path, int psLen, ByteBuffer buffer) throws IOException { int len = OrcFile.MAGIC.length(); if (psLen < len + 1) { throw new IOException("Malformed ORC file " + path + ". Invalid postscript length " + psLen); } int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - 1 - len; byte[] array = buffer.array(); // now look for the magic string at the end of the postscript. if (!Text.decode(array, offset, len).equals(OrcFile.MAGIC)) { // If it isn't there, this may be the 0.11.0 version of ORC. // Read the first 3 bytes of the file to check for the header in.seek(0); byte[] header = new byte[len]; in.readFully(header, 0, len); // if it isn't there, this isn't an ORC file if (!Text.decode(header, 0, len).equals(OrcFile.MAGIC)) { throw new IOException("Malformed ORC file " + path + ". Invalid postscript."); } } }
From source file:com.blm.orc.ReaderImpl.java
License:Apache License
private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs, Path path, long maxFileLength) throws IOException { FSDataInputStream file = fs.open(path); // figure out the size of the file using the option or filesystem long size;/*from w ww .j a v a 2 s . c o m*/ if (maxFileLength == Long.MAX_VALUE) { size = fs.getFileStatus(path).getLen(); } else { size = maxFileLength; } //read last bytes into buffer to get PostScript int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS); file.seek(size - readSize); ByteBuffer buffer = ByteBuffer.allocate(readSize); file.readFully(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining()); //read the PostScript //get length of PostScript int psLen = buffer.get(readSize - 1) & 0xff; ensureOrcFooter(file, path, psLen, buffer); int psOffset = readSize - 1 - psLen; CodedInputStream in = CodedInputStream.newInstance(buffer.array(), buffer.arrayOffset() + psOffset, psLen); OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in); checkOrcVersion(LOG, path, ps.getVersionList()); int footerSize = (int) ps.getFooterLength(); int metadataSize = (int) ps.getMetadataLength(); OrcFile.WriterVersion writerVersion; if (ps.hasWriterVersion()) { writerVersion = getWriterVersion(ps.getWriterVersion()); } else { writerVersion = OrcFile.WriterVersion.ORIGINAL; } //check compression codec switch (ps.getCompression()) { case NONE: break; case ZLIB: break; case SNAPPY: break; case LZO: break; default: throw new IllegalArgumentException("Unknown compression"); } //check if extra bytes need to be read int extra = Math.max(0, psLen + 1 + footerSize + metadataSize - readSize); if (extra > 0) { //more bytes need to be read, seek back to the right place and read extra bytes file.seek(size - readSize - extra); ByteBuffer extraBuf = ByteBuffer.allocate(extra + readSize); file.readFully(extraBuf.array(), extraBuf.arrayOffset() + extraBuf.position(), extra); extraBuf.position(extra); //append with already read bytes extraBuf.put(buffer); buffer = extraBuf; buffer.position(0); buffer.limit(footerSize + metadataSize); } else { //footer is already in the bytes in buffer, just adjust position, length buffer.position(psOffset - footerSize - metadataSize); buffer.limit(psOffset); } // remember position for later buffer.mark(); file.close(); return new FileMetaInfo(ps.getCompression().toString(), (int) ps.getCompressionBlockSize(), (int) ps.getMetadataLength(), buffer, ps.getVersionList(), writerVersion); }
From source file:com.blm.orc.RecordReaderImpl.java
License:Apache License
/** * Read the list of ranges from the file. * @param file the file to read/*from w ww.j a v a 2 s .c om*/ * @param base the base of the stripe * @param ranges the disk ranges within the stripe to read * @return the bytes read for each disk range, which is the same length as * ranges * @throws IOException */ List<BufferChunk> readDiskRanges(FSDataInputStream file, long base, List<DiskRange> ranges) throws IOException { ArrayList<BufferChunk> result = new ArrayList<RecordReaderImpl.BufferChunk>(ranges.size()); for (DiskRange range : ranges) { int len = (int) (range.end - range.offset); long off = range.offset; file.seek(base + off); if (zcr != null) { while (len > 0) { ByteBuffer partial = zcr.readBuffer(len, false); result.add(new BufferChunk(partial, off)); int read = partial.remaining(); len -= read; off += read; } } else { byte[] buffer = new byte[len]; file.readFully(buffer, 0, buffer.length); result.add(new BufferChunk(ByteBuffer.wrap(buffer), range.offset)); } } return result; }