Example usage for org.apache.hadoop.fs FSDataInputStream seek

List of usage examples for org.apache.hadoop.fs FSDataInputStream seek

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FSDataInputStream seek.

Prototype

@Override
public void seek(long desired) throws IOException 

Source Link

Document

Seek to the given offset.

Usage

From source file:co.nubetech.hiho.mapred.MySQLLoadDataMapper.java

License:Apache License

@Override
public void map(Text key, FSDataInputStream val, OutputCollector<NullWritable, NullWritable> collector,
        Reporter reporter) throws IOException {

    conn = getConnection();/*from  w ww . jav a 2 s . com*/
    com.mysql.jdbc.Statement stmt = null;
    String query;

    String[] columnNames = null;
    if (hasHeaderLine) {
        BufferedReader headerReader = new BufferedReader(new InputStreamReader(val));
        String header = headerReader.readLine();
        if (header == null)
            return;
        columnNames = header.split(",");
        val.seek(header.getBytes(utf8).length + newline.length);
    }
    try {

        stmt = (com.mysql.jdbc.Statement) conn.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE,
                ResultSet.CONCUR_UPDATABLE);
        String tablename = (keyIsTableName ? keyToTablename(key) : "");
        if (disableKeys && !tablename.equals("")) {
            reporter.setStatus("Disabling keys on " + tablename);
            stmt.execute("ALTER TABLE " + tablename + " DISABLE KEYS");
        }
        stmt.setLocalInfileInputStream(val);
        query = "load data local infile 'abc.txt' into table " + tablename + " ";
        query += querySuffix;
        if (hasHeaderLine)
            query += " (" + StringUtils.join(columnNames, ",") + ")";
        reporter.setStatus("Inserting into " + tablename);
        logger.debug("stmt: " + query);
        int rows = stmt.executeUpdate(query);
        logger.debug(rows + " rows updated");
        if (disableKeys && !tablename.equals("")) {
            reporter.setStatus("Re-enabling keys on " + tablename);
            stmt.execute("ALTER TABLE " + tablename + " ENABLE KEYS");
        }
        if (!tablename.equals(""))
            reporter.getCounter("MySQLLoadCounters", "ROWS_INSERTED_TABLE_" + tablename).increment(rows);
        reporter.getCounter("MySQLLoadCounters", "ROWS_INSERTED_TOTAL").increment(rows);

    } catch (Exception e) {
        e.printStackTrace();
        stmt = null;
        throw new IOException(e);
    } finally {
        try {
            if (stmt != null) {
                stmt.close();
            }
        } catch (SQLException s) {
            s.printStackTrace();
        }
    }
}

From source file:co.nubetech.hiho.mapreduce.MySQLLoadDataMapper.java

License:Apache License

@Override
public void map(Text key, FSDataInputStream val, Context context) throws IOException, InterruptedException {

    conn = getConnection();/*from   w w  w  .  ja  v a  2  s .  c o  m*/
    com.mysql.jdbc.Statement stmt = null;
    String query;
    String querySuffix = context.getConfiguration().get(HIHOConf.LOAD_QUERY_SUFFIX);
    boolean hasHeaderLine = context.getConfiguration().getBoolean(HIHOConf.LOAD_HAS_HEADER, false);
    boolean keyIsTableName = context.getConfiguration().getBoolean(HIHOConf.LOAD_KEY_IS_TABLENAME, false);

    String[] columnNames = null;
    if (hasHeaderLine) {
        BufferedReader headerReader = new BufferedReader(new InputStreamReader(val));
        String header = headerReader.readLine();
        if (header == null)
            return;
        columnNames = header.split(",");
        val.seek(header.getBytes(utf8).length + newline.length);
    }
    try {

        stmt = (com.mysql.jdbc.Statement) conn.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE,
                ResultSet.CONCUR_UPDATABLE);
        stmt.setLocalInfileInputStream(val);
        String tablename = (keyIsTableName ? keyToTablename(key) : "");
        query = "load data local infile 'abc.txt' into table " + tablename + " ";
        query += querySuffix;
        if (hasHeaderLine)
            query += " (" + StringUtils.join(columnNames, ",") + ")";
        logger.debug("stmt: " + query);
        int rows = stmt.executeUpdate(query);
        logger.debug(rows + " rows updated");
        if (!tablename.equals(""))
            context.getCounter("MySQLLoadCounters", "ROWS_INSERTED_TABLE_" + tablename).increment(rows);
        context.getCounter("MySQLLoadCounters", "ROWS_INSERTED_TOTAL").increment(rows);

    } catch (Exception e) {
        e.printStackTrace();
        stmt = null;
        throw new IOException(e);
    } finally {
        try {
            if (stmt != null) {
                stmt.close();
            }
        } catch (SQLException s) {
            s.printStackTrace();
        }
    }
}

From source file:coldstorage.io.Reader.java

License:Apache License

@SuppressWarnings("unchecked")
private static List<IndexKey> getList(final FSDataInputStream reader, long length) {
    final int keyWidth = 8 * 2;
    final int size = (int) (length / keyWidth);
    InvocationHandler handler = new InvocationHandler() {
        @Override/*from w w w  .  j ava 2 s. c om*/
        public Object invoke(Object proxy, Method method, Object[] args) throws Throwable {
            if (method.getName().equals("size")) {
                return size;
            } else if (method.getName().equals("get")) {
                int index = (Integer) args[0];
                long seek = index * (long) keyWidth;
                reader.seek(seek);
                IndexKey indexKey = new IndexKey();
                indexKey.readFields(reader);
                return indexKey;
            } else {
                throw new RuntimeException("Not supported.");
            }
        }
    };
    return (List<IndexKey>) Proxy.newProxyInstance(List.class.getClassLoader(),
            new Class[] { List.class, RandomAccess.class }, handler);
}

From source file:com.aliyun.fs.oss.contract.TestAliyunOSSContractSeek.java

License:Apache License

@Test
public void testSeekBeyondDownloadSize() throws Throwable {
    describe("seek and read beyond download size.");

    Path byteFile = path("byte_file.txt");
    // 'fs.oss.multipart.download.size' = 100 * 1024
    byte[] block = dataset(100 * 1024 + 10, 0, 255);
    FileSystem fs = getFileSystem();
    createFile(fs, byteFile, true, block);

    FSDataInputStream instream = getFileSystem().open(byteFile);
    instream.seek(100 * 1024 - 1);
    assertEquals(100 * 1024 - 1, instream.getPos());
    assertEquals(144, instream.read());/*  ww  w. j a  v  a 2  s. c  o  m*/
    instream.seek(100 * 1024 + 1);
    assertEquals(100 * 1024 + 1, instream.getPos());
    assertEquals(146, instream.read());
}

From source file:com.aliyun.fs.oss.TestAliyunOSSInputStream.java

License:Apache License

@Test
public void testSeekFile() throws Exception {
    Path smallSeekFile = setPath("/test/smallSeekFile.txt");
    long size = 5 * 1024 * 1024;

    ContractTestUtils.generateTestFile(this.fs, smallSeekFile, size, 256, 255);
    LOG.info("5MB file created: smallSeekFile.txt");

    FSDataInputStream instream = this.fs.open(smallSeekFile);
    int seekTimes = 5;
    LOG.info("multiple fold position seeking test...:");
    for (int i = 0; i < seekTimes; i++) {
        long pos = size / (seekTimes - i) - 1;
        LOG.info("begin seeking for pos: " + pos);
        instream.seek(pos);
        assertTrue("expected position at:" + pos + ", but got:" + instream.getPos(), instream.getPos() == pos);
        LOG.info("completed seeking at pos: " + instream.getPos());
    }/* w  w  w  .  j  ava 2  s. c o m*/
    LOG.info("random position seeking test...:");
    Random rand = new Random();
    for (int i = 0; i < seekTimes; i++) {
        long pos = Math.abs(rand.nextLong()) % size;
        LOG.info("begin seeking for pos: " + pos);
        instream.seek(pos);
        assertTrue("expected position at:" + pos + ", but got:" + instream.getPos(), instream.getPos() == pos);
        LOG.info("completed seeking at pos: " + instream.getPos());
    }
    IOUtils.closeStream(instream);
}

From source file:com.asakusafw.runtime.directio.hadoop.HadoopFileFormatAdapter.java

License:Apache License

@Override
public ModelInput<T> createInput(Class<? extends T> dataType, FileSystem fileSystem, final Path path,
        final long offset, final long fragmentSize, Counter counter) throws IOException, InterruptedException {
    FSDataInputStream stream = fileSystem.open(path);
    boolean succeed = false;
    try {/*from w  w  w  . j  a  v  a 2s . c  o m*/
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format(
                    "Process opening input [stream opened] (path={0}, offset={1}, size={2})", //$NON-NLS-1$
                    path, offset, fragmentSize));
        }
        if (offset != 0) {
            stream.seek(offset);
            if (LOG.isDebugEnabled()) {
                LOG.debug(MessageFormat.format(
                        "Process opening input [sought to offset] (path={0}, offset={1}, size={2})", //$NON-NLS-1$
                        path, offset, fragmentSize));
            }
        }
        CountInputStream cstream;
        if (LOG.isDebugEnabled()) {
            cstream = new CountInputStream(stream, counter) {
                @Override
                public void close() throws IOException {
                    LOG.debug(MessageFormat.format("Start closing input (path={0}, offset={1}, size={2})", //$NON-NLS-1$
                            path, offset, fragmentSize));
                    super.close();
                    LOG.debug(MessageFormat.format("Finish closing input (path={0}, offset={1}, size={2})", //$NON-NLS-1$
                            path, offset, fragmentSize));
                }
            };
        } else {
            cstream = new CountInputStream(stream, counter);
        }
        ModelInput<T> input = streamFormat.createInput(dataType, path.toString(), cstream, offset,
                fragmentSize);
        succeed = true;
        return input;
    } finally {
        if (succeed == false) {
            try {
                stream.close();
            } catch (IOException e) {
                LOG.warn(MessageFormat.format("Failed to close input (path={0}, offset={1}, size={2})", path,
                        offset, fragmentSize), e);
            }
        }
    }
}

From source file:com.blackberry.logdriver.mapred.avro.AvroBlockRecordReader.java

License:Apache License

private void advanceToSyncMarker(FSDataInputStream in, byte[] syncMarker) throws IOException {
    byte b = 0;//from w ww  . j av a 2s  . c om
    int bytesRead = 0;
    byte[] sync = header.getSyncMarker();
    Iterator<Byte> iterator = null;
    boolean match = true;

    Deque<Byte> deque = new ArrayDeque<Byte>(DataFileConstants.SYNC_SIZE);
    while (true) {
        b = in.readByte();
        deque.add(b);
        bytesRead++;

        match = true;
        if (deque.size() == DataFileConstants.SYNC_SIZE) {
            match = true;
            iterator = deque.iterator();
            for (int i = 0; i < DataFileConstants.SYNC_SIZE; i++) {
                if (sync[i] != iterator.next()) {
                    match = false;
                    break;
                }
            }

            if (match) {
                break;
            }

            deque.remove();
        }
    }

    pos = start + bytesRead;
    LOG.info("Found sync marker at {}", pos - 16);
    in.seek(pos);
}

From source file:com.blm.orc.ReaderImpl.java

License:Apache License

/**
 * Ensure this is an ORC file to prevent users from trying to read text
 * files or RC files as ORC files./*from   w w  w  .  j  a v a2  s .c o  m*/
 * @param in the file being read
 * @param path the filename for error messages
 * @param psLen the postscript length
 * @param buffer the tail of the file
 * @throws IOException
 */
static void ensureOrcFooter(FSDataInputStream in, Path path, int psLen, ByteBuffer buffer) throws IOException {
    int len = OrcFile.MAGIC.length();
    if (psLen < len + 1) {
        throw new IOException("Malformed ORC file " + path + ". Invalid postscript length " + psLen);
    }
    int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - 1 - len;
    byte[] array = buffer.array();
    // now look for the magic string at the end of the postscript.
    if (!Text.decode(array, offset, len).equals(OrcFile.MAGIC)) {
        // If it isn't there, this may be the 0.11.0 version of ORC.
        // Read the first 3 bytes of the file to check for the header
        in.seek(0);
        byte[] header = new byte[len];
        in.readFully(header, 0, len);
        // if it isn't there, this isn't an ORC file
        if (!Text.decode(header, 0, len).equals(OrcFile.MAGIC)) {
            throw new IOException("Malformed ORC file " + path + ". Invalid postscript.");
        }
    }
}

From source file:com.blm.orc.ReaderImpl.java

License:Apache License

private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs, Path path, long maxFileLength)
        throws IOException {
    FSDataInputStream file = fs.open(path);

    // figure out the size of the file using the option or filesystem
    long size;/*from w  ww .j  a  v  a  2  s . c o  m*/
    if (maxFileLength == Long.MAX_VALUE) {
        size = fs.getFileStatus(path).getLen();
    } else {
        size = maxFileLength;
    }

    //read last bytes into buffer to get PostScript
    int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS);
    file.seek(size - readSize);
    ByteBuffer buffer = ByteBuffer.allocate(readSize);
    file.readFully(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining());

    //read the PostScript
    //get length of PostScript
    int psLen = buffer.get(readSize - 1) & 0xff;
    ensureOrcFooter(file, path, psLen, buffer);
    int psOffset = readSize - 1 - psLen;
    CodedInputStream in = CodedInputStream.newInstance(buffer.array(), buffer.arrayOffset() + psOffset, psLen);
    OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in);

    checkOrcVersion(LOG, path, ps.getVersionList());

    int footerSize = (int) ps.getFooterLength();
    int metadataSize = (int) ps.getMetadataLength();
    OrcFile.WriterVersion writerVersion;
    if (ps.hasWriterVersion()) {
        writerVersion = getWriterVersion(ps.getWriterVersion());
    } else {
        writerVersion = OrcFile.WriterVersion.ORIGINAL;
    }

    //check compression codec
    switch (ps.getCompression()) {
    case NONE:
        break;
    case ZLIB:
        break;
    case SNAPPY:
        break;
    case LZO:
        break;
    default:
        throw new IllegalArgumentException("Unknown compression");
    }

    //check if extra bytes need to be read
    int extra = Math.max(0, psLen + 1 + footerSize + metadataSize - readSize);
    if (extra > 0) {
        //more bytes need to be read, seek back to the right place and read extra bytes
        file.seek(size - readSize - extra);
        ByteBuffer extraBuf = ByteBuffer.allocate(extra + readSize);
        file.readFully(extraBuf.array(), extraBuf.arrayOffset() + extraBuf.position(), extra);
        extraBuf.position(extra);
        //append with already read bytes
        extraBuf.put(buffer);
        buffer = extraBuf;
        buffer.position(0);
        buffer.limit(footerSize + metadataSize);
    } else {
        //footer is already in the bytes in buffer, just adjust position, length
        buffer.position(psOffset - footerSize - metadataSize);
        buffer.limit(psOffset);
    }

    // remember position for later
    buffer.mark();

    file.close();

    return new FileMetaInfo(ps.getCompression().toString(), (int) ps.getCompressionBlockSize(),
            (int) ps.getMetadataLength(), buffer, ps.getVersionList(), writerVersion);
}

From source file:com.blm.orc.RecordReaderImpl.java

License:Apache License

/**
 * Read the list of ranges from the file.
 * @param file the file to read/*from w ww.j  a  v  a 2 s .c om*/
 * @param base the base of the stripe
 * @param ranges the disk ranges within the stripe to read
 * @return the bytes read for each disk range, which is the same length as
 *    ranges
 * @throws IOException
 */
List<BufferChunk> readDiskRanges(FSDataInputStream file, long base, List<DiskRange> ranges) throws IOException {
    ArrayList<BufferChunk> result = new ArrayList<RecordReaderImpl.BufferChunk>(ranges.size());
    for (DiskRange range : ranges) {
        int len = (int) (range.end - range.offset);
        long off = range.offset;
        file.seek(base + off);
        if (zcr != null) {
            while (len > 0) {
                ByteBuffer partial = zcr.readBuffer(len, false);
                result.add(new BufferChunk(partial, off));
                int read = partial.remaining();
                len -= read;
                off += read;
            }
        } else {
            byte[] buffer = new byte[len];
            file.readFully(buffer, 0, buffer.length);
            result.add(new BufferChunk(ByteBuffer.wrap(buffer), range.offset));
        }
    }
    return result;
}