Example usage for org.apache.hadoop.fs FSDataInputStream read

List of usage examples for org.apache.hadoop.fs FSDataInputStream read

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FSDataInputStream read.

Prototype

@Override
    public int read(ByteBuffer buf) throws IOException 

Source Link

Usage

From source file:org.apache.ignite.igfs.HadoopIgfs20FileSystemAbstractSelfTest.java

License:Apache License

/**
 * Test concurrent reads within the file.
 *
 * @throws Exception If failed./*from   w  w  w.  j  a  v  a  2  s .  c  o m*/
 */
public void testMultithreadedOpen() throws Exception {
    final byte[] dataChunk = new byte[256];

    for (int i = 0; i < dataChunk.length; i++)
        dataChunk[i] = (byte) i;

    Path dir = new Path(new Path(primaryFsUri), "/dir");

    fs.mkdir(dir, FsPermission.getDefault(), true);

    final Path file = new Path(dir, "file");

    FSDataOutputStream os = fs.create(file, EnumSet.noneOf(CreateFlag.class),
            Options.CreateOpts.perms(FsPermission.getDefault()));

    // Write 256 * 2048 = 512Kb of data.
    for (int i = 0; i < 2048; i++)
        os.write(dataChunk);

    os.close();

    final AtomicBoolean err = new AtomicBoolean();

    multithreaded(new Runnable() {
        @Override
        public void run() {
            FSDataInputStream is = null;

            try {
                int pos = ThreadLocalRandom8.current().nextInt(2048);

                try {
                    is = fs.open(file);
                } finally {
                    U.awaitQuiet(barrier);
                }

                is.seek(256 * pos);

                byte[] buf = new byte[256];

                for (int i = pos; i < 2048; i++) {
                    // First perform normal read.
                    int read = is.read(buf);

                    assert read == 256;

                    Arrays.equals(dataChunk, buf);
                }

                int res = is.read(buf);

                assert res == -1;
            } catch (IOException ignore) {
                err.set(true);
            } finally {
                U.closeQuiet(is);
            }
        }
    }, THREAD_CNT);

    assert !err.get();
}

From source file:org.apache.ignite.igfs.IgfsHadoopFileSystemAbstractSelfTest.java

License:Apache License

/**
 * Ensure that when running in multithreaded mode only one create() operation succeed.
 *
 * @throws Exception If failed./*from w  ww  .j  av a 2 s  .c o m*/
 */
public void testMultithreadedCreate() throws Exception {
    Path dir = new Path(new Path(PRIMARY_URI), "/dir");

    assert fs.mkdirs(dir);

    final Path file = new Path(dir, "file");

    fs.create(file).close();

    final AtomicInteger cnt = new AtomicInteger();

    final Collection<Integer> errs = new GridConcurrentHashSet<>(THREAD_CNT, 1.0f, THREAD_CNT);

    final AtomicBoolean err = new AtomicBoolean();

    multithreaded(new Runnable() {
        @Override
        public void run() {
            int idx = cnt.getAndIncrement();

            byte[] data = new byte[256];

            Arrays.fill(data, (byte) idx);

            FSDataOutputStream os = null;

            try {
                os = fs.create(file, true);
            } catch (IOException ignore) {
                errs.add(idx);
            }

            U.awaitQuiet(barrier);

            try {
                if (os != null)
                    os.write(data);
            } catch (IOException ignore) {
                err.set(true);
            } finally {
                U.closeQuiet(os);
            }
        }
    }, THREAD_CNT);

    assert !err.get();

    // Only one thread could obtain write lock on the file.
    assert errs.size() == THREAD_CNT - 1;

    int idx = -1;

    for (int i = 0; i < THREAD_CNT; i++) {
        if (!errs.remove(i)) {
            idx = i;

            break;
        }
    }

    byte[] expData = new byte[256];

    Arrays.fill(expData, (byte) idx);

    FSDataInputStream is = fs.open(file);

    byte[] data = new byte[256];

    is.read(data);

    is.close();

    assert Arrays.equals(expData, data) : "Expected=" + Arrays.toString(expData) + ", actual="
            + Arrays.toString(data);
}

From source file:org.apache.ignite.igfs.IgfsHadoopFileSystemAbstractSelfTest.java

License:Apache License

/**
 * Ensure that when running in multithreaded mode only one append() operation succeed.
 *
 * @throws Exception If failed.//w  w  w .j  a  v a  2 s .c o m
 */
public void testMultithreadedAppend() throws Exception {
    Path dir = new Path(new Path(PRIMARY_URI), "/dir");

    assert fs.mkdirs(dir);

    final Path file = new Path(dir, "file");

    fs.create(file).close();

    final AtomicInteger cnt = new AtomicInteger();

    final Collection<Integer> errs = new GridConcurrentHashSet<>(THREAD_CNT, 1.0f, THREAD_CNT);

    final AtomicBoolean err = new AtomicBoolean();

    multithreaded(new Runnable() {
        @Override
        public void run() {
            int idx = cnt.getAndIncrement();

            byte[] data = new byte[256];

            Arrays.fill(data, (byte) idx);

            U.awaitQuiet(barrier);

            FSDataOutputStream os = null;

            try {
                os = fs.append(file);
            } catch (IOException ignore) {
                errs.add(idx);
            }

            U.awaitQuiet(barrier);

            try {
                if (os != null)
                    os.write(data);
            } catch (IOException ignore) {
                err.set(true);
            } finally {
                U.closeQuiet(os);
            }
        }
    }, THREAD_CNT);

    assert !err.get();

    // Only one thread could obtain write lock on the file.
    assert errs.size() == THREAD_CNT - 1;

    int idx = -1;

    for (int i = 0; i < THREAD_CNT; i++) {
        if (!errs.remove(i)) {
            idx = i;

            break;
        }
    }

    byte[] expData = new byte[256];

    Arrays.fill(expData, (byte) idx);

    FSDataInputStream is = fs.open(file);

    byte[] data = new byte[256];

    is.read(data);

    is.close();

    assert Arrays.equals(expData, data);
}

From source file:org.apache.ignite.igfs.IgfsHadoopFileSystemAbstractSelfTest.java

License:Apache License

/**
 * Test concurrent reads within the file.
 *
 * @throws Exception If failed.//from   w  w  w  .  j  a  v a 2s  .  c o m
 */
public void testMultithreadedOpen() throws Exception {
    final byte[] dataChunk = new byte[256];

    for (int i = 0; i < dataChunk.length; i++)
        dataChunk[i] = (byte) i;

    Path dir = new Path(new Path(PRIMARY_URI), "/dir");

    assert fs.mkdirs(dir);

    final Path file = new Path(dir, "file");

    FSDataOutputStream os = fs.create(file);

    // Write 256 * 2048 = 512Kb of data.
    for (int i = 0; i < 2048; i++)
        os.write(dataChunk);

    os.close();

    final AtomicBoolean err = new AtomicBoolean();

    multithreaded(new Runnable() {
        @Override
        public void run() {
            FSDataInputStream is = null;

            try {
                int pos = ThreadLocalRandom8.current().nextInt(2048);

                try {
                    is = fs.open(file);
                } finally {
                    U.awaitQuiet(barrier);
                }

                is.seek(256 * pos);

                byte[] buf = new byte[256];

                for (int i = pos; i < 2048; i++) {
                    // First perform normal read.
                    int read = is.read(buf);

                    assert read == 256;

                    Arrays.equals(dataChunk, buf);
                }

                int res = is.read(buf);

                assert res == -1;
            } catch (IOException ignore) {
                err.set(true);
            } finally {
                U.closeQuiet(is);
            }
        }
    }, THREAD_CNT);

    assert !err.get();
}

From source file:org.apache.mahout.h2obindings.H2OHdfs.java

License:Apache License

/**
 * Predicate to check if a given filename is a SequenceFile.
 *
 * Inspect the first three bytes to determine the format of the file.
 *
 * @param filename Name of the file to check.
 * @return True if file is of SequenceFile format.
 *//*  ww w  .  j  a v  a 2  s  .c om*/
public static boolean isSeqfile(String filename) {
    try {
        String uri = filename;
        Configuration conf = new Configuration();
        Path path = new Path(uri);
        FileSystem fs = FileSystem.get(URI.create(uri), conf);
        FSDataInputStream fin = fs.open(path);
        byte seq[] = new byte[3];

        fin.read(seq);
        fin.close();

        if (seq[0] == 'S' && seq[1] == 'E' && seq[2] == 'Q') {
            return true;
        } else {
            return false;
        }
    } catch (java.io.IOException e) {
        return false;
    }
}

From source file:org.apache.parquet.hadoop.util.CompatibilityUtil.java

License:Apache License

public static int getBuf(FSDataInputStream f, ByteBuffer readBuf, int maxSize) throws IOException {
    int res;//from   www .j  av  a 2  s.  co m
    if (useV21) {
        try {
            res = (Integer) fileAPI.PROVIDE_BUF_READ_METHOD.invoke(f, readBuf);
        } catch (InvocationTargetException e) {
            if (e.getCause() instanceof UnsupportedOperationException) {
                // the FSDataInputStream docs say specifically that implementations
                // can choose to throw UnsupportedOperationException, so this should
                // be a reasonable check to make to see if the interface is
                // present but not implemented and we should be falling back
                useV21 = false;
                return getBuf(f, readBuf, maxSize);
            } else if (e.getCause() instanceof IOException) {
                throw (IOException) e.getCause();
            } else {
                // To handle any cases where a Runtime exception occurs and provide
                // some additional context information. A stacktrace would just give
                // a line number, this at least tells them we were using the version
                // of the read method designed for using a ByteBuffer.
                throw new IOException("Error reading out of an FSDataInputStream "
                        + "using the Hadoop 2 ByteBuffer based read method.", e.getCause());
            }
        } catch (IllegalAccessException e) {
            // This method is public because it is defined in an interface,
            // there should be no problems accessing it
            throw new ShouldNeverHappenException(e);
        }
    } else {
        byte[] buf = new byte[maxSize];
        res = f.read(buf);
        readBuf.put(buf, 0, res);
    }
    return res;
}

From source file:org.apache.pig.piggybank.storage.allloader.LoadFuncHelper.java

License:Apache License

/**
 * Tries to identify the extension and there by the loader from the content
 * type.//ww  w  .  j  av a  2s.c o m
 * 
 * @param path
 * @return
 * @throws IOException
 */
private FuncSpec getFuncSpecFromContent(Path path) throws IOException {
    // get the first three bytes from the file.
    FSDataInputStream dataIn = null;
    byte[] magic = new byte[3];
    int read = -1;

    try {
        dataIn = fileSystem.open(path, 3);
        read = dataIn.read(magic);
    } finally {
        dataIn.close();
    }

    FuncSpec funcSpec = null;
    String extensionMapping = magicNumberExtensionMap.get(new MagicNumber(magic));

    if (read < magic.length || extensionMapping == null) {
        // assume plain text
        funcSpec = new FuncSpec("PigStorage()");
    } else {
        // an extension mapping was found. i.e. this is a GZ, BZ2, LZO or
        // SEQ file

        String applicableTag = getApplicableTag(extensionMapping, path);
        String loadFuncDefinition = null;

        if (extensionMapping.equals("seq")) {
            // if this is a sequence file we load the key class also
            loadFuncDefinition = loadFunctionExtensionTagMap
                    .get(extensionMapping + ":" + applicableTag + ":" + getSequenceFileKeyClass(path));

        }

        // we do this also for sequence file because a sequence file might
        // have a sequeyceFileKey associated or not in the extension mapping
        // given both cases if the key class is not found above in the
        // mapping, the default sequence file loader needs to be used as per
        // the extension mapping.
        if (loadFuncDefinition == null) {
            // use only extension and tag filtering
            loadFuncDefinition = loadFunctionExtensionTagMap.get(extensionMapping + ":" + applicableTag);

        }

        if (loadFuncDefinition == null) {
            // if still null thrown an error
            throw new RuntimeException(
                    "Cannot find loader for " + path + " extension mapping " + extensionMapping);
        }

        funcSpec = new FuncSpec(loadFuncDefinition);
    }

    return funcSpec;
}

From source file:org.apache.slider.core.persist.JsonSerDeser.java

License:Apache License

/**
 * Load from a Hadoop filesystem/*ww  w .ja v  a2  s. c o  m*/
 * @param fs filesystem
 * @param path path
 * @return a loaded CD
 * @throws IOException IO problems
 * @throws JsonParseException parse problems
 * @throws JsonMappingException O/J mapping problems
 */
public T load(FileSystem fs, Path path) throws IOException, JsonParseException, JsonMappingException {
    FileStatus status = fs.getFileStatus(path);
    long len = status.getLen();
    byte[] b = new byte[(int) len];
    FSDataInputStream dataInputStream = fs.open(path);
    int count = dataInputStream.read(b);
    if (count != len) {
        throw new EOFException("Read finished prematurely");
    }
    return fromBytes(b);
}

From source file:org.apache.storm.hdfs.bolt.AvroGenericRecordBoltTest.java

License:Apache License

private void fileIsGoodAvro(Path path) throws IOException {
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    FSDataInputStream in = fs.open(path, 0);
    FileOutputStream out = new FileOutputStream("target/FOO.avro");

    byte[] buffer = new byte[100];
    int bytesRead;
    while ((bytesRead = in.read(buffer)) > 0) {
        out.write(buffer, 0, bytesRead);
    }//from   w w w.ja  v a  2s  .  c o m
    out.close();

    java.io.File file = new File("target/FOO.avro");

    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(file, datumReader);
    GenericRecord user = null;
    while (dataFileReader.hasNext()) {
        user = dataFileReader.next(user);
    }

    file.delete();
}

From source file:org.commoncrawl.service.listcrawler.DataTransferAgent.java

License:Open Source License

static int uploadSingeFile(CCBridgeServerMapping mapping, FileSystem fs, Configuration conf, Path hdfsFilePath,
        String uploadName, EventLoop eventLoop) throws IOException {

    final FileStatus fileStatus = fs.getFileStatus(hdfsFilePath);
    LOG.info("Uploading:" + uploadName + " size:" + fileStatus.getLen() + " to:" + mapping._internalName);

    {/*from   www .  j  ava2  s .  co  m*/
        // construct url 
        URL fePostURL = new URL("http://" + mapping._externalName + ":8090/");
        LOG.info("POST URL IS:" + fePostURL.toString());

        // open input stream 
        final FSDataInputStream is = fs.open(hdfsFilePath);
        final Semaphore blockingSemaphore = new Semaphore(0);
        NIOHttpConnection connection = null;
        try {
            // create connection 
            connection = new NIOHttpConnection(fePostURL, eventLoop.getSelector(), eventLoop.getResolver(),
                    null);
            // set listener 
            connection.setListener(new Listener() {

                @Override
                public void HttpConnectionStateChanged(NIOHttpConnection theConnection, State oldState,
                        State state) {
                    LOG.info("Connection State Changed to:" + state.toString());
                    if (state == State.DONE || state == State.ERROR) {
                        //LOG.info("Connection Transition to Done or Error");
                        //LOG.info("Response Headers:" + theConnection.getResponseHeaders().toString());
                        blockingSemaphore.release();
                    }
                }

                @Override
                public void HttpContentAvailable(NIOHttpConnection theConnection, NIOBufferList contentBuffer) {
                    // TODO Auto-generated method stub

                }
            });
            // set headers 
            connection.getRequestHeaders().reset();
            connection.getRequestHeaders().prepend("PUT /put?src=" + uploadName + " HTTP/1.1", null);
            connection.getRequestHeaders().set("Host", mapping._internalName + ":8090");
            connection.getRequestHeaders().set("Content-Length", Long.toString(fileStatus.getLen()));
            connection.getRequestHeaders().set("Connection", "keep-alive");
            connection.setPopulateDefaultHeaderItems(false);

            final LinkedBlockingDeque<BufferStruct> _loaderQueue = new LinkedBlockingDeque<BufferStruct>(20);
            final AtomicBoolean eof = new AtomicBoolean();
            final ByteBuffer sentinel = ByteBuffer.allocate(4096);
            sentinel.position(sentinel.position());
            final Thread loaderThread = new Thread(new Runnable() {

                int _id = 0;

                @Override
                public void run() {
                    int bytesRead;
                    byte incomingBuffer[] = new byte[4096 * 10];
                    try {
                        while ((bytesRead = is.read(incomingBuffer)) != -1) {
                            ByteBuffer buffer = ByteBuffer.wrap(incomingBuffer, 0, bytesRead);
                            buffer.position(bytesRead);

                            //LOG.info("Loader Thread Read:"+ bytesRead + " Buffer:" + ++_id);
                            try {
                                _loaderQueue.put(new BufferStruct(buffer, _id));
                            } catch (InterruptedException e) {
                                LOG.error(CCStringUtils.stringifyException(e));
                                break;
                            }
                            incomingBuffer = new byte[4096 * 10];
                        }
                        try {
                            _loaderQueue.put(new BufferStruct(sentinel, ++_id));
                        } catch (InterruptedException e) {
                        }
                    } catch (IOException e) {
                        LOG.error(CCStringUtils.stringifyException(e));
                        return;
                    }
                }

            });

            loaderThread.start();

            // set data source ... 
            connection.setDataSource(new DataSource() {

                int bytesTransferred = 0;

                @Override
                public boolean read(NIOBufferList dataBuffer) throws IOException {
                    if (eof.get())
                        return true;
                    //LOG.info("Connect read callback triggered");
                    BufferStruct buffer = _loaderQueue.poll();
                    if (buffer != null) {
                        if (buffer._buffer != sentinel) {
                            //LOG.info("Got Buffer:"+ buffer._id);
                            if (buffer._id == 1) {
                                //LOG.info("Inital Buffer Bytes:" + new String(buffer._buffer.array(),0,10).toString());
                            }
                            dataBuffer.write(buffer._buffer);
                            bytesTransferred += buffer._buffer.limit();
                            //LOG.info("Read:" + buffer._buffer.limit() + " Transfered:" + bytesTransferred);
                            return false;
                        } else {
                            //LOG.info("EOF Condition");
                            dataBuffer.write(sentinel);
                            eof.set(true);
                            return true;
                        }
                    }
                    return false;
                }
            });

            // open connection 
            connection.open();
            // wait for connection to complete ... 
            blockingSemaphore.acquireUninterruptibly();
            // kill loader thread 
            loaderThread.interrupt();
            try {
                LOG.info("Waiting for Loader Thread");
                loaderThread.join();
                LOG.info("Done Waiting for Loader Thread");
            } catch (InterruptedException e) {
            }
        } finally {
            is.close();
            if (connection != null) {
                connection.close();
                LOG.info("Response Code for File:" + uploadName + "to Host: " + mapping._internalName + " is:"
                        + connection.getResponseHeaders().getHttpResponseCode());
                return connection.getResponseHeaders().getHttpResponseCode();
                /*
                if (connection.getResponseHeaders().getHttpResponseCode() != 200) { 
                  throw new IOException("Failed to upload file:" + dataFile.getName() + " responseCode:" + connection.getResponseHeaders().getHttpResponseCode());
                }
                */
            }
        }
    }
    // something went wrong ??? 
    LOG.error("Failed to upload file:" + uploadName + " unknown response code");
    return 500;
}