Example usage for org.apache.hadoop.io IOUtils closeStream

Introduction

In this page you can find the example usage for org.apache.hadoop.io IOUtils closeStream.

Prototype

public static void closeStream(java.io.Closeable stream)

Source Link

Document

Closes the stream ignoring Throwable .

Usage

From source file:com.knewton.mapreduce.SSTableRecordReader.java

License:Apache License

/**
 * Copies a remote path to the local filesystem, while updating hadoop that we're making
 * progress. Doesn't support directories.
 *///from  w  ww .  ja v  a  2  s . c  o  m
@VisibleForTesting
void copyToLocalFile(FileSystem remoteFS, FileSystem localFS, Path remote, Path local) throws IOException {
    // don't support transferring from remote directories
    FileStatus remoteStat = remoteFS.getFileStatus(remote);
    Preconditions.checkArgument(!remoteStat.isDirectory(), String.format("Path %s is directory!", remote));
    // if local is a dir, copy to inside that dir, like 'cp /path/file /tmp/' would do
    if (localFS.exists(local)) {
        FileStatus localStat = localFS.getFileStatus(local);
        if (localStat.isDirectory()) {
            local = new Path(local, remote.getName());
        }
    }
    long remoteFileSize = remoteStat.getLen();
    // do actual copy
    InputStream in = null;
    OutputStream out = null;
    try {
        long startTime = System.currentTimeMillis();
        long lastLogTime = 0;
        long bytesCopied = 0;
        in = remoteFS.open(remote);
        out = localFS.create(local, true);
        int buffSize = this.conf.getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY,
                CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_DEFAULT);
        byte[] buf = new byte[buffSize];
        int bytesRead = in.read(buf);
        while (bytesRead >= 0) {
            long now = System.currentTimeMillis();
            // log transfer rate once per min, starting 1 min after transfer began
            if (now - lastLogTime > 60000L && now - startTime > 60000L) {
                double elapsedSec = (now - startTime) / 1000D;
                double bytesPerSec = bytesCopied / elapsedSec;
                LOG.info("Transferred {} of {} bytes at {} bytes per second", bytesCopied, remoteFileSize,
                        bytesPerSec);
                lastLogTime = now;
            }
            this.ctx.progress();
            out.write(buf, 0, bytesRead);
            bytesCopied += bytesRead;
            bytesRead = in.read(buf);
        }
        // try to close these outside of finally so we receive exception on failure
        out.close();
        out = null;
        in.close();
        in = null;
    } finally {
        // make sure everything's closed
        IOUtils.closeStream(out);
        IOUtils.closeStream(in);
    }
}

From source file:com.kylinolap.job.hadoop.cube.RowKeyDistributionCheckerMapper.java

License:Apache License

@SuppressWarnings("deprecation")
public byte[][] getSplits(Configuration conf, Path path) {
    List<byte[]> rowkeyList = new ArrayList<byte[]>();
    SequenceFile.Reader reader = null;
    try {//from  w  w  w  .ja v a  2s.  co  m
        reader = new SequenceFile.Reader(path.getFileSystem(conf), path, conf);
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
        while (reader.next(key, value)) {
            byte[] tmp = ((Text) key).copyBytes();
            if (rowkeyList.contains(tmp) == false) {
                rowkeyList.add(tmp);
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        IOUtils.closeStream(reader);
    }

    byte[][] retValue = rowkeyList.toArray(new byte[rowkeyList.size()][]);

    return retValue;
}

From source file:com.kylinolap.job.hadoop.hbase.CreateHTableJob.java

License:Apache License

@SuppressWarnings("deprecation")
public byte[][] getSplits(Configuration conf, Path path) throws Exception {
    FileSystem fs = path.getFileSystem(conf);
    if (fs.exists(path) == false) {
        System.err.println("Path " + path + " not found, no region split, HTable will be one region");
        return null;
    }/*from www  . j  av a2  s  .  c  o m*/

    List<byte[]> rowkeyList = new ArrayList<byte[]>();
    SequenceFile.Reader reader = null;
    try {
        reader = new SequenceFile.Reader(fs, path, conf);
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
        while (reader.next(key, value)) {
            rowkeyList.add(((Text) key).copyBytes());
        }
    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    } finally {
        IOUtils.closeStream(reader);
    }

    System.out.println((rowkeyList.size() + 1) + " regions");
    System.out.println(rowkeyList.size() + " splits");
    for (byte[] split : rowkeyList) {
        System.out.println(StringUtils.byteToHexString(split));
    }

    byte[][] retValue = rowkeyList.toArray(new byte[rowkeyList.size()][]);
    return retValue.length == 0 ? null : retValue;
}

From source file:com.marcolotz.lung.io.inputFormat.MultipleFilesRecordReader.java

License:Creative Commons License

/**
 * <p>/* www.  ja  v a 2s. c  o  m*/
 * If the file has not already been read, this reads it into memory, so that
 * a call to getCurrentValue() will return the entire contents of this file
 * as Text, and getCurrentKey() will return the qualified path to this file
 * as Text. Then, returns true. If it has already been read, then returns
 * false without updating any internal state.
 * </p>
 * 
 * @return Whether the file was read or not.
 * @throws IOException
 *             if there is an error reading the file.
 * @throws InterruptedException
 *             if there is an error.
 */
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
    if (!isProcessed) {
        if (mFileLength > (long) Integer.MAX_VALUE) {
            throw new IOException("File is longer than Integer.MAX_VALUE.");
        }
        byte[] contents = new byte[(int) mFileLength];

        FileSystem fs = mFileToRead.getFileSystem(mConf);

        FSDataInputStream in = null;
        try {
            // Set the contents of this file.
            in = fs.open(mFileToRead);
            IOUtils.readFully(in, contents, 0, contents.length);
            fileContent.set(contents, 0, contents.length);

        } finally {
            IOUtils.closeStream(in);
        }
        isProcessed = true;
        return true;
    }
    return false;
}

From source file:com.marcolotz.lung.io.inputFormat.WholeFileRecordReader.java

License:Creative Commons License

@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
    /* if that record reader for that input split was not called yet */
    if (!processed) {
        byte[] contents = new byte[(int) fileSplit.getLength()];

        Path file = fileSplit.getPath();

        // Reads from the conf file what is the desired file system.
        FileSystem fs = file.getFileSystem(conf);

        FSDataInputStream in = null;/*from w  w w  . jav a  2s  .  co m*/

        try {
            in = fs.open(file);
            IOUtils.readFully(in, contents, 0, contents.length);
            value.set(contents, 0, contents.length);
        } finally {
            IOUtils.closeStream(in);
        }

        processed = true;
        return true;
    }
    return false;
}

From source file:com.marklogic.contentpump.examples.SimpleSequenceFileCreator.java

License:Apache License

public static void main(String args[]) throws IOException {
    System.out.println("Sequence File Creator");
    String uri = args[0]; // output sequence file name
    String filePath = args[1]; // text file to read from; Odd line is key,
                               // even line is value

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(URI.create(uri), conf);
    Path path = new Path(uri);
    SequenceFile.Writer writer = null;
    SimpleSequenceFileKey key = new SimpleSequenceFileKey();

    BufferedReader buffer = new BufferedReader(new FileReader(filePath));
    String line = null;/*from w ww . j a  v  a2 s.c  o m*/

    SimpleSequenceFileValue<Text> value = new SimpleSequenceFileValue<Text>();
    try {
        writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());
        while ((line = buffer.readLine()) != null) {
            key.setDocumentURI(new DocumentURI(line));
            if ((line = buffer.readLine()) == null) {
                break;
            }
            value.setValue(new Text(line));
            writer.append(key, value);
        }
    } finally {
        IOUtils.closeStream(writer);
        buffer.close();
    }
}

From source file:com.marklogic.contentpump.examples.SimpleSequenceFileReader.java

License:Apache License

public static void main(String args[]) throws Exception {
    System.out.println("Sequence File Reader");
    String uri = args[0]; // Input should be a sequence file
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(URI.create(uri), conf);
    Path path = new Path(uri);

    SequenceFile.Reader reader = null;
    try {//  w ww.j  av  a 2  s. co m
        reader = new SequenceFile.Reader(fs, path, conf);
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);

        long position = reader.getPosition();
        while (reader.next(key, value)) {
            String syncSeen = reader.syncSeen() ? "*" : "";
            System.out.printf("[%s%s]\t%s\t%s\n", position, syncSeen,
                    ((SimpleSequenceFileKey) key).getDocumentURI().getUri(),
                    ((SimpleSequenceFileValue) value).getValue());
            position = reader.getPosition();
        }
    } catch (IOException e) {
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        IOUtils.closeStream(reader);
    }
}

From source file:com.marklogic.contentpump.Versions.java

License:Apache License

protected Versions(String component) {
    info = new Properties();
    String versionInfoFile = component + "-version-info.properties";
    InputStream is = null;/*from w  ww. ja  va 2s  . co  m*/
    try {
        is = Thread.currentThread().getContextClassLoader().getResourceAsStream(versionInfoFile);
        if (is == null) {
            throw new IOException("Resource not found");
        }
        info.load(is);
    } catch (IOException ex) {
        LogFactory.getLog(getClass()).warn("Could not read '" + versionInfoFile + "', " + ex.toString(), ex);
    } finally {
        IOUtils.closeStream(is);
    }
}

From source file:com.mellanox.r4h.DFSClient.java

License:Apache License

/**
 * Get the checksum of the whole file of a range of the file. Note that the
 * range always starts from the beginning of the file.
 * //from  w w  w.  ja  va2 s . c  o m
 * @param src
 *            The file path
 * @param length
 *            the length of the range, i.e., the range is [0, length]
 * @return The checksum
 * @see DistributedFileSystem#getFileChecksum(Path)
 */
public MD5MD5CRC32FileChecksum getFileChecksum(String src, long length) throws IOException {
    checkOpen();
    Preconditions.checkArgument(length >= 0);
    // get block locations for the file range
    LocatedBlocks blockLocations = callGetBlockLocations(namenode, src, 0, length);
    if (null == blockLocations) {
        throw new FileNotFoundException("File does not exist: " + src);
    }
    List<LocatedBlock> locatedblocks = blockLocations.getLocatedBlocks();
    final DataOutputBuffer md5out = new DataOutputBuffer();
    int bytesPerCRC = -1;
    DataChecksum.Type crcType = DataChecksum.Type.DEFAULT;
    long crcPerBlock = 0;
    boolean refetchBlocks = false;
    int lastRetriedIndex = -1;

    // get block checksum for each block
    long remaining = length;
    if (src.contains(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR_SEPARATOR)) {
        remaining = Math.min(length, blockLocations.getFileLength());
    }
    for (int i = 0; i < locatedblocks.size() && remaining > 0; i++) {
        if (refetchBlocks) { // refetch to get fresh tokens
            blockLocations = callGetBlockLocations(namenode, src, 0, length);
            if (null == blockLocations) {
                throw new FileNotFoundException("File does not exist: " + src);
            }
            locatedblocks = blockLocations.getLocatedBlocks();
            refetchBlocks = false;
        }
        LocatedBlock lb = locatedblocks.get(i);
        final ExtendedBlock block = lb.getBlock();
        if (remaining < block.getNumBytes()) {
            block.setNumBytes(remaining);
        }
        remaining -= block.getNumBytes();
        final DatanodeInfo[] datanodes = lb.getLocations();

        // try each datanode location of the block
        final int timeout = 3000 * datanodes.length + dfsClientConf.socketTimeout();
        boolean done = false;
        for (int j = 0; !done && j < datanodes.length; j++) {
            DataOutputStream out = null;
            DataInputStream in = null;

            try {
                // connect to a datanode
                IOStreamPair pair = connectToDN(datanodes[j], timeout, lb);
                out = new DataOutputStream(new BufferedOutputStream(pair.out, HdfsConstants.SMALL_BUFFER_SIZE));
                in = new DataInputStream(pair.in);

                if (LOG.isDebugEnabled()) {
                    LOG.debug("write to " + datanodes[j] + ": " + Op.BLOCK_CHECKSUM + ", block=" + block);
                }
                // get block MD5
                new Sender(out).blockChecksum(block, lb.getBlockToken());

                final BlockOpResponseProto reply = BlockOpResponseProto.parseFrom(PBHelper.vintPrefixed(in));

                String logInfo = "for block " + block + " from datanode " + datanodes[j];
                DataTransferProtoUtil.checkBlockOpStatus(reply, logInfo);

                OpBlockChecksumResponseProto checksumData = reply.getChecksumResponse();

                // read byte-per-checksum
                final int bpc = checksumData.getBytesPerCrc();
                if (i == 0) { // first block
                    bytesPerCRC = bpc;
                } else if (bpc != bytesPerCRC) {
                    throw new IOException(
                            "Byte-per-checksum not matched: bpc=" + bpc + " but bytesPerCRC=" + bytesPerCRC);
                }

                // read crc-per-block
                final long cpb = checksumData.getCrcPerBlock();
                if (locatedblocks.size() > 1 && i == 0) {
                    crcPerBlock = cpb;
                }

                // read md5
                final MD5Hash md5 = new MD5Hash(checksumData.getMd5().toByteArray());
                md5.write(md5out);

                // read crc-type
                final DataChecksum.Type ct;
                if (checksumData.hasCrcType()) {
                    ct = PBHelper.convert(checksumData.getCrcType());
                } else {
                    LOG.debug("Retrieving checksum from an earlier-version DataNode: "
                            + "inferring checksum by reading first byte");
                    ct = inferChecksumTypeByReading(lb, datanodes[j]);
                }

                if (i == 0) { // first block
                    crcType = ct;
                } else if (crcType != DataChecksum.Type.MIXED && crcType != ct) {
                    // if crc types are mixed in a file
                    crcType = DataChecksum.Type.MIXED;
                }

                done = true;

                if (LOG.isDebugEnabled()) {
                    if (i == 0) {
                        LOG.debug("set bytesPerCRC=" + bytesPerCRC + ", crcPerBlock=" + crcPerBlock);
                    }
                    LOG.debug("got reply from " + datanodes[j] + ": md5=" + md5);
                }
            } catch (InvalidBlockTokenException ibte) {
                if (i > lastRetriedIndex) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Got access token error in response to OP_BLOCK_CHECKSUM " + "for file " + src
                                + " for block " + block + " from datanode " + datanodes[j]
                                + ". Will retry the block once.");
                    }
                    lastRetriedIndex = i;
                    done = true; // actually it's not done; but we'll retry
                    i--; // repeat at i-th block
                    refetchBlocks = true;
                    break;
                }
            } catch (IOException ie) {
                LOG.warn("src=" + src + ", datanodes[" + j + "]=" + datanodes[j], ie);
            } finally {
                IOUtils.closeStream(in);
                IOUtils.closeStream(out);
            }
        }

        if (!done) {
            throw new IOException("Fail to get block MD5 for " + block);
        }
    }

    // compute file MD5
    final MD5Hash fileMD5 = MD5Hash.digest(md5out.getData());
    switch (crcType) {
    case CRC32:
        return new MD5MD5CRC32GzipFileChecksum(bytesPerCRC, crcPerBlock, fileMD5);
    case CRC32C:
        return new MD5MD5CRC32CastagnoliFileChecksum(bytesPerCRC, crcPerBlock, fileMD5);
    default:
        // If there is no block allocated for the file,
        // return one with the magic entry that matches what previous
        // hdfs versions return.
        if (locatedblocks.size() == 0) {
            return new MD5MD5CRC32GzipFileChecksum(0, 0, fileMD5);
        }

        // we should never get here since the validity was checked
        // when getCrcType() was called above.
        return null;
    }
}

From source file:com.moz.fiji.mapreduce.input.impl.WholeFileRecordReader.java

License:Apache License

/**
 * <p>If the file has not already been read, this reads it into memory, so that a call
 * to getCurrentValue() will return the entire contents of this file as Text,
 * and getCurrentKey() will return the qualified path to this file as Text.  Then, returns
 * true.  If it has already been read, then returns false without updating any internal state.</p>
 *
 * @return Whether the file was read or not.
 * @throws IOException if there is an error reading the file.
 * @throws InterruptedException if there is an error.
 *//* www  .  j a  v  a  2 s.c  o m*/
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
    if (!mProcessed) {
        if (mFileLength > (long) Integer.MAX_VALUE) {
            throw new IOException("File is longer than Integer.MAX_VALUE.");
        }
        byte[] contents = new byte[(int) mFileLength];

        FileSystem fs = mFileToRead.getFileSystem(mConf);
        FSDataInputStream in = null;
        try {
            // Set the contents of this file.
            in = fs.open(mFileToRead);
            IOUtils.readFully(in, contents, 0, contents.length);
            mFileText.set(contents, 0, contents.length);

            // Set the name of this file.
            String fileName = mFileToRead.makeQualified(fs).toString();
            mFileName.set(fileName);
        } finally {
            IOUtils.closeStream(in);
        }
        mProcessed = true;
        return true;
    }
    return false;
}