Example usage for org.apache.hadoop.fs FileSystem open

List of usage examples for org.apache.hadoop.fs FileSystem open

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem open.

Prototype

public FSDataInputStream open(PathHandle fd) throws IOException 

Source Link

Document

Open an FSDataInputStream matching the PathHandle instance.

Usage

From source file:com.hadoop.compression.fourmc.FourMzInputStream.java

License:BSD License

/**
 * Reads blocks index at tail of file.//from  ww w  .  j a  v a 2  s  .c  om
 *
 * @param fs   filesystem
 * @param file path to 4mc file
 * @return block index
 * @throws IOException
 */
public static FourMzBlockIndex readIndex(FileSystem fs, Path file) throws IOException {

    long fileSize = fs.getFileStatus(file).getLen();
    if (fileSize < (12 + 20)) { // file too small
        return new FourMzBlockIndex();
    }

    FSDataInputStream indexIn = fs.open(file);

    /**
     * jump to file tail and read-ahead last 4KB of file which should be enough in most cases
     * Improvement: we could estimate a best case compression factor of 10% and calc forecast
     *              based on filesize and blocksize, to see if better to read-head more.
     */

    int readTailSize = 4 * 1024;
    if (readTailSize > (fileSize - 12))
        readTailSize = (int) (fileSize - 12);

    indexIn.seek(fileSize - readTailSize);
    byte[] buf = new byte[readTailSize];
    readFully(indexIn, buf, 0, buf.length);
    int footerSize = getInt(buf, buf.length - 12);
    int magic = getInt(buf, buf.length - 8);
    int checksum = getInt(buf, buf.length - 4);

    if (magic != FourMzCodec.FOURMZ_MAGIC) {
        throw new IOException("Invalid 4mc footer magic");
    }
    if (footerSize >= (fileSize - 12)) {
        throw new IOException("Invalid 4mc footer checksum");
    }

    // very rare case: read head was not enough! seek back and read it all
    if (footerSize > readTailSize) {
        readTailSize = footerSize;
        indexIn.seek(fileSize - readTailSize);
        buf = new byte[readTailSize];
        readFully(indexIn, buf, 0, buf.length);
    }
    indexIn.close();

    int startFooterOffset = readTailSize - footerSize;

    if (getInt(buf, startFooterOffset) != footerSize) { // size again
        throw new IOException("Invalid 4mc footer size");
    }

    if (getInt(buf, startFooterOffset + 4) != FourMzCodec.FOURMZ_VERSION) { // version
        throw new IOException("Invalid 4mc footer version (" + getInt(buf, startFooterOffset + 4) + ")");
    }

    if (checksum != ZstdDecompressor.xxhash32(buf, startFooterOffset, footerSize - 4, 0)) {
        throw new IOException("Invalid 4mc footer checksum");
    }

    int totalBlocks = (footerSize - 20) / 4;
    FourMzBlockIndex index = new FourMzBlockIndex(totalBlocks);
    long curOffset = 0;
    for (int i = 0; i < totalBlocks; ++i) {
        curOffset += getInt(buf, startFooterOffset + 8 + (i * 4));
        index.set(i, curOffset);
    }

    return index;
}

From source file:com.hadoop.compression.lzo.LzoIndex.java

License:Open Source License

/**
 * Read the index of the lzo file./*from  w  w w . j  a v a2  s .  c o m*/
        
 * @param fs The index file is on this file system.
 * @param lzoFile the file whose index we are reading -- NOT the index file itself.  That is,
 * pass in filename.lzo, not filename.lzo.index, for this parameter.
 * @throws IOException
 */
public static LzoIndex readIndex(FileSystem fs, Path lzoFile) throws IOException {
    FSDataInputStream indexIn = null;
    Path indexFile = lzoFile.suffix(LZO_INDEX_SUFFIX);

    try {
        indexIn = fs.open(indexFile);
    } catch (IOException fileNotFound) {
        // return empty index, fall back to the unsplittable mode
        return new LzoIndex();
    }

    int capacity = 16 * 1024 * 8; //size for a 4GB file (with 256KB lzo blocks)
    DataOutputBuffer bytes = new DataOutputBuffer(capacity);

    // copy indexIn and close it
    IOUtils.copyBytes(indexIn, bytes, 4 * 1024, true);

    ByteBuffer bytesIn = ByteBuffer.wrap(bytes.getData(), 0, bytes.getLength());
    int blocks = bytesIn.remaining() / 8;
    LzoIndex index = new LzoIndex(blocks);

    for (int i = 0; i < blocks; i++) {
        index.set(i, bytesIn.getLong());
    }

    return index;
}

From source file:com.hadoop.compression.lzo.LzoIndex.java

License:Open Source License

/**
 * Index an lzo file to allow the input format to split them into separate map
 * jobs.//from w w  w  .  j  ava2s.co m
 *
 * @param fs File system that contains the file.
 * @param lzoFile the lzo file to index.  For filename.lzo, the created index file will be
 * filename.lzo.index.
 * @throws IOException
 */
public static void createIndex(FileSystem fs, Path lzoFile) throws IOException {

    Configuration conf = fs.getConf();
    CompressionCodecFactory factory = new CompressionCodecFactory(conf);
    CompressionCodec codec = factory.getCodec(lzoFile);
    if (null == codec) {
        throw new IOException("Could not find codec for file " + lzoFile
                + " - you may need to add the LZO codec to your io.compression.codecs "
                + "configuration in core-site.xml");
    }
    ((Configurable) codec).setConf(conf);

    FSDataInputStream is = null;
    FSDataOutputStream os = null;
    Path outputFile = lzoFile.suffix(LZO_INDEX_SUFFIX);
    Path tmpOutputFile = lzoFile.suffix(LZO_TMP_INDEX_SUFFIX);

    // Track whether an exception was thrown or not, so we know to either
    // delete the tmp index file on failure, or rename it to the new index file on success.
    boolean indexingSucceeded = false;
    try {
        is = fs.open(lzoFile);
        os = fs.create(tmpOutputFile);
        LzopDecompressor decompressor = (LzopDecompressor) codec.createDecompressor();
        // Solely for reading the header
        codec.createInputStream(is, decompressor);
        int numCompressedChecksums = decompressor.getCompressedChecksumsCount();
        int numDecompressedChecksums = decompressor.getDecompressedChecksumsCount();

        while (true) {
            // read and ignore, we just want to get to the next int
            int uncompressedBlockSize = is.readInt();
            if (uncompressedBlockSize == 0) {
                break;
            } else if (uncompressedBlockSize < 0) {
                throw new EOFException();
            }

            int compressedBlockSize = is.readInt();
            if (compressedBlockSize <= 0) {
                throw new IOException("Could not read compressed block size");
            }

            // See LzopInputStream.getCompressedData
            boolean isUncompressedBlock = (uncompressedBlockSize == compressedBlockSize);
            int numChecksumsToSkip = isUncompressedBlock ? numDecompressedChecksums
                    : numDecompressedChecksums + numCompressedChecksums;
            long pos = is.getPos();
            // write the pos of the block start
            os.writeLong(pos - 8);
            // seek to the start of the next block, skip any checksums
            is.seek(pos + compressedBlockSize + (4 * numChecksumsToSkip));
        }
        // If we're here, indexing was successful.
        indexingSucceeded = true;
    } finally {
        // Close any open streams.
        if (is != null) {
            is.close();
        }

        if (os != null) {
            os.close();
        }

        if (!indexingSucceeded) {
            // If indexing didn't succeed (i.e. an exception was thrown), clean up after ourselves.
            fs.delete(tmpOutputFile, false);
        } else {
            // Otherwise, rename filename.lzo.index.tmp to filename.lzo.index.
            fs.rename(tmpOutputFile, outputFile);
        }
    }
}

From source file:com.hadoop.mapred.DeprecatedLzoLineRecordReader.java

License:Open Source License

DeprecatedLzoLineRecordReader(Configuration conf, FileSplit split) throws IOException {
    start = split.getStart();/*ww  w. jav a2 s  .c  o  m*/
    end = start + split.getLength();
    final Path file = split.getPath();

    FileSystem fs = file.getFileSystem(conf);
    codecFactory = new CompressionCodecFactory(conf);
    final CompressionCodec codec = codecFactory.getCodec(file);
    if (codec == null) {
        throw new IOException("No LZO codec found, cannot run.");
    }

    // Open the file and seek to the next split.
    fileIn = fs.open(file);
    // Create input stream and read the file header.
    in = new LineReader(codec.createInputStream(fileIn), conf);
    if (start != 0) {
        fileIn.seek(start);

        // Read and ignore the first line.
        in.readLine(new Text());
        start = fileIn.getPos();
    }

    pos = start;
}

From source file:com.hadoop.mapreduce.FourMcLineRecordReader.java

License:BSD License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;
    start = split.getStart();/*from w w w  .jav a2  s . c o m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    Configuration job = HadoopUtils.getConfiguration(context);
    maxLineLen = job.getInt(MAX_LINE_LEN_CONF, Integer.MAX_VALUE);

    FileSystem fs = file.getFileSystem(job);
    CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    if (codec == null) {
        throw new IOException("Codec for file " + file + " not found, cannot run");
    }

    // open the file and seek to the start of the split
    fileIn = fs.open(split.getPath());

    // creates input stream and also reads the file header
    in = new LineReader(codec.createInputStream(fileIn), job);

    if (start != 0) {
        fileIn.seek(start);

        // read and ignore the first line
        in.readLine(new Text());
        start = fileIn.getPos();
    }

    this.pos = start;
}

From source file:com.hadoop.mapreduce.LzoLineRecordReader.java

License:Open Source License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;
    start = split.getStart();//from  ww w . j  a  va2 s.c  o  m
    end = start + split.getLength();
    final Path file = split.getPath();
    Configuration job = context.getConfiguration();

    FileSystem fs = file.getFileSystem(job);
    CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    if (codec == null) {
        throw new IOException("No codec for file " + file + " not found, cannot run");
    }

    // open the file and seek to the start of the split
    fileIn = fs.open(split.getPath());

    // creates input stream and also reads the file header
    in = new LineReader(codec.createInputStream(fileIn), job);

    if (start != 0) {
        fileIn.seek(start);

        // read and ignore the first line
        in.readLine(new Text());
        start = fileIn.getPos();
    }

    this.pos = start;
}

From source file:com.hadoop.mapreduce.LzoTextInputFormat.java

License:Open Source License

/**
 * Read the index of the lzo file.//from www. ja  v a2s.  com
 * 
 * @param split
 *          Read the index of this file.
 * @param fs
 *          The index file is on this file system.
 * @throws IOException
 */
private LzoIndex readIndex(Path file, FileSystem fs) throws IOException {
    FSDataInputStream indexIn = null;
    try {
        Path indexFile = new Path(file.toString() + LZO_INDEX_SUFFIX);
        if (!fs.exists(indexFile)) {
            // return empty index, fall back to the unsplittable mode
            return new LzoIndex();
        }

        long indexLen = fs.getFileStatus(indexFile).getLen();
        int blocks = (int) (indexLen / 8);
        LzoIndex index = new LzoIndex(blocks);
        indexIn = fs.open(indexFile);
        for (int i = 0; i < blocks; i++) {
            index.set(i, indexIn.readLong());
        }
        return index;
    } finally {
        if (indexIn != null) {
            indexIn.close();
        }
    }
}

From source file:com.hadoop.mapreduce.LzoTextInputFormat.java

License:Open Source License

/**
 * Index an lzo file to allow the input format to split them into separate map
 * jobs./*from   ww w.  j a  v a 2s .  c om*/
 * 
 * @param fs
 *          File system that contains the file.
 * @param lzoFile
 *          the lzo file to index.
 * @throws IOException
 */
public static void createIndex(FileSystem fs, Path lzoFile) throws IOException {

    Configuration conf = fs.getConf();
    CompressionCodecFactory factory = new CompressionCodecFactory(fs.getConf());
    CompressionCodec codec = factory.getCodec(lzoFile);
    ((Configurable) codec).setConf(conf);

    InputStream lzoIs = null;
    FSDataOutputStream os = null;
    Path outputFile = new Path(lzoFile.toString() + LzoTextInputFormat.LZO_INDEX_SUFFIX);
    Path tmpOutputFile = outputFile.suffix(".tmp");

    try {
        FSDataInputStream is = fs.open(lzoFile);
        os = fs.create(tmpOutputFile);
        LzopDecompressor decompressor = (LzopDecompressor) codec.createDecompressor();
        // for reading the header
        lzoIs = codec.createInputStream(is, decompressor);

        int numChecksums = decompressor.getChecksumsCount();

        while (true) {
            // read and ignore, we just want to get to the next int
            int uncompressedBlockSize = is.readInt();
            if (uncompressedBlockSize == 0) {
                break;
            } else if (uncompressedBlockSize < 0) {
                throw new EOFException();
            }

            int compressedBlockSize = is.readInt();
            if (compressedBlockSize <= 0) {
                throw new IOException("Could not read compressed block size");
            }

            long pos = is.getPos();
            // write the pos of the block start
            os.writeLong(pos - 8);
            // seek to the start of the next block, skip any checksums
            is.seek(pos + compressedBlockSize + (4 * numChecksums));
        }
    } finally {
        if (lzoIs != null) {
            lzoIs.close();
        }

        if (os != null) {
            os.close();
        }
    }

    fs.rename(tmpOutputFile, outputFile);
}

From source file:com.hdfs.concat.crush.CountersInputFormat.java

License:Apache License

@Override
public RecordReader<Counters, NullWritable> getRecordReader(InputSplit inputSplit, JobConf jobconf,
        Reporter reporter) throws IOException {

    if (!(inputSplit instanceof FileSplit)) {
        throw new AssertionError();
    }/*from  w  ww.j a  v  a  2s.c  om*/

    FileSplit fSplit = (FileSplit) inputSplit;

    Path path = fSplit.getPath();
    long length = fSplit.getLength();

    FileSystem fs = FileSystem.get(jobconf);

    FSDataInputStream is = fs.open(path);

    return new CountersReader(is, length);
}

From source file:com.hdfstoftp.main.HdfsToFtp.java

/**
 * ?//  ww w  .  j  a v a 2s.  co  m
 * 
 * @param srcFS
 *            
 * @param src
 *            ?
 * @param dst
 *            
 * @param queryStr
 *            
 * @param deleteSource
 *            ??
 * @param overwrite
 *            ????
 * @return boolean
 * @throws Exception
 */
private static boolean copyFromHDFSToFTP(Config config) throws Exception {
    // ?hdfs
    Configuration conf = new Configuration();
    FileSystem srcFS = FileSystem.get(conf);
    long start = System.currentTimeMillis();
    boolean isRename = config.isRenameUploaded();
    int retryTimes = config.getRetryTimes();
    // ?
    String dstPath = config.getDestDir();
    Path src = new Path(config.getSouceDir());
    FileStatus fileStatus = srcFS.getFileStatus(src);
    String subDir = null;
    if (fileStatus.isDirectory()) {// 
        if (isRename) {// ??rename
            subDir = Config.RENAME_DIR;
            srcFS.mkdirs(new Path(fileStatus.getPath(), subDir));
        }
        int threadNum = config.getThreadNum();
        // 
        ExecutorService threadPool = Executors.newFixedThreadPool(threadNum);
        // ?ftp
        FTPClientPool ftpPool = new FTPClientPool(threadNum, new FtpClientFactory(config.getFTPClientConfig()));
        FTPClient ftpClient = ftpPool.borrowObject();
        // ?
        ftpClient.makeDirectory(dstPath);
        ftpPool.returnObject(ftpClient);
        // ??
        FileStatus contents[] = srcFS.listStatus(src);
        long beginFilter = 0;
        long endFileter = 0;

        if (config.getCommandLine().hasOption("d") || config.getCommandLine().hasOption("h")
                || config.getCommandLine().hasOption("t")) {// ?"["
            beginFilter = System.currentTimeMillis();
            Long[] timeRange = parseTimeRange(config.getCommandLine());
            contents = getNewContents(timeRange, contents);
            endFileter = System.currentTimeMillis();
        }
        // ?
        if (config.getCommandLine().hasOption("r")) {// "["??
            beginFilter = System.currentTimeMillis();
            contents = getFilterContents(config.getCommandLine().getOptionValue("r").trim(), contents);
            endFileter = System.currentTimeMillis();
        }
        logger.info("total file count:" + contents.length);
        Map<String, String> fileNameMap = null;
        long beginSkip = 0;
        long endSkip = 0;
        boolean overwrite = true;
        if (config.getCommandLine().hasOption("o")) {
            overwrite = "true".equals(config.getCommandLine().getOptionValue("o").trim());
        }
        if (!overwrite) {// ?????
            beginSkip = System.currentTimeMillis();
            fileNameMap = getFileNameMap(dstPath, ftpPool);
            endSkip = System.currentTimeMillis();
        }
        int skiped = 0;

        List<Future<?>> futureList = new ArrayList<Future<?>>();
        for (int i = 0; i < contents.length; i++) {
            if (!overwrite && fileNameMap.containsKey(contents[i].getPath().getName())) {
                // 
                skiped++;
                Log.info("skiped filename:" + contents[i].getPath().getName());
                continue;
            }
            if (contents[i].isDirectory()) {
                continue;
            }
            // ???
            Future<?> future = threadPool.submit(new UploadFileTask(srcFS, contents[i].getPath(),
                    new Path(dstPath, contents[i].getPath().getName()), ftpPool, false, isRename, subDir,
                    retryTimes));
            futureList.add(future);
        }
        int transfered = 0;
        int failed = 0;
        for (Future<?> future : futureList) {
            Boolean computeResult = (Boolean) future.get();
            if (computeResult) {
                transfered++;
                if (transfered % 50 == 0 || transfered == contents.length) {
                    logger.info("have transfered:" + transfered + " files");
                }
            } else {
                failed++;
                logger.error("failed transter:" + failed + " files");
            }
        }
        // 
        threadPool.shutdown();
        // FTPCient
        ftpPool.close();
        // ****************
        logger.info("filter time:" + (endFileter - beginFilter) + " ms");
        if (!overwrite) {
            logger.info("skip time:" + (endSkip - beginSkip) + " ms");
        }
        logger.info("total file count:" + contents.length);
        logger.info("total transtered: " + transfered + ",total failed:" + failed + ",total skiped:" + skiped);

    } else {// 

        BufferedReader reader = null;
        FtpClientFactory facotry = new FtpClientFactory(config.getFTPClientConfig());
        FTPClient ftpClient = null;
        InputStream in = null;
        try {
            Path path = fileStatus.getPath();
            if (!path.getName().contains("log")) {

            }
            reader = new BufferedReader(new FileReader(new File(path.toUri().getPath())));
            String str = null;

            ftpClient = facotry.makeObject();

            while ((str = reader.readLine()) != null) {
                String[] feilds = str.split("&");
                Path filePath = null;
                if (feilds.length == 2 && feilds[1] != "") {
                    filePath = new Path(feilds[1]);
                    in = srcFS.open(filePath);
                    boolean result = ftpClient.storeFile(dstPath, in);
                    System.out.println(ftpClient.getReplyCode());
                    if (result) {
                        logger.info(filePath.toString());
                    } else {
                        logger_failed.info(filePath.toString());
                    }
                } else {
                    continue;
                }

            }
        } catch (Exception e) {
            e.printStackTrace();

        } finally {
            in.close();
            reader.close();
            facotry.destroyObject(ftpClient);
        }

    }
    long end = System.currentTimeMillis();
    logger.info("finished transfer,total time:" + (end - start) / 1000 + "s");
    return true;
}