Example usage for org.apache.hadoop.fs Path suffix

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path suffix.

Prototype

public Path suffix(String suffix)

Source Link

Document

Adds a suffix to the final name in the path.

Usage

From source file:com.aliyun.fs.oss.TestAliyunOSSFileSystemStore.java

License:Apache License

protected void writeRenameReadCompare(Path path, long len) throws IOException, NoSuchAlgorithmException {
    // If len > fs.oss.multipart.upload.threshold,
    // we'll use a multipart upload copy
    MessageDigest digest = MessageDigest.getInstance("MD5");
    OutputStream out = new BufferedOutputStream(new DigestOutputStream(fs.create(path, false), digest));
    for (long i = 0; i < len; i++) {
        out.write('Q');
    }//from   w w  w . j a  va2  s . co  m
    out.flush();
    out.close();

    assertTrue("Exists", fs.exists(path));

    Path copyPath = path.suffix(".copy");
    fs.rename(path, copyPath);

    assertTrue("Copy exists", fs.exists(copyPath));

    // Download file from Aliyun OSS and compare the digest against the original
    MessageDigest digest2 = MessageDigest.getInstance("MD5");
    InputStream in = new BufferedInputStream(new DigestInputStream(fs.open(copyPath), digest2));
    long copyLen = 0;
    while (in.read() != -1) {
        copyLen++;
    }
    in.close();

    assertEquals("Copy length matches original", len, copyLen);
    assertArrayEquals("Digests match", digest.digest(), digest2.digest());
}

From source file:com.btoddb.chronicle.apps.AvroTools.java

License:Open Source License

private void testFileAndFix(Path inFile) throws IOException {
    FileContext context = FileContext.getFileContext(hdfsConfig);
    AvroFSInput input = new AvroFSInput(context, inFile);

    ReflectDatumReader<Object> reader = new ReflectDatumReader<>();
    FileReader<Object> fileReader = DataFileReader.openReader(input, reader);

    Path outFile = inFile.suffix(".fixing");
    FSDataOutputStream output = FileSystem.create(outFile.getFileSystem(hdfsConfig), outFile,
            FsPermission.getDefault());/* w  w w .ja va2s  .  c o  m*/
    DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>());
    writer.setCodec(CodecFactory.snappyCodec());

    boolean corrupted = false;
    long count = 0;

    try {
        Schema schema = fileReader.getSchema();
        writer.create(schema, output);

        for (;;) {
            try {
                if (fileReader.hasNext()) {
                    Object obj = fileReader.next();
                    count++;
                    writer.append(obj);
                } else {
                    break;
                }
            } catch (AvroRuntimeException e) {
                corrupted = true;
                System.out.println("  - file pointer = " + input.tell());
                if (e.getCause() instanceof EOFException) {
                    System.out.println("  - EOF occurred so we're done : " + e.getMessage());
                    break;
                } else if (e.getCause() instanceof IOException) {
                    System.out.println("  - will try to 'next' past the error : " + e.getMessage());
                    try {
                        fileReader.next();
                        System.out.println("  - 'next' worked - didn't really expect it to, but great!");
                    } catch (Exception e2) {
                        System.out.println("  - 'next' did not work - will continue on and see what happens : "
                                + e2.getMessage());
                    }
                    continue;
                }
                break;
            } catch (Exception e) {
                corrupted = true;
                System.out.println("  - file pointer = " + input.tell());
                e.printStackTrace();
                break;
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        System.out.println(("  - processed " + count + " records"));
        if (null != fileReader) {
            try {
                fileReader.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        if (null != writer) {
            try {
                writer.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    if (!corrupted) {
        outFile.getFileSystem(hdfsConfig).delete(outFile, false);
    } else {
        outFile.getFileSystem(hdfsConfig).rename(outFile, inFile.suffix(".fixed"));
    }
}

From source file:com.cloudera.impala.catalog.HBaseTable.java

License:Apache License

/**
 * Returns the Hdfs size of the given region in bytes. NULL can be
 * passed as a parameter to retrieve the size of the complete table.
 *///  ww  w.  j a v a  2 s.  co m
public long getHdfsSize(HRegionInfo info) throws IOException {
    Path tableDir = HTableDescriptor.getTableDir(FSUtils.getRootDir(hbaseConf_),
            Bytes.toBytes(hbaseTableName_));
    FileSystem fs = tableDir.getFileSystem(hbaseConf_);
    if (info != null) {
        Path regionDir = tableDir.suffix("/" + info.getEncodedName());
        return fs.getContentSummary(regionDir).getLength();
    } else {
        return fs.getContentSummary(tableDir).getLength();
    }
}

From source file:com.fullcontact.sstable.index.SSTableIndexIndex.java

License:Apache License

/**
 * Read an existing index. Reads and returns the index index, which is a list of chunks defined by the Cassandra
 * Index.db file along with the configured split size.
 *
 * @param fileSystem Hadoop file system.
 * @param sstablePath SSTable Index.db./*  w  w w  .j a  v  a  2 s.  c  om*/
 * @return Index of chunks.
 * @throws IOException
 */
public static SSTableIndexIndex readIndex(final FileSystem fileSystem, final Path sstablePath)
        throws IOException {
    final Closer closer = Closer.create();
    final Path indexPath = sstablePath.suffix(SSTABLE_INDEX_SUFFIX);

    // Detonate if we don't have an index.
    final FSDataInputStream inputStream = closer.register(fileSystem.open(indexPath));

    final SSTableIndexIndex indexIndex = new SSTableIndexIndex();
    try {
        while (inputStream.available() != 0) {
            indexIndex.add(inputStream.readLong(), inputStream.readLong());
        }
    } finally {
        closer.close();
    }

    return indexIndex;
}

From source file:com.fullcontact.sstable.index.SSTableIndexIndex.java

License:Apache License

/**
 * Create and write an index index based on the input Cassandra Index.db file. Read the Index.db and generate chunks
 * (splits) based on the configured chunk size.
 *
 * @param fileSystem Hadoop file system.
 * @param sstablePath SSTable Index.db./*from www . jav  a 2 s.  com*/
 * @throws IOException
 */
public static void writeIndex(final FileSystem fileSystem, final Path sstablePath) throws IOException {

    final Configuration configuration = fileSystem.getConf();

    final long splitSize = configuration.getLong(HadoopSSTableConstants.HADOOP_SSTABLE_SPLIT_MB,
            HadoopSSTableConstants.DEFAULT_SPLIT_MB) * 1024 * 1024;

    final Closer closer = Closer.create();

    final Path outputPath = sstablePath.suffix(SSTABLE_INDEX_SUFFIX);
    final Path inProgressOutputPath = sstablePath.suffix(SSTABLE_INDEX_IN_PROGRESS_SUFFIX);

    boolean success = false;
    try {
        final FSDataOutputStream os = closer.register(fileSystem.create(inProgressOutputPath));

        final TLongArrayList splitOffsets = new TLongArrayList();
        long currentStart = 0;
        long currentEnd = 0;
        final IndexOffsetScanner index = new IndexOffsetScanner(sstablePath, fileSystem);

        while (index.hasNext()) {
            // NOTE: This does not give an exact size of this split in bytes but a rough estimate.
            // This should be good enough since it's only used for sorting splits by size in hadoop land.
            while (currentEnd - currentStart < splitSize && index.hasNext()) {
                currentEnd = index.next();
                splitOffsets.add(currentEnd);
            }

            // Record the split
            final long[] offsets = splitOffsets.toArray();
            os.writeLong(offsets[0]); // Start
            os.writeLong(offsets[offsets.length - 1]); // End

            // Clear the offsets
            splitOffsets.clear();

            if (index.hasNext()) {
                currentStart = index.next();
                currentEnd = currentStart;
                splitOffsets.add(currentStart);
            }
        }

        success = true;
    } finally {
        closer.close();

        if (!success) {
            fileSystem.delete(inProgressOutputPath, false);
        } else {
            fileSystem.rename(inProgressOutputPath, outputPath);
        }
    }
}

From source file:com.hadoop.compression.lzo.LzoIndex.java

License:Open Source License

/**
 * Read the index of the lzo file./*from   www. j  av  a  2  s  .  co  m*/
        
 * @param fs The index file is on this file system.
 * @param lzoFile the file whose index we are reading -- NOT the index file itself.  That is,
 * pass in filename.lzo, not filename.lzo.index, for this parameter.
 * @throws IOException
 */
public static LzoIndex readIndex(FileSystem fs, Path lzoFile) throws IOException {
    FSDataInputStream indexIn = null;
    Path indexFile = lzoFile.suffix(LZO_INDEX_SUFFIX);

    try {
        indexIn = fs.open(indexFile);
    } catch (IOException fileNotFound) {
        // return empty index, fall back to the unsplittable mode
        return new LzoIndex();
    }

    int capacity = 16 * 1024 * 8; //size for a 4GB file (with 256KB lzo blocks)
    DataOutputBuffer bytes = new DataOutputBuffer(capacity);

    // copy indexIn and close it
    IOUtils.copyBytes(indexIn, bytes, 4 * 1024, true);

    ByteBuffer bytesIn = ByteBuffer.wrap(bytes.getData(), 0, bytes.getLength());
    int blocks = bytesIn.remaining() / 8;
    LzoIndex index = new LzoIndex(blocks);

    for (int i = 0; i < blocks; i++) {
        index.set(i, bytesIn.getLong());
    }

    return index;
}

From source file:com.hadoop.compression.lzo.LzoIndex.java

License:Open Source License

/**
 * Index an lzo file to allow the input format to split them into separate map
 * jobs.//  w  w  w. j  av  a2s.  c om
 *
 * @param fs File system that contains the file.
 * @param lzoFile the lzo file to index.  For filename.lzo, the created index file will be
 * filename.lzo.index.
 * @throws IOException
 */
public static void createIndex(FileSystem fs, Path lzoFile) throws IOException {

    Configuration conf = fs.getConf();
    CompressionCodecFactory factory = new CompressionCodecFactory(conf);
    CompressionCodec codec = factory.getCodec(lzoFile);
    if (null == codec) {
        throw new IOException("Could not find codec for file " + lzoFile
                + " - you may need to add the LZO codec to your io.compression.codecs "
                + "configuration in core-site.xml");
    }
    ((Configurable) codec).setConf(conf);

    FSDataInputStream is = null;
    FSDataOutputStream os = null;
    Path outputFile = lzoFile.suffix(LZO_INDEX_SUFFIX);
    Path tmpOutputFile = lzoFile.suffix(LZO_TMP_INDEX_SUFFIX);

    // Track whether an exception was thrown or not, so we know to either
    // delete the tmp index file on failure, or rename it to the new index file on success.
    boolean indexingSucceeded = false;
    try {
        is = fs.open(lzoFile);
        os = fs.create(tmpOutputFile);
        LzopDecompressor decompressor = (LzopDecompressor) codec.createDecompressor();
        // Solely for reading the header
        codec.createInputStream(is, decompressor);
        int numCompressedChecksums = decompressor.getCompressedChecksumsCount();
        int numDecompressedChecksums = decompressor.getDecompressedChecksumsCount();

        while (true) {
            // read and ignore, we just want to get to the next int
            int uncompressedBlockSize = is.readInt();
            if (uncompressedBlockSize == 0) {
                break;
            } else if (uncompressedBlockSize < 0) {
                throw new EOFException();
            }

            int compressedBlockSize = is.readInt();
            if (compressedBlockSize <= 0) {
                throw new IOException("Could not read compressed block size");
            }

            // See LzopInputStream.getCompressedData
            boolean isUncompressedBlock = (uncompressedBlockSize == compressedBlockSize);
            int numChecksumsToSkip = isUncompressedBlock ? numDecompressedChecksums
                    : numDecompressedChecksums + numCompressedChecksums;
            long pos = is.getPos();
            // write the pos of the block start
            os.writeLong(pos - 8);
            // seek to the start of the next block, skip any checksums
            is.seek(pos + compressedBlockSize + (4 * numChecksumsToSkip));
        }
        // If we're here, indexing was successful.
        indexingSucceeded = true;
    } finally {
        // Close any open streams.
        if (is != null) {
            is.close();
        }

        if (os != null) {
            os.close();
        }

        if (!indexingSucceeded) {
            // If indexing didn't succeed (i.e. an exception was thrown), clean up after ourselves.
            fs.delete(tmpOutputFile, false);
        } else {
            // Otherwise, rename filename.lzo.index.tmp to filename.lzo.index.
            fs.rename(tmpOutputFile, outputFile);
        }
    }
}

From source file:com.hadoop.mapreduce.LzoTextInputFormat.java

License:Open Source License

/**
 * Index an lzo file to allow the input format to split them into separate map
 * jobs.//from   www  . java 2  s.c  o m
 * 
 * @param fs
 *          File system that contains the file.
 * @param lzoFile
 *          the lzo file to index.
 * @throws IOException
 */
public static void createIndex(FileSystem fs, Path lzoFile) throws IOException {

    Configuration conf = fs.getConf();
    CompressionCodecFactory factory = new CompressionCodecFactory(fs.getConf());
    CompressionCodec codec = factory.getCodec(lzoFile);
    ((Configurable) codec).setConf(conf);

    InputStream lzoIs = null;
    FSDataOutputStream os = null;
    Path outputFile = new Path(lzoFile.toString() + LzoTextInputFormat.LZO_INDEX_SUFFIX);
    Path tmpOutputFile = outputFile.suffix(".tmp");

    try {
        FSDataInputStream is = fs.open(lzoFile);
        os = fs.create(tmpOutputFile);
        LzopDecompressor decompressor = (LzopDecompressor) codec.createDecompressor();
        // for reading the header
        lzoIs = codec.createInputStream(is, decompressor);

        int numChecksums = decompressor.getChecksumsCount();

        while (true) {
            // read and ignore, we just want to get to the next int
            int uncompressedBlockSize = is.readInt();
            if (uncompressedBlockSize == 0) {
                break;
            } else if (uncompressedBlockSize < 0) {
                throw new EOFException();
            }

            int compressedBlockSize = is.readInt();
            if (compressedBlockSize <= 0) {
                throw new IOException("Could not read compressed block size");
            }

            long pos = is.getPos();
            // write the pos of the block start
            os.writeLong(pos - 8);
            // seek to the start of the next block, skip any checksums
            is.seek(pos + compressedBlockSize + (4 * numChecksums));
        }
    } finally {
        if (lzoIs != null) {
            lzoIs.close();
        }

        if (os != null) {
            os.close();
        }
    }

    fs.rename(tmpOutputFile, outputFile);
}

From source file:com.liferay.hadoop.search.HadoopDLIndexerPostProcessor.java

License:Open Source License

public void postProcessDocument(Document document, Object obj) throws Exception {

    DLFileEntry dlFileEntry = (DLFileEntry) obj;

    long companyId = dlFileEntry.getCompanyId();
    long repositoryId = dlFileEntry.getRepositoryId();

    String stringObject = document.toString();

    // remove JSON chars

    stringObject = StringUtil.replace(stringObject, new String[] { "\"", ",", ":", "{", "}", "[", "]" },
            new String[] { StringPool.SPACE, StringPool.SPACE, StringPool.SPACE, StringPool.SPACE,
                    StringPool.SPACE, StringPool.SPACE, StringPool.SPACE });

    Path fullDirPath = HadoopManager.getFullDirPath(companyId, repositoryId, null);

    fullDirPath = new Path("/index".concat(fullDirPath.toString()));

    FSDataOutputStream outputStream = null;

    try {/*from  w w w  . j a va 2  s. com*/
        FileSystem fileSystem = HadoopManager.getFileSystem();

        String suffix = StringPool.SLASH.concat(document.getUID());

        outputStream = fileSystem.create(fullDirPath.suffix(suffix));

        PrintWriter pw = new PrintWriter(outputStream);

        pw.write(stringObject);

        pw.flush();
        pw.close();
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        StreamUtil.cleanUp(outputStream);
    }
}

From source file:com.nagarro.nteg.utils.HDFSFileDataBufferedReader.java

License:Apache License

/**
 * @param path//w w w. jav a 2  s  .co  m
 * @param batchSize
 * @throws IOException
 */
public HDFSFileDataBufferedReader(final Path path, final int batchSize) throws IOException {
    this.batchSize = batchSize;
    this.currentFile = path;
    this.buffer = new ArrayDeque<String>(batchSize);
    internalRenamedFile = path.suffix(IN_PROGRESS_FILE_SUFFIX);

    fileSystem = path.getFileSystem(new Configuration());

    fileSystem.rename(path, internalRenamedFile);

    scanner = new Scanner(fileSystem.open(internalRenamedFile));
}