List of usage examples for org.apache.hadoop.fs Path suffix
public Path suffix(String suffix)
From source file:com.aliyun.fs.oss.TestAliyunOSSFileSystemStore.java
License:Apache License
protected void writeRenameReadCompare(Path path, long len) throws IOException, NoSuchAlgorithmException { // If len > fs.oss.multipart.upload.threshold, // we'll use a multipart upload copy MessageDigest digest = MessageDigest.getInstance("MD5"); OutputStream out = new BufferedOutputStream(new DigestOutputStream(fs.create(path, false), digest)); for (long i = 0; i < len; i++) { out.write('Q'); }//from w w w . j a va2 s . co m out.flush(); out.close(); assertTrue("Exists", fs.exists(path)); Path copyPath = path.suffix(".copy"); fs.rename(path, copyPath); assertTrue("Copy exists", fs.exists(copyPath)); // Download file from Aliyun OSS and compare the digest against the original MessageDigest digest2 = MessageDigest.getInstance("MD5"); InputStream in = new BufferedInputStream(new DigestInputStream(fs.open(copyPath), digest2)); long copyLen = 0; while (in.read() != -1) { copyLen++; } in.close(); assertEquals("Copy length matches original", len, copyLen); assertArrayEquals("Digests match", digest.digest(), digest2.digest()); }
From source file:com.btoddb.chronicle.apps.AvroTools.java
License:Open Source License
private void testFileAndFix(Path inFile) throws IOException { FileContext context = FileContext.getFileContext(hdfsConfig); AvroFSInput input = new AvroFSInput(context, inFile); ReflectDatumReader<Object> reader = new ReflectDatumReader<>(); FileReader<Object> fileReader = DataFileReader.openReader(input, reader); Path outFile = inFile.suffix(".fixing"); FSDataOutputStream output = FileSystem.create(outFile.getFileSystem(hdfsConfig), outFile, FsPermission.getDefault());/* w w w .ja va2s . c o m*/ DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>()); writer.setCodec(CodecFactory.snappyCodec()); boolean corrupted = false; long count = 0; try { Schema schema = fileReader.getSchema(); writer.create(schema, output); for (;;) { try { if (fileReader.hasNext()) { Object obj = fileReader.next(); count++; writer.append(obj); } else { break; } } catch (AvroRuntimeException e) { corrupted = true; System.out.println(" - file pointer = " + input.tell()); if (e.getCause() instanceof EOFException) { System.out.println(" - EOF occurred so we're done : " + e.getMessage()); break; } else if (e.getCause() instanceof IOException) { System.out.println(" - will try to 'next' past the error : " + e.getMessage()); try { fileReader.next(); System.out.println(" - 'next' worked - didn't really expect it to, but great!"); } catch (Exception e2) { System.out.println(" - 'next' did not work - will continue on and see what happens : " + e2.getMessage()); } continue; } break; } catch (Exception e) { corrupted = true; System.out.println(" - file pointer = " + input.tell()); e.printStackTrace(); break; } } } catch (Exception e) { e.printStackTrace(); } finally { System.out.println((" - processed " + count + " records")); if (null != fileReader) { try { fileReader.close(); } catch (Exception e) { e.printStackTrace(); } } if (null != writer) { try { writer.close(); } catch (Exception e) { e.printStackTrace(); } } } if (!corrupted) { outFile.getFileSystem(hdfsConfig).delete(outFile, false); } else { outFile.getFileSystem(hdfsConfig).rename(outFile, inFile.suffix(".fixed")); } }
From source file:com.cloudera.impala.catalog.HBaseTable.java
License:Apache License
/** * Returns the Hdfs size of the given region in bytes. NULL can be * passed as a parameter to retrieve the size of the complete table. */// ww w. j a v a 2 s. co m public long getHdfsSize(HRegionInfo info) throws IOException { Path tableDir = HTableDescriptor.getTableDir(FSUtils.getRootDir(hbaseConf_), Bytes.toBytes(hbaseTableName_)); FileSystem fs = tableDir.getFileSystem(hbaseConf_); if (info != null) { Path regionDir = tableDir.suffix("/" + info.getEncodedName()); return fs.getContentSummary(regionDir).getLength(); } else { return fs.getContentSummary(tableDir).getLength(); } }
From source file:com.fullcontact.sstable.index.SSTableIndexIndex.java
License:Apache License
/** * Read an existing index. Reads and returns the index index, which is a list of chunks defined by the Cassandra * Index.db file along with the configured split size. * * @param fileSystem Hadoop file system. * @param sstablePath SSTable Index.db./* w w w .j a v a 2 s. c om*/ * @return Index of chunks. * @throws IOException */ public static SSTableIndexIndex readIndex(final FileSystem fileSystem, final Path sstablePath) throws IOException { final Closer closer = Closer.create(); final Path indexPath = sstablePath.suffix(SSTABLE_INDEX_SUFFIX); // Detonate if we don't have an index. final FSDataInputStream inputStream = closer.register(fileSystem.open(indexPath)); final SSTableIndexIndex indexIndex = new SSTableIndexIndex(); try { while (inputStream.available() != 0) { indexIndex.add(inputStream.readLong(), inputStream.readLong()); } } finally { closer.close(); } return indexIndex; }
From source file:com.fullcontact.sstable.index.SSTableIndexIndex.java
License:Apache License
/** * Create and write an index index based on the input Cassandra Index.db file. Read the Index.db and generate chunks * (splits) based on the configured chunk size. * * @param fileSystem Hadoop file system. * @param sstablePath SSTable Index.db./*from www . jav a 2 s. com*/ * @throws IOException */ public static void writeIndex(final FileSystem fileSystem, final Path sstablePath) throws IOException { final Configuration configuration = fileSystem.getConf(); final long splitSize = configuration.getLong(HadoopSSTableConstants.HADOOP_SSTABLE_SPLIT_MB, HadoopSSTableConstants.DEFAULT_SPLIT_MB) * 1024 * 1024; final Closer closer = Closer.create(); final Path outputPath = sstablePath.suffix(SSTABLE_INDEX_SUFFIX); final Path inProgressOutputPath = sstablePath.suffix(SSTABLE_INDEX_IN_PROGRESS_SUFFIX); boolean success = false; try { final FSDataOutputStream os = closer.register(fileSystem.create(inProgressOutputPath)); final TLongArrayList splitOffsets = new TLongArrayList(); long currentStart = 0; long currentEnd = 0; final IndexOffsetScanner index = new IndexOffsetScanner(sstablePath, fileSystem); while (index.hasNext()) { // NOTE: This does not give an exact size of this split in bytes but a rough estimate. // This should be good enough since it's only used for sorting splits by size in hadoop land. while (currentEnd - currentStart < splitSize && index.hasNext()) { currentEnd = index.next(); splitOffsets.add(currentEnd); } // Record the split final long[] offsets = splitOffsets.toArray(); os.writeLong(offsets[0]); // Start os.writeLong(offsets[offsets.length - 1]); // End // Clear the offsets splitOffsets.clear(); if (index.hasNext()) { currentStart = index.next(); currentEnd = currentStart; splitOffsets.add(currentStart); } } success = true; } finally { closer.close(); if (!success) { fileSystem.delete(inProgressOutputPath, false); } else { fileSystem.rename(inProgressOutputPath, outputPath); } } }
From source file:com.hadoop.compression.lzo.LzoIndex.java
License:Open Source License
/** * Read the index of the lzo file./*from www. j av a 2 s . co m*/ * @param fs The index file is on this file system. * @param lzoFile the file whose index we are reading -- NOT the index file itself. That is, * pass in filename.lzo, not filename.lzo.index, for this parameter. * @throws IOException */ public static LzoIndex readIndex(FileSystem fs, Path lzoFile) throws IOException { FSDataInputStream indexIn = null; Path indexFile = lzoFile.suffix(LZO_INDEX_SUFFIX); try { indexIn = fs.open(indexFile); } catch (IOException fileNotFound) { // return empty index, fall back to the unsplittable mode return new LzoIndex(); } int capacity = 16 * 1024 * 8; //size for a 4GB file (with 256KB lzo blocks) DataOutputBuffer bytes = new DataOutputBuffer(capacity); // copy indexIn and close it IOUtils.copyBytes(indexIn, bytes, 4 * 1024, true); ByteBuffer bytesIn = ByteBuffer.wrap(bytes.getData(), 0, bytes.getLength()); int blocks = bytesIn.remaining() / 8; LzoIndex index = new LzoIndex(blocks); for (int i = 0; i < blocks; i++) { index.set(i, bytesIn.getLong()); } return index; }
From source file:com.hadoop.compression.lzo.LzoIndex.java
License:Open Source License
/** * Index an lzo file to allow the input format to split them into separate map * jobs.// w w w. j av a2s. c om * * @param fs File system that contains the file. * @param lzoFile the lzo file to index. For filename.lzo, the created index file will be * filename.lzo.index. * @throws IOException */ public static void createIndex(FileSystem fs, Path lzoFile) throws IOException { Configuration conf = fs.getConf(); CompressionCodecFactory factory = new CompressionCodecFactory(conf); CompressionCodec codec = factory.getCodec(lzoFile); if (null == codec) { throw new IOException("Could not find codec for file " + lzoFile + " - you may need to add the LZO codec to your io.compression.codecs " + "configuration in core-site.xml"); } ((Configurable) codec).setConf(conf); FSDataInputStream is = null; FSDataOutputStream os = null; Path outputFile = lzoFile.suffix(LZO_INDEX_SUFFIX); Path tmpOutputFile = lzoFile.suffix(LZO_TMP_INDEX_SUFFIX); // Track whether an exception was thrown or not, so we know to either // delete the tmp index file on failure, or rename it to the new index file on success. boolean indexingSucceeded = false; try { is = fs.open(lzoFile); os = fs.create(tmpOutputFile); LzopDecompressor decompressor = (LzopDecompressor) codec.createDecompressor(); // Solely for reading the header codec.createInputStream(is, decompressor); int numCompressedChecksums = decompressor.getCompressedChecksumsCount(); int numDecompressedChecksums = decompressor.getDecompressedChecksumsCount(); while (true) { // read and ignore, we just want to get to the next int int uncompressedBlockSize = is.readInt(); if (uncompressedBlockSize == 0) { break; } else if (uncompressedBlockSize < 0) { throw new EOFException(); } int compressedBlockSize = is.readInt(); if (compressedBlockSize <= 0) { throw new IOException("Could not read compressed block size"); } // See LzopInputStream.getCompressedData boolean isUncompressedBlock = (uncompressedBlockSize == compressedBlockSize); int numChecksumsToSkip = isUncompressedBlock ? numDecompressedChecksums : numDecompressedChecksums + numCompressedChecksums; long pos = is.getPos(); // write the pos of the block start os.writeLong(pos - 8); // seek to the start of the next block, skip any checksums is.seek(pos + compressedBlockSize + (4 * numChecksumsToSkip)); } // If we're here, indexing was successful. indexingSucceeded = true; } finally { // Close any open streams. if (is != null) { is.close(); } if (os != null) { os.close(); } if (!indexingSucceeded) { // If indexing didn't succeed (i.e. an exception was thrown), clean up after ourselves. fs.delete(tmpOutputFile, false); } else { // Otherwise, rename filename.lzo.index.tmp to filename.lzo.index. fs.rename(tmpOutputFile, outputFile); } } }
From source file:com.hadoop.mapreduce.LzoTextInputFormat.java
License:Open Source License
/** * Index an lzo file to allow the input format to split them into separate map * jobs.//from www . java 2 s.c o m * * @param fs * File system that contains the file. * @param lzoFile * the lzo file to index. * @throws IOException */ public static void createIndex(FileSystem fs, Path lzoFile) throws IOException { Configuration conf = fs.getConf(); CompressionCodecFactory factory = new CompressionCodecFactory(fs.getConf()); CompressionCodec codec = factory.getCodec(lzoFile); ((Configurable) codec).setConf(conf); InputStream lzoIs = null; FSDataOutputStream os = null; Path outputFile = new Path(lzoFile.toString() + LzoTextInputFormat.LZO_INDEX_SUFFIX); Path tmpOutputFile = outputFile.suffix(".tmp"); try { FSDataInputStream is = fs.open(lzoFile); os = fs.create(tmpOutputFile); LzopDecompressor decompressor = (LzopDecompressor) codec.createDecompressor(); // for reading the header lzoIs = codec.createInputStream(is, decompressor); int numChecksums = decompressor.getChecksumsCount(); while (true) { // read and ignore, we just want to get to the next int int uncompressedBlockSize = is.readInt(); if (uncompressedBlockSize == 0) { break; } else if (uncompressedBlockSize < 0) { throw new EOFException(); } int compressedBlockSize = is.readInt(); if (compressedBlockSize <= 0) { throw new IOException("Could not read compressed block size"); } long pos = is.getPos(); // write the pos of the block start os.writeLong(pos - 8); // seek to the start of the next block, skip any checksums is.seek(pos + compressedBlockSize + (4 * numChecksums)); } } finally { if (lzoIs != null) { lzoIs.close(); } if (os != null) { os.close(); } } fs.rename(tmpOutputFile, outputFile); }
From source file:com.liferay.hadoop.search.HadoopDLIndexerPostProcessor.java
License:Open Source License
public void postProcessDocument(Document document, Object obj) throws Exception { DLFileEntry dlFileEntry = (DLFileEntry) obj; long companyId = dlFileEntry.getCompanyId(); long repositoryId = dlFileEntry.getRepositoryId(); String stringObject = document.toString(); // remove JSON chars stringObject = StringUtil.replace(stringObject, new String[] { "\"", ",", ":", "{", "}", "[", "]" }, new String[] { StringPool.SPACE, StringPool.SPACE, StringPool.SPACE, StringPool.SPACE, StringPool.SPACE, StringPool.SPACE, StringPool.SPACE }); Path fullDirPath = HadoopManager.getFullDirPath(companyId, repositoryId, null); fullDirPath = new Path("/index".concat(fullDirPath.toString())); FSDataOutputStream outputStream = null; try {/*from w w w . j a va 2 s. com*/ FileSystem fileSystem = HadoopManager.getFileSystem(); String suffix = StringPool.SLASH.concat(document.getUID()); outputStream = fileSystem.create(fullDirPath.suffix(suffix)); PrintWriter pw = new PrintWriter(outputStream); pw.write(stringObject); pw.flush(); pw.close(); } catch (IOException e) { e.printStackTrace(); } finally { StreamUtil.cleanUp(outputStream); } }
From source file:com.nagarro.nteg.utils.HDFSFileDataBufferedReader.java
License:Apache License
/** * @param path//w w w. jav a 2 s .co m * @param batchSize * @throws IOException */ public HDFSFileDataBufferedReader(final Path path, final int batchSize) throws IOException { this.batchSize = batchSize; this.currentFile = path; this.buffer = new ArrayDeque<String>(batchSize); internalRenamedFile = path.suffix(IN_PROGRESS_FILE_SUFFIX); fileSystem = path.getFileSystem(new Configuration()); fileSystem.rename(path, internalRenamedFile); scanner = new Scanner(fileSystem.open(internalRenamedFile)); }