List of usage examples for org.apache.hadoop.fs FSDataInputStream read
@Override public int read(ByteBuffer buf) throws IOException
From source file:com.aliyun.fs.oss.blk.TestOssFileSystem.java
License:Apache License
public void testAppendWrite() throws IOException { String base = "test/oss"; Path path = path(base);/*from w ww .j a va2 s . c o m*/ createEmptyFile(path); Long fileLen = fs.getFileStatus(path).getLen(); assert (fileLen == 5); FSDataOutputStream fsDataOutputStream = fs.append(path); fsDataOutputStream.write(" world!".getBytes()); fsDataOutputStream.flush(); fsDataOutputStream.close(); fileLen = fs.getFileStatus(path).getLen(); assert (fileLen == 12); FSDataInputStream fsDataInputStream = fs.open(path); byte[] bytes = new byte[12]; int numBytes = fsDataInputStream.read(bytes); while (numBytes < bytes.length) { numBytes += fsDataInputStream.read(bytes, numBytes, bytes.length); } String content = new String(bytes); assert (content.equals("Hello world!")); }
From source file:com.aliyun.fs.oss.nat.TestNativeOssFileSystem.java
License:Apache License
public void testAppendWrite() throws IOException { String base = "test/oss"; Path path = path(base);/*from ww w . j a va 2s . c o m*/ FSDataOutputStream fsDataOutputStream = fs.append(path); fsDataOutputStream.write("Hello".getBytes()); fsDataOutputStream.flush(); fsDataOutputStream.close(); Long fileLen = fs.getFileStatus(path).getLen(); assert (fileLen == 5); fsDataOutputStream = fs.append(path); fsDataOutputStream.write(" world!".getBytes()); fsDataOutputStream.flush(); fsDataOutputStream.close(); fileLen = fs.getFileStatus(path).getLen(); assert (fileLen == 12); FSDataInputStream fsDataInputStream = fs.open(path); byte[] bytes = new byte[12]; fsDataInputStream.read(bytes); String content = new String(bytes); assert (content.equals("Hello world!")); }
From source file:com.cip.crane.agent.utils.TaskHelper.java
License:Open Source License
@SuppressWarnings("unused") private void readFileFromHdfs(String srcFile, String destFile) throws IOException, FileNotFoundException { File file = new File(destFile); if (file.exists()) { file.delete();//from w ww . j a v a 2s.co m } byte[] buf = new byte[BUFFER_SIZE]; FileOutputStream fos = new FileOutputStream(file); FileSystem fs; FSDataInputStream hdfsInput; try { fs = FileSystem.get(URI.create(srcFile), conf); hdfsInput = fs.open(new Path(srcFile)); int num = hdfsInput.read(buf); while (num != (-1)) {// ? fos.write(buf, 0, num);// ? fos.flush();// ? num = hdfsInput.read(buf);// ?? } hdfsInput.close(); fos.close(); fs.close(); } catch (IOException e) { if (file.exists()) { file.delete(); } throw e; } }
From source file:com.cloudera.cdk.tools.JobClasspathHelper.java
License:Apache License
/** * /*from www . j ava2 s. co m*/ * @param conf * Configuration object for the Job. Used to get the FileSystem associated with it. * @param libDir * Destination directory in the FileSystem (Usually HDFS) where to upload and look for the libs. * @param classesToInclude * Classes that are needed by the job. JarFinder will look for the jar containing these classes. * @throws Exception */ public void prepareClasspath(final Configuration conf, final Path libDir, Class<?>... classesToInclude) throws Exception { FileSystem fs = null; List<Class<?>> classList = new ArrayList<Class<?>>(Arrays.asList(classesToInclude)); fs = FileSystem.get(conf); Map<String, String> jarMd5Map = new TreeMap<String, String>(); // for each classes we use JarFinder to locate the jar in the local classpath. for (Class<?> clz : classList) { if (clz != null) { String localJarPath = JarFinder.getJar(clz); // we don't want to upload the same jar twice if (!jarMd5Map.containsKey(localJarPath)) { // We should not push core Hadoop classes with this tool. // Should it be the responsibility of the developer or we let // this fence here? if (!clz.getName().startsWith("org.apache.hadoop.")) { // we compute the MD5 sum of the local jar InputStream in = new FileInputStream(localJarPath); boolean threw = true; try { String md5sum = DigestUtils.md5Hex(in); jarMd5Map.put(localJarPath, md5sum); threw = false; } finally { Closeables.close(in, threw); } } else { logger.info("Ignoring {}, since it looks like it's from Hadoop's core libs", localJarPath); } } } } for (Entry<String, String> entry : jarMd5Map.entrySet()) { Path localJarPath = new Path(entry.getKey()); String jarFilename = localJarPath.getName(); String localMd5sum = entry.getValue(); logger.info("Jar {}. MD5 : [{}]", localJarPath, localMd5sum); Path remoteJarPath = new Path(libDir, jarFilename); Path remoteMd5Path = new Path(libDir, jarFilename + ".md5"); // If the jar file does not exist in HDFS or if the MD5 file does not exist in HDFS, // we force the upload of the jar. if (!fs.exists(remoteJarPath) || !fs.exists(remoteMd5Path)) { copyJarToHDFS(fs, localJarPath, localMd5sum, remoteJarPath, remoteMd5Path); } else { // If the jar exist,we validate the MD5 file. // If the MD5 sum is different, we upload the jar FSDataInputStream md5FileStream = null; String remoteMd5sum = ""; try { md5FileStream = fs.open(remoteMd5Path); byte[] md5bytes = new byte[32]; if (32 == md5FileStream.read(md5bytes)) { remoteMd5sum = new String(md5bytes, Charsets.UTF_8); } } finally { if (md5FileStream != null) { md5FileStream.close(); } } if (localMd5sum.equals(remoteMd5sum)) { logger.info("Jar {} already exists [{}] and md5sum are equals", jarFilename, remoteJarPath.toUri().toASCIIString()); } else { logger.info("Jar {} already exists [{}] and md5sum are different!", jarFilename, remoteJarPath.toUri().toASCIIString()); copyJarToHDFS(fs, localJarPath, localMd5sum, remoteJarPath, remoteMd5Path); } } // In all case we want to add the jar to the DistributedCache's classpath DistributedCache.addFileToClassPath(remoteJarPath, conf, fs); } // and we create the symlink (was necessary in earlier versions of Hadoop) DistributedCache.createSymlink(conf); }
From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.handlers.READHandler.java
License:Apache License
@Override protected READResponse doHandle(NFS4Handler server, Session session, READRequest request) throws NFS4Exception, IOException { if (session.getCurrentFileHandle() == null) { throw new NFS4Exception(NFS4ERR_NOFILEHANDLE); }// w w w.j av a 2 s .co m int size = Math.min(request.getCount(), NFS4_MAX_RWSIZE); if (size < 0) { throw new NFS4Exception(NFS4ERR_INVAL); } FileHandle fileHandle = session.getCurrentFileHandle(); Path path = server.getPath(fileHandle); FileSystem fs = session.getFileSystem(); FSDataInputStream inputStream = server.forRead(request.getStateID(), fs, fileHandle); synchronized (inputStream) { if (inputStream.getPos() != request.getOffset()) { try { inputStream.seek(request.getOffset()); } catch (IOException e) { throw new IOException(e.getMessage() + ": " + inputStream.getPos() + ", " + request.getOffset(), e); } server.incrementMetric("NFS_RANDOM_READS", 1); } READResponse response = createResponse(); byte[] data = new byte[size]; int count = inputStream.read(data); long fileLength = -1; if (count > 0 && count != data.length && (request.getOffset() + count) < (fileLength = fs.getFileStatus(path).getLen())) { LOGGER.info("Short read " + path + " at pos = " + request.getOffset() + ", wanted " + data.length + " and read " + count + ", fileLength = " + fileLength); server.incrementMetric("NFS_SHORT_READS", 1); } boolean eof = count < 0; if (eof) { data = new byte[0]; count = 0; } server.incrementMetric("HDFS_BYTES_READ", count); response.setData(data, 0, count); response.setEOF(eof); response.setStatus(NFS4_OK); return response; } }
From source file:com.cloudera.recordservice.mapreduce.testapps.RecordCount.java
License:Apache License
public static long countRecords(String path) throws IOException { String output = TestUtil.getTempDirectory(); Path inputPath = new Path(path); Path outputPath = new Path(output); JobConf conf = new JobConf(RecordCount.class); conf.setJobName("recordcount"); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(LongWritable.class); conf.setInt("mapreduce.job.reduces", 1); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(com.cloudera.recordservice.mapred.TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); JobClient.runJob(conf);/*from w w w . j a v a 2 s .c om*/ // Read the result and return it. Since we set the number of reducers to 1, // there is always just one file containing the value. FileSystem fs = outputPath.getFileSystem(conf); FSDataInputStream resultStream = fs.open(new Path(output + "/part-00000")); byte[] bytes = new byte[16]; int length = resultStream.read(bytes); String result = new String(bytes, 0, length).trim(); return Long.parseLong(result); }
From source file:com.datatorrent.lib.io.fs.AbstractFileOutputOperator.java
License:Open Source License
@Override public void setup(Context.OperatorContext context) { LOG.debug("setup initiated"); rollingFile = maxLength < Long.MAX_VALUE; //Getting required file system instance. try {/*from w w w . jav a 2s .c om*/ fs = getFSInstance(); } catch (IOException ex) { throw new RuntimeException(ex); } if (replication <= 0) { replication = fs.getDefaultReplication(new Path(filePath)); } LOG.debug("FS class {}", fs.getClass()); //When an entry is removed from the cache, removal listener is notified and it closes the output stream. RemovalListener<String, FSDataOutputStream> removalListener = new RemovalListener<String, FSDataOutputStream>() { @Override public void onRemoval(RemovalNotification<String, FSDataOutputStream> notification) { FSDataOutputStream value = notification.getValue(); if (value != null) { try { LOG.debug("closing {}", notification.getKey()); value.close(); } catch (IOException e) { throw new RuntimeException(e); } } } }; //Define cache CacheLoader<String, FSDataOutputStream> loader = new CacheLoader<String, FSDataOutputStream>() { @Override public FSDataOutputStream load(String filename) { String partFileName = getPartFileNamePri(filename); Path lfilepath = new Path(filePath + Path.SEPARATOR + partFileName); FSDataOutputStream fsOutput; boolean sawThisFileBefore = endOffsets.containsKey(filename); try { if (fs.exists(lfilepath)) { if (sawThisFileBefore) { FileStatus fileStatus = fs.getFileStatus(lfilepath); MutableLong endOffset = endOffsets.get(filename); if (endOffset != null) { endOffset.setValue(fileStatus.getLen()); } else { endOffsets.put(filename, new MutableLong(fileStatus.getLen())); } fsOutput = fs.append(lfilepath); LOG.debug("appending to {}", lfilepath); } //We never saw this file before and we don't want to append else { //If the file is rolling we need to delete all its parts. if (rollingFile) { int part = 0; while (true) { Path seenPartFilePath = new Path( filePath + Path.SEPARATOR + getPartFileName(filename, part)); if (!fs.exists(seenPartFilePath)) { break; } fs.delete(seenPartFilePath, true); part = part + 1; } fsOutput = fs.create(lfilepath, (short) replication); } //Not rolling is easy, just delete the file and create it again. else { fs.delete(lfilepath, true); fsOutput = fs.create(lfilepath, (short) replication); } } } else { fsOutput = fs.create(lfilepath, (short) replication); } //Get the end offset of the file. LOG.info("opened: {}", fs.getFileStatus(lfilepath).getPath()); return fsOutput; } catch (IOException e) { throw new RuntimeException(e); } } }; streamsCache = CacheBuilder.newBuilder().maximumSize(maxOpenFiles).removalListener(removalListener) .build(loader); try { LOG.debug("File system class: {}", fs.getClass()); LOG.debug("end-offsets {}", endOffsets); //Restore the files in case they were corrupted and the operator Path writerPath = new Path(filePath); if (fs.exists(writerPath)) { for (String seenFileName : endOffsets.keySet()) { String seenFileNamePart = getPartFileNamePri(seenFileName); LOG.debug("seenFileNamePart: {}", seenFileNamePart); Path seenPartFilePath = new Path(filePath + Path.SEPARATOR + seenFileNamePart); if (fs.exists(seenPartFilePath)) { LOG.debug("file exists {}", seenFileNamePart); long offset = endOffsets.get(seenFileName).longValue(); FSDataInputStream inputStream = fs.open(seenPartFilePath); FileStatus status = fs.getFileStatus(seenPartFilePath); if (status.getLen() != offset) { LOG.info("file corrupted {} {} {}", seenFileNamePart, offset, status.getLen()); byte[] buffer = new byte[COPY_BUFFER_SIZE]; Path tmpFilePath = new Path( filePath + Path.SEPARATOR + seenFileNamePart + TMP_EXTENSION); FSDataOutputStream fsOutput = fs.create(tmpFilePath, (short) replication); while (inputStream.getPos() < offset) { long remainingBytes = offset - inputStream.getPos(); int bytesToWrite = remainingBytes < COPY_BUFFER_SIZE ? (int) remainingBytes : COPY_BUFFER_SIZE; inputStream.read(buffer); fsOutput.write(buffer, 0, bytesToWrite); } flush(fsOutput); fsOutput.close(); inputStream.close(); FileContext fileContext = FileContext.getFileContext(fs.getUri()); LOG.debug("temp file path {}, rolling file path {}", tmpFilePath.toString(), status.getPath().toString()); fileContext.rename(tmpFilePath, status.getPath(), Options.Rename.OVERWRITE); } else { inputStream.close(); } } } } //delete the left over future rolling files produced from the previous crashed instance //of this operator. if (rollingFile) { for (String seenFileName : endOffsets.keySet()) { try { Integer part = openPart.get(seenFileName).getValue() + 1; while (true) { Path seenPartFilePath = new Path( filePath + Path.SEPARATOR + getPartFileName(seenFileName, part)); if (!fs.exists(seenPartFilePath)) { break; } fs.delete(seenPartFilePath, true); part = part + 1; } Path seenPartFilePath = new Path(filePath + Path.SEPARATOR + getPartFileName(seenFileName, openPart.get(seenFileName).intValue())); //Handle the case when restoring to a checkpoint where the current rolling file //already has a length greater than max length. if (fs.getFileStatus(seenPartFilePath).getLen() > maxLength) { LOG.debug("rotating file at setup."); rotate(seenFileName); } } catch (IOException e) { throw new RuntimeException(e); } catch (ExecutionException e) { throw new RuntimeException(e); } } } LOG.debug("setup completed"); LOG.debug("end-offsets {}", endOffsets); } catch (IOException e) { throw new RuntimeException(e); } this.context = context; lastTimeStamp = System.currentTimeMillis(); fileCounters.setCounter(Counters.TOTAL_BYTES_WRITTEN, new MutableLong()); fileCounters.setCounter(Counters.TOTAL_TIME_ELAPSED, new MutableLong()); }
From source file:com.datatorrent.lib.io.fs.AbstractFSWriter.java
License:Open Source License
@Override public void setup(Context.OperatorContext context) { rollingFile = maxLength < Long.MAX_VALUE; //Getting required file system instance. try {/*from ww w . j ava 2 s. co m*/ fs = getFSInstance(); } catch (IOException ex) { throw new RuntimeException(ex); } LOG.debug("FS class {}", fs.getClass()); //Setting listener for debugging LOG.debug("setup initiated"); RemovalListener<String, FSDataOutputStream> removalListener = new RemovalListener<String, FSDataOutputStream>() { @Override public void onRemoval(RemovalNotification<String, FSDataOutputStream> notification) { FSDataOutputStream value = notification.getValue(); if (value != null) { try { LOG.debug("closing {}", notification.getKey()); value.close(); } catch (IOException e) { throw new RuntimeException(e); } } } }; //Define cache CacheLoader<String, FSDataOutputStream> loader = new CacheLoader<String, FSDataOutputStream>() { @Override public FSDataOutputStream load(String filename) { String partFileName = getPartFileNamePri(filename); Path lfilepath = new Path(filePath + File.separator + partFileName); FSDataOutputStream fsOutput; if (replication <= 0) { replication = fs.getDefaultReplication(lfilepath); } boolean sawThisFileBefore = endOffsets.containsKey(filename); try { if (fs.exists(lfilepath)) { if (sawThisFileBefore || append) { FileStatus fileStatus = fs.getFileStatus(lfilepath); MutableLong endOffset = endOffsets.get(filename); if (endOffset != null) { endOffset.setValue(fileStatus.getLen()); } else { endOffsets.put(filename, new MutableLong(fileStatus.getLen())); } fsOutput = fs.append(lfilepath); LOG.debug("appending to {}", lfilepath); } //We never saw this file before and we don't want to append else { //If the file is rolling we need to delete all its parts. if (rollingFile) { int part = 0; while (true) { Path seenPartFilePath = new Path( filePath + "/" + getPartFileName(filename, part)); if (!fs.exists(seenPartFilePath)) { break; } fs.delete(seenPartFilePath, true); part = part + 1; } fsOutput = fs.create(lfilepath, (short) replication); } //Not rolling is easy, just delete the file and create it again. else { fs.delete(lfilepath, true); fsOutput = fs.create(lfilepath, (short) replication); } } } else { fsOutput = fs.create(lfilepath, (short) replication); } //Get the end offset of the file. LOG.debug("full path: {}", fs.getFileStatus(lfilepath).getPath()); return fsOutput; } catch (IOException e) { throw new RuntimeException(e); } } }; streamsCache = CacheBuilder.newBuilder().maximumSize(maxOpenFiles).removalListener(removalListener) .build(loader); try { LOG.debug("File system class: {}", fs.getClass()); LOG.debug("end-offsets {}", endOffsets); //Restore the files in case they were corrupted and the operator Path writerPath = new Path(filePath); if (fs.exists(writerPath)) { for (String seenFileName : endOffsets.keySet()) { String seenFileNamePart = getPartFileNamePri(seenFileName); LOG.debug("seenFileNamePart: {}", seenFileNamePart); Path seenPartFilePath = new Path(filePath + "/" + seenFileNamePart); if (fs.exists(seenPartFilePath)) { LOG.debug("file exists {}", seenFileNamePart); long offset = endOffsets.get(seenFileName).longValue(); FSDataInputStream inputStream = fs.open(seenPartFilePath); FileStatus status = fs.getFileStatus(seenPartFilePath); if (status.getLen() != offset) { LOG.info("file corrupted {} {} {}", seenFileNamePart, offset, status.getLen()); byte[] buffer = new byte[COPY_BUFFER_SIZE]; String tmpFileName = seenFileNamePart + TMP_EXTENSION; FSDataOutputStream fsOutput = streamsCache.get(tmpFileName); while (inputStream.getPos() < offset) { long remainingBytes = offset - inputStream.getPos(); int bytesToWrite = remainingBytes < COPY_BUFFER_SIZE ? (int) remainingBytes : COPY_BUFFER_SIZE; inputStream.read(buffer); fsOutput.write(buffer, 0, bytesToWrite); } flush(fsOutput); FileContext fileContext = FileContext.getFileContext(fs.getUri()); String tempTmpFilePath = getPartFileNamePri(filePath + File.separator + tmpFileName); Path tmpFilePath = new Path(tempTmpFilePath); tmpFilePath = fs.getFileStatus(tmpFilePath).getPath(); LOG.debug("temp file path {}, rolling file path {}", tmpFilePath.toString(), status.getPath().toString()); fileContext.rename(tmpFilePath, status.getPath(), Options.Rename.OVERWRITE); } } } } //delete the left over future rolling files produced from the previous crashed instance //of this operator. if (rollingFile) { for (String seenFileName : endOffsets.keySet()) { try { Integer part = openPart.get(seenFileName).getValue() + 1; while (true) { Path seenPartFilePath = new Path(filePath + "/" + getPartFileName(seenFileName, part)); if (!fs.exists(seenPartFilePath)) { break; } fs.delete(seenPartFilePath, true); part = part + 1; } Path seenPartFilePath = new Path(filePath + "/" + getPartFileName(seenFileName, openPart.get(seenFileName).intValue())); //Handle the case when restoring to a checkpoint where the current rolling file //already has a length greater than max length. if (fs.getFileStatus(seenPartFilePath).getLen() > maxLength) { LOG.debug("rotating file at setup."); rotate(seenFileName); } } catch (IOException e) { throw new RuntimeException(e); } catch (ExecutionException e) { throw new RuntimeException(e); } } } LOG.debug("setup completed"); LOG.debug("end-offsets {}", endOffsets); } catch (IOException e) { throw new RuntimeException(e); } catch (ExecutionException e) { throw new RuntimeException(e); } this.context = context; lastTimeStamp = System.currentTimeMillis(); fileCounters.setCounter(Counters.TOTAL_BYTES_WRITTEN, new MutableLong()); fileCounters.setCounter(Counters.TOTAL_TIME_ELAPSED, new MutableLong()); }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopSyncableOutputStreamTest.java
License:Open Source License
@Test public void testEndToEndHsync() throws IOException { Path objectPath = new Path(ghfs.getFileSystemRoot(), "dir/object.txt"); FSDataOutputStream fout = ghfs.create(objectPath); byte[] data1 = new byte[] { 0x0f, 0x0e, 0x0e, 0x0d }; byte[] data2 = new byte[] { 0x0b, 0x0e, 0x0e, 0x0f }; byte[] data3 = new byte[] { 0x04, 0x02 }; byte[] data1Read = new byte[4]; byte[] data2Read = new byte[4]; byte[] data3Read = new byte[2]; fout.write(data1, 0, data1.length);//from ww w . jav a2 s .c o m fout.sync(); Assert.assertEquals(4, ghfs.getFileStatus(objectPath).getLen()); FSDataInputStream fin = ghfs.open(objectPath); fin.read(data1Read); fin.close(); Assert.assertArrayEquals(data1, data1Read); fout.write(data2, 0, data2.length); fout.sync(); Assert.assertEquals(8, ghfs.getFileStatus(objectPath).getLen()); fin = ghfs.open(objectPath); fin.read(data1Read); fin.read(data2Read); fin.close(); Assert.assertArrayEquals(data1, data1Read); Assert.assertArrayEquals(data2, data2Read); fout.write(data3, 0, data3.length); fout.close(); Assert.assertEquals(10, ghfs.getFileStatus(objectPath).getLen()); fin = ghfs.open(objectPath); fin.read(data1Read); fin.read(data2Read); fin.read(data3Read); fin.close(); Assert.assertArrayEquals(data1, data1Read); Assert.assertArrayEquals(data2, data2Read); Assert.assertArrayEquals(data3, data3Read); }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopSyncableOutputStreamTest.java
License:Open Source License
@Test public void testSyncCompositeLimitException() throws IOException { Path objectPath = new Path(ghfs.getFileSystemRoot(), "dir/object.txt"); FSDataOutputStream fout = ghfs.create(objectPath); byte[] expected = new byte[GoogleHadoopSyncableOutputStream.MAX_COMPOSITE_COMPONENTS + 1]; byte[] buf = new byte[1]; for (int i = 0; i < GoogleHadoopSyncableOutputStream.MAX_COMPOSITE_COMPONENTS - 1; ++i) { buf[0] = (byte) i; expected[i] = buf[0];//from w w w.j av a2 s. co m fout.write(buf, 0, 1); fout.sync(); } // If the limit is N, then the Nth attempt to call sync() will fail, since it means the // base object already has N - 1 components, and we have 1 temporary object in-progress, // and a call to close() at this point brings the base object up to the limit of N. try { // Despite the exception we're expecting, the data here should still be safe. fout.write(new byte[] { 0x42 }); expected[GoogleHadoopSyncableOutputStream.MAX_COMPOSITE_COMPONENTS - 1] = 0x42; fout.sync(); Assert.fail("Expected CompositeLimitExceededException"); } catch (CompositeLimitExceededException clee) { // Expected. } // Despite having thrown an exception, the stream is still safe to use and even write more data. fout.write(new byte[] { 0x11 }); expected[GoogleHadoopSyncableOutputStream.MAX_COMPOSITE_COMPONENTS] = 0x11; fout.close(); byte[] actual = new byte[expected.length]; FSDataInputStream fin = ghfs.open(objectPath); fin.read(actual); fin.close(); Assert.assertArrayEquals(expected, actual); }