List of usage examples for org.apache.hadoop.fs FSDataInputStream read
@Override public int read(ByteBuffer buf) throws IOException
From source file:org.elasticsearch.common.blobstore.hdfs.AbstractHdfsBlobContainer.java
License:Apache License
@Override public void readBlob(final String blobName, final ReadBlobListener listener) { blobStore.executor().execute(new Runnable() { @Override// w ww .j a va 2 s . com public void run() { byte[] buffer = new byte[blobStore.bufferSizeInBytes()]; FSDataInputStream fileStream; try { fileStream = blobStore.fileSystem().open(new Path(path, blobName)); } catch (IOException e) { listener.onFailure(e); return; } try { int bytesRead; while ((bytesRead = fileStream.read(buffer)) != -1) { listener.onPartial(buffer, 0, bytesRead); } listener.onCompleted(); } catch (Exception e) { try { fileStream.close(); } catch (IOException e1) { // ignore } listener.onFailure(e); } } }); }
From source file:org.elasticsearch.hadoop.hdfs.blobstore.AbstractHdfsBlobContainer.java
License:Apache License
@Override public void readBlob(final String blobName, final ReadBlobListener listener) { blobStore.executor().execute(new Runnable() { @Override/*from www . ja v a 2s .co m*/ public void run() { byte[] buffer = new byte[blobStore.bufferSizeInBytes()]; FSDataInputStream fileStream; try { fileStream = blobStore.fileSystem().open(new Path(path, blobName)); } catch (Throwable th) { listener.onFailure(th); return; } try { int bytesRead; while ((bytesRead = fileStream.read(buffer)) != -1) { listener.onPartial(buffer, 0, bytesRead); } listener.onCompleted(); } catch (Throwable th) { try { fileStream.close(); } catch (Throwable t) { // ignore } listener.onFailure(th); } } }); }
From source file:org.kitesdk.tools.JobClasspathHelper.java
License:Apache License
/** * //from ww w. j av a 2 s . c o m * @param conf * Configuration object for the Job. Used to get the FileSystem associated with it. * @param libDir * Destination directory in the FileSystem (Usually HDFS) where to upload and look for the libs. * @param classesToInclude * Classes that are needed by the job. JarFinder will look for the jar containing these classes. * @throws Exception */ public void prepareClasspath(final Configuration conf, final Path libDir, Class<?>... classesToInclude) throws Exception { FileSystem fs = null; List<Class<?>> classList = new ArrayList<Class<?>>(Arrays.asList(classesToInclude)); fs = FileSystem.get(conf); Map<String, String> jarMd5Map = new TreeMap<String, String>(); // for each classes we use JarFinder to locate the jar in the local classpath. for (Class<?> clz : classList) { if (clz != null) { String localJarPath = JarFinder.getJar(clz); // we don't want to upload the same jar twice if (!jarMd5Map.containsKey(localJarPath)) { // We should not push core Hadoop classes with this tool. // Should it be the responsibility of the developer or we let // this fence here? if (!clz.getName().startsWith("org.apache.hadoop.")) { // we compute the MD5 sum of the local jar InputStream in = new FileInputStream(localJarPath); boolean threw = true; try { String md5sum = DigestUtils.md5Hex(in); jarMd5Map.put(localJarPath, md5sum); threw = false; } finally { Closeables.close(in, threw); } } else { LOG.info("Ignoring {}, since it looks like it's from Hadoop's core libs", localJarPath); } } } } for (Entry<String, String> entry : jarMd5Map.entrySet()) { Path localJarPath = new Path(entry.getKey()); String jarFilename = localJarPath.getName(); String localMd5sum = entry.getValue(); LOG.info("Jar {}. MD5 : [{}]", localJarPath, localMd5sum); Path remoteJarPath = new Path(libDir, jarFilename); Path remoteMd5Path = new Path(libDir, jarFilename + ".md5"); // If the jar file does not exist in HDFS or if the MD5 file does not exist in HDFS, // we force the upload of the jar. if (!fs.exists(remoteJarPath) || !fs.exists(remoteMd5Path)) { copyJarToHDFS(fs, localJarPath, localMd5sum, remoteJarPath, remoteMd5Path); } else { // If the jar exist,we validate the MD5 file. // If the MD5 sum is different, we upload the jar FSDataInputStream md5FileStream = null; String remoteMd5sum = ""; try { md5FileStream = fs.open(remoteMd5Path); byte[] md5bytes = new byte[32]; if (32 == md5FileStream.read(md5bytes)) { remoteMd5sum = new String(md5bytes, Charsets.UTF_8); } } finally { if (md5FileStream != null) { md5FileStream.close(); } } if (localMd5sum.equals(remoteMd5sum)) { LOG.info("Jar {} already exists [{}] and md5sum are equals", jarFilename, remoteJarPath.toUri().toASCIIString()); } else { LOG.info("Jar {} already exists [{}] and md5sum are different!", jarFilename, remoteJarPath.toUri().toASCIIString()); copyJarToHDFS(fs, localJarPath, localMd5sum, remoteJarPath, remoteMd5Path); } } // In all case we want to add the jar to the DistributedCache's classpath DistributedCache.addFileToClassPath(remoteJarPath, conf, fs); } // and we create the symlink (was necessary in earlier versions of Hadoop) DistributedCache.createSymlink(conf); }
From source file:org.mrgeo.test.MapOpTestVectorUtils.java
License:Apache License
public void compareVectors(Configuration conf, String testName) throws IOException { Path output = new Path(outputHdfs, testName); FileSystem fs = HadoopFileUtils.getFileSystem(conf, output); Path[] srcFiles;//from ww w . j a v a2 s. c om if (fs.isDirectory(output)) { FileStatus[] files = fs.listStatus(output); if (files == null || files.length == 0) { Assert.fail("No files founds: " + output.toString()); } srcFiles = new Path[files.length]; int cnt = 0; for (FileStatus file : files) { srcFiles[cnt++] = file.getPath(); } } else { srcFiles = new Path[] { output }; } for (Path file : srcFiles) { // read in the output file final long l = fs.getFileStatus(file).getLen(); final byte[] testBuffer = new byte[(int) l]; final FSDataInputStream fdis = fs.open(file); fdis.read(testBuffer); fdis.close(); File baselineVector = new File(inputLocal + testName + "/" + file.getName()); if (!baselineVector.exists()) { Assert.fail("Golden test file missing: " + baselineVector.toString()); } // read in the baseline final byte[] baselineBuffer = new byte[(int) baselineVector.length()]; final FileInputStream fis = new FileInputStream(baselineVector); fis.read(baselineBuffer); fis.close(); Assert.assertEquals("Output is different!", new String(baselineBuffer), new String(testBuffer)); } }
From source file:org.oclc.firefly.hadoop.backup.BackupUtils.java
License:Apache License
/** * Copy the file at inputPath to the destination cluster using the same directory structure * @param srcFs The source file system//from w ww . j a va 2 s. c o m * @param dstFs The destination file system * @param srcPath The source path * @param dstPath The destination path * @param buffer The buffer to use * @param username The user name. Used when checking in .Trash for deleted files * @param context The mapper context object * @param replication The replication factor * @throws InterruptedException hdfs exception * @throws IOException hdfs exception */ public static void copy(FileSystem srcFs, Path srcPath, FileSystem dstFs, Path dstPath, byte[] buffer, String username, short replication, Context context) throws InterruptedException, IOException { Path src = srcPath; int bytesRead = 0; long totalBytesRead = 0; if (!srcFs.exists(src)) { // File no longer exists. Attempt to get from .Trash // NOTE: It appears that deleted regions are not sent to trash, even with move to trash enabled, // so this file will never be found unless this aspect of hbase changes. src = BackupUtils.getPathInTrash(src, username, srcFs); if (src == null) { throw new FileNotFoundException("Could not recover deleted file from trash: " + srcPath); } LOG.warn("File has been deleted, but found it in trash: " + src); } long bytesToCopy = srcFs.getFileStatus(src).getLen(); FSDataOutputStream out = dstFs.create(dstPath, replication); FSDataInputStream in = srcFs.open(src); int numWrites = 0; while ((bytesRead = in.read(buffer)) >= 0) { out.write(buffer, 0, bytesRead); totalBytesRead += bytesRead; if (context != null && numWrites % ONE_MB_OVER_64KB == 0) { context.setStatus("Copied " + totalBytesRead + " of " + bytesToCopy + " bytes for " + src); } numWrites++; } in.close(); out.close(); }
From source file:org.opencloudengine.garuda.backend.hdfs.HdfsServiceImpl.java
License:Open Source License
@Override public void downloadFile(String path, HttpServletResponse response) throws Exception { this.mustExists(path); FileSystem fs = fileSystemFactory.getFileSystem(); Path fsPath = new Path(path); FileStatus fileStatus = fs.getFileStatus(fsPath); if (!fileStatus.isFile()) { this.notFileException(fsPath.toString()); }/*from w w w.java2 s. c o m*/ HdfsFileInfo fileInfo = new HdfsFileInfo(fileStatus, fs.getContentSummary(fsPath)); FSDataInputStream in = fs.open(fsPath); String filename = fileInfo.getFilename(); response.setHeader("Content-Length", "" + fileInfo.getLength()); response.setHeader("Content-Transfer-Encoding", "binary"); response.setHeader("Content-Type", "application/force-download"); response.setHeader("Content-Disposition", MessageFormatter .format("attachment; fullyQualifiedPath={}; filename={};", URLEncoder.encode(fileInfo.getFullyQualifiedPath(), "UTF-8"), filename) .getMessage()); response.setStatus(200); ServletOutputStream out = response.getOutputStream(); byte[] b = new byte[1024]; int numBytes = 0; while ((numBytes = in.read(b)) > 0) { out.write(b, 0, numBytes); } in.close(); out.close(); fs.close(); }
From source file:org.pig.oink.operation.impl.StreamingBinOutputImpl.java
License:Apache License
@Override public void write(OutputStream output) throws IOException, WebApplicationException { FileSystem fileSystem = null; try {/* ww w . j a va2 s.c o m*/ fileSystem = FileSystem.get(conf); } catch (Exception e) { logger.error(e.getMessage(), e); throw new WebApplicationException(Response.status(500) .entity("Unable to connect to Hadoop FileSytem. Please try again later").build()); } Path filePath = new Path(filePathStr); if (isAvailable()) { FSDataInputStream in = null; try { in = fileSystem.open(filePath); byte[] b = new byte[1024]; int numBytes = 0; while ((numBytes = in.read(b)) > 0) { output.write(b, 0, numBytes); } in.close(); output.close(); } catch (Exception e) { logger.error(e.getMessage(), e); throw new WebApplicationException( Response.status(500).entity("Unable to retrieve file from Hadoop FileSystem").build()); } } }
From source file:org.sleuthkit.hadoop.clustering.ClusterJSONBuilder.java
License:Open Source License
private static void writeFileToStream(Path path, FSDataOutputStream stream) throws IOException { FileSystem fs = FileSystem.get(new Configuration()); FSDataInputStream in = fs.open(path); byte[] bytes = new byte[1024]; int i = in.read(bytes); while (i != -1) { stream.write(bytes, 0, i);/*w w w . j av a2 s.c o m*/ i = in.read(bytes); } }
From source file:org.sleuthkit.hadoop.GrepJSONBuilder.java
License:Open Source License
public static void writeFileToStream(Path path, FSDataOutputStream stream) throws IOException { FileSystem fs = FileSystem.get(new Configuration()); FSDataInputStream in = fs.open(path); byte[] bytes = new byte[1024]; int i = in.read(bytes); while (i != -1) { stream.write(bytes, 0, i);// ww w . ja va 2s . c om i = in.read(bytes); } }
From source file:org.sleuthkit.hadoop.GrepReportGenerator.java
License:Open Source License
public static void runPipeline(String regexFile, String deviceID, String friendlyName, String imgDir) { // STEP 1: Generate 'a' value file which maps regexes to numbers. try {//from w ww. j ava 2s .co m // Read in the regex file. FileSystem fs = FileSystem.get(new Configuration()); Path inFile = new Path(regexFile); FSDataInputStream in = fs.open(inFile); byte[] bytes = new byte[1024]; StringBuilder b = new StringBuilder(); int i = in.read(bytes); while (i != -1) { b.append(new String(bytes).substring(0, i)); i = in.read(bytes); } // Stringified version of the regex file. String regexes = b.toString(); Job job = SKJobFactory.createJob(deviceID, friendlyName, JobNames.GREP_COUNT_MATCHED_EXPRS_JSON); job.setJarByClass(GrepCountMapper.class); job.setMapperClass(GrepCountMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(LongWritable.class); job.getConfiguration().set("mapred.mapper.regex", regexes); // we must have precisely one reducer. job.setNumReduceTasks(1); job.setReducerClass(GrepCountReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(FsEntryHBaseInputFormat.class); FsEntryHBaseInputFormat.setupJob(job, deviceID); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(imgDir + "/grep/count")); job.waitForCompletion(true); /////////////////////////////////////////////////////////////////// job = SKJobFactory.createJob(deviceID, friendlyName, JobNames.GREP_MATCHED_EXPRS_JSON); job.setJarByClass(GrepMatchMapper.class); job.setMapperClass(GrepMatchMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); job.getConfiguration().set("mapred.mapper.regex", regexes); // we must have precisely one reducer. job.setNumReduceTasks(1); job.setReducerClass(GrepMatchReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(FsEntryHBaseInputFormat.class); FsEntryHBaseInputFormat.setupJob(job, deviceID); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(imgDir + "/grep/matchinfo")); job.waitForCompletion(true); /////////////////////////////////////////////////////////////////// // Finally, write the output. GrepJSONBuilder.buildReport(new Path(imgDir + "/grep/count/part-r-00000"), new Path(imgDir + "/grep/matchinfo/part-r-00000"), new Path(imgDir + "/reports/data/searchhits.js")); } catch (Exception ex) { LOG.error("Exception while attempting to output grep.", ex); } }