Example usage for org.apache.hadoop.fs FSDataInputStream read

List of usage examples for org.apache.hadoop.fs FSDataInputStream read

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FSDataInputStream read.

Prototype

@Override
    public int read(ByteBuffer buf) throws IOException 

Source Link

Usage

From source file:org.elasticsearch.common.blobstore.hdfs.AbstractHdfsBlobContainer.java

License:Apache License

@Override
public void readBlob(final String blobName, final ReadBlobListener listener) {
    blobStore.executor().execute(new Runnable() {
        @Override//  w  ww  .j a va  2 s  .  com
        public void run() {
            byte[] buffer = new byte[blobStore.bufferSizeInBytes()];

            FSDataInputStream fileStream;
            try {
                fileStream = blobStore.fileSystem().open(new Path(path, blobName));
            } catch (IOException e) {
                listener.onFailure(e);
                return;
            }
            try {
                int bytesRead;
                while ((bytesRead = fileStream.read(buffer)) != -1) {
                    listener.onPartial(buffer, 0, bytesRead);
                }
                listener.onCompleted();
            } catch (Exception e) {
                try {
                    fileStream.close();
                } catch (IOException e1) {
                    // ignore
                }
                listener.onFailure(e);
            }
        }
    });
}

From source file:org.elasticsearch.hadoop.hdfs.blobstore.AbstractHdfsBlobContainer.java

License:Apache License

@Override
public void readBlob(final String blobName, final ReadBlobListener listener) {
    blobStore.executor().execute(new Runnable() {
        @Override/*from  www . ja v a 2s  .co  m*/
        public void run() {
            byte[] buffer = new byte[blobStore.bufferSizeInBytes()];

            FSDataInputStream fileStream;
            try {
                fileStream = blobStore.fileSystem().open(new Path(path, blobName));
            } catch (Throwable th) {
                listener.onFailure(th);
                return;
            }
            try {
                int bytesRead;
                while ((bytesRead = fileStream.read(buffer)) != -1) {
                    listener.onPartial(buffer, 0, bytesRead);
                }
                listener.onCompleted();
            } catch (Throwable th) {
                try {
                    fileStream.close();
                } catch (Throwable t) {
                    // ignore
                }
                listener.onFailure(th);
            }
        }
    });
}

From source file:org.kitesdk.tools.JobClasspathHelper.java

License:Apache License

/**
 * //from   ww  w. j  av a 2 s . c o  m
 * @param conf
 *            Configuration object for the Job. Used to get the FileSystem associated with it.
 * @param libDir
 *            Destination directory in the FileSystem (Usually HDFS) where to upload and look for the libs.
 * @param classesToInclude
 *            Classes that are needed by the job. JarFinder will look for the jar containing these classes.
 * @throws Exception
 */
public void prepareClasspath(final Configuration conf, final Path libDir, Class<?>... classesToInclude)
        throws Exception {
    FileSystem fs = null;
    List<Class<?>> classList = new ArrayList<Class<?>>(Arrays.asList(classesToInclude));
    fs = FileSystem.get(conf);
    Map<String, String> jarMd5Map = new TreeMap<String, String>();
    // for each classes we use JarFinder to locate the jar in the local classpath.
    for (Class<?> clz : classList) {
        if (clz != null) {
            String localJarPath = JarFinder.getJar(clz);
            // we don't want to upload the same jar twice
            if (!jarMd5Map.containsKey(localJarPath)) {
                // We should not push core Hadoop classes with this tool.
                // Should it be the responsibility of the developer or we let
                // this fence here?
                if (!clz.getName().startsWith("org.apache.hadoop.")) {
                    // we compute the MD5 sum of the local jar
                    InputStream in = new FileInputStream(localJarPath);
                    boolean threw = true;
                    try {
                        String md5sum = DigestUtils.md5Hex(in);
                        jarMd5Map.put(localJarPath, md5sum);
                        threw = false;
                    } finally {
                        Closeables.close(in, threw);
                    }
                } else {
                    LOG.info("Ignoring {}, since it looks like it's from Hadoop's core libs", localJarPath);
                }
            }
        }
    }

    for (Entry<String, String> entry : jarMd5Map.entrySet()) {
        Path localJarPath = new Path(entry.getKey());
        String jarFilename = localJarPath.getName();
        String localMd5sum = entry.getValue();
        LOG.info("Jar {}. MD5 : [{}]", localJarPath, localMd5sum);

        Path remoteJarPath = new Path(libDir, jarFilename);
        Path remoteMd5Path = new Path(libDir, jarFilename + ".md5");

        // If the jar file does not exist in HDFS or if the MD5 file does not exist in HDFS,
        // we force the upload of the jar.
        if (!fs.exists(remoteJarPath) || !fs.exists(remoteMd5Path)) {
            copyJarToHDFS(fs, localJarPath, localMd5sum, remoteJarPath, remoteMd5Path);
        } else {
            // If the jar exist,we validate the MD5 file.
            // If the MD5 sum is different, we upload the jar
            FSDataInputStream md5FileStream = null;

            String remoteMd5sum = "";
            try {
                md5FileStream = fs.open(remoteMd5Path);
                byte[] md5bytes = new byte[32];
                if (32 == md5FileStream.read(md5bytes)) {
                    remoteMd5sum = new String(md5bytes, Charsets.UTF_8);
                }
            } finally {
                if (md5FileStream != null) {
                    md5FileStream.close();
                }
            }

            if (localMd5sum.equals(remoteMd5sum)) {
                LOG.info("Jar {} already exists [{}] and md5sum are equals", jarFilename,
                        remoteJarPath.toUri().toASCIIString());
            } else {
                LOG.info("Jar {} already exists [{}] and md5sum are different!", jarFilename,
                        remoteJarPath.toUri().toASCIIString());
                copyJarToHDFS(fs, localJarPath, localMd5sum, remoteJarPath, remoteMd5Path);
            }

        }
        // In all case we want to add the jar to the DistributedCache's classpath
        DistributedCache.addFileToClassPath(remoteJarPath, conf, fs);
    }
    // and we create the symlink (was necessary in earlier versions of Hadoop)
    DistributedCache.createSymlink(conf);
}

From source file:org.mrgeo.test.MapOpTestVectorUtils.java

License:Apache License

public void compareVectors(Configuration conf, String testName) throws IOException {

    Path output = new Path(outputHdfs, testName);
    FileSystem fs = HadoopFileUtils.getFileSystem(conf, output);

    Path[] srcFiles;//from  ww w . j a  v  a2 s. c  om
    if (fs.isDirectory(output)) {
        FileStatus[] files = fs.listStatus(output);
        if (files == null || files.length == 0) {
            Assert.fail("No files founds: " + output.toString());
        }
        srcFiles = new Path[files.length];

        int cnt = 0;
        for (FileStatus file : files) {
            srcFiles[cnt++] = file.getPath();
        }
    } else {
        srcFiles = new Path[] { output };
    }

    for (Path file : srcFiles) {
        // read in the output file
        final long l = fs.getFileStatus(file).getLen();
        final byte[] testBuffer = new byte[(int) l];
        final FSDataInputStream fdis = fs.open(file);
        fdis.read(testBuffer);
        fdis.close();

        File baselineVector = new File(inputLocal + testName + "/" + file.getName());

        if (!baselineVector.exists()) {
            Assert.fail("Golden test file missing: " + baselineVector.toString());
        }

        // read in the baseline
        final byte[] baselineBuffer = new byte[(int) baselineVector.length()];

        final FileInputStream fis = new FileInputStream(baselineVector);
        fis.read(baselineBuffer);
        fis.close();

        Assert.assertEquals("Output is different!", new String(baselineBuffer), new String(testBuffer));
    }

}

From source file:org.oclc.firefly.hadoop.backup.BackupUtils.java

License:Apache License

/**
 * Copy the file at inputPath to the destination cluster using the same directory structure
 * @param srcFs The source file system//from   w  ww  . j a va  2  s. c  o  m
 * @param dstFs The destination file system
 * @param srcPath The source path
 * @param dstPath The destination path
 * @param buffer The buffer to use
 * @param username The user name. Used when checking in .Trash for deleted files
 * @param context The mapper context object
 * @param replication The replication factor
 * @throws InterruptedException hdfs exception
 * @throws IOException hdfs exception
 */
public static void copy(FileSystem srcFs, Path srcPath, FileSystem dstFs, Path dstPath, byte[] buffer,
        String username, short replication, Context context) throws InterruptedException, IOException {
    Path src = srcPath;
    int bytesRead = 0;
    long totalBytesRead = 0;

    if (!srcFs.exists(src)) {
        // File no longer exists. Attempt to get from .Trash
        // NOTE: It appears that deleted regions are not sent to trash, even with move to trash enabled,
        // so this file will never be found unless this aspect of hbase changes.
        src = BackupUtils.getPathInTrash(src, username, srcFs);

        if (src == null) {
            throw new FileNotFoundException("Could not recover deleted file from trash: " + srcPath);
        }

        LOG.warn("File has been deleted, but found it in trash: " + src);
    }

    long bytesToCopy = srcFs.getFileStatus(src).getLen();
    FSDataOutputStream out = dstFs.create(dstPath, replication);
    FSDataInputStream in = srcFs.open(src);

    int numWrites = 0;
    while ((bytesRead = in.read(buffer)) >= 0) {
        out.write(buffer, 0, bytesRead);
        totalBytesRead += bytesRead;

        if (context != null && numWrites % ONE_MB_OVER_64KB == 0) {
            context.setStatus("Copied " + totalBytesRead + " of " + bytesToCopy + " bytes for " + src);
        }

        numWrites++;
    }

    in.close();
    out.close();
}

From source file:org.opencloudengine.garuda.backend.hdfs.HdfsServiceImpl.java

License:Open Source License

@Override
public void downloadFile(String path, HttpServletResponse response) throws Exception {
    this.mustExists(path);
    FileSystem fs = fileSystemFactory.getFileSystem();
    Path fsPath = new Path(path);

    FileStatus fileStatus = fs.getFileStatus(fsPath);
    if (!fileStatus.isFile()) {
        this.notFileException(fsPath.toString());
    }/*from   w  w  w.java2 s.  c o m*/
    HdfsFileInfo fileInfo = new HdfsFileInfo(fileStatus, fs.getContentSummary(fsPath));

    FSDataInputStream in = fs.open(fsPath);
    String filename = fileInfo.getFilename();
    response.setHeader("Content-Length", "" + fileInfo.getLength());
    response.setHeader("Content-Transfer-Encoding", "binary");
    response.setHeader("Content-Type", "application/force-download");
    response.setHeader("Content-Disposition",
            MessageFormatter
                    .format("attachment; fullyQualifiedPath={}; filename={};",
                            URLEncoder.encode(fileInfo.getFullyQualifiedPath(), "UTF-8"), filename)
                    .getMessage());
    response.setStatus(200);

    ServletOutputStream out = response.getOutputStream();

    byte[] b = new byte[1024];
    int numBytes = 0;
    while ((numBytes = in.read(b)) > 0) {
        out.write(b, 0, numBytes);
    }

    in.close();
    out.close();
    fs.close();
}

From source file:org.pig.oink.operation.impl.StreamingBinOutputImpl.java

License:Apache License

@Override
public void write(OutputStream output) throws IOException, WebApplicationException {
    FileSystem fileSystem = null;
    try {/*  ww  w . j a  va2 s.c  o  m*/
        fileSystem = FileSystem.get(conf);
    } catch (Exception e) {
        logger.error(e.getMessage(), e);
        throw new WebApplicationException(Response.status(500)
                .entity("Unable to connect to Hadoop FileSytem. Please try again later").build());
    }
    Path filePath = new Path(filePathStr);

    if (isAvailable()) {
        FSDataInputStream in = null;
        try {
            in = fileSystem.open(filePath);
            byte[] b = new byte[1024];
            int numBytes = 0;
            while ((numBytes = in.read(b)) > 0) {
                output.write(b, 0, numBytes);
            }
            in.close();
            output.close();
        } catch (Exception e) {
            logger.error(e.getMessage(), e);
            throw new WebApplicationException(
                    Response.status(500).entity("Unable to retrieve file from Hadoop FileSystem").build());
        }
    }
}

From source file:org.sleuthkit.hadoop.clustering.ClusterJSONBuilder.java

License:Open Source License

private static void writeFileToStream(Path path, FSDataOutputStream stream) throws IOException {
    FileSystem fs = FileSystem.get(new Configuration());

    FSDataInputStream in = fs.open(path);

    byte[] bytes = new byte[1024];
    int i = in.read(bytes);
    while (i != -1) {
        stream.write(bytes, 0, i);/*w w  w  . j av  a2 s.c  o  m*/
        i = in.read(bytes);
    }
}

From source file:org.sleuthkit.hadoop.GrepJSONBuilder.java

License:Open Source License

public static void writeFileToStream(Path path, FSDataOutputStream stream) throws IOException {

    FileSystem fs = FileSystem.get(new Configuration());
    FSDataInputStream in = fs.open(path);

    byte[] bytes = new byte[1024];
    int i = in.read(bytes);
    while (i != -1) {
        stream.write(bytes, 0, i);// ww w . ja va 2s . c om
        i = in.read(bytes);
    }
}

From source file:org.sleuthkit.hadoop.GrepReportGenerator.java

License:Open Source License

public static void runPipeline(String regexFile, String deviceID, String friendlyName, String imgDir) {
    // STEP 1: Generate 'a' value file which maps regexes to numbers.

    try {//from w  ww. j ava  2s  .co  m
        // Read in the regex file.
        FileSystem fs = FileSystem.get(new Configuration());
        Path inFile = new Path(regexFile);
        FSDataInputStream in = fs.open(inFile);

        byte[] bytes = new byte[1024];

        StringBuilder b = new StringBuilder();
        int i = in.read(bytes);
        while (i != -1) {
            b.append(new String(bytes).substring(0, i));
            i = in.read(bytes);
        }

        // Stringified version of the regex file.
        String regexes = b.toString();

        Job job = SKJobFactory.createJob(deviceID, friendlyName, JobNames.GREP_COUNT_MATCHED_EXPRS_JSON);
        job.setJarByClass(GrepCountMapper.class);
        job.setMapperClass(GrepCountMapper.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(LongWritable.class);

        job.getConfiguration().set("mapred.mapper.regex", regexes);

        // we must have precisely one reducer.
        job.setNumReduceTasks(1);
        job.setReducerClass(GrepCountReducer.class);

        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(Text.class);

        job.setInputFormatClass(FsEntryHBaseInputFormat.class);
        FsEntryHBaseInputFormat.setupJob(job, deviceID);
        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job, new Path(imgDir + "/grep/count"));

        job.waitForCompletion(true);
        ///////////////////////////////////////////////////////////////////

        job = SKJobFactory.createJob(deviceID, friendlyName, JobNames.GREP_MATCHED_EXPRS_JSON);
        job.setJarByClass(GrepMatchMapper.class);
        job.setMapperClass(GrepMatchMapper.class);
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(Text.class);

        job.getConfiguration().set("mapred.mapper.regex", regexes);

        // we must have precisely one reducer.
        job.setNumReduceTasks(1);
        job.setReducerClass(GrepMatchReducer.class);

        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(Text.class);

        job.setInputFormatClass(FsEntryHBaseInputFormat.class);
        FsEntryHBaseInputFormat.setupJob(job, deviceID);
        job.setOutputFormatClass(TextOutputFormat.class);

        TextOutputFormat.setOutputPath(job, new Path(imgDir + "/grep/matchinfo"));

        job.waitForCompletion(true);

        ///////////////////////////////////////////////////////////////////
        // Finally, write the output.

        GrepJSONBuilder.buildReport(new Path(imgDir + "/grep/count/part-r-00000"),
                new Path(imgDir + "/grep/matchinfo/part-r-00000"),
                new Path(imgDir + "/reports/data/searchhits.js"));

    } catch (Exception ex) {
        LOG.error("Exception while attempting to output grep.", ex);
    }
}