Example usage for org.apache.hadoop.fs FSDataInputStream read

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FSDataInputStream read.

Prototype

@Override
    public int read(ByteBuffer buf) throws IOException

Source Link

Usage

From source file:org.sleuthkit.hadoop.GrepSearchJob.java

License:Open Source License

public static int runPipeline(String table, String deviceID, String regexFile, String friendlyName) {

    try {//from   ww  w. j av a2  s  .  c  om
        Job job = SKJobFactory.createJob(deviceID, friendlyName, JobNames.GREP_SEARCH);
        job.setJarByClass(GrepSearchJob.class);

        FileSystem fs = FileSystem.get(job.getConfiguration());
        //fs.delete(new Path(outputdir), true);
        Path inFile = new Path(regexFile);
        FSDataInputStream in = fs.open(inFile);

        // Read the regex file, set a property on the configuration object
        // to store them in a place accessible by all of the child jobs.

        byte[] bytes = new byte[1024];

        StringBuilder b = new StringBuilder();
        int i = in.read(bytes);
        while (i != -1) {
            b.append(new String(bytes).substring(0, i));
            i = in.read(bytes);
        }
        System.out.println("regexes are: " + b.toString());
        String regexes = b.toString();

        job.getConfiguration().set("mapred.mapper.regex", regexes);

        job.setMapperClass(GrepMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(FsEntry.class);

        // we are not reducing.
        job.setNumReduceTasks(0);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FsEntry.class);

        job.setInputFormatClass(FsEntryHBaseInputFormat.class);
        FsEntryHBaseInputFormat.setupJob(job, deviceID);
        job.setOutputFormatClass(FsEntryHBaseOutputFormat.class);

        System.out.println("About to run the job...");

        return job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception ex) {
        ex.printStackTrace();
        return 2;
    }
}

From source file:org.trommel.trommel.mapreduce.TrommelDriver.java

License:Apache License

private static int processScript(Level logLevel, int numOfReducers, String trommelScriptFilePath)
        throws Exception {
    int exitCode = 0;
    FrontEndInterpreter frontEndInterpreter = null;
    FileSystem fileSystem = null;
    Path cachedScript = null;//from   w ww .  j a va 2  s .co m

    logger.setLevel(logLevel);

    try {
        logger.info(String.format("Loading and parsing TommelScript file %1$s ...", trommelScriptFilePath));
        Lexer lexer = new Lexer(
                new PushbackReader(new BufferedReader(new FileReader(trommelScriptFilePath)), 4096));
        Parser parser = new Parser(lexer);
        Start ast = parser.parse();
        ValidationInterpreter validationInterpreter = new ValidationInterpreter();

        logger.info(String.format("Validating TommelScript...", trommelScriptFilePath));
        ast.apply(validationInterpreter);

        if (validationInterpreter.getSemanticErrors().size() != 0) {
            // Validation of script failed
            logger.info(String.format("TommelScript failed validation with following errors:",
                    trommelScriptFilePath));

            for (String errorMessage : validationInterpreter.getSemanticErrors()) {
                logger.info(errorMessage);
            }

            return exitCode;
        }

        logger.info(String.format("Interpreting TommelScript...", trommelScriptFilePath));
        frontEndInterpreter = new FrontEndInterpreter(logger, DEFAULT_HDFS_PATH);
        ast.apply(frontEndInterpreter);

        logger.debug("Creating Job object");
        Job job = new Job();

        job.setJarByClass(TrommelDriver.class);

        // Copy TrommelScript file from local file system to HDFS and added to distributed cache
        fileSystem = FileSystem.get(job.getConfiguration());
        Path src = new Path(trommelScriptFilePath);
        cachedScript = new Path(String.format("/tmp/%1$s_%2$s", src.getName(), UUID.randomUUID().toString()));

        fileSystem.copyFromLocalFile(src, cachedScript);
        logger.debug(String.format("Moved TrommelScript file to HDFS as %1$s.", cachedScript.toString()));

        logger.debug("Adding TrommelScript file to DistibutedCachce.");
        DistributedCache.addCacheFile(new URI(cachedScript.toString()), job.getConfiguration());

        logger.debug(String.format("Setting LOGGING_LEVEL_CONFIG_PROP to %1$s", logLevel.toString()));
        job.getConfiguration().set(LOGGING_LEVEL_CONFIG_PROP, logLevel.toString());

        // Specify HDFS input/output locations
        logger.debug(String.format("Calling FileInputFormat.addInputPath() with %1$s.",
                frontEndInterpreter.getHdfsInputFilePath()));
        FileInputFormat.addInputPath(job, new Path(frontEndInterpreter.getHdfsInputFilePath()));

        logger.debug(String.format("Calling FileOutputFormat.setOutputPath() with %1$s.",
                frontEndInterpreter.getHdfsOutputFilePath()));
        FileOutputFormat.setOutputPath(job, new Path(frontEndInterpreter.getHdfsOutputFilePath()));

        // Hadoop setup
        job.setMapperClass(TrommelMapper.class);

        if (frontEndInterpreter.samplingData()) {
            logger.debug("Trommel is sampling data, 0 Reducers set.");
            job.setNumReduceTasks(0);
        } else {
            logger.debug(String.format("Setting number of Reducers to %1$s.", numOfReducers));
            job.setReducerClass(TrommelReducer.class);
            job.setNumReduceTasks(numOfReducers);
        }

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        logger.debug("Running job");
        if (!job.waitForCompletion(true)) {
            exitCode = 1;
        } else if (frontEndInterpreter.getLocalFilePath() != null) {
            // User would like data exported to local file system
            logger.debug(String.format("Exporting Trommel output from %1$s to %2$s.",
                    frontEndInterpreter.getHdfsOutputFilePath(), frontEndInterpreter.getLocalFilePath()));
            Path mergeFilePath = new Path(String.format("/tmp/%1$s", UUID.randomUUID()));
            FSDataOutputStream mergeFileStream = fileSystem.create(mergeFilePath);
            Path localFilePath = new Path(frontEndInterpreter.getLocalFilePath());
            FileStatus[] outputFileStatuses = fileSystem
                    .listStatus(new Path(frontEndInterpreter.getHdfsOutputFilePath()));
            FSDataInputStream outputFileStream = null;
            String fileNameFilter = (frontEndInterpreter.samplingData() ? "part-m" : "part-r");

            try {
                // Loop through the output, merging any reducer output file for export to local file system
                for (FileStatus outputFileStatus : outputFileStatuses) {
                    if (!outputFileStatus.isDir()
                            && outputFileStatus.getPath().getName().contains(fileNameFilter)) {
                        logger.debug(String.format("Merging file %1$s into local file system output.",
                                outputFileStatus.getPath().toString()));

                        outputFileStream = fileSystem.open(outputFileStatus.getPath());
                        byte[] buffer = new byte[(int) outputFileStatus.getLen()];

                        outputFileStream.read(buffer);

                        mergeFileStream.write(buffer);

                        outputFileStream.close();
                    }
                }
            } finally {
                if (mergeFileStream != null) {
                    mergeFileStream.close();
                    fileSystem.copyToLocalFile(mergeFilePath, localFilePath);
                    fileSystem.delete(mergeFilePath, true);
                }
            }
        }
    } finally {
        try {
            if (fileSystem != null) {
                // Clean up any temp files if needed
                if (frontEndInterpreter.getHdfsOutputFilePath().equals(DEFAULT_HDFS_PATH)) {
                    logger.debug(String.format("Deleting temp files from /tmp/Trommel"));
                    fileSystem.delete(new Path(DEFAULT_HDFS_PATH), true);
                }

                // Clean up the cached file
                logger.debug(String.format("Deleting cached TrommelScript file %1$s", cachedScript.toString()));
                fileSystem.delete(cachedScript, true);
            }
        } catch (IOException ioe) {
            // Couldn't delete file for some reason, alert user
            logger.error(String.format(
                    "Exception encountered deleting cached TommelScript file %1$s. Error message: %2$s",
                    cachedScript.toString(), ioe.getMessage()));
        }
    }

    return exitCode;
}

From source file:ph.fingra.hadoop.mapred.common.CopyToLocalFile.java

License:Apache License

public void dirToFile(String srcdir, String dstfile) throws IOException {

    FileSystem fs = FileSystem.get(URI.create(srcdir), getConf());
    FileSystem local = FileSystem.getLocal(getConf());
    Path srcPath = new Path(srcdir);
    Path dstPath = new Path(dstfile);

    // delete existed destination local file
    if (local.exists(dstPath)) {
        local.delete(dstPath, true);//from   w w  w .j a  va  2s.co  m
    }

    // get hdfs file list
    PathFilter resultFileFilter = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith(ConstantVars.RESULT_FILE_PREFIX);
        }
    };

    FileStatus[] status = fs.listStatus(srcPath, resultFileFilter);

    Path[] listedPaths = FileUtil.stat2Paths(status);

    if (listedPaths.length > 0) {
        // create local output stream
        FSDataOutputStream out = local.create(dstPath);
        for (int i = 0; i < listedPaths.length; i++) {
            // create hdfs input stream
            FSDataInputStream in = fs.open(listedPaths[i]);
            byte buffer[] = new byte[256];
            int bytesRead = 0;
            while ((bytesRead = in.read(buffer)) > 0) {
                out.write(buffer, 0, bytesRead);
            }
            in.close();
        }
        out.close();
    }

    return;
}

From source file:ph.fingra.hadoop.mapred.common.CopyWithinHdfsFile.java

License:Apache License

public void dirToFile(String srcdir, String dstfile) throws IOException {

    FileSystem shfs = FileSystem.get(URI.create(srcdir), getConf());
    FileSystem thfs = FileSystem.get(URI.create(dstfile), getConf());

    Path srcPath = new Path(srcdir);
    Path dstPath = new Path(dstfile);

    // delete existed destination local file
    if (thfs.exists(dstPath)) {
        thfs.delete(dstPath, true);//from w  ww . j ava  2 s . co m
    }

    // get hdfs file list
    PathFilter resultFileFilter = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith(ConstantVars.RESULT_FILE_PREFIX);
        }
    };

    FileStatus[] status = shfs.listStatus(srcPath, resultFileFilter);

    Path[] listedPaths = FileUtil.stat2Paths(status);

    if (listedPaths.length > 0) {
        // create hdfs output stream
        FSDataOutputStream out = thfs.create(dstPath);
        for (int i = 0; i < listedPaths.length; i++) {
            // create hdfs input stream
            FSDataInputStream in = shfs.open(listedPaths[i]);
            byte buffer[] = new byte[256];
            int bytesRead = 0;
            while ((bytesRead = in.read(buffer)) > 0) {
                out.write(buffer, 0, bytesRead);
            }
            in.close();
        }
        out.close();
    }

    return;
}

From source file:pp.TestUtils.java

License:Open Source License

public static String readPath(Path p) throws FileNotFoundException, IOException {
    FileSystem fs = p.getFileSystem(new org.apache.hadoop.conf.Configuration());
    FSDataInputStream fdis = fs.open(p);
    FileStatus stat = fs.getFileStatus(p);

    byte[] baselineBuffer = new byte[(int) stat.getLen()];
    fdis.read(baselineBuffer);
    return new String(baselineBuffer);
}

From source file:putmerge_bycategory.PutMerge_byCategory.java

/**
 * @param args the command line arguments
 *//* ww  w . j  a va  2s.c om*/
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    long startTime = System.currentTimeMillis();
    Configuration conf = new Configuration();
    Path inputDir = new Path(args[0]);
    Path hdfsFile = new Path(args[1]);
    FileSystem hdfs = FileSystem.get(conf);
    FileSystem local = FileSystem.getLocal(conf);

    try {
        FileStatus[] inputFiles = local.listStatus(inputDir);
        FSDataOutputStream out = hdfs.create(hdfsFile);

        for (int i = 0; i < inputFiles.length; i++) {
            System.out.println(inputFiles[i].getPath().getName());
            FSDataInputStream in = local.open(inputFiles[i].getPath());
            byte[] buffer = new byte[256];
            int bytesRead = 0;
            while ((bytesRead = in.read(buffer)) > 0) {
                out.write(buffer, 0, bytesRead);
            }
            in.close();
        }
        out.close();
    } catch (IOException e) {
        e.printStackTrace();
    }

    Job job = Job.getInstance(conf, "average stock price");
    job.setJarByClass(PutMerge_byCategory.class);
    job.setMapperClass(NYSEMapper.class);
    job.setReducerClass(NYSEReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);
    FileInputFormat.addInputPath(job, hdfsFile);
    String timeStamp = new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss", Locale.US)
            .format(new Timestamp(System.currentTimeMillis()));
    FileOutputFormat.setOutputPath(job, new Path(args[2] + timeStamp));
    long endTime = System.currentTimeMillis();
    long totalTime = endTime - startTime;
    System.out.println(totalTime);
    System.exit(job.waitForCompletion(true) ? 0 : 1);

}

From source file:ras.test.hadoop.fs.InMemoryFileSystemUnitTest.java

License:Apache License

@Test
public void testInputStreamSeek() throws IOException {
    Path path = new Path("message.txt");
    writeMessage(path);//  ww w. ja  va 2  s  . co  m
    byte[] bytesOut = message.getBytes();

    long seekPosition = 6;
    FSDataInputStream in = inMemoryFileSystem.open(path);
    byte[] bytesIn = new byte[bytesOut.length - (int) seekPosition];
    in.seek(seekPosition);
    assertThat("Wrong position after seek", in.getPos(), is(equalTo(seekPosition)));
    in.read(bytesIn);
    for (int i = 6; i < bytesOut.length; i++) {
        assertThat("Wrong byte at index " + i, bytesIn[i - 6], is(equalTo(bytesOut[i])));
    }
}

From source file:ras.test.hadoop.fs.InMemoryFileSystemUnitTest.java

License:Apache License

@Test
public void testRenameFile() throws IOException {
    Path source = new Path("/source.txt");
    Path destination = new Path("/destination.txt");
    FSDataOutputStream out = inMemoryFileSystem.create(source);
    String message = "Hello World!";
    out.writeBytes(message);/*from  w  w w  . j  ava  2s.co  m*/
    out.close();

    inMemoryFileSystem.rename(source, destination);
    FSDataInputStream in = inMemoryFileSystem.open(destination);
    byte[] bytes = new byte[message.getBytes().length];
    in.read(bytes);
    assertThat("Wrong message", new String(bytes), is(equalTo(message)));

    expectIOException("'" + source + "' not found!");
    inMemoryFileSystem.getFileStatus(source);
}

From source file:tap.core.MapperBridge.java

License:Apache License

/**
 * Read first N bytes from normal file system file.
 * @param file//from ww  w  .j  av  a 2  s  . c  o m
 * @return byte buffer containing first SNIFF_HEADER_SIZE bytes.
 * @throws FileNotFoundException
 * @throws IOException
 */
private byte[] readHeader(FSDataInputStream inputStream) throws FileNotFoundException, IOException {
    byte[] header = new byte[SNIFF_HEADER_SIZE];
    inputStream.read(header);
    inputStream.close();
    return header;
}

From source file:tap.Pipe.java

License:Apache License

private Formats sniffFileFormat(Path path) throws IOException, FileNotFoundException {

    byte[] header;

    FileSystem fs = path.getFileSystem(this.getConf());

    FSDataInputStream in = null;
    try {// w  ww .  j  a v a  2  s .  c  o m
        in = fs.open(path);
        header = new byte[1000];
        in.read(header);
        in.close();

    } finally {
        if (in != null)
            in.close();
    }
    return determineFileFormat(header);

}