List of usage examples for org.apache.hadoop.fs FSDataInputStream read
@Override public int read(ByteBuffer buf) throws IOException
From source file:org.sleuthkit.hadoop.GrepSearchJob.java
License:Open Source License
public static int runPipeline(String table, String deviceID, String regexFile, String friendlyName) { try {//from ww w. j av a2 s . c om Job job = SKJobFactory.createJob(deviceID, friendlyName, JobNames.GREP_SEARCH); job.setJarByClass(GrepSearchJob.class); FileSystem fs = FileSystem.get(job.getConfiguration()); //fs.delete(new Path(outputdir), true); Path inFile = new Path(regexFile); FSDataInputStream in = fs.open(inFile); // Read the regex file, set a property on the configuration object // to store them in a place accessible by all of the child jobs. byte[] bytes = new byte[1024]; StringBuilder b = new StringBuilder(); int i = in.read(bytes); while (i != -1) { b.append(new String(bytes).substring(0, i)); i = in.read(bytes); } System.out.println("regexes are: " + b.toString()); String regexes = b.toString(); job.getConfiguration().set("mapred.mapper.regex", regexes); job.setMapperClass(GrepMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(FsEntry.class); // we are not reducing. job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(FsEntry.class); job.setInputFormatClass(FsEntryHBaseInputFormat.class); FsEntryHBaseInputFormat.setupJob(job, deviceID); job.setOutputFormatClass(FsEntryHBaseOutputFormat.class); System.out.println("About to run the job..."); return job.waitForCompletion(true) ? 0 : 1; } catch (Exception ex) { ex.printStackTrace(); return 2; } }
From source file:org.trommel.trommel.mapreduce.TrommelDriver.java
License:Apache License
private static int processScript(Level logLevel, int numOfReducers, String trommelScriptFilePath) throws Exception { int exitCode = 0; FrontEndInterpreter frontEndInterpreter = null; FileSystem fileSystem = null; Path cachedScript = null;//from w ww . j a va 2 s .co m logger.setLevel(logLevel); try { logger.info(String.format("Loading and parsing TommelScript file %1$s ...", trommelScriptFilePath)); Lexer lexer = new Lexer( new PushbackReader(new BufferedReader(new FileReader(trommelScriptFilePath)), 4096)); Parser parser = new Parser(lexer); Start ast = parser.parse(); ValidationInterpreter validationInterpreter = new ValidationInterpreter(); logger.info(String.format("Validating TommelScript...", trommelScriptFilePath)); ast.apply(validationInterpreter); if (validationInterpreter.getSemanticErrors().size() != 0) { // Validation of script failed logger.info(String.format("TommelScript failed validation with following errors:", trommelScriptFilePath)); for (String errorMessage : validationInterpreter.getSemanticErrors()) { logger.info(errorMessage); } return exitCode; } logger.info(String.format("Interpreting TommelScript...", trommelScriptFilePath)); frontEndInterpreter = new FrontEndInterpreter(logger, DEFAULT_HDFS_PATH); ast.apply(frontEndInterpreter); logger.debug("Creating Job object"); Job job = new Job(); job.setJarByClass(TrommelDriver.class); // Copy TrommelScript file from local file system to HDFS and added to distributed cache fileSystem = FileSystem.get(job.getConfiguration()); Path src = new Path(trommelScriptFilePath); cachedScript = new Path(String.format("/tmp/%1$s_%2$s", src.getName(), UUID.randomUUID().toString())); fileSystem.copyFromLocalFile(src, cachedScript); logger.debug(String.format("Moved TrommelScript file to HDFS as %1$s.", cachedScript.toString())); logger.debug("Adding TrommelScript file to DistibutedCachce."); DistributedCache.addCacheFile(new URI(cachedScript.toString()), job.getConfiguration()); logger.debug(String.format("Setting LOGGING_LEVEL_CONFIG_PROP to %1$s", logLevel.toString())); job.getConfiguration().set(LOGGING_LEVEL_CONFIG_PROP, logLevel.toString()); // Specify HDFS input/output locations logger.debug(String.format("Calling FileInputFormat.addInputPath() with %1$s.", frontEndInterpreter.getHdfsInputFilePath())); FileInputFormat.addInputPath(job, new Path(frontEndInterpreter.getHdfsInputFilePath())); logger.debug(String.format("Calling FileOutputFormat.setOutputPath() with %1$s.", frontEndInterpreter.getHdfsOutputFilePath())); FileOutputFormat.setOutputPath(job, new Path(frontEndInterpreter.getHdfsOutputFilePath())); // Hadoop setup job.setMapperClass(TrommelMapper.class); if (frontEndInterpreter.samplingData()) { logger.debug("Trommel is sampling data, 0 Reducers set."); job.setNumReduceTasks(0); } else { logger.debug(String.format("Setting number of Reducers to %1$s.", numOfReducers)); job.setReducerClass(TrommelReducer.class); job.setNumReduceTasks(numOfReducers); } job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); logger.debug("Running job"); if (!job.waitForCompletion(true)) { exitCode = 1; } else if (frontEndInterpreter.getLocalFilePath() != null) { // User would like data exported to local file system logger.debug(String.format("Exporting Trommel output from %1$s to %2$s.", frontEndInterpreter.getHdfsOutputFilePath(), frontEndInterpreter.getLocalFilePath())); Path mergeFilePath = new Path(String.format("/tmp/%1$s", UUID.randomUUID())); FSDataOutputStream mergeFileStream = fileSystem.create(mergeFilePath); Path localFilePath = new Path(frontEndInterpreter.getLocalFilePath()); FileStatus[] outputFileStatuses = fileSystem .listStatus(new Path(frontEndInterpreter.getHdfsOutputFilePath())); FSDataInputStream outputFileStream = null; String fileNameFilter = (frontEndInterpreter.samplingData() ? "part-m" : "part-r"); try { // Loop through the output, merging any reducer output file for export to local file system for (FileStatus outputFileStatus : outputFileStatuses) { if (!outputFileStatus.isDir() && outputFileStatus.getPath().getName().contains(fileNameFilter)) { logger.debug(String.format("Merging file %1$s into local file system output.", outputFileStatus.getPath().toString())); outputFileStream = fileSystem.open(outputFileStatus.getPath()); byte[] buffer = new byte[(int) outputFileStatus.getLen()]; outputFileStream.read(buffer); mergeFileStream.write(buffer); outputFileStream.close(); } } } finally { if (mergeFileStream != null) { mergeFileStream.close(); fileSystem.copyToLocalFile(mergeFilePath, localFilePath); fileSystem.delete(mergeFilePath, true); } } } } finally { try { if (fileSystem != null) { // Clean up any temp files if needed if (frontEndInterpreter.getHdfsOutputFilePath().equals(DEFAULT_HDFS_PATH)) { logger.debug(String.format("Deleting temp files from /tmp/Trommel")); fileSystem.delete(new Path(DEFAULT_HDFS_PATH), true); } // Clean up the cached file logger.debug(String.format("Deleting cached TrommelScript file %1$s", cachedScript.toString())); fileSystem.delete(cachedScript, true); } } catch (IOException ioe) { // Couldn't delete file for some reason, alert user logger.error(String.format( "Exception encountered deleting cached TommelScript file %1$s. Error message: %2$s", cachedScript.toString(), ioe.getMessage())); } } return exitCode; }
From source file:ph.fingra.hadoop.mapred.common.CopyToLocalFile.java
License:Apache License
public void dirToFile(String srcdir, String dstfile) throws IOException { FileSystem fs = FileSystem.get(URI.create(srcdir), getConf()); FileSystem local = FileSystem.getLocal(getConf()); Path srcPath = new Path(srcdir); Path dstPath = new Path(dstfile); // delete existed destination local file if (local.exists(dstPath)) { local.delete(dstPath, true);//from w w w .j a va 2s.co m } // get hdfs file list PathFilter resultFileFilter = new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith(ConstantVars.RESULT_FILE_PREFIX); } }; FileStatus[] status = fs.listStatus(srcPath, resultFileFilter); Path[] listedPaths = FileUtil.stat2Paths(status); if (listedPaths.length > 0) { // create local output stream FSDataOutputStream out = local.create(dstPath); for (int i = 0; i < listedPaths.length; i++) { // create hdfs input stream FSDataInputStream in = fs.open(listedPaths[i]); byte buffer[] = new byte[256]; int bytesRead = 0; while ((bytesRead = in.read(buffer)) > 0) { out.write(buffer, 0, bytesRead); } in.close(); } out.close(); } return; }
From source file:ph.fingra.hadoop.mapred.common.CopyWithinHdfsFile.java
License:Apache License
public void dirToFile(String srcdir, String dstfile) throws IOException { FileSystem shfs = FileSystem.get(URI.create(srcdir), getConf()); FileSystem thfs = FileSystem.get(URI.create(dstfile), getConf()); Path srcPath = new Path(srcdir); Path dstPath = new Path(dstfile); // delete existed destination local file if (thfs.exists(dstPath)) { thfs.delete(dstPath, true);//from w ww . j ava 2 s . co m } // get hdfs file list PathFilter resultFileFilter = new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith(ConstantVars.RESULT_FILE_PREFIX); } }; FileStatus[] status = shfs.listStatus(srcPath, resultFileFilter); Path[] listedPaths = FileUtil.stat2Paths(status); if (listedPaths.length > 0) { // create hdfs output stream FSDataOutputStream out = thfs.create(dstPath); for (int i = 0; i < listedPaths.length; i++) { // create hdfs input stream FSDataInputStream in = shfs.open(listedPaths[i]); byte buffer[] = new byte[256]; int bytesRead = 0; while ((bytesRead = in.read(buffer)) > 0) { out.write(buffer, 0, bytesRead); } in.close(); } out.close(); } return; }
From source file:pp.TestUtils.java
License:Open Source License
public static String readPath(Path p) throws FileNotFoundException, IOException { FileSystem fs = p.getFileSystem(new org.apache.hadoop.conf.Configuration()); FSDataInputStream fdis = fs.open(p); FileStatus stat = fs.getFileStatus(p); byte[] baselineBuffer = new byte[(int) stat.getLen()]; fdis.read(baselineBuffer); return new String(baselineBuffer); }
From source file:putmerge_bycategory.PutMerge_byCategory.java
/** * @param args the command line arguments *//* ww w . j a va 2s.c om*/ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { long startTime = System.currentTimeMillis(); Configuration conf = new Configuration(); Path inputDir = new Path(args[0]); Path hdfsFile = new Path(args[1]); FileSystem hdfs = FileSystem.get(conf); FileSystem local = FileSystem.getLocal(conf); try { FileStatus[] inputFiles = local.listStatus(inputDir); FSDataOutputStream out = hdfs.create(hdfsFile); for (int i = 0; i < inputFiles.length; i++) { System.out.println(inputFiles[i].getPath().getName()); FSDataInputStream in = local.open(inputFiles[i].getPath()); byte[] buffer = new byte[256]; int bytesRead = 0; while ((bytesRead = in.read(buffer)) > 0) { out.write(buffer, 0, bytesRead); } in.close(); } out.close(); } catch (IOException e) { e.printStackTrace(); } Job job = Job.getInstance(conf, "average stock price"); job.setJarByClass(PutMerge_byCategory.class); job.setMapperClass(NYSEMapper.class); job.setReducerClass(NYSEReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); FileInputFormat.addInputPath(job, hdfsFile); String timeStamp = new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss", Locale.US) .format(new Timestamp(System.currentTimeMillis())); FileOutputFormat.setOutputPath(job, new Path(args[2] + timeStamp)); long endTime = System.currentTimeMillis(); long totalTime = endTime - startTime; System.out.println(totalTime); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:ras.test.hadoop.fs.InMemoryFileSystemUnitTest.java
License:Apache License
@Test public void testInputStreamSeek() throws IOException { Path path = new Path("message.txt"); writeMessage(path);// ww w. ja va 2 s . co m byte[] bytesOut = message.getBytes(); long seekPosition = 6; FSDataInputStream in = inMemoryFileSystem.open(path); byte[] bytesIn = new byte[bytesOut.length - (int) seekPosition]; in.seek(seekPosition); assertThat("Wrong position after seek", in.getPos(), is(equalTo(seekPosition))); in.read(bytesIn); for (int i = 6; i < bytesOut.length; i++) { assertThat("Wrong byte at index " + i, bytesIn[i - 6], is(equalTo(bytesOut[i]))); } }
From source file:ras.test.hadoop.fs.InMemoryFileSystemUnitTest.java
License:Apache License
@Test public void testRenameFile() throws IOException { Path source = new Path("/source.txt"); Path destination = new Path("/destination.txt"); FSDataOutputStream out = inMemoryFileSystem.create(source); String message = "Hello World!"; out.writeBytes(message);/*from w w w . j ava 2s.co m*/ out.close(); inMemoryFileSystem.rename(source, destination); FSDataInputStream in = inMemoryFileSystem.open(destination); byte[] bytes = new byte[message.getBytes().length]; in.read(bytes); assertThat("Wrong message", new String(bytes), is(equalTo(message))); expectIOException("'" + source + "' not found!"); inMemoryFileSystem.getFileStatus(source); }
From source file:tap.core.MapperBridge.java
License:Apache License
/** * Read first N bytes from normal file system file. * @param file//from ww w .j av a 2 s . c o m * @return byte buffer containing first SNIFF_HEADER_SIZE bytes. * @throws FileNotFoundException * @throws IOException */ private byte[] readHeader(FSDataInputStream inputStream) throws FileNotFoundException, IOException { byte[] header = new byte[SNIFF_HEADER_SIZE]; inputStream.read(header); inputStream.close(); return header; }
From source file:tap.Pipe.java
License:Apache License
private Formats sniffFileFormat(Path path) throws IOException, FileNotFoundException { byte[] header; FileSystem fs = path.getFileSystem(this.getConf()); FSDataInputStream in = null; try {// w ww . j a v a 2 s . c o m in = fs.open(path); header = new byte[1000]; in.read(header); in.close(); } finally { if (in != null) in.close(); } return determineFileFormat(header); }