List of usage examples for org.apache.hadoop.fs FileSystem exists
public boolean exists(Path f) throws IOException
From source file:com.marklogic.contentpump.OutputArchive.java
License:Apache License
private void newOutputStream() throws IOException { // use the constructor filename for the first zip, // then add filecount to subsequent archives, if any. int count = fileCount.getAndIncrement(); currPath = newPackagePath(basePath, count, 6); if (outputStream != null) { if (LOG.isDebugEnabled()) { LOG.debug("closing output archive: " + currPath); }/*from w ww . j ava 2 s .c o m*/ outputStream.flush(); outputStream.close(); } currentFileBytes = 0; currentEntries = 0; Path zpath = new Path(currPath); FileSystem fs = zpath.getFileSystem(conf); if (fs.exists(zpath)) { throw new IOException(zpath + " already exists."); } if (LOG.isDebugEnabled()) { LOG.debug("Creating output archive: " + zpath); LOG.debug("Default charset: " + Charset.defaultCharset()); } // if fs instanceof DistributedFileSystem, use hadoop api; otherwise, // use java api if (fs instanceof DistributedFileSystem) { FSDataOutputStream fsout = fs.create(zpath, false); outputStream = new ZipOutputStream(fsout); } else { File f = new File(zpath.toUri().getPath()); if (!f.exists()) { f.getParentFile().mkdirs(); f.createNewFile(); } FileOutputStream fos = new FileOutputStream(f, false); outputStream = new ZipOutputStream(fos); } }
From source file:com.marklogic.mapreduce.LargeBinaryDocument.java
License:Apache License
public byte[] getContentAsByteArray(int offset, int len) { FileSystem fs; FSDataInputStream is = null;/*from w w w. ja va 2 s .c o m*/ try { fs = path.getFileSystem(conf); if (!fs.exists(path)) { throw new RuntimeException("File not found: " + path); } FileStatus status = fs.getFileStatus(path); if (status.getLen() < offset) { throw new RuntimeException("Reached end of file: " + path); } byte[] buf = new byte[len]; is = fs.open(path); for (int toSkip = offset, skipped = 0; toSkip < offset; toSkip -= skipped) { skipped = is.skipBytes(offset); } for (int bytesRead = 0; bytesRead < len;) { bytesRead += is.read(buf, bytesRead, len - bytesRead); } return buf; } catch (IOException e) { throw new RuntimeException("Error accessing file: " + path, e); } finally { if (is != null) { try { is.close(); } catch (IOException e) { } } } }
From source file:com.marklogic.mapreduce.LargeBinaryDocument.java
License:Apache License
@Override public InputStream getContentAsByteStream() { FileSystem fs; FSDataInputStream is = null;//from w w w . j a v a 2 s . co m try { fs = path.getFileSystem(conf); if (!fs.exists(path)) { throw new RuntimeException("File not found: " + path); } is = fs.open(path); return is; } catch (IOException e) { throw new RuntimeException("Error accessing file: " + path, e); } }
From source file:com.metamx.druid.indexer.DeterminePartitionsJob.java
License:Open Source License
public boolean run() { try {//from www.ja v a2s. co m /* * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear * in the final segment. */ if (!config.getPartitionsSpec().isAssumeGrouped()) { final Job groupByJob = new Job(new Configuration(), String.format( "%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals())); injectSystemProperties(groupByJob); groupByJob.setInputFormatClass(TextInputFormat.class); groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class); groupByJob.setMapOutputKeyClass(BytesWritable.class); groupByJob.setMapOutputValueClass(NullWritable.class); groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setOutputKeyClass(BytesWritable.class); groupByJob.setOutputValueClass(NullWritable.class); groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class); groupByJob.setJarByClass(DeterminePartitionsJob.class); config.addInputPaths(groupByJob); config.intoConfiguration(groupByJob); FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir()); groupByJob.submit(); log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL()); if (!groupByJob.waitForCompletion(true)) { log.error("Job failed: %s", groupByJob.getJobID()); return false; } } else { log.info("Skipping group-by job."); } /* * Read grouped data and determine appropriate partitions. */ final Job dimSelectionJob = new Job(new Configuration(), String.format( "%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals())); dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19"); injectSystemProperties(dimSelectionJob); if (!config.getPartitionsSpec().isAssumeGrouped()) { // Read grouped data from the groupByJob. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class); dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir()); } else { // Directly read the source data, since we assume it's already grouped. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class); dimSelectionJob.setInputFormatClass(TextInputFormat.class); config.addInputPaths(dimSelectionJob); } SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob); dimSelectionJob.setMapOutputValueClass(Text.class); dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class); dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class); dimSelectionJob.setOutputKeyClass(BytesWritable.class); dimSelectionJob.setOutputValueClass(Text.class); dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class); dimSelectionJob.setJarByClass(DeterminePartitionsJob.class); config.intoConfiguration(dimSelectionJob); FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath()); dimSelectionJob.submit(); log.info("Job %s submitted, status available at: %s", dimSelectionJob.getJobName(), dimSelectionJob.getTrackingURL()); if (!dimSelectionJob.waitForCompletion(true)) { log.error("Job failed: %s", dimSelectionJob.getJobID().toString()); return false; } /* * Load partitions determined by the previous job. */ log.info("Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals()); FileSystem fileSystem = null; Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance()); int shardCount = 0; for (Interval segmentGranularity : config.getSegmentGranularIntervals()) { DateTime bucket = segmentGranularity.getStart(); final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(new Bucket(0, bucket, 0)); if (fileSystem == null) { fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration()); } if (fileSystem.exists(partitionInfoPath)) { List<ShardSpec> specs = config.jsonMapper.readValue( Utils.openInputStream(dimSelectionJob, partitionInfoPath), new TypeReference<List<ShardSpec>>() { }); List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size()); for (int i = 0; i < specs.size(); ++i) { actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++)); log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i)); } shardSpecs.put(bucket, actualSpecs); } else { log.info("Path[%s] didn't exist!?", partitionInfoPath); } } config.setShardSpecs(shardSpecs); return true; } catch (Exception e) { throw Throwables.propagate(e); } }
From source file:com.metamx.druid.indexer.Utils.java
License:Open Source License
public static OutputStream makePathAndOutputStream(JobContext job, Path outputPath, boolean deleteExisting) throws IOException { OutputStream retVal;/*from www . jav a2 s .c o m*/ FileSystem fs = outputPath.getFileSystem(job.getConfiguration()); if (fs.exists(outputPath)) { if (deleteExisting) { fs.delete(outputPath, false); } else { throw new ISE("outputPath[%s] must not exist.", outputPath); } } if (!FileOutputFormat.getCompressOutput(job)) { retVal = fs.create(outputPath, false); } else { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); outputPath = new Path(outputPath.toString() + codec.getDefaultExtension()); retVal = codec.createOutputStream(fs.create(outputPath, false)); } return retVal; }
From source file:com.metamx.milano.pig.MilanoLoadFunc.java
License:Apache License
/** * This builds a Pig ResourceSchema from the input file(s). This relies on the existence of TypeMetadata. * This is the method by which we pass the schema types and names directly to pig without having to specify them directly. * * @param location As passed to relativeToAbsolutePath * @param job The job./*w w w. ja v a2s . c o m*/ * * @return Returns a ResourceSchema representing the incoming file(s) or null if TypeMetadata does not exist. * * @throws IOException Not thrown directly, but thrown from getMessageSchema where it indicates an unsupported type. */ @Override public ResourceSchema getSchema(String location, Job job) throws IOException { Configuration conf = job.getConfiguration(); Properties props = ConfigurationUtil.toProperties(conf); // HACK: Here we open the file directly to read the TypeMetadata. // HACK: There may be a better more direct way to do this, but it works for now. Path path = new Path(location); FileSystem fileSystem = path.getFileSystem(conf); FileStatus fileStatus = fileSystem.getFileStatus(path); if (fileStatus.isDir()) { log.debug(String.format("Path is a directory.")); path = getFilePath(path, fileSystem); if (path == null) { return null; } } else if (!fileSystem.exists(path)) { return null; } MilanoProtoFile.Reader reader = MilanoProtoFile.createReader(fileSystem.open(path)); typeMetadata = reader.getMetadata(); reader.close(); if (typeMetadata == null) { return null; } descriptor = MilanoTool.with(typeMetadata).getDescriptor(); return new ResourceSchema(getMessageSchema(descriptor)); }
From source file:com.michaeljones.hellohadoopworldmaven.HelloMapReduceTest.java
/** * Test of RunJobAsync method, of class HelloMapReduce. * @throws java.lang.Exception// ww w . j a va 2 s. c o m */ @Test public void testRunJobAsync() throws Exception { System.out.println("RunJobAsync"); LOGGER.info("RunJobAsync"); FileSystem hdfs = FileSystem.get(hadoopConfig); Path outputPath = new Path(wcOutputPathDir); // We need to remove the output directory before running the map reduce job. if (hdfs.exists(outputPath)) { // remove the directory recursively. hdfs.delete(outputPath, true); } Path inputPath = new Path(wcInputPathDir); Job result = HelloMapReduce.RunJobAsync(inputPath, outputPath, hadoopConfig); boolean ok = result.waitForCompletion(true); assertTrue(ok); }
From source file:com.michaeljones.hellohadoopworldmaven.HelloMapReduceTest.java
/** * Test of RunJobAnalysisAsync method, of class HelloMapReduce. * @throws java.lang.Exception//from w w w . j a v a2s. c o m */ @Test public void testRunJobAnalysisAsync() throws Exception { System.out.println("RunJobAnalysisAsync"); LOGGER.info("RunJobAnalysisAsync"); FileSystem hdfs = FileSystem.get(hadoopConfig); Path outputPath = new Path(wcOutputAnalysisPathDir); if (hdfs.exists(outputPath)) { hdfs.delete(outputPath, true); } Path inputPath = new Path(wcInputPathDir); Job result = HelloMapReduce.RunJobAnalysisAsync(inputPath, outputPath, hadoopConfig); boolean ok = result.waitForCompletion(true); assertTrue(ok); }
From source file:com.michaeljones.hellohadoopworldmaven.HelloMapReduceTest.java
/** * Test of main method, of class HelloMapReduce. * @throws java.lang.Exception/* ww w .j a va2 s . c o m*/ */ @Test public void testMain() throws Exception { System.out.println("main"); LOGGER.info("testMain"); FileSystem hdfs = FileSystem.get(hadoopConfig); Path outputPath = new Path(wcOutputMainPathDir); if (hdfs.exists(outputPath)) { hdfs.delete(outputPath, true); } String[] args = { wcInputPathDir, wcOutputMainPathDir }; HelloMapReduce.main(args); // Assume it is true. assertTrue(true); }
From source file:com.ml.ira.algos.LogisticModelParameters.java
License:Apache License
public static LogisticModelParameters loadFrom(Path path) throws IOException { FileSystem ofs = path.getFileSystem(new Configuration()); if (!ofs.exists(path)) { throw new IOException(path.toString() + " does not exists. "); }/*w w w. j a va 2 s . com*/ LogisticModelParameters result = new LogisticModelParameters(); FSDataInputStream in = ofs.open(path); result.readFields(in); ofs.close(); return result; }