List of usage examples for org.apache.hadoop.fs Path getFileSystem
public FileSystem getFileSystem(Configuration conf) throws IOException
From source file:com.bonc.mr_roamRecognition_hjpt.comm.FileCountTextOutputFormat.java
License:Apache License
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); String keyValueSeparator = conf.get(SEPERATOR, "\t"); CompressionCodec codec = null;/*from w ww . j a v a 2 s. c o m*/ String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } Path file = getDefaultWorkFile(job, extension); FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); return new LineRecordWriter<K, V>(fileOut, keyValueSeparator); } else { FSDataOutputStream fileOut = fs.create(file, false); return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator); } }
From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java
License:Apache License
/** * Set the {@link Path} of the output directory for the map-reduce job. * * @param job/*from w w w. j a va 2s .c o m*/ * The job to modify * @param outputDir * the {@link Path} of the output directory for the map-reduce * job. */ public static void setOutputPath(Job job, Path outputDir) { try { outputDir = outputDir.getFileSystem(job.getConfiguration()).makeQualified(outputDir); } catch (IOException e) { // Throw the IOException as a RuntimeException to be compatible with // MR1 throw new RuntimeException(e); } job.getConfiguration().set(FileOutputFormat.OUTDIR, outputDir.toString()); }
From source file:com.bonc.mr_roamRecognition_hjpt.comm.PathRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE); start = split.getStart();/* w ww. j a va 2 s . c o m*/ end = start + split.getLength(); final Path file = split.getPath(); path = split.getPath().toString(); // open the file and seek to the start of the split final FileSystem fs = file.getFileSystem(job); fileIn = fs.open(file); CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file); if (null != codec) { isCompressedInput = true; decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes); start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; } else { in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job, this.recordDelimiterBytes); filePosition = fileIn; } } else { fileIn.seek(start); in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes); filePosition = fileIn; } // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. if (start != 0) { start += in.readLine(new Text(), 0, maxBytesToConsume(start)); } this.pos = start; }
From source file:com.btoddb.chronicle.apps.AvroTools.java
License:Open Source License
private void testFileAndFix(Path inFile) throws IOException { FileContext context = FileContext.getFileContext(hdfsConfig); AvroFSInput input = new AvroFSInput(context, inFile); ReflectDatumReader<Object> reader = new ReflectDatumReader<>(); FileReader<Object> fileReader = DataFileReader.openReader(input, reader); Path outFile = inFile.suffix(".fixing"); FSDataOutputStream output = FileSystem.create(outFile.getFileSystem(hdfsConfig), outFile, FsPermission.getDefault());//from w ww . j a va2 s . co m DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>()); writer.setCodec(CodecFactory.snappyCodec()); boolean corrupted = false; long count = 0; try { Schema schema = fileReader.getSchema(); writer.create(schema, output); for (;;) { try { if (fileReader.hasNext()) { Object obj = fileReader.next(); count++; writer.append(obj); } else { break; } } catch (AvroRuntimeException e) { corrupted = true; System.out.println(" - file pointer = " + input.tell()); if (e.getCause() instanceof EOFException) { System.out.println(" - EOF occurred so we're done : " + e.getMessage()); break; } else if (e.getCause() instanceof IOException) { System.out.println(" - will try to 'next' past the error : " + e.getMessage()); try { fileReader.next(); System.out.println(" - 'next' worked - didn't really expect it to, but great!"); } catch (Exception e2) { System.out.println(" - 'next' did not work - will continue on and see what happens : " + e2.getMessage()); } continue; } break; } catch (Exception e) { corrupted = true; System.out.println(" - file pointer = " + input.tell()); e.printStackTrace(); break; } } } catch (Exception e) { e.printStackTrace(); } finally { System.out.println((" - processed " + count + " records")); if (null != fileReader) { try { fileReader.close(); } catch (Exception e) { e.printStackTrace(); } } if (null != writer) { try { writer.close(); } catch (Exception e) { e.printStackTrace(); } } } if (!corrupted) { outFile.getFileSystem(hdfsConfig).delete(outFile, false); } else { outFile.getFileSystem(hdfsConfig).rename(outFile, inFile.suffix(".fixed")); } }
From source file:com.btoddb.chronicle.plunkers.hdfs.HdfsFileBaseImpl.java
License:Open Source License
@Override public void init(String permFilename, String openFilename) throws IOException { this.permFilename = permFilename; this.openFilename = openFilename; Configuration conf = new Configuration(); Path path = new Path(this.openFilename); fileSystem = path.getFileSystem(conf); outputStream = fileSystem.create(path); }
From source file:com.chinamobile.bcbsp.client.BSPJobClient.java
License:Apache License
/** * Get a fileSystem handle. We need this to prepare jobs for submission to the * BSP system./* w w w .jav a2 s.c om*/ * @return the fileSystem handle. */ public synchronized FileSystem getFs() throws IOException { if (this.fs == null) { Path systemDir = getSystemDir(); this.fs = systemDir.getFileSystem(getConf()); } return fs; }
From source file:com.chinamobile.bcbsp.client.BSPJobClient.java
License:Apache License
/** * Write splits file header./* ww w. j a v a 2s. c o m*/ * @param conf Configuration * @param filename path of file * @param length size of split * @return DataOutputStream * @throws IOException */ private DataOutputStream writeSplitsFileHeader(Configuration conf, Path filename, int length) throws IOException { // write the splits to a file for the job tracker FileSystem files = filename.getFileSystem(conf); BSPFSDataOutputStream bspout = new BSPFSDataOutputStreamImpl(files, filename, new BSPFspermissionImpl(0).getFp()); bspout.write(SPLIT_FILE_HEADER); WritableUtils.writeVInt(bspout.getOut(), CURRENT_SPLIT_FILE_VERSION); WritableUtils.writeVInt(bspout.getOut(), length); return bspout.getOut(); }
From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java
License:Apache License
/** * Generate the list of files and make them into FileSplits. * * @param job/* w w w .ja v a2 s .c om*/ * The current BSPJob job * @return input splits */ @Override public List<InputSplit> getSplits(BSPJob job) throws IOException { List<InputSplit> splits = new ArrayList<InputSplit>(); for (FileStatus file : listStatus(job)) { Path path = file.getPath(); FileSystem fs = path.getFileSystem(job.getConf()); long length = file.getLen(); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if ((length != 0) && isSplitable(job, path)) { long blockSize = file.getBlockSize(); long splitSize = 0L; if (job.getInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, 1) == 1) { if (job.getSplitSize() == 0L) { splitSize = blockSize; } else { splitSize = job.getSplitSize(); } } else { if (job.getSplitSize() == 0L) { splitSize = blockSize * job.getInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, 1); } else { splitSize = job.getSplitSize() * job.getInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, 1); } } LOG.info("[Split Size] " + (splitSize / (1024 * 1024)) + " MB"); long bytesRemaining = length; while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(new FileSplit(path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts())); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { splits.add(new FileSplit(path, length - bytesRemaining, bytesRemaining, blkLocations[blkLocations.length - 1].getHosts())); } } else if (length != 0) { splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts())); } else { // Create empty hosts array for zero length files splits.add(new FileSplit(path, 0, length, new String[0])); } } LOG.info("[Split Number] " + splits.size()); return splits; }
From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java
License:Apache License
/** * List input directories. Subclasses may override to, e.g., select only files * matching a regular expression./*from ww w . jav a 2 s .c om*/ * * @param job * the job to list input paths for * @return array of FileStatus objects * @throws IOException * if zero items. */ protected List<FileStatus> listStatus(BSPJob job) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(HIDDEN_FILE_FILTER); PathFilter inputFilter = new MultiPathFilter(filters); for (int i = 0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(job.getConf()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat : matches) { if (globStat.isDir()) { for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) { result.add(stat); } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total input paths to process : " + result.size()); return result; }
From source file:com.ci.backports.avro.mapreduce.AvroOutputFormat.java
License:Apache License
@Override public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException { Schema schema = AvroJob.getOutputSchema(context.getConfiguration()); if (schema == null) { throw new RuntimeException("AvroOutputFormat requires an output schema."); }//from ww w . j a v a 2 s .c o m final DataFileWriter<T> writer = new DataFileWriter<T>(new SpecificDatumWriter<T>()); if (FileOutputFormat.getCompressOutput(context)) { int level = context.getConfiguration().getInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY, org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL); writer.setCodec(CodecFactory.deflateCodec(level)); } Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT); writer.create(schema, path.getFileSystem(context.getConfiguration()).create(path)); return new RecordWriter<AvroWrapper<T>, NullWritable>() { public void write(AvroWrapper<T> record, NullWritable ignore) throws IOException { writer.append(record.datum()); } public void close(TaskAttemptContext context) throws IOException { writer.close(); } }; }