List of usage examples for org.apache.hadoop.fs Path getFileSystem
public FileSystem getFileSystem(Configuration conf) throws IOException
From source file:com.hadoop.mapreduce.FourMcInputFormat.java
License:BSD License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { Configuration conf = HadoopUtils.getConfiguration(job); List<InputSplit> defaultSplits = super.getSplits(job); List<InputSplit> result = new ArrayList<InputSplit>(); Path prevFile = null;//from www . j ava 2 s .c o m FourMcBlockIndex prevIndex = null; for (InputSplit genericSplit : defaultSplits) { // Load the index. FileSplit fileSplit = (FileSplit) genericSplit; Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); FourMcBlockIndex index; if (file.equals(prevFile)) { index = prevIndex; } else { index = FourMcBlockIndex.readIndex(fs, file); prevFile = file; prevIndex = index; } if (index == null) { throw new IOException("BlockIndex unreadable for " + file); } if (index.isEmpty()) { // leave the default split for empty block index result.add(fileSplit); continue; } long start = fileSplit.getStart(); long end = start + fileSplit.getLength(); long fourMcStart = index.alignSliceStartToIndex(start, end); long fourMcEnd = index.alignSliceEndToIndex(end, fs.getFileStatus(file).getLen()); if (fourMcStart != FourMcBlockIndex.NOT_FOUND && fourMcEnd != FourMcBlockIndex.NOT_FOUND) { result.add(new FileSplit(file, fourMcStart, fourMcEnd - fourMcStart, fileSplit.getLocations())); LOG.debug("Added 4mc split for " + file + "[start=" + fourMcStart + ", length=" + (fourMcEnd - fourMcStart) + "]"); } } return result; }
From source file:com.hadoop.mapreduce.FourMcLineRecordReader.java
License:BSD License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) genericSplit; start = split.getStart();//from w w w . ja v a 2 s . c o m end = start + split.getLength(); final Path file = split.getPath(); Configuration job = HadoopUtils.getConfiguration(context); maxLineLen = job.getInt(MAX_LINE_LEN_CONF, Integer.MAX_VALUE); FileSystem fs = file.getFileSystem(job); CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); if (codec == null) { throw new IOException("Codec for file " + file + " not found, cannot run"); } // open the file and seek to the start of the split fileIn = fs.open(split.getPath()); // creates input stream and also reads the file header in = new LineReader(codec.createInputStream(fileIn), job); if (start != 0) { fileIn.seek(start); // read and ignore the first line in.readLine(new Text()); start = fileIn.getPos(); } this.pos = start; }
From source file:com.hadoop.mapreduce.FourMzInputFormat.java
License:BSD License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { Configuration conf = HadoopUtils.getConfiguration(job); List<InputSplit> defaultSplits = super.getSplits(job); List<InputSplit> result = new ArrayList<InputSplit>(); Path prevFile = null;/* w w w . ja va2 s . com*/ FourMzBlockIndex prevIndex = null; for (InputSplit genericSplit : defaultSplits) { // Load the index. FileSplit fileSplit = (FileSplit) genericSplit; Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); FourMzBlockIndex index; if (file.equals(prevFile)) { index = prevIndex; } else { index = FourMzBlockIndex.readIndex(fs, file); prevFile = file; prevIndex = index; } if (index == null) { throw new IOException("BlockIndex unreadable for " + file); } if (index.isEmpty()) { // leave the default split for empty block index result.add(fileSplit); continue; } long start = fileSplit.getStart(); long end = start + fileSplit.getLength(); long fourMcStart = index.alignSliceStartToIndex(start, end); long fourMcEnd = index.alignSliceEndToIndex(end, fs.getFileStatus(file).getLen()); if (fourMcStart != FourMzBlockIndex.NOT_FOUND && fourMcEnd != FourMzBlockIndex.NOT_FOUND) { result.add(new FileSplit(file, fourMcStart, fourMcEnd - fourMcStart, fileSplit.getLocations())); LOG.debug("Added 4mz split for " + file + "[start=" + fourMcStart + ", length=" + (fourMcEnd - fourMcStart) + "]"); } } return result; }
From source file:com.hadoop.mapreduce.LzoLineRecordReader.java
License:Open Source License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) genericSplit; start = split.getStart();// w ww . j a va2 s . com end = start + split.getLength(); final Path file = split.getPath(); Configuration job = context.getConfiguration(); FileSystem fs = file.getFileSystem(job); CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); if (codec == null) { throw new IOException("No codec for file " + file + " not found, cannot run"); } // open the file and seek to the start of the split fileIn = fs.open(split.getPath()); // creates input stream and also reads the file header in = new LineReader(codec.createInputStream(fileIn), job); if (start != 0) { fileIn.seek(start); // read and ignore the first line in.readLine(new Text()); start = fileIn.getPos(); } this.pos = start; }
From source file:com.hortonworks.pso.data.generator.mapreduce.DataGenTool.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); // new Job(conf, this.getClass().getCanonicalName()); // Configuration conf = getConf(); int mappers = 2; String output = null;//from w ww . j a va 2 s . co m String config = null; long count = 100; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-mappers".equals(args[i])) { mappers = Integer.parseInt(args[++i]); otherArgs.add("-Dmapreduce.job.maps=" + Integer.toString(mappers)); } else if ("-output".equals(args[i])) { output = args[++i]; } else if ("-json.cfg".equals(args[i])) { config = args[++i]; } else if ("-count".equals(args[i])) { count = Long.parseLong(args[++i]); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } job.getConfiguration().set("json.cfg", config); String[] altArgs = new String[otherArgs.size()]; otherArgs.toArray(altArgs); GenericOptionsParser gop = new GenericOptionsParser(job.getConfiguration(), altArgs); DataGenInputFormat.setNumberOfRows(job, count); job.setJarByClass(DataGenTool.class); Path output_path = new Path(output); if (output_path.getFileSystem(getConf()).exists(output_path)) { throw new IOException("Output directory " + output_path + " already exists."); } FileOutputFormat.setOutputPath(job, output_path); job.setMapperClass(DataGenMapper.class); // Map Only Job job.setNumReduceTasks(0); // job.setReducerClass(RerateReducer.class); job.setInputFormatClass(DataGenInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); // job.setOutputKeyClass(Text.class); // job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.hotels.corc.test.OrcReader.java
License:Apache License
public OrcReader(Configuration conf, Path path) throws IOException { Reader reader = OrcFile.createReader(path.getFileSystem(conf), path); inspector = reader.getObjectInspector(); rows = reader.rows();/*from www .j a v a 2 s. c o m*/ }
From source file:com.hp.hpit.cs.MyTextOutputFormat.java
License:Apache License
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); String keyValueSeparator = conf.get("mapred.textoutputformat.separator", "\t"); CompressionCodec codec = null;// ww w.ja v a2 s .c om String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } Path file = getDefaultWorkFile(job, extension); FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); return new LineRecordWriter<K, V>(fileOut, keyValueSeparator); } else { FSDataOutputStream fileOut = fs.create(file, false); return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator); } }
From source file:com.huayu.metis.flume.sink.hdfs.HDFSSequenceFile.java
License:Apache License
@Override public void open(String filePath) throws IOException { Configuration conf = new Configuration(); Path dstPath = new Path(filePath); FileSystem fileSystem = dstPath.getFileSystem(conf); //2.2Hadoop, dfs.append.support ??? if (fileSystem.exists(dstPath) && fileSystem.isFile(dstPath)) { outStream = fileSystem.append(dstPath); } else {/* w w w .j a v a 2s . c o m*/ outStream = fileSystem.create(dstPath); } writer = SequenceFile.createWriter(conf, SequenceFile.Writer.stream(outStream), SequenceFile.Writer.keyClass(serializer.getKeyClass()), SequenceFile.Writer.valueClass(serializer.getValueClass())); registerCurrentStream(outStream, fileSystem, dstPath); }
From source file:com.ibm.bi.dml.runtime.matrix.CSVReblockMR.java
License:Open Source License
/** * Method to find the first (part)file in the order given by <code>fs.listStatus()</code> among all (part)files in <code>inpathPath</code>. * // w w w . j a v a 2 s .c o m * @param job * @param inputPath * @return * @throws IOException * @throws FileNotFoundException */ public static String findSmallestFile(JobConf job, String inputPath) throws FileNotFoundException, IOException { String smallestFile = null; Path p = new Path(inputPath); FileSystem fs = p.getFileSystem(job); if (!fs.isDirectory(p)) smallestFile = p.makeQualified(fs).toString(); else { FileStatus[] stats = fs.listStatus(p, hiddenFileFilter); if (stats.length == 0) smallestFile = ""; else { smallestFile = stats[0].getPath().toString(); for (int j = 1; j < stats.length; j++) { String f = stats[j].getPath().toString(); if (f.compareTo(smallestFile) < 0) smallestFile = f; } } } return smallestFile; }
From source file:com.ibm.bi.dml.runtime.matrix.CSVReblockMR.java
License:Open Source License
public static JobReturn runJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos, long[] rlens, long[] clens, int[] brlens, int[] bclens, String reblockInstructions, String otherInstructionsInReducer, int numReducers, int replication, byte[] resultIndexes, String[] outputs, OutputInfo[] outputInfos) throws Exception { String[] smallestFiles = new String[inputs.length]; JobConf job = new JobConf(); for (int i = 0; i < inputs.length; i++) { smallestFiles[i] = findSmallestFile(job, inputs[i]); }//w w w.j av a2s .com for (int i = 0; i < inputs.length; i++) { Path p = new Path(inputs[i]); FileSystem fs = p.getFileSystem(job); if (!fs.isDirectory(p)) smallestFiles[i] = p.makeQualified(fs).toString(); else { FileStatus[] stats = fs.listStatus(p, hiddenFileFilter); if (stats.length == 0) smallestFiles[i] = ""; else { smallestFiles[i] = stats[0].getPath().toString(); for (int j = 1; j < stats.length; j++) { String f = stats[j].getPath().toString(); if (f.compareTo(smallestFiles[i]) < 0) smallestFiles[i] = f; } } } } AssignRowIDMRReturn ret1 = CSVReblockMR.runAssignRowIDMRJob(inputs, inputInfos, brlens, bclens, reblockInstructions, replication, smallestFiles); for (int i = 0; i < rlens.length; i++) if ((rlens[i] > 0 && rlens[i] != ret1.rlens[i]) || (clens[i] > 0 && clens[i] != ret1.clens[i])) throw new RuntimeException("Dimension doesn't mach for input matrix " + i + ", expected (" + rlens[i] + ", " + clens[i] + ") but real (" + ret1.rlens[i] + ", " + ret1.clens[i] + ")"); JobReturn ret = CSVReblockMR.runCSVReblockJob(null, inputs, inputInfos, ret1.rlens, ret1.clens, brlens, bclens, reblockInstructions, otherInstructionsInReducer, numReducers, replication, resultIndexes, outputs, outputInfos, ret1.counterFile, smallestFiles); return ret; }