Example usage for org.apache.hadoop.fs Path getFileSystem

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getFileSystem.

Prototype

public FileSystem getFileSystem(Configuration conf) throws IOException

Source Link

Document

Return the FileSystem that owns this Path.

Usage

From source file:com.hadoop.mapreduce.FourMcInputFormat.java

License:BSD License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    Configuration conf = HadoopUtils.getConfiguration(job);

    List<InputSplit> defaultSplits = super.getSplits(job);
    List<InputSplit> result = new ArrayList<InputSplit>();

    Path prevFile = null;//from   www .  j  ava 2 s  .c o m
    FourMcBlockIndex prevIndex = null;

    for (InputSplit genericSplit : defaultSplits) {
        // Load the index.
        FileSplit fileSplit = (FileSplit) genericSplit;
        Path file = fileSplit.getPath();
        FileSystem fs = file.getFileSystem(conf);

        FourMcBlockIndex index;
        if (file.equals(prevFile)) {
            index = prevIndex;
        } else {
            index = FourMcBlockIndex.readIndex(fs, file);
            prevFile = file;
            prevIndex = index;
        }

        if (index == null) {
            throw new IOException("BlockIndex unreadable for " + file);
        }

        if (index.isEmpty()) { // leave the default split for empty block index
            result.add(fileSplit);
            continue;
        }

        long start = fileSplit.getStart();
        long end = start + fileSplit.getLength();

        long fourMcStart = index.alignSliceStartToIndex(start, end);
        long fourMcEnd = index.alignSliceEndToIndex(end, fs.getFileStatus(file).getLen());

        if (fourMcStart != FourMcBlockIndex.NOT_FOUND && fourMcEnd != FourMcBlockIndex.NOT_FOUND) {
            result.add(new FileSplit(file, fourMcStart, fourMcEnd - fourMcStart, fileSplit.getLocations()));
            LOG.debug("Added 4mc split for " + file + "[start=" + fourMcStart + ", length="
                    + (fourMcEnd - fourMcStart) + "]");
        }

    }

    return result;
}

From source file:com.hadoop.mapreduce.FourMcLineRecordReader.java

License:BSD License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;
    start = split.getStart();//from   w  w w .  ja  v  a 2  s .  c o m
    end = start + split.getLength();
    final Path file = split.getPath();
    Configuration job = HadoopUtils.getConfiguration(context);
    maxLineLen = job.getInt(MAX_LINE_LEN_CONF, Integer.MAX_VALUE);

    FileSystem fs = file.getFileSystem(job);
    CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    if (codec == null) {
        throw new IOException("Codec for file " + file + " not found, cannot run");
    }

    // open the file and seek to the start of the split
    fileIn = fs.open(split.getPath());

    // creates input stream and also reads the file header
    in = new LineReader(codec.createInputStream(fileIn), job);

    if (start != 0) {
        fileIn.seek(start);

        // read and ignore the first line
        in.readLine(new Text());
        start = fileIn.getPos();
    }

    this.pos = start;
}

From source file:com.hadoop.mapreduce.FourMzInputFormat.java

License:BSD License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    Configuration conf = HadoopUtils.getConfiguration(job);

    List<InputSplit> defaultSplits = super.getSplits(job);
    List<InputSplit> result = new ArrayList<InputSplit>();

    Path prevFile = null;/*  w w  w  .  ja  va2  s . com*/
    FourMzBlockIndex prevIndex = null;

    for (InputSplit genericSplit : defaultSplits) {
        // Load the index.
        FileSplit fileSplit = (FileSplit) genericSplit;
        Path file = fileSplit.getPath();
        FileSystem fs = file.getFileSystem(conf);

        FourMzBlockIndex index;
        if (file.equals(prevFile)) {
            index = prevIndex;
        } else {
            index = FourMzBlockIndex.readIndex(fs, file);
            prevFile = file;
            prevIndex = index;
        }

        if (index == null) {
            throw new IOException("BlockIndex unreadable for " + file);
        }

        if (index.isEmpty()) { // leave the default split for empty block index
            result.add(fileSplit);
            continue;
        }

        long start = fileSplit.getStart();
        long end = start + fileSplit.getLength();

        long fourMcStart = index.alignSliceStartToIndex(start, end);
        long fourMcEnd = index.alignSliceEndToIndex(end, fs.getFileStatus(file).getLen());

        if (fourMcStart != FourMzBlockIndex.NOT_FOUND && fourMcEnd != FourMzBlockIndex.NOT_FOUND) {
            result.add(new FileSplit(file, fourMcStart, fourMcEnd - fourMcStart, fileSplit.getLocations()));
            LOG.debug("Added 4mz split for " + file + "[start=" + fourMcStart + ", length="
                    + (fourMcEnd - fourMcStart) + "]");
        }

    }

    return result;
}

From source file:com.hadoop.mapreduce.LzoLineRecordReader.java

License:Open Source License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;
    start = split.getStart();//  w  ww  . j a  va2 s  .  com
    end = start + split.getLength();
    final Path file = split.getPath();
    Configuration job = context.getConfiguration();

    FileSystem fs = file.getFileSystem(job);
    CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    if (codec == null) {
        throw new IOException("No codec for file " + file + " not found, cannot run");
    }

    // open the file and seek to the start of the split
    fileIn = fs.open(split.getPath());

    // creates input stream and also reads the file header
    in = new LineReader(codec.createInputStream(fileIn), job);

    if (start != 0) {
        fileIn.seek(start);

        // read and ignore the first line
        in.readLine(new Text());
        start = fileIn.getPos();
    }

    this.pos = start;
}

From source file:com.hortonworks.pso.data.generator.mapreduce.DataGenTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Job job = Job.getInstance(getConf()); // new Job(conf, this.getClass().getCanonicalName());

    //        Configuration conf = getConf();

    int mappers = 2;
    String output = null;//from w ww  . j a  va 2 s  . co m
    String config = null;
    long count = 100;

    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-mappers".equals(args[i])) {
                mappers = Integer.parseInt(args[++i]);
                otherArgs.add("-Dmapreduce.job.maps=" + Integer.toString(mappers));
            } else if ("-output".equals(args[i])) {
                output = args[++i];
            } else if ("-json.cfg".equals(args[i])) {
                config = args[++i];
            } else if ("-count".equals(args[i])) {
                count = Long.parseLong(args[++i]);
            } else {
                otherArgs.add(args[i]);
            }

        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }

    job.getConfiguration().set("json.cfg", config);

    String[] altArgs = new String[otherArgs.size()];
    otherArgs.toArray(altArgs);

    GenericOptionsParser gop = new GenericOptionsParser(job.getConfiguration(), altArgs);

    DataGenInputFormat.setNumberOfRows(job, count);

    job.setJarByClass(DataGenTool.class);

    Path output_path = new Path(output);

    if (output_path.getFileSystem(getConf()).exists(output_path)) {
        throw new IOException("Output directory " + output_path + " already exists.");
    }

    FileOutputFormat.setOutputPath(job, output_path);

    job.setMapperClass(DataGenMapper.class);
    // Map Only Job
    job.setNumReduceTasks(0);
    //        job.setReducerClass(RerateReducer.class);

    job.setInputFormatClass(DataGenInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);
    //        job.setOutputKeyClass(Text.class);
    //        job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;

}

From source file:com.hotels.corc.test.OrcReader.java

License:Apache License

public OrcReader(Configuration conf, Path path) throws IOException {
    Reader reader = OrcFile.createReader(path.getFileSystem(conf), path);
    inspector = reader.getObjectInspector();
    rows = reader.rows();/*from  www .j a  v a  2 s.  c  o  m*/
}

From source file:com.hp.hpit.cs.MyTextOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);
    String keyValueSeparator = conf.get("mapred.textoutputformat.separator", "\t");
    CompressionCodec codec = null;//  ww  w.ja v a2 s  .c  om
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new LineRecordWriter<K, V>(fileOut, keyValueSeparator);
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                keyValueSeparator);
    }
}

From source file:com.huayu.metis.flume.sink.hdfs.HDFSSequenceFile.java

License:Apache License

@Override
public void open(String filePath) throws IOException {
    Configuration conf = new Configuration();
    Path dstPath = new Path(filePath);

    FileSystem fileSystem = dstPath.getFileSystem(conf);
    //2.2Hadoop, dfs.append.support ???
    if (fileSystem.exists(dstPath) && fileSystem.isFile(dstPath)) {
        outStream = fileSystem.append(dstPath);
    } else {/*  w  w  w .j a v  a 2s . c  o  m*/
        outStream = fileSystem.create(dstPath);
    }

    writer = SequenceFile.createWriter(conf, SequenceFile.Writer.stream(outStream),
            SequenceFile.Writer.keyClass(serializer.getKeyClass()),
            SequenceFile.Writer.valueClass(serializer.getValueClass()));
    registerCurrentStream(outStream, fileSystem, dstPath);
}

From source file:com.ibm.bi.dml.runtime.matrix.CSVReblockMR.java

License:Open Source License

/**
 * Method to find the first (part)file in the order given by <code>fs.listStatus()</code> among all (part)files in <code>inpathPath</code>.
 * // w  w  w . j a  v  a 2 s .c o  m
 * @param job
 * @param inputPath
 * @return
 * @throws IOException 
 * @throws FileNotFoundException 
 */
public static String findSmallestFile(JobConf job, String inputPath) throws FileNotFoundException, IOException {

    String smallestFile = null;

    Path p = new Path(inputPath);
    FileSystem fs = p.getFileSystem(job);
    if (!fs.isDirectory(p))
        smallestFile = p.makeQualified(fs).toString();
    else {
        FileStatus[] stats = fs.listStatus(p, hiddenFileFilter);
        if (stats.length == 0)
            smallestFile = "";
        else {
            smallestFile = stats[0].getPath().toString();
            for (int j = 1; j < stats.length; j++) {
                String f = stats[j].getPath().toString();
                if (f.compareTo(smallestFile) < 0)
                    smallestFile = f;
            }
        }
    }
    return smallestFile;
}

From source file:com.ibm.bi.dml.runtime.matrix.CSVReblockMR.java

License:Open Source License

public static JobReturn runJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos, long[] rlens,
        long[] clens, int[] brlens, int[] bclens, String reblockInstructions, String otherInstructionsInReducer,
        int numReducers, int replication, byte[] resultIndexes, String[] outputs, OutputInfo[] outputInfos)
        throws Exception {
    String[] smallestFiles = new String[inputs.length];
    JobConf job = new JobConf();
    for (int i = 0; i < inputs.length; i++) {
        smallestFiles[i] = findSmallestFile(job, inputs[i]);
    }//w  w  w.j  av  a2s .com

    for (int i = 0; i < inputs.length; i++) {
        Path p = new Path(inputs[i]);
        FileSystem fs = p.getFileSystem(job);
        if (!fs.isDirectory(p))
            smallestFiles[i] = p.makeQualified(fs).toString();
        else {
            FileStatus[] stats = fs.listStatus(p, hiddenFileFilter);
            if (stats.length == 0)
                smallestFiles[i] = "";
            else {
                smallestFiles[i] = stats[0].getPath().toString();
                for (int j = 1; j < stats.length; j++) {
                    String f = stats[j].getPath().toString();
                    if (f.compareTo(smallestFiles[i]) < 0)
                        smallestFiles[i] = f;
                }
            }
        }
    }

    AssignRowIDMRReturn ret1 = CSVReblockMR.runAssignRowIDMRJob(inputs, inputInfos, brlens, bclens,
            reblockInstructions, replication, smallestFiles);
    for (int i = 0; i < rlens.length; i++)
        if ((rlens[i] > 0 && rlens[i] != ret1.rlens[i]) || (clens[i] > 0 && clens[i] != ret1.clens[i]))
            throw new RuntimeException("Dimension doesn't mach for input matrix " + i + ", expected ("
                    + rlens[i] + ", " + clens[i] + ") but real (" + ret1.rlens[i] + ", " + ret1.clens[i] + ")");
    JobReturn ret = CSVReblockMR.runCSVReblockJob(null, inputs, inputInfos, ret1.rlens, ret1.clens, brlens,
            bclens, reblockInstructions, otherInstructionsInReducer, numReducers, replication, resultIndexes,
            outputs, outputInfos, ret1.counterFile, smallestFiles);
    return ret;
}