Example usage for org.apache.hadoop.fs Path getFileSystem

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getFileSystem.

Prototype

public FileSystem getFileSystem(Configuration conf) throws IOException

Source Link

Document

Return the FileSystem that owns this Path.

Usage

From source file:com.bonc.mr_roamRecognition_hjpt.comm.FileCountTextOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);
    String keyValueSeparator = conf.get(SEPERATOR, "\t");
    CompressionCodec codec = null;/*from  w  ww  . j a v a  2  s. c o  m*/
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new LineRecordWriter<K, V>(fileOut, keyValueSeparator);
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                keyValueSeparator);
    }
}

From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java

License:Apache License

/**
 * Set the {@link Path} of the output directory for the map-reduce job.
 *
 * @param job/*from w  w  w. j  a  va 2s .c o  m*/
 *            The job to modify
 * @param outputDir
 *            the {@link Path} of the output directory for the map-reduce
 *            job.
 */
public static void setOutputPath(Job job, Path outputDir) {
    try {
        outputDir = outputDir.getFileSystem(job.getConfiguration()).makeQualified(outputDir);
    } catch (IOException e) {
        // Throw the IOException as a RuntimeException to be compatible with
        // MR1
        throw new RuntimeException(e);
    }
    job.getConfiguration().set(FileOutputFormat.OUTDIR, outputDir.toString());
}

From source file:com.bonc.mr_roamRecognition_hjpt.comm.PathRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();/*  w  ww. j a  va  2 s  . c  o m*/
    end = start + split.getLength();
    final Path file = split.getPath();

    path = split.getPath().toString();

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);

    CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
    if (null != codec) {
        isCompressedInput = true;
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job,
                    this.recordDelimiterBytes);
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes);
        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:com.btoddb.chronicle.apps.AvroTools.java

License:Open Source License

private void testFileAndFix(Path inFile) throws IOException {
    FileContext context = FileContext.getFileContext(hdfsConfig);
    AvroFSInput input = new AvroFSInput(context, inFile);

    ReflectDatumReader<Object> reader = new ReflectDatumReader<>();
    FileReader<Object> fileReader = DataFileReader.openReader(input, reader);

    Path outFile = inFile.suffix(".fixing");
    FSDataOutputStream output = FileSystem.create(outFile.getFileSystem(hdfsConfig), outFile,
            FsPermission.getDefault());//from   w  ww  . j a va2 s  .  co m
    DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>());
    writer.setCodec(CodecFactory.snappyCodec());

    boolean corrupted = false;
    long count = 0;

    try {
        Schema schema = fileReader.getSchema();
        writer.create(schema, output);

        for (;;) {
            try {
                if (fileReader.hasNext()) {
                    Object obj = fileReader.next();
                    count++;
                    writer.append(obj);
                } else {
                    break;
                }
            } catch (AvroRuntimeException e) {
                corrupted = true;
                System.out.println("  - file pointer = " + input.tell());
                if (e.getCause() instanceof EOFException) {
                    System.out.println("  - EOF occurred so we're done : " + e.getMessage());
                    break;
                } else if (e.getCause() instanceof IOException) {
                    System.out.println("  - will try to 'next' past the error : " + e.getMessage());
                    try {
                        fileReader.next();
                        System.out.println("  - 'next' worked - didn't really expect it to, but great!");
                    } catch (Exception e2) {
                        System.out.println("  - 'next' did not work - will continue on and see what happens : "
                                + e2.getMessage());
                    }
                    continue;
                }
                break;
            } catch (Exception e) {
                corrupted = true;
                System.out.println("  - file pointer = " + input.tell());
                e.printStackTrace();
                break;
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        System.out.println(("  - processed " + count + " records"));
        if (null != fileReader) {
            try {
                fileReader.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        if (null != writer) {
            try {
                writer.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    if (!corrupted) {
        outFile.getFileSystem(hdfsConfig).delete(outFile, false);
    } else {
        outFile.getFileSystem(hdfsConfig).rename(outFile, inFile.suffix(".fixed"));
    }
}

From source file:com.btoddb.chronicle.plunkers.hdfs.HdfsFileBaseImpl.java

License:Open Source License

@Override
public void init(String permFilename, String openFilename) throws IOException {
    this.permFilename = permFilename;
    this.openFilename = openFilename;

    Configuration conf = new Configuration();
    Path path = new Path(this.openFilename);
    fileSystem = path.getFileSystem(conf);
    outputStream = fileSystem.create(path);
}

From source file:com.chinamobile.bcbsp.client.BSPJobClient.java

License:Apache License

/**
 * Get a fileSystem handle. We need this to prepare jobs for submission to the
 * BSP system./*  w  w  w .jav a2  s.c om*/
 * @return the fileSystem handle.
 */
public synchronized FileSystem getFs() throws IOException {
    if (this.fs == null) {
        Path systemDir = getSystemDir();
        this.fs = systemDir.getFileSystem(getConf());
    }
    return fs;
}

From source file:com.chinamobile.bcbsp.client.BSPJobClient.java

License:Apache License

/**
 * Write splits file header./*  ww w. j a  v  a 2s. c  o m*/
 * @param conf Configuration
 * @param filename path of file
 * @param length size of split
 * @return DataOutputStream
 * @throws IOException
 */
private DataOutputStream writeSplitsFileHeader(Configuration conf, Path filename, int length)
        throws IOException {
    // write the splits to a file for the job tracker
    FileSystem files = filename.getFileSystem(conf);
    BSPFSDataOutputStream bspout = new BSPFSDataOutputStreamImpl(files, filename,
            new BSPFspermissionImpl(0).getFp());
    bspout.write(SPLIT_FILE_HEADER);
    WritableUtils.writeVInt(bspout.getOut(), CURRENT_SPLIT_FILE_VERSION);
    WritableUtils.writeVInt(bspout.getOut(), length);
    return bspout.getOut();
}

From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java

License:Apache License

/**
 * Generate the list of files and make them into FileSplits.
 *
 * @param job/*  w  w  w .ja  v  a2  s .c om*/
 *        The current BSPJob job
 * @return input splits
 */
@Override
public List<InputSplit> getSplits(BSPJob job) throws IOException {
    List<InputSplit> splits = new ArrayList<InputSplit>();
    for (FileStatus file : listStatus(job)) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConf());
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if ((length != 0) && isSplitable(job, path)) {
            long blockSize = file.getBlockSize();
            long splitSize = 0L;
            if (job.getInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, 1) == 1) {
                if (job.getSplitSize() == 0L) {
                    splitSize = blockSize;
                } else {
                    splitSize = job.getSplitSize();
                }
            } else {
                if (job.getSplitSize() == 0L) {
                    splitSize = blockSize * job.getInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, 1);
                } else {
                    splitSize = job.getSplitSize() * job.getInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, 1);
                }
            }
            LOG.info("[Split Size] " + (splitSize / (1024 * 1024)) + " MB");
            long bytesRemaining = length;
            while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                splits.add(new FileSplit(path, length - bytesRemaining, splitSize,
                        blkLocations[blkIndex].getHosts()));
                bytesRemaining -= splitSize;
            }
            if (bytesRemaining != 0) {
                splits.add(new FileSplit(path, length - bytesRemaining, bytesRemaining,
                        blkLocations[blkLocations.length - 1].getHosts()));
            }
        } else if (length != 0) {
            splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
        } else {
            // Create empty hosts array for zero length files
            splits.add(new FileSplit(path, 0, length, new String[0]));
        }
    }
    LOG.info("[Split Number] " + splits.size());
    return splits;
}

From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java

License:Apache License

/**
 * List input directories. Subclasses may override to, e.g., select only files
 * matching a regular expression./*from   ww w  .  jav a 2 s .c  om*/
 *
 * @param job
 *        the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException
 *         if zero items.
 */
protected List<FileStatus> listStatus(BSPJob job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }
    List<IOException> errors = new ArrayList<IOException>();
    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(HIDDEN_FILE_FILTER);
    PathFilter inputFilter = new MultiPathFilter(filters);
    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        FileSystem fs = p.getFileSystem(job.getConf());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDir()) {
                    for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) {
                        result.add(stat);
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }
    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result;
}

From source file:com.ci.backports.avro.mapreduce.AvroOutputFormat.java

License:Apache License

@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException {
    Schema schema = AvroJob.getOutputSchema(context.getConfiguration());
    if (schema == null) {
        throw new RuntimeException("AvroOutputFormat requires an output schema.");
    }//from ww w .  j a  v  a  2 s .c  o  m

    final DataFileWriter<T> writer = new DataFileWriter<T>(new SpecificDatumWriter<T>());

    if (FileOutputFormat.getCompressOutput(context)) {
        int level = context.getConfiguration().getInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY,
                org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
        writer.setCodec(CodecFactory.deflateCodec(level));
    }

    Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT);
    writer.create(schema, path.getFileSystem(context.getConfiguration()).create(path));

    return new RecordWriter<AvroWrapper<T>, NullWritable>() {
        public void write(AvroWrapper<T> record, NullWritable ignore) throws IOException {
            writer.append(record.datum());
        }

        public void close(TaskAttemptContext context) throws IOException {
            writer.close();
        }
    };
}