Example usage for org.apache.hadoop.fs Path getFileSystem

List of usage examples for org.apache.hadoop.fs Path getFileSystem

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getFileSystem.

Prototype

public FileSystem getFileSystem(Configuration conf) throws IOException 

Source Link

Document

Return the FileSystem that owns this Path.

Usage

From source file:co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException {
    boolean isCompressed = getCompressOutput(context);
    Configuration conf = context.getConfiguration();
    String ext = "";
    CompressionCodec codec = null;//from   w  w w .j  a va 2  s. c  om

    if (isCompressed) {
        // create the named codec
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, conf);

        ext = codec.getDefaultExtension();
    }

    Path file = getDefaultWorkFile(context, ext);
    FileSystem fs = file.getFileSystem(conf);
    FSDataOutputStream fileOut = fs.create(file, false);
    DataOutputStream ostream = fileOut;

    if (isCompressed) {
        ostream = new DataOutputStream(codec.createOutputStream(fileOut));
    }

    return new NoKeyRecordWriter<K, V>(ostream);
}

From source file:coldstorage.io.Reader.java

License:Apache License

public static void main(String[] args) throws IOException {

    List<Long> idsToFind = new ArrayList<Long>();
    int maxId = 100000000;
    Random random = new Random(1);
    for (int i = 0; i < 1000; i++) {
        long id = (long) random.nextInt(maxId);
        //      System.out.println(id);
        idsToFind.add(id);//from  w  ww.j  a v a 2s  .  c o  m
    }

    // idsToFind.clear();
    // idsToFind.add(58998000L);

    //    Path pathData = new Path("./out/data.avro");
    //    Path pathIndex = new Path("./out/data.index");

    Path pathData = new Path("hdfs://localhost:9000/avro/out/data.avro");
    Path pathIndex = new Path("hdfs://localhost:9000/avro/out/data.index");

    Configuration configuration = new Configuration();
    FileSystem fileSystem = pathData.getFileSystem(configuration);
    FileStatus indexFileStatus = fileSystem.getFileStatus(pathIndex);
    FileStatus dataFileStatus = fileSystem.getFileStatus(pathData);
    FSDataInputStream indexInputStream = fileSystem.open(pathIndex);
    FSDataInputStream dataInputStream = fileSystem.open(pathData);

    AvroFSInput fsInput = new AvroFSInput(dataInputStream, dataFileStatus.getLen());
    GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> reader = new DataFileReader<GenericRecord>(fsInput, gdr);

    List<IndexKey> list = getList(indexInputStream, indexFileStatus.getLen());

    for (Long idToFind : idsToFind) {
        long t1 = System.nanoTime();
        GenericRecord lookupRecord = lookupRecord(reader, list, idToFind);
        long t2 = System.nanoTime();
        System.out.println("Found [" + idToFind + "] in [" + (t2 - t1) / 1000000.0 + " ms]:" + lookupRecord);
    }
}

From source file:coldstorage.io.Writer.java

License:Apache License

public static void main(String[] args) throws IOException {
    Schema.Parser parser = new Schema.Parser();
    Schema schema = parser.parse("{" + "\"namespace\": \"example.avro\", " + "\"type\": \"record\", "
            + "\"name\": \"User\", " + "\"fields\": [" + "     {\"name\": \"id\", \"type\": \"long\"},"
            + "     {\"name\": \"data\", \"type\": \"string\"}" + " ]}");

    GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
    // Path pathData = new Path("./out/data.avro");
    // Path pathIndex = new Path("./out/data.index");

    Path pathData = new Path("hdfs://localhost:9000/avro/out/data.avro");
    Path pathIndex = new Path("hdfs://localhost:9000/avro/out/data.index");

    Configuration configuration = new Configuration();
    FileSystem fileSystem = pathData.getFileSystem(configuration);

    FSDataOutputStream indexOutputStream = fileSystem.create(pathIndex);
    FSDataOutputStream outputStream = fileSystem.create(pathData);
    dfw.create(schema, outputStream);//from   w w  w  .  java2  s.c  o m
    GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(schema);
    Random random = new Random(1);
    final int syncPoint = 1000;
    int count = 0;

    for (int i = 0; i < 100000000; i++) {
        genericRecordBuilder.set("id", (long) i);
        genericRecordBuilder.set("data", Long.toString(random.nextLong()));
        Record record = genericRecordBuilder.build();
        dfw.append(record);
        if (count >= syncPoint) {
            long sync = dfw.sync();
            Object object = record.get("id");
            writeIndex(indexOutputStream, sync, object);
            count = 0;
        }
        count++;
    }
    indexOutputStream.close();
    dfw.close();
}

From source file:colossal.pipe.ColFile.java

License:Apache License

public boolean exists(Configuration conf) {
    Path dfsPath = new Path(path);
    try {//from ww  w.  j a  va 2  s. c o  m
        FileSystem fs = dfsPath.getFileSystem(conf);
        return fs.exists(dfsPath);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:colossal.pipe.ColFile.java

License:Apache License

public boolean isObsolete(Configuration conf) {
    Path dfsPath = new Path(path);
    try {//from  w w w  . j a  v a2s.  c  om
        FileSystem fs = dfsPath.getFileSystem(conf);
        // this needs to be smart - we should encode in the file metadata the dependents and their dates used
        // so we can verify that any existing antecedent is not newer and declare victory...
        if (fs.exists(dfsPath)) {
            FileStatus[] statuses = fs.listStatus(dfsPath);
            for (FileStatus status : statuses) {
                if (!status.isDir()) {
                    if (format != Formats.AVRO_FORMAT || status.getPath().toString().endsWith(".avro")) {
                        return false; // may check for extension for other types
                    }
                } else {
                    if (!status.getPath().toString().endsWith("/_logs")
                            && !status.getPath().toString().endsWith("/_temporary")) {
                        return false;
                    }
                }
            }
        }
        return true; // needs more work!
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:colossal.pipe.ColFile.java

License:Apache License

public void clearAndPrepareOutput(Configuration conf) {
    try {/*from  ww w  .  j  av a  2 s  . c om*/
        Path dfsPath = new Path(path);
        FileSystem fs = dfsPath.getFileSystem(conf);
        if (fs.exists(dfsPath)) {
            FileStatus[] statuses = fs.listStatus(dfsPath);
            for (FileStatus status : statuses) {
                if (status.isDir()) {
                    if (!status.getPath().toString().endsWith("/_logs")
                            && !status.getPath().toString().endsWith("/_temporary")) {
                        throw new IllegalArgumentException(
                                "Trying to overwrite directory with child directories: " + path);
                    }
                }
            }
        } else {
            fs.mkdirs(dfsPath);
        }
        fs.delete(dfsPath, true);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:colossal.pipe.ColFile.java

License:Apache License

public long getTimestamp(JobConf conf) {
    try {/*www  . j av  a  2  s .  c o m*/
        Path dfsPath = new Path(path);
        FileSystem fs = dfsPath.getFileSystem(conf);
        return fs.getFileStatus(dfsPath).getModificationTime();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java

License:Apache License

/**
 * Write out a SequenceFile that can be read by TotalOrderPartitioner
 * that contains the split points in startKeys.
 * @param partitionsPath output path for SequenceFile
 * @param startKeys the region start keys
 *///from w  w  w .j  a  v a  2s .c o m
private static void writePartitions(Configuration conf, Path partitionsPath,
        List<ImmutableBytesWritable> startKeys) throws IOException {
    if (startKeys.isEmpty()) {
        throw new IllegalArgumentException("No regions passed");
    }

    // We're generating a list of split points, and we don't ever
    // have keys < the first region (which has an empty start key)
    // so we need to remove it. Otherwise we would end up with an
    // empty reducer with index 0
    TreeSet<ImmutableBytesWritable> sorted = new TreeSet<ImmutableBytesWritable>(startKeys);

    ImmutableBytesWritable first = sorted.first();
    if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) {
        throw new IllegalArgumentException("First region of table should have empty start key. Instead has: "
                + Bytes.toStringBinary(first.get()));
    }
    sorted.remove(first);

    // Write the actual file
    FileSystem fs = partitionsPath.getFileSystem(conf);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, partitionsPath,
            ImmutableBytesWritable.class, NullWritable.class);

    try {
        for (ImmutableBytesWritable startKey : sorted) {
            writer.append(startKey, NullWritable.get());
        }
    } finally {
        writer.close();
    }
}

From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java

License:Apache License

/**
 * Configure a MapReduce Job to perform an incremental load into the given
 * table. This/*  www.java2  s .  c  o m*/
 * <ul>
 *   <li>Inspects the table to configure a total order partitioner</li>
 *   <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
 *   <li>Sets the number of reduce tasks to match the current number of regions</li>
 *   <li>Sets the output key/value class to match HFileOutputFormat's requirements</li>
 *   <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
 *     PutSortReducer)</li>
 * </ul>
 * The user should be sure to set the map output value class to either KeyValue or Put before
 * running this function.
 */
public static void configureIncrementalLoad(Job job, HTable table) throws IOException {
    Configuration conf = job.getConfiguration();
    Class<? extends Partitioner> topClass;
    try {
        topClass = getTotalOrderPartitionerClass();
    } catch (ClassNotFoundException e) {
        throw new IOException("Failed getting TotalOrderPartitioner", e);
    }
    //partition
    job.setPartitionerClass(topClass);
    //Set the key class for the job output data
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    //Set the value class for job outputs
    job.setOutputValueClass(KeyValue.class);
    //outputformatHfile
    job.setOutputFormatClass(HFileOutputFormat2.class);

    // Based on the configured map output class, set the correct reducer to properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(SingleColumnReducer.class);
    } else {
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    LOG.info("Looking up current regions for table " + table);
    //?regionstarkey
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");

    //?region?reduce?
    job.setNumReduceTasks(startKeys.size());

    Path partitionsPath = new Path(job.getWorkingDirectory(), "partitions_" + UUID.randomUUID());
    LOG.info("Writing partition information to " + partitionsPath);

    FileSystem fs = partitionsPath.getFileSystem(conf);
    writePartitions(conf, partitionsPath, startKeys);
    partitionsPath.makeQualified(fs);

    URI cacheUri;
    try {
        // Below we make explicit reference to the bundled TOP.  Its cheating.
        // We are assume the define in the hbase bundled TOP is as it is in
        // hadoop (whether 0.20 or 0.22, etc.)
        /*
          cacheUri = new URI(partitionsPath.toString() + "#" +
            org.apache.hadoop.hbase.mapreduce.hadoopbackport.TotalOrderPartitioner.DEFAULT_PATH);
            */
        cacheUri = new URI(partitionsPath.toString() + "#" + TotalOrderPartitioner.DEFAULT_PATH);
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }
    DistributedCache.addCacheFile(cacheUri, conf);
    DistributedCache.createSymlink(conf);

    // Set compression algorithms based on column families
    configureCompression(table, conf);

    TableMapReduceUtil.addDependencyJars(job);
    LOG.info("Incremental table output configured.");
}

From source file:com.alectenharmsel.research.LcCounters.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: LineCounter <input> <output>");
        System.exit(-1);/*from  w  w w  .j  a v a  2s .  c  om*/
    }

    Job job = new Job(getConf(), "LineCount");
    job.setJarByClass(LineCount.class);

    job.setInputFormatClass(WholeBlockInputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(LineCountMapper.class);
    job.setReducerClass(LineCountReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    Configuration check = job.getConfiguration();
    boolean success = job.waitForCompletion(true);

    //Get the counter here, output to a file called total in the dir
    Counters counters = job.getCounters();

    //Throw it in the file
    Path outPath = new Path(args[1]);
    FileSystem fs = outPath.getFileSystem(check);
    OutputStream out = fs.create(new Path(outPath, "total"));
    String total = counters.findCounter(LcCounters.NUM_LINES).getValue() + "\n";
    out.write(total.getBytes());
    out.close();
    return success ? 0 : 1;
}