List of usage examples for org.apache.hadoop.fs Path getFileSystem
public FileSystem getFileSystem(Configuration conf) throws IOException
From source file:co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat.java
License:Apache License
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { boolean isCompressed = getCompressOutput(context); Configuration conf = context.getConfiguration(); String ext = ""; CompressionCodec codec = null;//from w w w .j a va 2 s. c om if (isCompressed) { // create the named codec Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class); codec = ReflectionUtils.newInstance(codecClass, conf); ext = codec.getDefaultExtension(); } Path file = getDefaultWorkFile(context, ext); FileSystem fs = file.getFileSystem(conf); FSDataOutputStream fileOut = fs.create(file, false); DataOutputStream ostream = fileOut; if (isCompressed) { ostream = new DataOutputStream(codec.createOutputStream(fileOut)); } return new NoKeyRecordWriter<K, V>(ostream); }
From source file:coldstorage.io.Reader.java
License:Apache License
public static void main(String[] args) throws IOException { List<Long> idsToFind = new ArrayList<Long>(); int maxId = 100000000; Random random = new Random(1); for (int i = 0; i < 1000; i++) { long id = (long) random.nextInt(maxId); // System.out.println(id); idsToFind.add(id);//from w ww.j a v a 2s . c o m } // idsToFind.clear(); // idsToFind.add(58998000L); // Path pathData = new Path("./out/data.avro"); // Path pathIndex = new Path("./out/data.index"); Path pathData = new Path("hdfs://localhost:9000/avro/out/data.avro"); Path pathIndex = new Path("hdfs://localhost:9000/avro/out/data.index"); Configuration configuration = new Configuration(); FileSystem fileSystem = pathData.getFileSystem(configuration); FileStatus indexFileStatus = fileSystem.getFileStatus(pathIndex); FileStatus dataFileStatus = fileSystem.getFileStatus(pathData); FSDataInputStream indexInputStream = fileSystem.open(pathIndex); FSDataInputStream dataInputStream = fileSystem.open(pathData); AvroFSInput fsInput = new AvroFSInput(dataInputStream, dataFileStatus.getLen()); GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(); DataFileReader<GenericRecord> reader = new DataFileReader<GenericRecord>(fsInput, gdr); List<IndexKey> list = getList(indexInputStream, indexFileStatus.getLen()); for (Long idToFind : idsToFind) { long t1 = System.nanoTime(); GenericRecord lookupRecord = lookupRecord(reader, list, idToFind); long t2 = System.nanoTime(); System.out.println("Found [" + idToFind + "] in [" + (t2 - t1) / 1000000.0 + " ms]:" + lookupRecord); } }
From source file:coldstorage.io.Writer.java
License:Apache License
public static void main(String[] args) throws IOException { Schema.Parser parser = new Schema.Parser(); Schema schema = parser.parse("{" + "\"namespace\": \"example.avro\", " + "\"type\": \"record\", " + "\"name\": \"User\", " + "\"fields\": [" + " {\"name\": \"id\", \"type\": \"long\"}," + " {\"name\": \"data\", \"type\": \"string\"}" + " ]}"); GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema); DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw); // Path pathData = new Path("./out/data.avro"); // Path pathIndex = new Path("./out/data.index"); Path pathData = new Path("hdfs://localhost:9000/avro/out/data.avro"); Path pathIndex = new Path("hdfs://localhost:9000/avro/out/data.index"); Configuration configuration = new Configuration(); FileSystem fileSystem = pathData.getFileSystem(configuration); FSDataOutputStream indexOutputStream = fileSystem.create(pathIndex); FSDataOutputStream outputStream = fileSystem.create(pathData); dfw.create(schema, outputStream);//from w w w . java2 s.c o m GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(schema); Random random = new Random(1); final int syncPoint = 1000; int count = 0; for (int i = 0; i < 100000000; i++) { genericRecordBuilder.set("id", (long) i); genericRecordBuilder.set("data", Long.toString(random.nextLong())); Record record = genericRecordBuilder.build(); dfw.append(record); if (count >= syncPoint) { long sync = dfw.sync(); Object object = record.get("id"); writeIndex(indexOutputStream, sync, object); count = 0; } count++; } indexOutputStream.close(); dfw.close(); }
From source file:colossal.pipe.ColFile.java
License:Apache License
public boolean exists(Configuration conf) { Path dfsPath = new Path(path); try {//from ww w. j a va 2 s. c o m FileSystem fs = dfsPath.getFileSystem(conf); return fs.exists(dfsPath); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:colossal.pipe.ColFile.java
License:Apache License
public boolean isObsolete(Configuration conf) { Path dfsPath = new Path(path); try {//from w w w . j a v a2s. c om FileSystem fs = dfsPath.getFileSystem(conf); // this needs to be smart - we should encode in the file metadata the dependents and their dates used // so we can verify that any existing antecedent is not newer and declare victory... if (fs.exists(dfsPath)) { FileStatus[] statuses = fs.listStatus(dfsPath); for (FileStatus status : statuses) { if (!status.isDir()) { if (format != Formats.AVRO_FORMAT || status.getPath().toString().endsWith(".avro")) { return false; // may check for extension for other types } } else { if (!status.getPath().toString().endsWith("/_logs") && !status.getPath().toString().endsWith("/_temporary")) { return false; } } } } return true; // needs more work! } catch (IOException e) { throw new RuntimeException(e); } }
From source file:colossal.pipe.ColFile.java
License:Apache License
public void clearAndPrepareOutput(Configuration conf) { try {/*from ww w . j av a 2 s . c om*/ Path dfsPath = new Path(path); FileSystem fs = dfsPath.getFileSystem(conf); if (fs.exists(dfsPath)) { FileStatus[] statuses = fs.listStatus(dfsPath); for (FileStatus status : statuses) { if (status.isDir()) { if (!status.getPath().toString().endsWith("/_logs") && !status.getPath().toString().endsWith("/_temporary")) { throw new IllegalArgumentException( "Trying to overwrite directory with child directories: " + path); } } } } else { fs.mkdirs(dfsPath); } fs.delete(dfsPath, true); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:colossal.pipe.ColFile.java
License:Apache License
public long getTimestamp(JobConf conf) { try {/*www . j av a 2 s . c o m*/ Path dfsPath = new Path(path); FileSystem fs = dfsPath.getFileSystem(conf); return fs.getFileStatus(dfsPath).getModificationTime(); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java
License:Apache License
/** * Write out a SequenceFile that can be read by TotalOrderPartitioner * that contains the split points in startKeys. * @param partitionsPath output path for SequenceFile * @param startKeys the region start keys *///from w w w .j a v a 2s .c o m private static void writePartitions(Configuration conf, Path partitionsPath, List<ImmutableBytesWritable> startKeys) throws IOException { if (startKeys.isEmpty()) { throw new IllegalArgumentException("No regions passed"); } // We're generating a list of split points, and we don't ever // have keys < the first region (which has an empty start key) // so we need to remove it. Otherwise we would end up with an // empty reducer with index 0 TreeSet<ImmutableBytesWritable> sorted = new TreeSet<ImmutableBytesWritable>(startKeys); ImmutableBytesWritable first = sorted.first(); if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) { throw new IllegalArgumentException("First region of table should have empty start key. Instead has: " + Bytes.toStringBinary(first.get())); } sorted.remove(first); // Write the actual file FileSystem fs = partitionsPath.getFileSystem(conf); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, partitionsPath, ImmutableBytesWritable.class, NullWritable.class); try { for (ImmutableBytesWritable startKey : sorted) { writer.append(startKey, NullWritable.get()); } } finally { writer.close(); } }
From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java
License:Apache License
/** * Configure a MapReduce Job to perform an incremental load into the given * table. This/* www.java2 s . c o m*/ * <ul> * <li>Inspects the table to configure a total order partitioner</li> * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li> * <li>Sets the number of reduce tasks to match the current number of regions</li> * <li>Sets the output key/value class to match HFileOutputFormat's requirements</li> * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or * PutSortReducer)</li> * </ul> * The user should be sure to set the map output value class to either KeyValue or Put before * running this function. */ public static void configureIncrementalLoad(Job job, HTable table) throws IOException { Configuration conf = job.getConfiguration(); Class<? extends Partitioner> topClass; try { topClass = getTotalOrderPartitionerClass(); } catch (ClassNotFoundException e) { throw new IOException("Failed getting TotalOrderPartitioner", e); } //partition job.setPartitionerClass(topClass); //Set the key class for the job output data job.setOutputKeyClass(ImmutableBytesWritable.class); //Set the value class for job outputs job.setOutputValueClass(KeyValue.class); //outputformatHfile job.setOutputFormatClass(HFileOutputFormat2.class); // Based on the configured map output class, set the correct reducer to properly // sort the incoming values. // TODO it would be nice to pick one or the other of these formats. if (KeyValue.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(KeyValueSortReducer.class); } else if (Put.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(SingleColumnReducer.class); } else { LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass()); } LOG.info("Looking up current regions for table " + table); //?regionstarkey List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table); LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count"); //?region?reduce? job.setNumReduceTasks(startKeys.size()); Path partitionsPath = new Path(job.getWorkingDirectory(), "partitions_" + UUID.randomUUID()); LOG.info("Writing partition information to " + partitionsPath); FileSystem fs = partitionsPath.getFileSystem(conf); writePartitions(conf, partitionsPath, startKeys); partitionsPath.makeQualified(fs); URI cacheUri; try { // Below we make explicit reference to the bundled TOP. Its cheating. // We are assume the define in the hbase bundled TOP is as it is in // hadoop (whether 0.20 or 0.22, etc.) /* cacheUri = new URI(partitionsPath.toString() + "#" + org.apache.hadoop.hbase.mapreduce.hadoopbackport.TotalOrderPartitioner.DEFAULT_PATH); */ cacheUri = new URI(partitionsPath.toString() + "#" + TotalOrderPartitioner.DEFAULT_PATH); } catch (URISyntaxException e) { throw new IOException(e); } DistributedCache.addCacheFile(cacheUri, conf); DistributedCache.createSymlink(conf); // Set compression algorithms based on column families configureCompression(table, conf); TableMapReduceUtil.addDependencyJars(job); LOG.info("Incremental table output configured."); }
From source file:com.alectenharmsel.research.LcCounters.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: LineCounter <input> <output>"); System.exit(-1);/*from w w w .j a v a 2s . c om*/ } Job job = new Job(getConf(), "LineCount"); job.setJarByClass(LineCount.class); job.setInputFormatClass(WholeBlockInputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(LineCountMapper.class); job.setReducerClass(LineCountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); Configuration check = job.getConfiguration(); boolean success = job.waitForCompletion(true); //Get the counter here, output to a file called total in the dir Counters counters = job.getCounters(); //Throw it in the file Path outPath = new Path(args[1]); FileSystem fs = outPath.getFileSystem(check); OutputStream out = fs.create(new Path(outPath, "total")); String total = counters.findCounter(LcCounters.NUM_LINES).getValue() + "\n"; out.write(total.getBytes()); out.close(); return success ? 0 : 1; }