List of usage examples for org.apache.hadoop.fs FileSystem get
public static FileSystem get(Configuration conf) throws IOException
From source file:com.ailk.oci.ocnosql.tools.load.csvbulkload.CsvBulkLoadTool.java
License:Apache License
@Override public int run(String[] args) throws Exception { HBaseConfiguration.addHbaseResources(getConf()); Configuration conf = getConf(); String quorum = conf.get("hbase.zookeeper.quorum"); String clientPort = conf.get("hbase.zookeeper.property.clientPort"); LOG.info("hbase.zookeeper.quorum=" + quorum); LOG.info("hbase.zookeeper.property.clientPort=" + clientPort); LOG.info("phoenix.query.dateFormat=" + conf.get("phoenix.query.dateFormat")); CommandLine cmdLine = null;//from w w w .j ava2 s .co m try { cmdLine = parseOptions(args); LOG.info("JdbcUrl=" + getJdbcUrl(quorum + ":" + clientPort)); } catch (IllegalStateException e) { printHelpAndExit(e.getMessage(), getOptions()); } Class.forName(DriverManager.class.getName()); Connection conn = DriverManager.getConnection(getJdbcUrl(quorum + ":" + clientPort)); String tableName = cmdLine.getOptionValue(TABLE_NAME_OPT.getOpt()); String schemaName = cmdLine.getOptionValue(SCHEMA_NAME_OPT.getOpt()); String qualifiedTableName = getQualifiedTableName(schemaName, tableName); List<ColumnInfo> importColumns = buildImportColumns(conn, cmdLine, qualifiedTableName); LOG.info("tableName=" + tableName); LOG.info("schemaName=" + schemaName); LOG.info("qualifiedTableName=" + qualifiedTableName); configureOptions(cmdLine, importColumns, getConf()); try { validateTable(conn, schemaName, tableName); } finally { conn.close(); } Path inputPath = new Path(cmdLine.getOptionValue(INPUT_PATH_OPT.getOpt())); Path outputPath = null; if (cmdLine.hasOption(OUTPUT_PATH_OPT.getOpt())) { outputPath = new Path(cmdLine.getOptionValue(OUTPUT_PATH_OPT.getOpt())); } else { outputPath = new Path("/tmp/" + UUID.randomUUID()); } LOG.info("Configuring HFile output path to {}", outputPath); Job job = new Job(getConf(), "Phoenix MapReduce import for " + getConf().get(PhoenixCsvToKeyValueMapper.TABLE_NAME_CONFKEY)); // Allow overriding the job jar setting by using a -D system property at startup if (job.getJar() == null) { job.setJarByClass(PhoenixCsvToKeyValueMapper.class); } job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, inputPath); FileSystem.get(getConf()); FileOutputFormat.setOutputPath(job, outputPath); job.setMapperClass(PhoenixCsvToKeyValueMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); HTable htable = new HTable(getConf(), qualifiedTableName); // Auto configure partitioner and reducer according to the Main Data table HFileOutputFormat.configureIncrementalLoad(job, htable); LOG.info("Running MapReduce import job from {} to {}", inputPath, outputPath); boolean success = job.waitForCompletion(true); if (!success) { LOG.error("Import job failed, check JobTracker for details"); return 1; } LOG.info("Loading HFiles from {}", outputPath); LoadIncrementalHFiles loader = new LoadIncrementalHFiles(getConf()); loader.doBulkLoad(outputPath, htable); htable.close(); LOG.info("Incremental load complete"); LOG.info("Removing output directory {}", outputPath); if (!FileSystem.get(getConf()).delete(outputPath, true)) { LOG.error("Removing output directory {} failed", outputPath); } return 0; }
From source file:com.alexholmes.hadooputils.combine.avro.AvroFileGenerator.java
License:Apache License
public int run(final String[] args) throws Exception { if (args.length != 2) { System.err.println(/* w ww. ja va 2s .c om*/ String.format("Usage: %s: <file path> <number of records>", AvroFileGenerator.class.getName())); return 1; } Path file = new Path(args[0]); int numRecords = Integer.valueOf(args[1]); FileSystem fs = FileSystem.get(super.getConf()); SequenceFile.Writer writer = SequenceFile.createWriter(fs, super.getConf(), file, Text.class, Text.class, SequenceFile.CompressionType.BLOCK, new DefaultCodec()); try { for (int i = 0; i < numRecords; i++) { writer.append(new Text("k" + i), new Text("v" + i)); } } finally { writer.close(); } return 0; }
From source file:com.alexholmes.hadooputils.combine.avro.mapred.CombineAvroInputFormatTest.java
License:Apache License
@SuppressWarnings("deprecation") public void testProjection() throws Exception { JobConf job = new JobConf(); Integer defaultRank = new Integer(-1); String jsonSchema = "{\"type\":\"record\"," + "\"name\":\"org.apache.avro.mapred.Pair\"," + "\"fields\": [ " + "{\"name\":\"rank\", \"type\":\"int\", \"default\": -1}," + "{\"name\":\"value\", \"type\":\"long\"}" + "]}"; Schema readerSchema = Schema.parse(jsonSchema); AvroJob.setInputSchema(job, readerSchema); String dir = System.getProperty("test.dir", ".") + "/mapred"; Path inputPath = new Path(dir + "/out" + "/part-00000" + AvroOutputFormat.EXT); FileStatus fileStatus = FileSystem.get(job).getFileStatus(inputPath); FileSplit fileSplit = new FileSplit(inputPath, 0, fileStatus.getLen(), job); AvroRecordReader<Pair<Integer, Long>> recordReader = new AvroRecordReader<Pair<Integer, Long>>(job, fileSplit);/* w ww . ja v a 2s .co m*/ AvroWrapper<Pair<Integer, Long>> inputPair = new AvroWrapper<Pair<Integer, Long>>(null); NullWritable ignore = NullWritable.get(); long sumOfCounts = 0; long numOfCounts = 0; while (recordReader.next(inputPair, ignore)) { Assert.assertEquals((Integer) inputPair.datum().get(0), defaultRank); sumOfCounts += (Long) inputPair.datum().get(1); numOfCounts++; } Assert.assertEquals(numOfCounts, WordCountUtil.COUNTS.size()); long actualSumOfCounts = 0; for (Long count : WordCountUtil.COUNTS.values()) { actualSumOfCounts += count; } Assert.assertEquals(sumOfCounts, actualSumOfCounts); }
From source file:com.alexholmes.hadooputils.combine.seqfile.mapred.CombineSequenceFileTest.java
License:Apache License
public void writeSequenceFile(Path path) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, Text.class, Text.class, SequenceFile.CompressionType.BLOCK, new DefaultCodec()); try {/*from w w w .j a v a 2s . co m*/ writer.append(key, value); } finally { writer.close(); } }
From source file:com.alexholmes.hadooputils.combine.seqfile.SequenceFileGenerator.java
License:Apache License
public int run(final String[] args) throws Exception { if (args.length != 2) { System.err.println(String.format("Usage: %s: <file path> <number of records>", SequenceFileGenerator.class.getName())); return 1; }//from w w w .java 2 s . c o m Path file = new Path(args[0]); int numRecords = Integer.valueOf(args[1]); FileSystem fs = FileSystem.get(super.getConf()); SequenceFile.Writer writer = SequenceFile.createWriter(fs, super.getConf(), file, Text.class, Text.class, SequenceFile.CompressionType.BLOCK, new DefaultCodec()); try { for (int i = 0; i < numRecords; i++) { writer.append(new Text("k" + i), new Text("v" + i)); } } finally { writer.close(); } return 0; }
From source file:com.alexholmes.hadooputils.sort.Sort.java
License:Apache License
/** * The driver for the sort MapReduce job. * * @param jobConf sort configuration * @param numMapTasks number of map tasks * @param numReduceTasks number of reduce tasks * @param sampler sampler, if required * @param codecClass the compression codec for compressing final outputs * @param mapCodecClass the compression codec for compressing intermediary map outputs * @param createLzopIndexes whether or not a MR job should be launched to create LZOP indexes * for the job output files * @param inputDirAsString input directory in CSV-form * @param outputDirAsString output directory * @return true if the job completed successfully * @throws IOException if something went wrong * @throws URISyntaxException if a URI wasn't correctly formed *///from w ww.j av a 2s . c om public boolean runJob(final JobConf jobConf, final Integer numMapTasks, final Integer numReduceTasks, final InputSampler.Sampler<K, V> sampler, final Class<? extends CompressionCodec> codecClass, final Class<? extends CompressionCodec> mapCodecClass, final boolean createLzopIndexes, final String inputDirAsString, final String outputDirAsString) throws IOException, URISyntaxException { jobConf.setJarByClass(Sort.class); jobConf.setJobName("sorter"); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.getClusterStatus(); if (numMapTasks != null) { jobConf.setNumMapTasks(numMapTasks); } if (numReduceTasks != null) { jobConf.setNumReduceTasks(numReduceTasks); } else { int numReduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sortReduces = jobConf.get("test.sort.reduces_per_host"); if (sortReduces != null) { numReduces = cluster.getTaskTrackers() * Integer.parseInt(sortReduces); } // Set user-supplied (possibly default) job configs jobConf.setNumReduceTasks(numReduces); } jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(SortReduce.class); jobConf.setInputFormat(SortInputFormat.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); if (mapCodecClass != null) { jobConf.setMapOutputCompressorClass(mapCodecClass); } if (codecClass != null) { jobConf.setBoolean("mapred.output.compress", true); jobConf.setClass("mapred.output.compression.codec", codecClass, CompressionCodec.class); } FileInputFormat.setInputPaths(jobConf, inputDirAsString); FileOutputFormat.setOutputPath(jobConf, new Path(outputDirAsString)); if (sampler != null) { System.out.println("Sampling input to effect total-order sort..."); jobConf.setPartitionerClass(TotalOrderPartitioner.class); Path inputDir = FileInputFormat.getInputPaths(jobConf)[0]; FileSystem fileSystem = FileSystem.get(jobConf); if (fileSystem.exists(inputDir) && fileSystem.isFile(inputDir)) { inputDir = inputDir.getParent(); } inputDir = inputDir.makeQualified(inputDir.getFileSystem(jobConf)); Path partitionFile = new Path(inputDir, "_sortPartitioning"); TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile); InputSampler.writePartitionFile(jobConf, sampler); URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning"); DistributedCache.addCacheFile(partitionUri, jobConf); DistributedCache.createSymlink(jobConf); } System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf) + " with " + jobConf.getNumReduceTasks() + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); jobResult = JobClient.runJob(jobConf); Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + TimeUnit.MILLISECONDS.toSeconds(endTime.getTime() - startTime.getTime()) + " seconds."); if (jobResult.isSuccessful()) { if (createLzopIndexes && codecClass != null && LzopCodec.class.equals(codecClass)) { new LzoIndexer(jobConf).index(new Path(outputDirAsString)); } return true; } return false; }
From source file:com.alexholmes.hadooputils.test.TextIOJobBuilder.java
License:Apache License
/** * Constructor which instantiates input/output paths. * * @param config the Hadoop configuration * @param inputPath the input directory where input files will be created * @param outputPath the output directory that the MapReduce job will write to * @throws IOException if something goes wrong *//*from w w w.j a va 2 s . c o m*/ public TextIOJobBuilder(final Configuration config, final Path inputPath, final Path outputPath) throws IOException { this(FileSystem.get(config), inputPath, outputPath); }
From source file:com.alexholmes.hadooputils.test.TextIOJobBuilder.java
License:Apache License
/** * Constructor which instantiates input/output paths. * * @param config the Hadoop configuration * @throws IOException if something goes wrong *//*from w ww . j a va2 s .c o m*/ public TextIOJobBuilder(final Configuration config) throws IOException { this(FileSystem.get(config), new Path("/input"), new Path("/output")); }
From source file:com.alexholmes.json.mapreduce.ExampleJob.java
License:Apache License
/** * Writes the contents of {@link #JSON} into a file in the job input directory in HDFS. * * @param conf the Hadoop config/* ww w . j a v a 2 s . c om*/ * @param inputDir the HDFS input directory where we'll write a file * @throws IOException if something goes wrong */ public static void writeInput(Configuration conf, Path inputDir) throws IOException { FileSystem fs = FileSystem.get(conf); if (fs.exists(inputDir)) { throw new IOException( String.format("Input directory '%s' exists - please remove and rerun this example", inputDir)); } OutputStreamWriter writer = new OutputStreamWriter(fs.create(new Path(inputDir, "input.txt"))); writer.write(JSON); IOUtils.closeStream(writer); }
From source file:com.alibaba.jstorm.hdfs.HdfsCache.java
License:Apache License
public HdfsCache(Map conf) { this.hadoopConf = new Configuration(); String hdfsHostName = (String) conf.get(Config.BLOBSTORE_HDFS_HOSTNAME); Integer hdfsPort = JStormUtils.parseInt(conf.get(Config.BLOBSTORE_HDFS_PORT)); LOG.info("hdfs address hdfs://{}:{}", hdfsHostName, hdfsPort); hadoopConf.set("fs.defaultFS", String.format("hdfs://%s:%d", hdfsHostName, hdfsPort)); hadoopConf.setBoolean("fs.hdfs.impl.disable.cache", true); try {/*from w w w. j ava2 s . c o m*/ fs = FileSystem.get(hadoopConf); String configBaseDir = (String) conf.get("hdfs.base.dir"); if (configBaseDir != null) { this.baseDir = configBaseDir; } else { String clusterName = ConfigExtension.getClusterName(conf) != null ? ConfigExtension.getClusterName(conf) : "default"; baseDir = baseDir + "/" + clusterName; if (!exist(baseDir)) mkdir(baseDir); } } catch (IOException e) { LOG.error("Failed to instance hdfs cache", e); throw new RuntimeException(e.getMessage()); } }