List of usage examples for org.apache.hadoop.fs Path Path
public Path(Path parent, Path child)
From source file:PiEstimator.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi//from w ww . ja v a 2 s. com */ public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException { // setup job conf jobConf.setJobName(PiEstimator.class.getSimpleName()); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputKeyClass(BooleanWritable.class); jobConf.setOutputValueClass(LongWritable.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setMapperClass(PiMapper.class); jobConf.setNumMapTasks(numMaps); jobConf.setReducerClass(PiReducer.class); jobConf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobConf.setSpeculativeExecution(false); // setup input/output directories final Path inDir = new Path(TMP_DIR, "in"); final Path outDir = new Path(TMP_DIR, "out"); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outDir); final FileSystem fs = FileSystem.get(jobConf); if (fs.exists(TMP_DIR)) { throw new IOException( "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { // generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } // start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); JobClient.runJob(jobConf); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); // read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf); try { reader.next(numInside, numOutside); } finally { reader.close(); } // compute estimated value return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())) .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints)); } finally { fs.delete(TMP_DIR, true); } }
From source file:PerformanceEvaluation.java
License:Apache License
private void doMapReduce(final Class<? extends Test> cmd) throws IOException, InterruptedException, ClassNotFoundException { Path inputDir = writeInputFile(this.conf); this.conf.set(EvaluationMapTask.CMD_KEY, cmd.getName()); this.conf.set(EvaluationMapTask.PE_KEY, getClass().getName()); Job job = new Job(this.conf); job.setJarByClass(PerformanceEvaluation.class); job.setJobName("HBase Performance Evaluation"); job.setInputFormatClass(PeInputFormat.class); PeInputFormat.setInputPaths(job, inputDir); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(EvaluationMapTask.class); job.setReducerClass(LongSumReducer.class); job.setNumReduceTasks(1);/* w w w. j a va 2s.co m*/ job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(inputDir, "outputs")); TableMapReduceUtil.addDependencyJars(job); job.waitForCompletion(true); }
From source file:PerformanceEvaluation.java
License:Apache License
private Path writeInputFile(final Configuration c) throws IOException { FileSystem fs = FileSystem.get(c); if (!fs.exists(PERF_EVAL_DIR)) { fs.mkdirs(PERF_EVAL_DIR);//from w ww .ja v a 2 s . c o m } SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmmss"); Path subdir = new Path(PERF_EVAL_DIR, formatter.format(new Date())); fs.mkdirs(subdir); Path inputFile = new Path(subdir, "input.txt"); PrintStream out = new PrintStream(fs.create(inputFile)); // Make input random. Map<Integer, String> m = new TreeMap<Integer, String>(); Hash h = MurmurHash.getInstance(); int perClientRows = (this.R / this.N); try { for (int i = 0; i < 10; i++) { for (int j = 0; j < N; j++) { String s = "startRow=" + ((j * perClientRows) + (i * (perClientRows / 10))) + ", perClientRunRows=" + (perClientRows / 10) + ", totalRows=" + this.R + ", clients=" + this.N + ", flushCommits=" + this.flushCommits + ", writeToWAL=" + this.writeToWAL + ", scanCache=" + this.S; int hash = h.hash(Bytes.toBytes(s)); m.put(hash, s); } } for (Map.Entry<Integer, String> e : m.entrySet()) { out.println(e.getValue()); } } finally { out.close(); } return subdir; }
From source file:AggregatedLogsPurger.java
License:Apache License
public boolean purge() throws IOException { LocalDateTime now = LocalDateTime.now(); LocalDateTime deleteLogsOlderThanTime = now.minusDays(deleteOlderThanDays); //Identify which log dirs should be deleted FileSystem fs = rootLogDir.getFileSystem(conf); try {//from www. j a va 2s . c om long totalBytes = 0; for (FileStatus userDir : fs.listStatus(rootLogDir)) { if (userDir.isDirectory()) { Path userDirPath = new Path(userDir.getPath(), suffix); System.out.println("Checking for userDir : " + userDirPath); for (FileStatus appDir : fs.listStatus(userDirPath)) { LocalDateTime appDirDate = getAppDirDateTime(appDir.getModificationTime()); if (appDirDate.isBefore(deleteLogsOlderThanTime)) { long size = getLengthRecursively(fs, appDir.getPath()); System.out.println(appDir.getPath() + ", " + appDir.getOwner() + ", " + appDirDate.toString() + ", size=" + size); totalBytes += size; if (shouldDelete) { System.out.println("Deleting " + appDir.getPath()); fs.delete(appDir.getPath(), true); } } } } } System.out.println("Savings : " + totalBytes); } catch (IOException e) { e.printStackTrace(); return false; } finally { fs.close(); } return true; }
From source file:DisplayClustering.java
License:Apache License
/** * This method plots points and colors them according to their cluster * membership, rather than drawing ellipses. * * As of commit, this method is used only by K-means spectral clustering. * Since the cluster assignments are set within the eigenspace of the data, it * is not inherent that the original data cluster as they would in K-means: * that is, as symmetric gaussian mixtures. * * Since Spectral K-Means uses K-Means to cluster the eigenspace data, the raw * output is not directly usable. Rather, the cluster assignments from the raw * output need to be transferred back to the original data. As such, this * method will read the SequenceFile cluster results of K-means and transfer * the cluster assignments to the original data, coloring them appropriately. * * @param g2/*from ww w. j a va2 s .c om*/ * @param data */ protected static void plotClusteredSampleData(Graphics2D g2, Path data) { double sx = (double) res / DS; g2.setTransform(AffineTransform.getScaleInstance(sx, sx)); g2.setColor(Color.BLACK); Vector dv = new DenseVector(2).assign(SIZE / 2.0); plotRectangle(g2, new DenseVector(2).assign(2), dv); plotRectangle(g2, new DenseVector(2).assign(-2), dv); // plot the sample data, colored according to the cluster they belong to dv.assign(0.03); Path clusteredPointsPath = new Path(data, "clusteredPoints"); Path inputPath = new Path(clusteredPointsPath, "part-m-00000"); Map<Integer, Color> colors = new HashMap<Integer, Color>(); int point = 0; for (Pair<IntWritable, WeightedVectorWritable> record : new SequenceFileIterable<IntWritable, WeightedVectorWritable>( inputPath, new Configuration())) { int clusterId = record.getFirst().get(); VectorWritable v = SAMPLE_DATA.get(point++); Integer key = clusterId; if (!colors.containsKey(key)) { colors.put(key, COLORS[Math.min(COLORS.length - 1, colors.size())]); } plotClusteredRectangle(g2, v.get(), dv, colors.get(key)); } }
From source file:JaqlShell.java
License:Apache License
/** * @param dir/* w w w . j ava 2 s.c om*/ * @param numNodes * @param format * @throws Exception */ public void init(String dir, int numNodes) throws Exception { String vInfo = VersionInfo.getVersion(); System.setProperty("test.build.data", dir); m_conf = new Configuration(); // setup conf according to the Hadoop version if (vInfo.indexOf("0.20") < 0) { throw new Exception("Unsupported Hadoop version: " + vInfo); } // setup the mini dfs cluster m_fs = new MiniDFSCluster(m_conf, numNodes, true, (String[]) null); FileSystem filesystem = m_fs.getFileSystem(); m_conf.set("fs.default.name", filesystem.getUri().toString()); Path parentdir = filesystem.getHomeDirectory(); filesystem.mkdirs(parentdir); //FSUtils.setVersion(filesystem, parentdir); // setup hbase cluster (only if OS is not windows) // if(!System.getProperty("os.name").toLowerCase().contains("win")) { // m_conf.set(HConstants.HBASE_DIR, parentdir.toString()); // Path hdfsTestDir = filesystem.makeQualified(new Path(m_conf.get(HConstants.HBASE_DIR))); // // // prime the hdfs for hbase information... // HRegion root = HRegion.createHRegion(HRegionInfo.ROOT_REGIONINFO, hdfsTestDir, (HBaseConfiguration)m_conf); // HRegion meta = HRegion.createHRegion(HRegionInfo.FIRST_META_REGIONINFO, hdfsTestDir, (HBaseConfiguration)m_conf); // HRegion.addRegionToMETA(root, meta); // // // ... and close the root and meta // if (meta != null) { // meta.close(); // meta.getLog().closeAndDelete(); // } // if (root != null) { // root.close(); // root.getLog().closeAndDelete(); // } // // try // { // this.zooKeeperCluster = new MiniZooKeeperCluster(); // File testDir = new File(dir); // int clientPort = this.zooKeeperCluster.startup(testDir); // m_conf.set("hbase.zookeeper.property.clientPort", Integer.toString(clientPort)); // } catch(Exception e) { // LOG.error("Unable to startup zookeeper"); // throw new IOException(e); // } // try { // // start the mini cluster // m_base = new MiniHBaseCluster((HBaseConfiguration)m_conf, numNodes); // } catch(Exception e) { // LOG.error("Unable to startup hbase"); // throw new IOException(e); // } // try { // // opening the META table ensures that cluster is running // new HTable((HBaseConfiguration)m_conf, HConstants.META_TABLE_NAME); // // //setupOverride(conf); // } // catch (Exception e) // { // LOG.warn("Could not verify that hbase is up", e); // } // setupOverride(); // } m_mr = startMRCluster(numNodes, m_fs.getFileSystem().getName(), m_conf); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); // make the home directory if it does not exist Path hd = fs.getWorkingDirectory(); if (!fs.exists(hd)) fs.mkdirs(hd); // make the $USER/_temporary directory if it does not exist Path tmpPath = new Path(hd, "_temporary"); if (!fs.exists(tmpPath)) fs.mkdirs(tmpPath); // if (m_base != null) // { // try { // m_admin = new HBaseAdmin((HBaseConfiguration) m_conf); // HTableDescriptor[] tables = m_admin.listTables(); // if (tables != null) // { // for (int i = 0; i < tables.length; i++) // { // m_admin.enableTable(tables[i].getName()); // } // } // } catch(Exception e) { // LOG.warn("failed to enable hbase tables"); // } // } }
From source file:RawParascaleFileSystem.java
License:Apache License
/** * Converts a relative path to its absolute representation. If the path * already is an absolute path it will not be converted. * * @param aPath relative path//from www. java2 s .co m * * @return absolute path */ Path makeAbsolute(final Path aPath) { if (aPath.isAbsolute()) { return aPath; } return new Path(workingDirectory, aPath); }
From source file:DisplayKMeans.java
License:Apache License
private static void runSequentialKMeansClassifier(Configuration conf, Path samples, Path output, DistanceMeasure measure, int numClusters, int maxIterations, double convergenceDelta) throws IOException { Collection<Vector> points = Lists.newArrayList(); for (int i = 0; i < numClusters; i++) { points.add(SAMPLE_DATA.get(i).get()); // System.out.println(SAMPLE_DATA.get(i).toString()); }//from w ww.j a v a 2s . c o m List<Cluster> initialClusters = Lists.newArrayList(); int id = 0; for (Vector point : points) { initialClusters.add(new org.apache.mahout.clustering.kmeans.Kluster(point, id++, measure)); } ClusterClassifier prior = new ClusterClassifier(initialClusters, new KMeansClusteringPolicy(convergenceDelta)); Path priorPath = new Path(output, Cluster.INITIAL_CLUSTERS_DIR); prior.writeToSeqFiles(priorPath); ClusterIterator.iterateSeq(conf, samples, priorPath, output, maxIterations); loadClustersWritable(output); }
From source file:DisplayKMeans.java
License:Apache License
private static void runSequentialKMeansClusterer(Configuration conf, Path samples, Path output, DistanceMeasure measure, int numClusters, int maxIterations, double convergenceDelta) throws IOException, InterruptedException, ClassNotFoundException { Path clustersIn = new Path(output, "random-seeds"); RandomSeedGenerator.buildRandom(conf, samples, clustersIn, numClusters, measure); KMeansDriver.run(samples, clustersIn, output, convergenceDelta, maxIterations, true, 0.0, true); loadClustersWritable(output);//from w w w. ja v a 2s .c o m }
From source file:DisplayFuzzyKMeans.java
License:Apache License
private static void runSequentialFuzzyKClassifier(Configuration conf, Path samples, Path output, DistanceMeasure measure, int numClusters, int maxIterations, float m, double threshold) throws IOException { Collection<Vector> points = Lists.newArrayList(); for (int i = 0; i < numClusters; i++) { points.add(SAMPLE_DATA.get(i).get()); }//from w w w . j ava2 s .c o m List<Cluster> initialClusters = Lists.newArrayList(); int id = 0; for (Vector point : points) { initialClusters.add(new SoftCluster(point, id++, measure)); } ClusterClassifier prior = new ClusterClassifier(initialClusters, new FuzzyKMeansClusteringPolicy(m, threshold)); Path priorPath = new Path(output, "classifier-0"); prior.writeToSeqFiles(priorPath); ClusterIterator.iterateSeq(conf, samples, priorPath, output, maxIterations); loadClustersWritable(output); }