Example usage for org.apache.hadoop.fs Path Path

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path Path.

Prototype

public Path(Path parent, Path child)

Source Link

Document

Create a new Path based on the child path resolved against the parent path.

Usage

From source file:PiEstimator.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi//from w ww .  ja v a 2 s. com
 */
public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException {
    // setup job conf
    jobConf.setJobName(PiEstimator.class.getSimpleName());

    jobConf.setInputFormat(SequenceFileInputFormat.class);

    jobConf.setOutputKeyClass(BooleanWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    jobConf.setMapperClass(PiMapper.class);
    jobConf.setNumMapTasks(numMaps);

    jobConf.setReducerClass(PiReducer.class);
    jobConf.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    jobConf.setSpeculativeExecution(false);

    // setup input/output directories
    final Path inDir = new Path(TMP_DIR, "in");
    final Path outDir = new Path(TMP_DIR, "out");
    FileInputFormat.setInputPaths(jobConf, inDir);
    FileOutputFormat.setOutputPath(jobConf, outDir);

    final FileSystem fs = FileSystem.get(jobConf);
    if (fs.exists(TMP_DIR)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        // generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        // start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        JobClient.runJob(jobConf);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        // read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        // compute estimated value
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get()))
                .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints));
    } finally {
        fs.delete(TMP_DIR, true);
    }
}

From source file:PerformanceEvaluation.java

License:Apache License

private void doMapReduce(final Class<? extends Test> cmd)
        throws IOException, InterruptedException, ClassNotFoundException {
    Path inputDir = writeInputFile(this.conf);
    this.conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
    this.conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
    Job job = new Job(this.conf);
    job.setJarByClass(PerformanceEvaluation.class);
    job.setJobName("HBase Performance Evaluation");

    job.setInputFormatClass(PeInputFormat.class);
    PeInputFormat.setInputPaths(job, inputDir);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(LongWritable.class);

    job.setMapperClass(EvaluationMapTask.class);
    job.setReducerClass(LongSumReducer.class);

    job.setNumReduceTasks(1);/*  w  w w.  j  a va 2s.co  m*/

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, new Path(inputDir, "outputs"));

    TableMapReduceUtil.addDependencyJars(job);
    job.waitForCompletion(true);
}

From source file:PerformanceEvaluation.java

License:Apache License

private Path writeInputFile(final Configuration c) throws IOException {
    FileSystem fs = FileSystem.get(c);
    if (!fs.exists(PERF_EVAL_DIR)) {
        fs.mkdirs(PERF_EVAL_DIR);//from  w  ww .ja v  a 2 s . c  o  m
    }
    SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmmss");
    Path subdir = new Path(PERF_EVAL_DIR, formatter.format(new Date()));
    fs.mkdirs(subdir);
    Path inputFile = new Path(subdir, "input.txt");
    PrintStream out = new PrintStream(fs.create(inputFile));
    // Make input random.
    Map<Integer, String> m = new TreeMap<Integer, String>();
    Hash h = MurmurHash.getInstance();
    int perClientRows = (this.R / this.N);
    try {
        for (int i = 0; i < 10; i++) {
            for (int j = 0; j < N; j++) {
                String s = "startRow=" + ((j * perClientRows) + (i * (perClientRows / 10)))
                        + ", perClientRunRows=" + (perClientRows / 10) + ", totalRows=" + this.R + ", clients="
                        + this.N + ", flushCommits=" + this.flushCommits + ", writeToWAL=" + this.writeToWAL
                        + ", scanCache=" + this.S;
                int hash = h.hash(Bytes.toBytes(s));
                m.put(hash, s);
            }
        }
        for (Map.Entry<Integer, String> e : m.entrySet()) {
            out.println(e.getValue());
        }
    } finally {
        out.close();
    }
    return subdir;
}

From source file:AggregatedLogsPurger.java

License:Apache License

public boolean purge() throws IOException {
    LocalDateTime now = LocalDateTime.now();
    LocalDateTime deleteLogsOlderThanTime = now.minusDays(deleteOlderThanDays);

    //Identify which log dirs should be deleted
    FileSystem fs = rootLogDir.getFileSystem(conf);
    try {//from  www. j  a va  2s  .  c  om

        long totalBytes = 0;
        for (FileStatus userDir : fs.listStatus(rootLogDir)) {
            if (userDir.isDirectory()) {
                Path userDirPath = new Path(userDir.getPath(), suffix);
                System.out.println("Checking for userDir : " + userDirPath);
                for (FileStatus appDir : fs.listStatus(userDirPath)) {
                    LocalDateTime appDirDate = getAppDirDateTime(appDir.getModificationTime());
                    if (appDirDate.isBefore(deleteLogsOlderThanTime)) {
                        long size = getLengthRecursively(fs, appDir.getPath());
                        System.out.println(appDir.getPath() + ", " + appDir.getOwner() + ", "
                                + appDirDate.toString() + ", size=" + size);
                        totalBytes += size;
                        if (shouldDelete) {
                            System.out.println("Deleting " + appDir.getPath());
                            fs.delete(appDir.getPath(), true);
                        }
                    }
                }
            }
        }
        System.out.println("Savings : " + totalBytes);
    } catch (IOException e) {
        e.printStackTrace();
        return false;
    } finally {
        fs.close();
    }
    return true;
}

From source file:DisplayClustering.java

License:Apache License

/**
 * This method plots points and colors them according to their cluster
 * membership, rather than drawing ellipses.
 *
 * As of commit, this method is used only by K-means spectral clustering.
 * Since the cluster assignments are set within the eigenspace of the data, it
 * is not inherent that the original data cluster as they would in K-means:
 * that is, as symmetric gaussian mixtures.
 *
 * Since Spectral K-Means uses K-Means to cluster the eigenspace data, the raw
 * output is not directly usable. Rather, the cluster assignments from the raw
 * output need to be transferred back to the original data. As such, this
 * method will read the SequenceFile cluster results of K-means and transfer
 * the cluster assignments to the original data, coloring them appropriately.
 *
 * @param g2/*from ww  w. j  a  va2  s  .c om*/
 * @param data
 */
protected static void plotClusteredSampleData(Graphics2D g2, Path data) {
    double sx = (double) res / DS;
    g2.setTransform(AffineTransform.getScaleInstance(sx, sx));

    g2.setColor(Color.BLACK);
    Vector dv = new DenseVector(2).assign(SIZE / 2.0);
    plotRectangle(g2, new DenseVector(2).assign(2), dv);
    plotRectangle(g2, new DenseVector(2).assign(-2), dv);

    // plot the sample data, colored according to the cluster they belong to
    dv.assign(0.03);

    Path clusteredPointsPath = new Path(data, "clusteredPoints");
    Path inputPath = new Path(clusteredPointsPath, "part-m-00000");
    Map<Integer, Color> colors = new HashMap<Integer, Color>();
    int point = 0;
    for (Pair<IntWritable, WeightedVectorWritable> record : new SequenceFileIterable<IntWritable, WeightedVectorWritable>(
            inputPath, new Configuration())) {
        int clusterId = record.getFirst().get();
        VectorWritable v = SAMPLE_DATA.get(point++);
        Integer key = clusterId;
        if (!colors.containsKey(key)) {
            colors.put(key, COLORS[Math.min(COLORS.length - 1, colors.size())]);
        }
        plotClusteredRectangle(g2, v.get(), dv, colors.get(key));
    }
}

From source file:JaqlShell.java

License:Apache License

/**
 * @param dir/* w  w w  .  j  ava 2 s.c  om*/
 * @param numNodes
 * @param format
 * @throws Exception
 */
public void init(String dir, int numNodes) throws Exception {
    String vInfo = VersionInfo.getVersion();
    System.setProperty("test.build.data", dir);
    m_conf = new Configuration();

    // setup conf according to the Hadoop version
    if (vInfo.indexOf("0.20") < 0) {
        throw new Exception("Unsupported Hadoop version: " + vInfo);
    }

    // setup the mini dfs cluster
    m_fs = new MiniDFSCluster(m_conf, numNodes, true, (String[]) null);
    FileSystem filesystem = m_fs.getFileSystem();
    m_conf.set("fs.default.name", filesystem.getUri().toString());
    Path parentdir = filesystem.getHomeDirectory();
    filesystem.mkdirs(parentdir);
    //FSUtils.setVersion(filesystem, parentdir);

    // setup hbase cluster (only if OS is not windows)
    //    if(!System.getProperty("os.name").toLowerCase().contains("win")) {
    //      m_conf.set(HConstants.HBASE_DIR, parentdir.toString());      
    //      Path hdfsTestDir = filesystem.makeQualified(new Path(m_conf.get(HConstants.HBASE_DIR)));
    //
    //      // prime the hdfs for hbase information...
    //      HRegion root = HRegion.createHRegion(HRegionInfo.ROOT_REGIONINFO, hdfsTestDir, (HBaseConfiguration)m_conf);
    //      HRegion meta = HRegion.createHRegion(HRegionInfo.FIRST_META_REGIONINFO, hdfsTestDir, (HBaseConfiguration)m_conf);
    //      HRegion.addRegionToMETA(root, meta);
    //
    //      // ... and close the root and meta
    //      if (meta != null) {
    //        meta.close();
    //        meta.getLog().closeAndDelete();
    //      }
    //      if (root != null) {
    //        root.close();
    //        root.getLog().closeAndDelete();
    //      }
    //
    //      try
    //      {
    //        this.zooKeeperCluster = new MiniZooKeeperCluster();
    //        File testDir = new File(dir);
    //        int clientPort = this.zooKeeperCluster.startup(testDir);
    //        m_conf.set("hbase.zookeeper.property.clientPort", Integer.toString(clientPort));
    //      } catch(Exception e) {
    //        LOG.error("Unable to startup zookeeper");
    //        throw new IOException(e);
    //      }
    //      try {
    //        // start the mini cluster
    //        m_base = new MiniHBaseCluster((HBaseConfiguration)m_conf, numNodes);
    //      } catch(Exception e) {
    //        LOG.error("Unable to startup hbase");
    //        throw new IOException(e);
    //      }
    //      try {
    //        // opening the META table ensures that cluster is running
    //        new HTable((HBaseConfiguration)m_conf, HConstants.META_TABLE_NAME);        
    //
    //        //setupOverride(conf);
    //      }
    //      catch (Exception e)
    //      {
    //        LOG.warn("Could not verify that hbase is up", e);
    //      }
    //      setupOverride();
    //    }

    m_mr = startMRCluster(numNodes, m_fs.getFileSystem().getName(), m_conf);

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    // make the home directory if it does not exist
    Path hd = fs.getWorkingDirectory();
    if (!fs.exists(hd))
        fs.mkdirs(hd);

    // make the $USER/_temporary directory if it does not exist
    Path tmpPath = new Path(hd, "_temporary");
    if (!fs.exists(tmpPath))
        fs.mkdirs(tmpPath);

    //    if (m_base != null)
    //    {
    //      try {
    //        m_admin = new HBaseAdmin((HBaseConfiguration) m_conf);
    //        HTableDescriptor[] tables = m_admin.listTables();
    //        if (tables != null)
    //        {
    //          for (int i = 0; i < tables.length; i++)
    //          {
    //            m_admin.enableTable(tables[i].getName());
    //          }
    //        }
    //      } catch(Exception e) {
    //        LOG.warn("failed to enable hbase tables");
    //      }
    //    }
}

From source file:RawParascaleFileSystem.java

License:Apache License

/**
 * Converts a relative path to its absolute representation. If the path
 * already is an absolute path it will not be converted.
 *
 * @param aPath relative path//from www. java2 s  .co m
 *
 * @return absolute path
 */
Path makeAbsolute(final Path aPath) {
    if (aPath.isAbsolute()) {
        return aPath;
    }
    return new Path(workingDirectory, aPath);
}

From source file:DisplayKMeans.java

License:Apache License

private static void runSequentialKMeansClassifier(Configuration conf, Path samples, Path output,
        DistanceMeasure measure, int numClusters, int maxIterations, double convergenceDelta)
        throws IOException {
    Collection<Vector> points = Lists.newArrayList();
    for (int i = 0; i < numClusters; i++) {
        points.add(SAMPLE_DATA.get(i).get());
        //      System.out.println(SAMPLE_DATA.get(i).toString());
    }//from   w  ww.j a  v  a 2s .  c  o m
    List<Cluster> initialClusters = Lists.newArrayList();
    int id = 0;
    for (Vector point : points) {
        initialClusters.add(new org.apache.mahout.clustering.kmeans.Kluster(point, id++, measure));
    }
    ClusterClassifier prior = new ClusterClassifier(initialClusters,
            new KMeansClusteringPolicy(convergenceDelta));
    Path priorPath = new Path(output, Cluster.INITIAL_CLUSTERS_DIR);
    prior.writeToSeqFiles(priorPath);

    ClusterIterator.iterateSeq(conf, samples, priorPath, output, maxIterations);
    loadClustersWritable(output);
}

From source file:DisplayKMeans.java

License:Apache License

private static void runSequentialKMeansClusterer(Configuration conf, Path samples, Path output,
        DistanceMeasure measure, int numClusters, int maxIterations, double convergenceDelta)
        throws IOException, InterruptedException, ClassNotFoundException {
    Path clustersIn = new Path(output, "random-seeds");
    RandomSeedGenerator.buildRandom(conf, samples, clustersIn, numClusters, measure);
    KMeansDriver.run(samples, clustersIn, output, convergenceDelta, maxIterations, true, 0.0, true);
    loadClustersWritable(output);//from w w w. ja v a  2s .c  o m
}

From source file:DisplayFuzzyKMeans.java

License:Apache License

private static void runSequentialFuzzyKClassifier(Configuration conf, Path samples, Path output,
        DistanceMeasure measure, int numClusters, int maxIterations, float m, double threshold)
        throws IOException {
    Collection<Vector> points = Lists.newArrayList();
    for (int i = 0; i < numClusters; i++) {
        points.add(SAMPLE_DATA.get(i).get());
    }//from   w w w  . j  ava2  s  .c o m
    List<Cluster> initialClusters = Lists.newArrayList();
    int id = 0;
    for (Vector point : points) {
        initialClusters.add(new SoftCluster(point, id++, measure));
    }
    ClusterClassifier prior = new ClusterClassifier(initialClusters,
            new FuzzyKMeansClusteringPolicy(m, threshold));
    Path priorPath = new Path(output, "classifier-0");
    prior.writeToSeqFiles(priorPath);

    ClusterIterator.iterateSeq(conf, samples, priorPath, output, maxIterations);
    loadClustersWritable(output);
}