Example usage for org.apache.hadoop.fs Path Path

List of usage examples for org.apache.hadoop.fs Path Path

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path Path.

Prototype

public Path(Path parent, Path child) 

Source Link

Document

Create a new Path based on the child path resolved against the parent path.

Usage

From source file:PiEstimator.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi//from w ww .  ja v a 2 s. com
 */
public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException {
    // setup job conf
    jobConf.setJobName(PiEstimator.class.getSimpleName());

    jobConf.setInputFormat(SequenceFileInputFormat.class);

    jobConf.setOutputKeyClass(BooleanWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    jobConf.setMapperClass(PiMapper.class);
    jobConf.setNumMapTasks(numMaps);

    jobConf.setReducerClass(PiReducer.class);
    jobConf.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    jobConf.setSpeculativeExecution(false);

    // setup input/output directories
    final Path inDir = new Path(TMP_DIR, "in");
    final Path outDir = new Path(TMP_DIR, "out");
    FileInputFormat.setInputPaths(jobConf, inDir);
    FileOutputFormat.setOutputPath(jobConf, outDir);

    final FileSystem fs = FileSystem.get(jobConf);
    if (fs.exists(TMP_DIR)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        // generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        // start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        JobClient.runJob(jobConf);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        // read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        // compute estimated value
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get()))
                .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints));
    } finally {
        fs.delete(TMP_DIR, true);
    }
}

From source file:PerformanceEvaluation.java

License:Apache License

private void doMapReduce(final Class<? extends Test> cmd)
        throws IOException, InterruptedException, ClassNotFoundException {
    Path inputDir = writeInputFile(this.conf);
    this.conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
    this.conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
    Job job = new Job(this.conf);
    job.setJarByClass(PerformanceEvaluation.class);
    job.setJobName("HBase Performance Evaluation");

    job.setInputFormatClass(PeInputFormat.class);
    PeInputFormat.setInputPaths(job, inputDir);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(LongWritable.class);

    job.setMapperClass(EvaluationMapTask.class);
    job.setReducerClass(LongSumReducer.class);

    job.setNumReduceTasks(1);/*  w  w w.  j  a va 2s.co  m*/

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, new Path(inputDir, "outputs"));

    TableMapReduceUtil.addDependencyJars(job);
    job.waitForCompletion(true);
}

From source file:PerformanceEvaluation.java

License:Apache License

private Path writeInputFile(final Configuration c) throws IOException {
    FileSystem fs = FileSystem.get(c);
    if (!fs.exists(PERF_EVAL_DIR)) {
        fs.mkdirs(PERF_EVAL_DIR);//from  w  ww .ja v  a 2 s . c  o  m
    }
    SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmmss");
    Path subdir = new Path(PERF_EVAL_DIR, formatter.format(new Date()));
    fs.mkdirs(subdir);
    Path inputFile = new Path(subdir, "input.txt");
    PrintStream out = new PrintStream(fs.create(inputFile));
    // Make input random.
    Map<Integer, String> m = new TreeMap<Integer, String>();
    Hash h = MurmurHash.getInstance();
    int perClientRows = (this.R / this.N);
    try {
        for (int i = 0; i < 10; i++) {
            for (int j = 0; j < N; j++) {
                String s = "startRow=" + ((j * perClientRows) + (i * (perClientRows / 10)))
                        + ", perClientRunRows=" + (perClientRows / 10) + ", totalRows=" + this.R + ", clients="
                        + this.N + ", flushCommits=" + this.flushCommits + ", writeToWAL=" + this.writeToWAL
                        + ", scanCache=" + this.S;
                int hash = h.hash(Bytes.toBytes(s));
                m.put(hash, s);
            }
        }
        for (Map.Entry<Integer, String> e : m.entrySet()) {
            out.println(e.getValue());
        }
    } finally {
        out.close();
    }
    return subdir;
}

From source file:AggregatedLogsPurger.java

License:Apache License

public boolean purge() throws IOException {
    LocalDateTime now = LocalDateTime.now();
    LocalDateTime deleteLogsOlderThanTime = now.minusDays(deleteOlderThanDays);

    //Identify which log dirs should be deleted
    FileSystem fs = rootLogDir.getFileSystem(conf);
    try {//from  www. j  a va  2s  .  c  om

        long totalBytes = 0;
        for (FileStatus userDir : fs.listStatus(rootLogDir)) {
            if (userDir.isDirectory()) {
                Path userDirPath = new Path(userDir.getPath(), suffix);
                System.out.println("Checking for userDir : " + userDirPath);
                for (FileStatus appDir : fs.listStatus(userDirPath)) {
                    LocalDateTime appDirDate = getAppDirDateTime(appDir.getModificationTime());
                    if (appDirDate.isBefore(deleteLogsOlderThanTime)) {
                        long size = getLengthRecursively(fs, appDir.getPath());
                        System.out.println(appDir.getPath() + ", " + appDir.getOwner() + ", "
                                + appDirDate.toString() + ", size=" + size);
                        totalBytes += size;
                        if (shouldDelete) {
                            System.out.println("Deleting " + appDir.getPath());
                            fs.delete(appDir.getPath(), true);
                        }
                    }
                }
            }
        }
        System.out.println("Savings : " + totalBytes);
    } catch (IOException e) {
        e.printStackTrace();
        return false;
    } finally {
        fs.close();
    }
    return true;
}

From source file:DisplayClustering.java

License:Apache License

/**
 * This method plots points and colors them according to their cluster
 * membership, rather than drawing ellipses.
 *
 * As of commit, this method is used only by K-means spectral clustering.
 * Since the cluster assignments are set within the eigenspace of the data, it
 * is not inherent that the original data cluster as they would in K-means:
 * that is, as symmetric gaussian mixtures.
 *
 * Since Spectral K-Means uses K-Means to cluster the eigenspace data, the raw
 * output is not directly usable. Rather, the cluster assignments from the raw
 * output need to be transferred back to the original data. As such, this
 * method will read the SequenceFile cluster results of K-means and transfer
 * the cluster assignments to the original data, coloring them appropriately.
 *
 * @param g2/*from ww  w. j  a  va2  s  .c om*/
 * @param data
 */
protected static void plotClusteredSampleData(Graphics2D g2, Path data) {
    double sx = (double) res / DS;
    g2.setTransform(AffineTransform.getScaleInstance(sx, sx));

    g2.setColor(Color.BLACK);
    Vector dv = new DenseVector(2).assign(SIZE / 2.0);
    plotRectangle(g2, new DenseVector(2).assign(2), dv);
    plotRectangle(g2, new DenseVector(2).assign(-2), dv);

    // plot the sample data, colored according to the cluster they belong to
    dv.assign(0.03);

    Path clusteredPointsPath = new Path(data, "clusteredPoints");
    Path inputPath = new Path(clusteredPointsPath, "part-m-00000");
    Map<Integer, Color> colors = new HashMap<Integer, Color>();
    int point = 0;
    for (Pair<IntWritable, WeightedVectorWritable> record : new SequenceFileIterable<IntWritable, WeightedVectorWritable>(
            inputPath, new Configuration())) {
        int clusterId = record.getFirst().get();
        VectorWritable v = SAMPLE_DATA.get(point++);
        Integer key = clusterId;
        if (!colors.containsKey(key)) {
            colors.put(key, COLORS[Math.min(COLORS.length - 1, colors.size())]);
        }
        plotClusteredRectangle(g2, v.get(), dv, colors.get(key));
    }
}

From source file:JaqlShell.java

License:Apache License

/**
 * @param dir/* w  w w  .  j  ava 2 s.c  om*/
 * @param numNodes
 * @param format
 * @throws Exception
 */
public void init(String dir, int numNodes) throws Exception {
    String vInfo = VersionInfo.getVersion();
    System.setProperty("test.build.data", dir);
    m_conf = new Configuration();

    // setup conf according to the Hadoop version
    if (vInfo.indexOf("0.20") < 0) {
        throw new Exception("Unsupported Hadoop version: " + vInfo);
    }

    // setup the mini dfs cluster
    m_fs = new MiniDFSCluster(m_conf, numNodes, true, (String[]) null);
    FileSystem filesystem = m_fs.getFileSystem();
    m_conf.set("fs.default.name", filesystem.getUri().toString());
    Path parentdir = filesystem.getHomeDirectory();
    filesystem.mkdirs(parentdir);
    //FSUtils.setVersion(filesystem, parentdir);

    // setup hbase cluster (only if OS is not windows)
    //    if(!System.getProperty("os.name").toLowerCase().contains("win")) {
    //      m_conf.set(HConstants.HBASE_DIR, parentdir.toString());      
    //      Path hdfsTestDir = filesystem.makeQualified(new Path(m_conf.get(HConstants.HBASE_DIR)));
    //
    //      // prime the hdfs for hbase information...
    //      HRegion root = HRegion.createHRegion(HRegionInfo.ROOT_REGIONINFO, hdfsTestDir, (HBaseConfiguration)m_conf);
    //      HRegion meta = HRegion.createHRegion(HRegionInfo.FIRST_META_REGIONINFO, hdfsTestDir, (HBaseConfiguration)m_conf);
    //      HRegion.addRegionToMETA(root, meta);
    //
    //      // ... and close the root and meta
    //      if (meta != null) {
    //        meta.close();
    //        meta.getLog().closeAndDelete();
    //      }
    //      if (root != null) {
    //        root.close();
    //        root.getLog().closeAndDelete();
    //      }
    //
    //      try
    //      {
    //        this.zooKeeperCluster = new MiniZooKeeperCluster();
    //        File testDir = new File(dir);
    //        int clientPort = this.zooKeeperCluster.startup(testDir);
    //        m_conf.set("hbase.zookeeper.property.clientPort", Integer.toString(clientPort));
    //      } catch(Exception e) {
    //        LOG.error("Unable to startup zookeeper");
    //        throw new IOException(e);
    //      }
    //      try {
    //        // start the mini cluster
    //        m_base = new MiniHBaseCluster((HBaseConfiguration)m_conf, numNodes);
    //      } catch(Exception e) {
    //        LOG.error("Unable to startup hbase");
    //        throw new IOException(e);
    //      }
    //      try {
    //        // opening the META table ensures that cluster is running
    //        new HTable((HBaseConfiguration)m_conf, HConstants.META_TABLE_NAME);        
    //
    //        //setupOverride(conf);
    //      }
    //      catch (Exception e)
    //      {
    //        LOG.warn("Could not verify that hbase is up", e);
    //      }
    //      setupOverride();
    //    }

    m_mr = startMRCluster(numNodes, m_fs.getFileSystem().getName(), m_conf);

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    // make the home directory if it does not exist
    Path hd = fs.getWorkingDirectory();
    if (!fs.exists(hd))
        fs.mkdirs(hd);

    // make the $USER/_temporary directory if it does not exist
    Path tmpPath = new Path(hd, "_temporary");
    if (!fs.exists(tmpPath))
        fs.mkdirs(tmpPath);

    //    if (m_base != null)
    //    {
    //      try {
    //        m_admin = new HBaseAdmin((HBaseConfiguration) m_conf);
    //        HTableDescriptor[] tables = m_admin.listTables();
    //        if (tables != null)
    //        {
    //          for (int i = 0; i < tables.length; i++)
    //          {
    //            m_admin.enableTable(tables[i].getName());
    //          }
    //        }
    //      } catch(Exception e) {
    //        LOG.warn("failed to enable hbase tables");
    //      }
    //    }
}

From source file:RawParascaleFileSystem.java

License:Apache License

/**
 * Converts a relative path to its absolute representation. If the path
 * already is an absolute path it will not be converted.
 *
 * @param aPath relative path//from www. java2 s  .co m
 *
 * @return absolute path
 */
Path makeAbsolute(final Path aPath) {
    if (aPath.isAbsolute()) {
        return aPath;
    }
    return new Path(workingDirectory, aPath);
}

From source file:DisplayKMeans.java

License:Apache License

private static void runSequentialKMeansClassifier(Configuration conf, Path samples, Path output,
        DistanceMeasure measure, int numClusters, int maxIterations, double convergenceDelta)
        throws IOException {
    Collection<Vector> points = Lists.newArrayList();
    for (int i = 0; i < numClusters; i++) {
        points.add(SAMPLE_DATA.get(i).get());
        //      System.out.println(SAMPLE_DATA.get(i).toString());
    }//from   w  ww.j a  v  a 2s .  c  o m
    List<Cluster> initialClusters = Lists.newArrayList();
    int id = 0;
    for (Vector point : points) {
        initialClusters.add(new org.apache.mahout.clustering.kmeans.Kluster(point, id++, measure));
    }
    ClusterClassifier prior = new ClusterClassifier(initialClusters,
            new KMeansClusteringPolicy(convergenceDelta));
    Path priorPath = new Path(output, Cluster.INITIAL_CLUSTERS_DIR);
    prior.writeToSeqFiles(priorPath);

    ClusterIterator.iterateSeq(conf, samples, priorPath, output, maxIterations);
    loadClustersWritable(output);
}

From source file:DisplayKMeans.java

License:Apache License

private static void runSequentialKMeansClusterer(Configuration conf, Path samples, Path output,
        DistanceMeasure measure, int numClusters, int maxIterations, double convergenceDelta)
        throws IOException, InterruptedException, ClassNotFoundException {
    Path clustersIn = new Path(output, "random-seeds");
    RandomSeedGenerator.buildRandom(conf, samples, clustersIn, numClusters, measure);
    KMeansDriver.run(samples, clustersIn, output, convergenceDelta, maxIterations, true, 0.0, true);
    loadClustersWritable(output);//from w w w. ja v a  2s .c  o m
}

From source file:DisplayFuzzyKMeans.java

License:Apache License

private static void runSequentialFuzzyKClassifier(Configuration conf, Path samples, Path output,
        DistanceMeasure measure, int numClusters, int maxIterations, float m, double threshold)
        throws IOException {
    Collection<Vector> points = Lists.newArrayList();
    for (int i = 0; i < numClusters; i++) {
        points.add(SAMPLE_DATA.get(i).get());
    }//from   w w w  . j  ava2  s  .c o m
    List<Cluster> initialClusters = Lists.newArrayList();
    int id = 0;
    for (Vector point : points) {
        initialClusters.add(new SoftCluster(point, id++, measure));
    }
    ClusterClassifier prior = new ClusterClassifier(initialClusters,
            new FuzzyKMeansClusteringPolicy(m, threshold));
    Path priorPath = new Path(output, "classifier-0");
    prior.writeToSeqFiles(priorPath);

    ClusterIterator.iterateSeq(conf, samples, priorPath, output, maxIterations);
    loadClustersWritable(output);
}