Example usage for org.apache.hadoop.fs FileSystem mkdirs

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem mkdirs.

Prototype

public boolean mkdirs(Path f) throws IOException

Source Link

Document

Call #mkdirs(Path,FsPermission) with default permission.

Usage

From source file:edu.uci.ics.hyracks.hdfs2.dataflow.DataflowTest.java

License:Apache License

/**
 * Start the HDFS cluster and setup the data files
 * /*from  w ww.j  a  v a  2 s .  c  om*/
 * @throws IOException
 */
private void startHDFS() throws IOException {
    conf.getConfiguration().addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
    conf.getConfiguration().addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
    conf.getConfiguration().addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));

    FileSystem lfs = FileSystem.getLocal(new Configuration());
    lfs.delete(new Path("build"), true);
    System.setProperty("hadoop.log.dir", "logs");
    dfsCluster = dfsClusterFactory.getMiniDFSCluster(conf.getConfiguration(), numberOfNC);
    FileSystem dfs = FileSystem.get(conf.getConfiguration());
    Path src = new Path(DATA_PATH);
    Path dest = new Path(HDFS_INPUT_PATH);
    Path result = new Path(HDFS_OUTPUT_PATH);
    dfs.mkdirs(dest);
    dfs.mkdirs(result);
    dfs.copyFromLocalFile(src, dest);

    DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
    conf.getConfiguration().writeXml(confOutput);
    confOutput.flush();
    confOutput.close();
}

From source file:edu.uci.ics.pregelix.example.jobrun.RunJobTestSuite.java

License:Apache License

private void startHDFS() throws IOException {
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
    FileSystem lfs = FileSystem.getLocal(new Configuration());
    lfs.delete(new Path("build"), true);
    System.setProperty("hadoop.log.dir", "logs");
    dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
    FileSystem dfs = FileSystem.get(conf);
    Path src = new Path(DATA_PATH);
    Path dest = new Path(HDFS_PATH);
    dfs.mkdirs(dest);
    dfs.copyFromLocalFile(src, dest);/*from  w  w w .j  a va  2  s .c  om*/

    src = new Path(DATA_PATH2);
    dest = new Path(HDFS_PATH2);
    dfs.mkdirs(dest);
    dfs.copyFromLocalFile(src, dest);

    src = new Path(DATA_PATH3);
    dest = new Path(HDFS_PATH3);
    dfs.mkdirs(dest);
    dfs.copyFromLocalFile(src, dest);

    DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
    conf.writeXml(confOutput);
    confOutput.flush();
    confOutput.close();
}

From source file:edu.uci.ics.pregelix.example.util.TestCluster.java

License:Apache License

private void startHDFS() throws IOException {
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
    FileSystem lfs = FileSystem.getLocal(new Configuration());
    lfs.delete(new Path("build"), true);
    System.setProperty("hadoop.log.dir", "logs");
    dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
    FileSystem dfs = FileSystem.get(conf);
    Path src = new Path(DATA_PATH);
    Path dest = new Path(HDFS_PATH);
    dfs.mkdirs(dest);
    dfs.copyFromLocalFile(src, dest);//from ww  w.  ja  v  a 2s. c  o  m

    src = new Path(DATA_PATH2);
    dest = new Path(HDFS_PATH2);
    dfs.mkdirs(dest);
    dfs.copyFromLocalFile(src, dest);

    src = new Path(DATA_PATH3);
    dest = new Path(HDFS_PATH3);
    dfs.mkdirs(dest);
    dfs.copyFromLocalFile(src, dest);

    src = new Path(DATA_PATH4);
    dest = new Path(HDFS_PATH4);
    dfs.mkdirs(dest);
    dfs.copyFromLocalFile(src, dest);

    src = new Path(DATA_PATH5);
    dest = new Path(HDFS_PATH5);
    dfs.mkdirs(dest);
    dfs.copyFromLocalFile(src, dest);

    DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
    conf.writeXml(confOutput);
    confOutput.flush();
    confOutput.close();
}

From source file:edu.umn.cs.spatialHadoop.core.SpatialSite.java

License:Open Source License

/**
 * Ensures that the given class is in the class path of running jobs.
 * If the jar is not already in the class path, it is added to the
 * DisributedCache of the given job to ensure the associated job will work
 * fine./*from  ww w.  j a v a  2  s .co  m*/
 * @param conf
 * @param klass
 */
public static void addClassToPath(Configuration conf, Class<?> klass) {
    // Check if we need to add the containing jar to class path
    String klassJar = findContainingJar(klass);
    String shadoopJar = findContainingJar(SpatialSite.class);
    if (klassJar == null || (shadoopJar != null && klassJar.equals(shadoopJar)))
        return;
    Path containingJar = new Path(findContainingJar(klass));
    Path[] existingClassPaths = DistributedCache.getArchiveClassPaths(conf);
    if (existingClassPaths != null) {
        for (Path existingClassPath : existingClassPaths) {
            if (containingJar.getName().equals(existingClassPath.getName()))
                return;
        }
    }
    // The containing jar is a new one and needs to be copied to class path
    try {
        LOG.info("Adding JAR '" + containingJar.getName() + "' to job class path");
        FileSystem defaultFS = FileSystem.get(conf);
        Path libFolder;
        if (existingClassPaths != null && existingClassPaths.length > 0) {
            libFolder = existingClassPaths[0].getParent();
        } else {
            // First jar to be added like this. Create a new lib folder
            do {
                libFolder = new Path("lib_" + (int) (Math.random() * 100000));
            } while (defaultFS.exists(libFolder));
            defaultFS.mkdirs(libFolder);
            defaultFS.deleteOnExit(libFolder);
        }
        defaultFS.copyFromLocalFile(containingJar, libFolder);
        Path jarFullPath = new Path(libFolder, containingJar.getName()).makeQualified(defaultFS);
        jarFullPath = jarFullPath.makeQualified(defaultFS);
        DistributedCache.addArchiveToClassPath(jarFullPath, conf);
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:edu.umn.cs.spatialHadoop.nasa.StockQuadTree.java

License:Open Source License

/**
 * Creates a full spatio-temporal hierarchy for a source folder
 * @throws ParseException // ww  w.  j a v a2 s.c  om
 * @throws InterruptedException 
 */
public static void directoryIndexer(final OperationsParams params)
        throws IOException, ParseException, InterruptedException {
    Path inputDir = params.getInputPath();
    FileSystem sourceFs = inputDir.getFileSystem(params);
    final Path sourceDir = inputDir.makeQualified(sourceFs);
    Path destDir = params.getOutputPath();
    final FileSystem destFs = destDir.getFileSystem(params);

    TimeRange timeRange = params.get("time") != null ? new TimeRange(params.get("time")) : null;

    // Create daily indexes that do not exist
    final Path dailyIndexDir = new Path(destDir, "daily");
    FileStatus[] mathcingDays = timeRange == null ? sourceFs.listStatus(inputDir)
            : sourceFs.listStatus(inputDir, timeRange);
    final Vector<Path> sourceFiles = new Vector<Path>();
    for (FileStatus matchingDay : mathcingDays) {
        for (FileStatus matchingTile : sourceFs.listStatus(matchingDay.getPath())) {
            sourceFiles.add(matchingTile.getPath());
        }

    }
    // Shuffle the array for better load balancing across threads
    Collections.shuffle(sourceFiles);
    final String datasetName = params.get("dataset");
    Parallel.forEach(sourceFiles.size(), new RunnableRange<Object>() {
        @Override
        public Object run(int i1, int i2) {
            LOG.info("Worker [" + i1 + "," + i2 + ") started");
            for (int i = i1; i < i2; i++) {
                Path sourceFile = sourceFiles.get(i);
                try {
                    Path relativeSourceFile = makeRelative(sourceDir, sourceFile);
                    Path destFilePath = new Path(dailyIndexDir, relativeSourceFile);
                    if (!destFs.exists(destFilePath)) {
                        LOG.info("Worker [" + i1 + "," + i2 + ") indexing: " + sourceFile.getName());
                        Path tmpFile;
                        do {
                            tmpFile = new Path((int) (Math.random() * 1000000) + ".tmp");
                        } while (destFs.exists(tmpFile));
                        tmpFile = tmpFile.makeQualified(destFs);
                        if (datasetName == null)
                            throw new RuntimeException(
                                    "Please provide the name of dataset you would like to index");
                        AggregateQuadTree.build(params, sourceFile, datasetName, tmpFile);
                        synchronized (destFs) {
                            Path destDir = destFilePath.getParent();
                            if (!destFs.exists(destDir))
                                destFs.mkdirs(destDir);
                        }
                        destFs.rename(tmpFile, destFilePath);
                    }
                } catch (IOException e) {
                    throw new RuntimeException("Error building an index for " + sourceFile, e);
                }
            }
            LOG.info("Worker [" + i1 + "," + i2 + ") finished");
            return null;
        }

    });
    LOG.info("Done generating daily indexes");

    // Merge daily indexes into monthly indexes
    Path monthlyIndexDir = new Path(destDir, "monthly");
    final SimpleDateFormat dayFormat = new SimpleDateFormat("yyyy.MM.dd");
    final SimpleDateFormat monthFormat = new SimpleDateFormat("yyyy.MM");
    mergeIndexes(destFs, dailyIndexDir, monthlyIndexDir, dayFormat, monthFormat, params);
    LOG.info("Done generating monthly indexes");

    // Merge daily indexes into monthly indexes
    Path yearlyIndexDir = new Path(destDir, "yearly");
    final SimpleDateFormat yearFormat = new SimpleDateFormat("yyyy");
    mergeIndexes(destFs, monthlyIndexDir, yearlyIndexDir, monthFormat, yearFormat, params);
    LOG.info("Done generating yearly indexes");
}

From source file:edu.umn.cs.spatialHadoop.nasa.StockQuadTree.java

License:Open Source License

/**
 * Merges a set of indexes into larger indexes
 * @param fs/*  ww w.  ja  v a 2s. c  o m*/
 * @param srcIndexDir
 * @param dstIndexDir
 * @param srcFormat
 * @param dstFormat
 * @param params
 * @throws IOException
 * @throws ParseException
 * @throws InterruptedException
 */
private static void mergeIndexes(final FileSystem fs, Path srcIndexDir, Path dstIndexDir,
        SimpleDateFormat srcFormat, SimpleDateFormat dstFormat, final OperationsParams params)
        throws IOException, ParseException, InterruptedException {
    TimeRange timeRange = params.get("time") != null ? new TimeRange(params.get("time")) : null;
    final FileStatus[] sourceIndexes = timeRange == null ? fs.listStatus(srcIndexDir)
            : fs.listStatus(srcIndexDir, timeRange);
    Arrays.sort(sourceIndexes); // Alphabetical sort acts as sort-by-date here

    // Scan the source indexes and merge each consecutive run belonging to the
    // same unit
    int i1 = 0;
    while (i1 < sourceIndexes.length) {
        final String indexToCreate = dstFormat.format(srcFormat.parse(sourceIndexes[i1].getPath().getName()));
        int i2 = i1 + 1;
        // Keep scanning as long as the source index belongs to the same dest index
        while (i2 < sourceIndexes.length && dstFormat
                .format(srcFormat.parse(sourceIndexes[i2].getPath().getName())).equals(indexToCreate))
            i2++;

        // Merge all source indexes in the range [i1, i2) into one dest index

        // Copy i1, i2 to other variables as final to be accessible from threads
        final int firstIndex = i1;
        final int lastIndex = i2;

        final Path destIndex = new Path(dstIndexDir, indexToCreate);

        // For each tile, merge all values in all source indexes
        /*A regular expression to catch the tile identifier of a MODIS grid cell*/
        final Pattern MODISTileID = Pattern.compile("^.*(h\\d\\dv\\d\\d).*$");
        final FileStatus[] tilesInFirstDay = fs.listStatus(sourceIndexes[i1].getPath());
        // Shuffle the array for better load balancing across threads
        Random rand = new Random();
        for (int i = 0; i < tilesInFirstDay.length - 1; i++) {
            // Swap the entry at i with any following entry
            int j = i + rand.nextInt(tilesInFirstDay.length - i - 1);
            FileStatus temp = tilesInFirstDay[i];
            tilesInFirstDay[i] = tilesInFirstDay[j];
            tilesInFirstDay[j] = temp;
        }
        Parallel.forEach(tilesInFirstDay.length, new RunnableRange<Object>() {
            @Override
            public Object run(int i_file1, int i_file2) {
                for (int i_file = i_file1; i_file < i_file2; i_file++) {
                    try {
                        FileStatus tileInFirstDay = tilesInFirstDay[i_file];

                        // Extract tile ID
                        Matcher matcher = MODISTileID.matcher(tileInFirstDay.getPath().getName());
                        if (!matcher.matches()) {
                            LOG.warn("Cannot extract tile id from file " + tileInFirstDay.getPath());
                            continue;
                        }

                        final String tileID = matcher.group(1);
                        Path destIndexFile = new Path(destIndex, tileID);

                        PathFilter tileFilter = new PathFilter() {
                            @Override
                            public boolean accept(Path path) {
                                return path.getName().contains(tileID);
                            }
                        };

                        // Find matching tiles in all source indexes to merge
                        Vector<Path> filesToMerge = new Vector<Path>(lastIndex - firstIndex);
                        filesToMerge.add(tileInFirstDay.getPath());
                        for (int iDailyIndex = firstIndex + 1; iDailyIndex < lastIndex; iDailyIndex++) {
                            FileStatus[] matchedTileFile = fs.listStatus(sourceIndexes[iDailyIndex].getPath(),
                                    tileFilter);
                            if (matchedTileFile.length == 0)
                                LOG.warn("Could not find tile " + tileID + " in dir "
                                        + sourceIndexes[iDailyIndex].getPath());
                            else if (matchedTileFile.length == 1)
                                filesToMerge.add(matchedTileFile[0].getPath());
                        }

                        if (fs.exists(destIndexFile)) {
                            // Destination file already exists
                            // Check the date of the destination and source files to see
                            // whether it needs to be updated or not
                            long destTimestamp = fs.getFileStatus(destIndexFile).getModificationTime();
                            boolean needsUpdate = false;
                            for (Path fileToMerge : filesToMerge) {
                                long sourceTimestamp = fs.getFileStatus(fileToMerge).getModificationTime();
                                if (sourceTimestamp > destTimestamp) {
                                    needsUpdate = true;
                                    break;
                                }
                            }
                            if (!needsUpdate)
                                continue;
                            else
                                LOG.info("Updating file " + destIndexFile.getName());
                        }

                        // Do the merge
                        Path tmpFile;
                        do {
                            tmpFile = new Path((int) (Math.random() * 1000000) + ".tmp");
                        } while (fs.exists(tmpFile));
                        tmpFile = tmpFile.makeQualified(fs);
                        LOG.info("Merging tile " + tileID + " into file " + destIndexFile);
                        AggregateQuadTree.merge(params, filesToMerge.toArray(new Path[filesToMerge.size()]),
                                tmpFile);
                        synchronized (fs) {
                            Path destDir = destIndexFile.getParent();
                            if (!fs.exists(destDir))
                                fs.mkdirs(destDir);
                        }
                        fs.rename(tmpFile, destIndexFile);
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
                return null;
            }
        });
        i1 = i2;
    }
}

From source file:edu.umn.cs.spatialHadoop.nasa.DistributedAggregateSpatioTemporalIndexer.java

License:Open Source License

public static void main(String[] args) throws IOException, ParseException {

    OperationsParams params = new OperationsParams(new GenericOptionsParser(args), false);

    final Path[] paths = params.getPaths();
    if (paths.length <= 1 && !params.checkInput()) {
        printUsage();/* w w  w .  j  a v  a2  s.c  o  m*/
        System.exit(1);
    }
    if (paths.length >= 2 && paths[1] == null) {
        printUsage();
        System.exit(1);
    }
    if (params.get("time") == null) {
        System.err.println("You must provide a time range");
        printUsage();
        System.exit(1);
    }

    Path datasetPath = paths[0]; // dataset path
    Path indexesPath = paths[1]; // index path
    String timeRange = params.get("time"); // time range

    TemporalIndexManager temporalIndexManager = new TemporalIndexManager(datasetPath, indexesPath);
    temporalIndexManager.prepareNeededIndexes(timeRange);

    // Indexes need to be built or re-built using AggregateQuadTreeMapReduce
    Path[] dailyIndexes = temporalIndexManager.getNeededDailyIndexes();
    LOG.info("Needs to index/re-index " + dailyIndexes.length + " days");
    for (Path dailyIndexPath : dailyIndexes) {
        FileSystem currFileSystem = dailyIndexPath.getFileSystem(params);
        Path[] dailyIndexHDFFiles = FileUtil.getFilesListInPath(dailyIndexPath);
        Path dailyIndexDictionaryPath = FileUtil.writePathsToHDFSFile(params, dailyIndexHDFFiles);
        Path dailyIndexOutputPath = new Path(temporalIndexManager.getDailyIndexesHomePath().toString() + "/"
                + NASADatasetUtil.extractDateStringFromPath(dailyIndexPath));

        if (currFileSystem.exists(dailyIndexOutputPath)) {
            currFileSystem.delete(dailyIndexOutputPath, true);
        }
        currFileSystem.mkdirs(dailyIndexOutputPath);

        DistributedAggregateSpatioTemporalIndexer.setIndexPath(dailyIndexOutputPath);
        aggregateQuadTreeMapReduce(dailyIndexDictionaryPath, params);

        currFileSystem.delete(dailyIndexDictionaryPath, false);
    }

    // Indexes need to be merged or re-merged
    Path[] monthlyIndexes = temporalIndexManager.getNeededMonthlyIndexes();
    LOG.info("Needs to index/re-index " + monthlyIndexes.length + " months");
    for (Path monthlyIndexPath : monthlyIndexes) {
        FileSystem currFileSystem = monthlyIndexPath.getFileSystem(new Configuration());
        ArrayList<Path[]> pathsArrList = NASADatasetUtil.getSortedTuplesInPath(
                temporalIndexManager.getDailyIndexesHomePath(),
                NASADatasetUtil.extractDateStringFromPath(monthlyIndexPath));

        if (currFileSystem.exists(monthlyIndexPath)) {
            currFileSystem.delete(monthlyIndexPath, true);
        }
        currFileSystem.mkdirs(monthlyIndexPath);

        for (Path[] currDailyIndexHDFFiles : pathsArrList) {
            Path currMonthlyIndexHDFFilePath = new Path(monthlyIndexPath.toString() + "/"
                    + NASADatasetUtil.getHDFfilePattern(currDailyIndexHDFFiles[0].toString()) + ".hdf");
            AggregateQuadTree.merge(new Configuration(), currDailyIndexHDFFiles, currMonthlyIndexHDFFilePath);
        }
    }

    // Indexes need to be merged or re-merged
    Path[] yearlyIndexes = temporalIndexManager.getNeededYearlyIndexes();
    LOG.info("Needs to index/re-index " + yearlyIndexes.length + " years");
    for (Path yearlyIndexPath : yearlyIndexes) {
        FileSystem currFileSystem = yearlyIndexPath.getFileSystem(new Configuration());
        ArrayList<Path[]> pathsArrList = NASADatasetUtil.getSortedTuplesInPath(
                temporalIndexManager.getMonthlyIndexesHomePath(),
                NASADatasetUtil.extractDateStringFromPath(yearlyIndexPath));

        if (currFileSystem.exists(yearlyIndexPath)) {
            currFileSystem.delete(yearlyIndexPath, true);
        }
        currFileSystem.mkdirs(yearlyIndexPath);

        for (Path[] currMonthlyIndexHDFFiles : pathsArrList) {
            Path currYearlyIndexHDFFilePath = new Path(yearlyIndexPath.toString() + "/"
                    + NASADatasetUtil.getHDFfilePattern(currMonthlyIndexHDFFiles[0].toString()) + ".hdf");
            AggregateQuadTree.merge(new Configuration(), currMonthlyIndexHDFFiles, currYearlyIndexHDFFilePath);
        }
    }

}

From source file:edu.umn.cs.spatialHadoop.RandomSpatialGenerator.java

License:Open Source License

/**
 * Generates random rectangles and write the result to a file.
 * @param outFS - The file system that contains the output file
 * @param outputFile - The file name to write to. If either outFS or
 *   outputFile is null, data is generated to the standard output
 * @param mbr - The whole MBR to generate in
 * @param shape //from w ww. j  a  v a  2 s  .co  m
 * @param totalSize - The total size of the generated file
 * @param blocksize 
 * @throws IOException 
 */
private static void generateFileLocal(Path outFile, OperationsParams params) throws IOException {
    JobConf job = new JobConf(params, RandomSpatialGenerator.class);
    FileSystem outFS = outFile.getFileSystem(params);
    long blocksize = outFS.getDefaultBlockSize(outFile);
    String sindex = params.get("sindex");
    Rectangle mbr = params.getShape("mbr").getMBR();
    long totalSize = params.getSize("size");

    // Calculate the dimensions of each partition based on gindex type
    CellInfo[] cells;
    if (sindex == null) {
        cells = new CellInfo[] { new CellInfo(1, mbr) };
    } else if (sindex.equals("grid")) {
        int num_partitions = Repartition.calculateNumberOfPartitions(params, totalSize, outFS, outFile,
                blocksize);

        GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2);
        gridInfo.calculateCellDimensions(num_partitions);
        cells = gridInfo.getAllCells();
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }

    outFS.mkdirs(outFile);

    ShapeRecordWriter<Shape> writer;
    if (sindex == null || sindex.equals("grid")) {
        writer = new GridRecordWriter<Shape>(outFile, job, null, cells);
    } else {
        throw new RuntimeException("Unupoorted spatial idnex: " + sindex);
    }

    int rectSize = params.getInt("rectsize", 100);
    long seed = params.getLong("seed", System.currentTimeMillis());
    float circleThickness = params.getFloat("thickness", 1);
    DistributionType type = SpatialSite.getDistributionType(params, "type", DistributionType.UNIFORM);
    Shape shape = params.getShape("shape");
    long t1 = System.currentTimeMillis();

    RandomShapeGenerator<Shape> generator = new RandomShapeGenerator<Shape>(totalSize, mbr, type, rectSize,
            seed, circleThickness);

    Rectangle key = generator.createKey();

    while (generator.next(key, shape)) {
        // Serialize it to text
        writer.write(NullWritable.get(), shape);
    }
    writer.close(null);
    long t2 = System.currentTimeMillis();

    System.out.println("Generation time: " + (t2 - t1) + " millis");
}

From source file:edu.umn.cs.spatialHadoop.temporal.RepartitionTemporal.java

License:Apache License

private static void bulkLoadSpatioTemporalIndexesLevel(Path indexLevelHomePath, Path[] inputPathDirs,
        String indexLevel, OperationsParams params) throws IOException, InterruptedException {
    LOG.info("Needs to index/re-index " + inputPathDirs.length + " " + indexLevel);

    for (Path inputPathDir : inputPathDirs) {
        FileSystem currFileSystem = inputPathDir.getFileSystem(params);

        if (currFileSystem.exists(inputPathDir)) {
            currFileSystem.delete(inputPathDir, true);
        }//from w  ww . jav  a 2s. c o m
        currFileSystem.mkdirs(inputPathDir);

        ArrayList<Path[]> pathsArrList = NASADatasetUtil.getSortedTuplesInPath(indexLevelHomePath,
                NASADatasetUtil.extractDateStringFromPath(inputPathDir));

        Path indexPath = generateIndexPath(pathsArrList.get(0)[0], inputPathDir);

        for (Path[] currInputFiles : pathsArrList) {
            repartitionMapReduce(currInputFiles, indexPath, params);
        }

    }

}

From source file:edu.umn.cs.spatialHadoop.visualization.MultilevelPlot.java

License:Open Source License

public static Job plot(Path[] inPaths, Path outPath, Class<? extends Plotter> plotterClass,
        OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException {
    if (params.getBoolean("showmem", false)) {
        // Run a thread that keeps track of used memory
        Thread memThread = new Thread(new Thread() {
            @Override/*from w  w  w. ja  va 2s .c o  m*/
            public void run() {
                Runtime runtime = Runtime.getRuntime();
                while (true) {
                    try {
                        Thread.sleep(60000);
                    } catch (InterruptedException e) {
                        e.printStackTrace();
                    }
                    runtime.gc();
                    LOG.info("Memory usage: "
                            + ((runtime.totalMemory() - runtime.freeMemory()) / (1024 * 1024 * 1024)) + "GB.");
                }
            }
        });
        memThread.setDaemon(true);
        memThread.start();
    }

    // Decide how to run it based on range of levels to generate
    String[] strLevels = params.get("levels", "7").split("\\.\\.");
    int minLevel, maxLevel;
    if (strLevels.length == 1) {
        minLevel = 0;
        maxLevel = Integer.parseInt(strLevels[0]) - 1;
    } else {
        minLevel = Integer.parseInt(strLevels[0]);
        maxLevel = Integer.parseInt(strLevels[1]);
    }
    // Create an output directory that will hold the output of the two jobs
    FileSystem outFS = outPath.getFileSystem(params);
    outFS.mkdirs(outPath);

    Job runningJob = null;
    if (OperationsParams.isLocal(params, inPaths)) {
        // Plot local
        plotLocal(inPaths, outPath, plotterClass, params);
    } else {
        int maxLevelWithFlatPartitioning = params.getInt(FlatPartitioningLevelThreshold, 4);
        if (minLevel <= maxLevelWithFlatPartitioning) {
            OperationsParams flatPartitioning = new OperationsParams(params);
            flatPartitioning.set("levels", minLevel + ".." + Math.min(maxLevelWithFlatPartitioning, maxLevel));
            flatPartitioning.set("partition", "flat");
            LOG.info("Using flat partitioning in levels " + flatPartitioning.get("levels"));
            runningJob = plotMapReduce(inPaths, new Path(outPath, "flat"), plotterClass, flatPartitioning);
        }
        if (maxLevel > maxLevelWithFlatPartitioning) {
            OperationsParams pyramidPartitioning = new OperationsParams(params);
            pyramidPartitioning.set("levels",
                    Math.max(minLevel, maxLevelWithFlatPartitioning + 1) + ".." + maxLevel);
            pyramidPartitioning.set("partition", "pyramid");
            LOG.info("Using pyramid partitioning in levels " + pyramidPartitioning.get("levels"));
            runningJob = plotMapReduce(inPaths, new Path(outPath, "pyramid"), plotterClass,
                    pyramidPartitioning);
        }
        // Write a new HTML file that displays both parts of the pyramid
        // Add an HTML file that visualizes the result using Google Maps
        LineReader templateFileReader = new LineReader(
                MultilevelPlot.class.getResourceAsStream("/zoom_view.html"));
        PrintStream htmlOut = new PrintStream(outFS.create(new Path(outPath, "index.html")));
        Text line = new Text();
        while (templateFileReader.readLine(line) > 0) {
            String lineStr = line.toString();
            lineStr = lineStr.replace("#{TILE_WIDTH}", Integer.toString(params.getInt("tilewidth", 256)));
            lineStr = lineStr.replace("#{TILE_HEIGHT}", Integer.toString(params.getInt("tileheight", 256)));
            lineStr = lineStr.replace("#{MAX_ZOOM}", Integer.toString(maxLevel));
            lineStr = lineStr.replace("#{MIN_ZOOM}", Integer.toString(minLevel));
            lineStr = lineStr.replace("#{TILE_URL}", "(zoom <= " + maxLevelWithFlatPartitioning
                    + "? 'flat' : 'pyramid')+('/tile-' + zoom + '-' + coord.x + '-' + coord.y + '.png')");

            htmlOut.println(lineStr);
        }
        templateFileReader.close();
        htmlOut.close();
    }

    return runningJob;
}