Example usage for org.apache.hadoop.fs Path getFileSystem

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getFileSystem.

Prototype

public FileSystem getFileSystem(Configuration conf) throws IOException

Source Link

Document

Return the FileSystem that owns this Path.

Usage

From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java

License:Apache License

private boolean canStartMining(FIMOptions opt) throws IOException {
    Path path = new Path(opt.outputDir + separator + "pg");
    return path.getFileSystem(new Configuration()).exists(path);
}

From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java

License:Apache License

protected int startAprioriPhase(FIMOptions opt)
        throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException {
    long nrLines = -1;
    int i = 1;//w  w w . ja v  a  2s.co  m
    boolean run = true;
    while (run) {
        if (i == 1) {
            String outputDir = opt.outputDir + separator + "ap" + i;
            String info = "Apriori Phase " + i;
            run = runAprioriOncPhaseOnce(opt, nrLines, i, info, outputDir, null);
        } else {
            Path path = new Path(opt.outputDir + separator + "tg" + (i - 1));
            for (FileStatus status : path.getFileSystem(new Configuration()).listStatus(path)) {
                String cacheFile = status.getPath().toString();
                String trieGroupCount = cacheFile.substring(cacheFile.lastIndexOf('/'), cacheFile.length());
                trieGroupCount = trieGroupCount.split("-")[1];
                String outputDir = opt.outputDir + separator + "ap" + i + "-trieGroup" + trieGroupCount;
                System.out.println("CacheFile " + cacheFile);
                String info = "Apriori Phase " + i + ", Trie Group " + trieGroupCount;
                run = runAprioriOncPhaseOnce(opt, nrLines, i, info, outputDir, cacheFile);
            }
        }

        i++;
    }
    return i - 1;
}

From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java

License:Apache License

private void startCreatePrefixGroups(FIMOptions opt, int phase)
        throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
    Path path = new Path(opt.outputDir + separator + "tg" + phase);
    for (FileStatus status : path.getFileSystem(new Configuration()).listStatus(path)) {
        String cacheFile = status.getPath().toString();
        String trieGroupCount = cacheFile.substring(cacheFile.lastIndexOf('/'), cacheFile.length());
        trieGroupCount = trieGroupCount.split("-")[1];
        String outputFile = opt.outputDir + separator + "pg-trieGroup" + trieGroupCount;
        System.out.println("[CreatePrefixGroups]: input: " + opt.inputFile + ", output: " + opt.outputDir
                + ", cache: " + cacheFile);

        Job job = prepareJob(new Path(opt.inputFile), new Path(outputFile), SplitByKTextInputFormat.class,
                ComputeTidListMapper.class, Text.class, IntArrayWritable.class, ComputeTidListReducer.class,
                IntArrayWritable.class, IntMatrixWritable.class, SequenceFileOutputFormat.class);

        job.setJobName("Create Prefix Groups");
        job.setJarByClass(BigFIMDriver.class);
        job.setNumReduceTasks(1);//from  ww w  .ja  v  a2  s. c o m

        Configuration conf = job.getConfiguration();
        setConfigurationValues(conf, opt);
        conf.setInt(PREFIX_LENGTH_KEY, phase);

        addCacheFile(new URI(cacheFile.replace(" ", "%20")), job.getConfiguration());

        runJob(job, "Prefix Creation");
    }
}

From source file:be.uantwerpen.adrem.bigfim.ComputeTidListReducer.java

License:Apache License

private void getPgStartIndex(Configuration conf) {
    try {// w  w  w.j  a  va  2 s . c o  m
        Path path = new Path(basePGDir);
        FileSystem fs = path.getFileSystem(new Configuration());

        if (!fs.exists(path)) {
            pgStartIndex = 0;
            return;
        }

        int largestIx = 0;
        for (FileStatus file : fs.listStatus(path)) {
            String tmp = file.getPath().toString();
            if (!tmp.contains("bucket")) {
                continue;
            }
            tmp = tmp.substring(tmp.lastIndexOf('/'), tmp.length());
            int ix = Integer.parseInt(tmp.split("-")[1]);
            largestIx = Math.max(largestIx, ix);
            pgStartIndex += 1;
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:be.uantwerpen.adrem.disteclat.PrefixComputerReducer.java

License:Apache License

private void createShortFIsFile(Context context) throws IOException {
    Path path = new Path(createPath(getJobAbsoluteOutputDir(context), OShortFIs, OShortFIs));
    FileSystem fs = path.getFileSystem(context.getConfiguration());
    shortFIsOut = new PrintStream(fs.create(path));
}

From source file:be.uantwerpen.adrem.hadoop.util.SplitByKTextInputFormat.java

License:Apache License

/**
 * Gets the different file splits for the data based on a given number of splits
 * /*from  w  ww  .  j a  v a  2 s. c  o m*/
 * @param status
 *          file status
 * @param conf
 *          hadoop configuration object
 * @param numberOfSplits
 *          number of splits to split the data in
 * @return list of file splits
 * @throws IOException
 *           thrown if the file does not exist
 */
public static List<FileSplit> getSplitsForFile(FileStatus status, Configuration conf, int numberOfSplits)
        throws IOException {
    List<FileSplit> splits = newArrayList();
    Path fileName = status.getPath();
    if (status.isDir()) {
        throw new IOException("Not a file: " + fileName);
    }
    long totalNumberOfLines = getTotalNumberOfLines(conf, fileName);
    int numLinesPerSplit = (int) Math.ceil(1.0 * totalNumberOfLines / numberOfSplits);
    LineReader lr = null;
    FSDataInputStream in = null;
    try {
        in = fileName.getFileSystem(conf).open(fileName);
        lr = new LineReader(in, conf);
        Text line = new Text();
        int numLines = 0;
        long begin = 0;
        long length = 0;
        int num = -1;
        while ((num = lr.readLine(line)) > 0) {
            numLines++;
            length += num;
            if (numLines == numLinesPerSplit) {
                splits.add(createFileSplit(fileName, begin, length));
                begin += length;
                length = 0;
                numLines = 0;
            }
        }
        if (numLines != 0) {
            splits.add(createFileSplit(fileName, begin, length));
        }
    } finally {
        if (lr != null) {
            lr.close();
        }
        if (in != null) {
            in.close();
        }
    }
    return splits;
}

From source file:be.uantwerpen.adrem.hadoop.util.SplitByKTextInputFormat.java

License:Apache License

/**
 * Gets the total number of lines from the file. If Config.NUMBER_OF_LINES_KEY is set, this value is returned.
 * /*from   w  ww .j a v a  2s.  c o m*/
 * @param conf
 *          hadoop configuration object
 * @param fileName
 *          name of file to count
 * @return the number of lines in the file
 * @throws IOException
 */
public static long getTotalNumberOfLines(Configuration conf, Path fileName) throws IOException {
    long nrLines = conf.getLong(NUMBER_OF_LINES_KEY, -1);
    if (nrLines != -1) {
        return nrLines;
    }

    try {
        FSDataInputStream in = fileName.getFileSystem(conf).open(fileName);
        LineReader lr = new LineReader(in, conf);
        Text text = new Text();
        nrLines = 0;
        while (lr.readLine(text) > 0) {
            nrLines++;
        }
        in.close();
        return nrLines;
    } catch (IOException e) {
        e.printStackTrace();
    }
    return 0;
}

From source file:be.uantwerpen.adrem.hadoop.util.Tools.java

License:Apache License

/**
 * Cleans the Hadoop file system by deleting the specified files if they exist.
 * //ww w .j a  va2  s .  c o  m
 * @param files
 *          the files to delete
 */
public static void cleanDirs(String... files) {
    System.out.println("[Cleaning]: Cleaning HDFS");
    Configuration conf = new Configuration();
    for (String filename : files) {
        System.out.println("[Cleaning]: Trying to delete " + filename);
        Path path = new Path(filename);
        try {
            FileSystem fs = path.getFileSystem(conf);
            if (fs.exists(path)) {
                if (fs.delete(path, true)) {
                    System.out.println("[Cleaning]: Deleted " + filename);
                } else {
                    System.out.println("[Cleaning]: Error while deleting " + filename);
                }
            } else {
                System.out.println("[Cleaning]: " + filename + " does not exist on HDFS");
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

From source file:be.uantwerpen.adrem.hadoop.util.Tools.java

License:Apache License

public static String getJobAbsoluteOutputDir(@SuppressWarnings("rawtypes") Context context) {
    try {/* w w  w  . j a v  a  2s .c  o m*/
        Path path = new Path(context.getConfiguration().get(OUTPUT_DIR_KEY));
        FileSystem fs = path.getFileSystem(context.getConfiguration());
        return fs.getFileStatus(path).getPath().toString();
    } catch (IOException e) {
    }
    return "";
}

From source file:be.uantwerpen.adrem.hadoop.util.Tools.java

License:Apache License

public static void cleanupSubdirsExcept(String dir, Collection<String> toKeep) {
    Path path = new Path(dir);
    try {/*from   w w  w  .j a  va2  s  .  c  om*/
        for (FileStatus fs : path.getFileSystem(new Configuration()).listStatus(path)) {
            String[] sp = fs.getPath().toString().split(Path.SEPARATOR);
            String filename = sp[sp.length - 1];
            if (toKeep.contains(filename)) {
                cleanDirs(fs.getPath().toString() + Path.SEPARATOR + "_SUCCESS");
                continue;
            }
            cleanDirs(fs.getPath().toString());
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}