Example usage for org.apache.hadoop.fs FileSystem exists

List of usage examples for org.apache.hadoop.fs FileSystem exists

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem exists.

Prototype

public boolean exists(Path f) throws IOException 

Source Link

Document

Check if a path exists.

Usage

From source file:br.com.lassal.nqueens.grid.job.GenerateSolutions.java

/**
 * NQueens working folder structure /nqueens/board-{x}/partial/solution_X-4
 *
 * @param queensSize//from ww w .  j a va  2 s.co  m
 * @throws IOException
 */
private void setWorkingFolder(int queensSize, Job job) throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    if (fs.isDirectory(new Path("/nqueens/board-" + queensSize + "/final"))) {
        System.exit(0); // ja foi processado anteriormente nao processa de novo
    }

    String lastSolution = null;
    Path partialSolDir = new Path("/nqueens/board-" + queensSize + "/partial/");
    Path inputPath = null;
    Path outputPath = null;

    if (fs.exists(partialSolDir)) {
        RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir);

        while (dirsFound.hasNext()) {
            LocatedFileStatus path = dirsFound.next();
            if (lastSolution == null) {
                lastSolution = path.getPath().getName();
                inputPath = path.getPath();
            } else {
                String currentDir = path.getPath().getName();
                if (lastSolution.compareToIgnoreCase(currentDir) < 0) {
                    lastSolution = currentDir;
                    inputPath = path.getPath();
                }
            }
        }
    }
    int currentSolutionSet = 0;
    if (inputPath == null) {
        inputPath = new Path("/nqueens/board-" + queensSize + "/seed");
        if (!fs.exists(inputPath)) {
            FSDataOutputStream seedFile = fs.create(inputPath, true);
            seedFile.writeBytes(queensSize + "#");
            seedFile.close();
        }
    }
    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(TextInputFormat.class);

    if (lastSolution != null) {
        String[] solution = lastSolution.split("-");
        if (solution[0].equalsIgnoreCase("solution_" + queensSize)) {
            currentSolutionSet = Integer.parseInt(solution[1]) + 4;

            if (currentSolutionSet >= queensSize) {
                outputPath = new Path("/nqueens/board-" + queensSize + "/final");
            } else {
                outputPath = new Path("/nqueens/board-" + queensSize + "/partial/solution_" + queensSize + "-"
                        + currentSolutionSet);
            }
        }
    } else {
        outputPath = new Path("/nqueens/board-" + queensSize + "/partial/solution_" + queensSize + "-4");
    }

    // Output
    FileOutputFormat.setOutputPath(job, outputPath);
    job.setOutputFormatClass(TextOutputFormat.class);

}

From source file:br.com.lassal.nqueens.grid.job.NQueenCounter.java

private Path setWorkingFolder(int queensSize, String workingFolder, boolean isFinal, Job job)
        throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path returnPath = null;//from   w w w.ja va 2  s.  co  m

    if (workingFolder == null) {
        workingFolder = "";
    }

    Path partialSolDir = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/");
    Path inputPath = null;
    Path outputPath = null;
    String nextRunPath = "run_1";

    if (fs.exists(partialSolDir)) {
        RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir);
        String lastRunPath = null;
        Path lastPath = null;

        while (dirsFound.hasNext()) {
            LocatedFileStatus dir = dirsFound.next();

            if (dir.isDirectory()) {
                if (lastRunPath == null || dir.getPath().getName().compareTo(lastRunPath) > 0) {
                    lastPath = dir.getPath();
                    lastRunPath = lastPath.getName();
                }
            }
        }
        if (lastRunPath != null) {
            String[] runParts = lastRunPath.split("_");
            int lastRun = Integer.parseInt(runParts[1]);
            nextRunPath = runParts[0] + "_" + (++lastRun);
            inputPath = lastPath;
        }

    }
    if (inputPath == null) {
        inputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/seed");
        if (!fs.exists(inputPath)) {
            FSDataOutputStream seedFile = fs.create(inputPath, true);
            seedFile.writeBytes(queensSize + ":");
            seedFile.close();
        }
    } else {
        returnPath = inputPath;
    }
    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(TextInputFormat.class);

    if (isFinal) {
        outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/final");
    } else {
        outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/" + nextRunPath);
    }

    // Output
    FileOutputFormat.setOutputPath(job, outputPath);
    job.setOutputFormatClass(TextOutputFormat.class);

    return returnPath;
}

From source file:br.ufpr.inf.hpath.HPath.java

License:Apache License

/**
 * Execute the XPath query as a Hadoop job
 * @param xpath_query XPath query submitted by the user via cli.
 * @param inputFile XML file which has all data.
 * @param outputFile Query's result is stored in this file. 
 * @throws Exception//from  ww  w. j a  va  2  s.  co m
 */
public static void main(String[] args) throws Exception {

    if (args.length < 1) {
        System.out.println("USAGE: hpath [xpath_query] [input_file] [<output_dir>]");
        System.exit(-1);
    }

    System.out.println("***************");
    System.out.println(" Query  -> " + args[2]);
    System.out.println(" Input  -> " + args[0]);
    System.out.println(" Output -> " + args[1]);
    System.out.println("***************");

    String xpath_query = args[2];
    String inputFile = args[0];
    String outputFile = args[1];
    String tag = "";

    // tag = getFisrtQueryTag(xpath_query);
    tag = getLastQueryTag(xpath_query);
    Configuration conf = new Configuration();
    conf.set("xmlinput.start", "<" + tag);
    conf.set("xmlinput.end", "</" + tag + ">");
    conf.set("xpath.query", xpath_query);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "HPath");
    FileSystem fs = FileSystem.get(conf);
    Path inFile = new Path(inputFile);
    Path outFile = new Path(outputFile);

    if (!fs.exists(inFile)) {
        System.out.println("error: Input file not found.");
        System.exit(-1);
    }
    if (!fs.isFile(inFile)) {
        System.out.println("error: Input should be a file.");
        System.exit(-1);
    }
    if (fs.exists(outFile)) {
        System.out.println("error: Output already exists.");
        System.exit(-1);
    }

    job.setJarByClass(HPath.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(XmlItemInputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, inFile);
    FileOutputFormat.setOutputPath(job, outFile);
    job.waitForCompletion(true);
}

From source file:br.ufrj.nce.recureco.distributedindex.search.controller.DocumentViewerServlet.java

License:Open Source License

protected void doGet(javax.servlet.http.HttpServletRequest request,
        javax.servlet.http.HttpServletResponse response) throws javax.servlet.ServletException, IOException {

    String doc = request.getParameter("doc");

    if (doc != null && doc.trim().length() > 0) {

        try {/*from  w ww.ja  v a2s.c o m*/

            String filePath = DIR_DOWNLOAD + doc;

            Configuration conf = new Configuration();

            conf.addResource(new Path(DIR_HADOOP_CONF + "core-site.xml"));
            conf.addResource(new Path(DIR_HADOOP_CONF + "hdfs-site.xml"));
            conf.addResource(new Path(DIR_HADOOP_CONF + "mapred-site.xml"));

            FileSystem fileSystem = FileSystem.get(conf);

            Path path = new Path(filePath);
            if (!fileSystem.exists(path)) {
                response.getWriter().print("File not found.");
                return;
            }

            FSDataInputStream in = fileSystem.open(path);

            response.setContentType("text/plain");

            int read = 0;
            byte[] bytes = new byte[BYTES_DOWNLOAD];
            OutputStream os = response.getOutputStream();

            while ((read = in.read(bytes)) != -1) {
                os.write(bytes, 0, read);
            }
            os.flush();
            os.close();
        } catch (FileNotFoundException e) {
            response.getWriter().print("File not found.");
        }

    } else {
        //print invalid document
        response.getWriter().print("File not informed.");
    }

}

From source file:bulkload.ImportTsv.java

License:Apache License

/**
 * Sets up the actual job.//from www. j av  a2 s.co  m
 * 
 * @param conf
 *            The current configuration.
 * @param args
 *            The command line parameters.
 * @return The newly created job.
 * @throws IOException
 *             When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {

    Job job = null;
    try (Connection connection = ConnectionFactory.createConnection(conf)) {
        try (Admin admin = connection.getAdmin()) {
            // Support non-XML supported characters
            // by re-encoding the passed separator as a Base64 string.
            String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
            if (actualSeparator != null) {
                conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes()));
            }
            TableName tableName = TableName.valueOf(args[0]);
            if (!admin.tableExists(tableName)) {
                String errorMsg = format("Table '%s' does not exist.", tableName);
                LOG.error(errorMsg);
                throw new TableNotFoundException(errorMsg);
            }
            Path inputDir = new Path(args[1]);
            String jobName = conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName.getNameAsString());
            job = Job.getInstance(conf, jobName);
            job.setJarByClass(TsvImporter.class);
            FileInputFormat.setInputPaths(job, inputDir);
            job.setInputFormatClass(TextInputFormat.class);
            job.setMapperClass(TsvImporter.class);

            String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
            if (hfileOutPath != null) {
                try (HTable table = (HTable) connection.getTable(tableName)) {
                    Path outputDir = new Path(hfileOutPath);
                    FileSystem fs = FileSystem.get(conf);
                    if (fs.exists(outputDir)) {
                        if (!fs.delete(outputDir, true)) {
                            throw new IllegalStateException("delete path:" + outputDir + " failed");
                        }
                    }
                    FileOutputFormat.setOutputPath(job, outputDir);
                    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
                    job.setMapOutputValueClass(Put.class);
                    job.setReducerClass(PutSortReducer.class);
                    HFileOutputFormat2.configureIncrementalLoad(job, table, table);
                }
            } else {
                // No reducers. Just write straight to table. Call
                // initTableReducerJob
                // to set up the TableOutputFormat.
                TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
                job.setNumReduceTasks(0);

                //               TableMapReduceUtil.addDependencyJars(job);
                //               TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
                //                     com.google.common.base.Function.class /* Guava used by TsvParser */);
            }

            // Workaround to remove unnecessary hadoop dependencies
            String[] jars = job.getConfiguration().get("tmpjars").split(",", -1);
            StringBuilder filteredJars = new StringBuilder();
            for (String j : jars) {
                String[] parts = j.split("/", -1);
                String fileName = parts[parts.length - 1];
                if (fileName.indexOf("hadoop-") != 0) {
                    filteredJars.append(j);
                    filteredJars.append(",");
                }
            }
            job.getConfiguration().set("tmpjars", filteredJars.toString());
        }
    }

    return job;
}

From source file:byte_import.HexastoreBulkImport.java

License:Open Source License

public Job createSubmittableJob(String[] args) {
    TABLE_NAME = args[1];/* www.j ava 2 s  . c o m*/
    Job job = null;
    try {
        job = new Job(new Configuration(), NAME);
        job.setJarByClass(HexastoreBulkImport.class);
        job.setMapperClass(sampler.TotalOrderPrep.Map.class);
        job.setReducerClass(Reduce.class);
        job.setCombinerClass(Combiner.class);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(ImmutableBytesWritable.class);
        job.setPartitionerClass(TotalOrderPartitioner.class);
        //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("/user/npapa/"+regions+"partitions/part-r-00000"));
        TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("partitions/part-r-00000"));
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(HFileOutputFormat.class);
        Path out = new Path("out");
        FileOutputFormat.setOutputPath(job, out);
        Configuration conf = new Configuration();
        FileSystem fs;
        try {
            fs = FileSystem.get(conf);
            if (fs.exists(out)) {
                fs.delete(out, true);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

        HBaseAdmin hadmin = new HBaseAdmin(conf);
        HTableDescriptor desc = new HTableDescriptor(TABLE_NAME + "_stats");
        HColumnDescriptor family = new HColumnDescriptor("size");
        desc.addFamily(family);
        conf.setInt("zookeeper.session.timeout", 600000);
        if (hadmin.tableExists(TABLE_NAME + "_stats")) {
            //hadmin.disableTable(TABLE_NAME+"_stats");
            //hadmin.deleteTable(TABLE_NAME+"_stats");
        } else {
            hadmin.createTable(desc);
        }

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        //job.getConfiguration().setInt("mapred.map.tasks", 18);
        job.getConfiguration().set("h2rdf.tableName", TABLE_NAME);
        job.getConfiguration().setInt("mapred.reduce.tasks", (int) TotalOrderPrep.regions);
        job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
        job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
        job.getConfiguration().setInt("io.sort.mb", 100);
        job.getConfiguration().setInt("io.file.buffer.size", 131072);
        job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);
        //job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864);
        job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432);
        job.getConfiguration().setInt("mapred.tasktracker.map.tasks.maximum", 5);
        job.getConfiguration().setInt("mapred.tasktracker.reduce.tasks.maximum", 5);
        //job.getConfiguration().setInt("io.sort.mb", 100);

    } catch (IOException e2) {
        e2.printStackTrace();
    }

    return job;
}

From source file:ca.uwaterloo.iss4e.spark.pointperrow.CosineMain.java

License:Open Source License

public void fetch(JavaSparkContext sc, String source) {
    try {//from w  w  w.j a  v a 2  s  . c o  m
        FileSystem fs = FileSystem.get(new URI(source), new Configuration());
        Path src = new Path(source);
        if (fs.exists(src)) {
            FileStatus[] lists = fs.listStatus(src);
            readFiles(sc, fs, lists);
        }
    } catch (IOException e) {
        e.printStackTrace();
    } catch (URISyntaxException e) {
        e.printStackTrace();
    }
}

From source file:cascading.flow.hadoop.util.HadoopUtil.java

License:Open Source License

private static Map<Path, Path> getCopyPaths(Configuration config, Map<Path, Path> commonPaths) {
    Map<Path, Path> copyPaths = new HashMap<Path, Path>();

    FileSystem remoteFS = getDefaultFS(config);
    FileSystem localFS = getLocalFS(config);

    for (Map.Entry<Path, Path> entry : commonPaths.entrySet()) {
        Path localPath = entry.getKey();
        Path remotePath = entry.getValue();

        try {// ww  w.ja  va 2 s.  c o m
            boolean localExists = localFS.exists(localPath);
            boolean remoteExist = remoteFS.exists(remotePath);

            if (localExists && !remoteExist) {
                copyPaths.put(localPath, remotePath);
            } else if (localExists) {
                long localModTime = localFS.getFileStatus(localPath).getModificationTime();
                long remoteModTime = remoteFS.getFileStatus(remotePath).getModificationTime();

                if (localModTime > remoteModTime)
                    copyPaths.put(localPath, remotePath);
            }
        } catch (IOException exception) {
            throw new FlowException("unable to get handle to underlying filesystem", exception);
        }
    }

    return copyPaths;
}

From source file:cascading.flow.hadoop.util.HadoopUtil.java

License:Open Source License

private static boolean exists(FileSystem fileSystem, Path path) {
    try {/*from   ww  w.  ja va2 s.c om*/
        return fileSystem.exists(path);
    } catch (IOException exception) {
        throw new FlowException("could not test file exists: " + path);
    }
}

From source file:cascading.tap.hadoop.Hadoop18TapUtil.java

License:Open Source License

public static boolean needsTaskCommit(JobConf conf) throws IOException {
    String workpath = conf.get("mapred.work.output.dir");

    if (workpath == null)
        return false;

    Path taskOutputPath = new Path(workpath);

    if (taskOutputPath != null) {
        FileSystem fs = getFSSafe(conf, taskOutputPath);

        if (fs == null)
            return false;

        if (fs.exists(taskOutputPath))
            return true;
    }//from w w w. ja v a  2  s  .c  om

    return false;
}