Example usage for org.apache.hadoop.fs FileSystem get

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem get.

Prototype

public static FileSystem get(Configuration conf) throws IOException

Source Link

Document

Returns the configured FileSystem implementation.

Usage

From source file:cc.wikitools.lucene.hadoop.HdfsWikipediaSearcher.java

License:Apache License

public HdfsWikipediaSearcher(Path indexLocation, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    Directory directory = new FileSystemDirectory(fs, indexLocation, false, conf);

    reader = DirectoryReader.open(directory);

    init();/* w  w w .j  ava  2  s .c  o m*/
}

From source file:cgl.hadoop.apps.runner.DataAnalysis.java

License:Open Source License

/**
 * Launch the MapReduce computation.//from   w  ww .  j  ava  2 s  . co  m
 * This method first, remove any previous working directories and create a new one
 * Then the data (file names) is copied to this new directory and launch the 
 * MapReduce (map-only though) computation.
 * @param numMapTasks - Number of map tasks.
 * @param numReduceTasks - Number of reduce tasks =0.
 * @param programDir - The directory where the Cap3 program is.
 * @param execName - Name of the executable.
 * @param dataDir - Directory where the data is located.
 * @param outputDir - Output directory to place the output.
 * @param cmdArgs - These are the command line arguments to the Cap3 program.
 * @throws Exception - Throws any exception occurs in this program.
 */
void launch(int numReduceTasks, String programDir, String execName, String workingDir, String databaseArchive,
        String databaseName, String dataDir, String outputDir, String cmdArgs) throws Exception {

    Configuration conf = new Configuration();
    Job job = new Job(conf, execName);

    // First get the file system handler, delete any previous files, add the
    // files and write the data to it, then pass its name as a parameter to
    // job
    Path hdMainDir = new Path(outputDir);
    FileSystem fs = FileSystem.get(conf);
    fs.delete(hdMainDir, true);

    Path hdOutDir = new Path(hdMainDir, "out");

    // Starting the data analysis.
    Configuration jc = job.getConfiguration();

    jc.set(WORKING_DIR, workingDir);
    jc.set(EXECUTABLE, execName);
    jc.set(PROGRAM_DIR, programDir); // this the name of the executable archive
    jc.set(DB_ARCHIVE, databaseArchive);
    jc.set(DB_NAME, databaseName);
    jc.set(PARAMETERS, cmdArgs);
    jc.set(OUTPUT_DIR, outputDir);

    // using distributed cache
    // flush it
    //DistributedCache.releaseCache(new URI(programDir), jc);
    //DistributedCache.releaseCache(new URI(databaseArchive), jc);
    //DistributedCache.purgeCache(jc);
    // reput the data into cache
    long startTime = System.currentTimeMillis();
    //DistributedCache.addCacheArchive(new URI(databaseArchive), jc);
    DistributedCache.addCacheArchive(new URI(programDir), jc);
    System.out.println(
            "Add Distributed Cache in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    FileInputFormat.setInputPaths(job, dataDir);
    FileOutputFormat.setOutputPath(job, hdOutDir);

    job.setJarByClass(DataAnalysis.class);
    job.setMapperClass(RunnerMap.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(DataFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setNumReduceTasks(numReduceTasks);

    startTime = System.currentTimeMillis();

    int exitStatus = job.waitForCompletion(true) ? 0 : 1;
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    //clean the cache

    System.exit(exitStatus);
}

From source file:ch.sentric.hbase.coprocessor.LoadWithTableDescriptorExample.java

License:Apache License

public static void main(String[] args) throws IOException {
    Configuration conf = HBaseConfiguration.create();

    FileSystem fs = FileSystem.get(conf);
    Path path = new Path(fs.getUri() + Path.SEPARATOR + "coprocessor-1.0-SNAPSHOT.jar");

    HTableDescriptor htd = new HTableDescriptor("testtable");
    htd.addFamily(new HColumnDescriptor("colfam1"));
    htd.setValue("COPROCESSOR$1", path.toString() + "|"
            + ProspectiveSearchRegionObserver.class.getCanonicalName() + "|" + Coprocessor.PRIORITY_USER);

    HBaseAdmin admin = new HBaseAdmin(conf);
    admin.createTable(htd);/*from ww  w .j a v  a  2  s  . c o  m*/

    System.out.println(admin.getTableDescriptor(Bytes.toBytes("testtable")));
}

From source file:chapter5.KMeanSample.java

License:Apache License

/**
 * Return the path to the final iteration's clusters
 *//*  ww w.  j  av a 2 s .com*/
private static Path finalClusterPath(Configuration conf, Path output, int maxIterations) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    for (int i = maxIterations; i >= 0; i--) {
        Path clusters = new Path(output, "clusters-" + i);
        if (fs.exists(clusters)) {
            return clusters;
        }
    }
    return null;
}

From source file:cienciaCelularMR.FernetMapper.java

@Override
public void map(KeyMcell key, Text value, Context output) throws IOException, InterruptedException {
    try {//w  ww .j a v a 2s .  c  o m

        System.out.println("Entro al Map");
        System.out.println("Key del map fernet: " + key.toString());

        System.out.println("Fernet empezo a leer y guardar archivo .dat");
        try (FSDataInputStream fis = FileSystem.get(output.getConfiguration())
                .open(new Path(value.toString()))) {
            File archivo = new File("entradaFernet.dat");
            try (FileOutputStream fos = new FileOutputStream(archivo)) {
                byte[] buf = new byte[1024];
                int bytesRead;
                while ((bytesRead = fis.read(buf)) > 0) {
                    fos.write(buf, 0, bytesRead);
                    fos.flush();
                    output.progress();
                }
                fos.close();
                fis.close();
            }
        }

        System.out.println("Fernet termino de leer y guardar archivo .dat");
        Process process = new ProcessBuilder("fernet.exe", "--mode=" + key.getModoFernet().toString(),
                "--config=fernet.cfg", "entradaFernet.dat").start();

        InputStream is = process.getInputStream();
        InputStreamReader isr = new InputStreamReader(is);
        BufferedReader br = new BufferedReader(isr);
        String line;

        System.out.println("Fernet is running");
        System.out.println("La key de mapper fernet es: " + key);
        String res = "";
        while ((line = br.readLine()) != null) {
            res = res.concat(line);
            output.progress();
        }

        if ("point".equals(key.getModoFernet().toString())) {
            System.out.println("Fernet es point");
            String salidaName = "salidaFernet-" + key.getIdUsuario() + "." + key.getSubIdUsuario() + ".txt";
            FSDataOutputStream fs = FileSystem.get(output.getConfiguration()).create(new Path(salidaName));

            File salidaFile = new File("out_point.txt");
            if (salidaFile.exists()) {

                byte[] buffer = readFileToByteArray(new File("out_point.txt"));
                FernetOutput salida = new FernetOutput();
                salida.setFileName(new Text("out_point.txt"));
                salida.setSubId(key.getIdUsuario());
                salida.setValue(new BytesWritable(buffer));
                output.write(key, salida);
            }
        } else {
            File dir = new File(".");
            FileFilter fileFilter = new WildcardFileFilter("multi_*.txt");
            File[] files = dir.listFiles(fileFilter);
            for (File file : files) {
                byte[] buffer = readFileToByteArray(new File(file.getName()));
                FernetOutput salida = new FernetOutput();
                salida.setFileName(new Text(file.getName()));
                salida.setSubId(key.getIdUsuario());
                salida.setValue(new BytesWritable(buffer));
                output.write(key, salida);
            }
        }

    } catch (Exception ex) {
        String salidaName = "errorMapper-" + key.getIdUsuario() + "." + key.getSubIdUsuario() + ".txt";
        FSDataOutputStream fs = FileSystem.get(output.getConfiguration()).create(new Path(salidaName));
        fs.write(new Byte("Error en Mapper FERnet"));
        fs.write(new Byte("\n"));
        fs.flush();
        fs.close();

        Logger.getLogger(FernetMapper.class.getName()).log(Level.SEVERE, null, ex);
        throw ex;
    }
}

From source file:cienciaCelularMR.McellMapper.java

@Override
public void map(KeyMcell key, BytesWritable value, Context output) throws IOException, InterruptedException {
    try {/*from  w ww . j a  va2s . c  o  m*/
        System.out.println("Entro al Map");
        System.out.println("Key del mcell mapper: " + key);

        byte[] arrayByte = value.copyBytes();
        File archivo = new File("entradaMap.mdl");
        try (FileOutputStream fos = new FileOutputStream(archivo)) {
            fos.write(arrayByte);
            fos.flush();
        }

        Process process = new ProcessBuilder("mcell.exe", "-errfile", "errorMcell.txt", "entradaMap.mdl")
                .start();

        InputStream is = process.getInputStream();
        InputStreamReader isr = new InputStreamReader(is);
        BufferedReader br = new BufferedReader(isr);
        String line;
        Matcher matcher;

        System.out.println("Mcell is running");
        String res = "";
        while ((line = br.readLine()) != null) {
            res = res.concat(line);
            res = res.concat("\n");
            output.progress();
            try {
                matcher = pattern.matcher(line);
                if (matcher.find()) {
                    int fieldCount;
                    Text[] fields;

                    fieldCount = matcher.groupCount();
                    fields = new Text[fieldCount];
                    for (int i = 0; i < fieldCount; i++) {
                        fields[i] = new Text(matcher.group(i + 1));
                    }
                    System.out.println("Progreso: " + Integer.parseInt(fields[0].toString()) + " de "
                            + Integer.parseInt(fields[1].toString()));
                }
            } catch (Exception ex) {
            }
        }

        File errorFile = new File("errorMcell.txt");
        if (errorFile.exists()) {
            InputStream in = new FileInputStream(errorFile);
            BufferedReader reader = new BufferedReader(new InputStreamReader(in));
            String l;
            while ((l = reader.readLine()) != null) {
                res = res.concat(l);
                res = res.concat("\n");
            }
        }

        mos.write("controloutput", key, new Text(res));

        //free memory
        res = "";
        System.out.println("Leyendo salida de MCell...");

        String salidaName = "salidaMCell-" + key.getIdUsuario() + "." + key.getSubIdUsuario() + ".dat";
        FSDataOutputStream fs = FileSystem.get(output.getConfiguration()).create(new Path(salidaName));

        File salidaFile = new File("joined_1.dat");

        if (salidaFile.exists()) {
            FileInputStream ios = new FileInputStream(salidaFile);
            byte[] buf = new byte[1024];
            int totalbytes = 0;
            int bytesRead;
            while ((bytesRead = ios.read(buf)) > 0) {
                totalbytes += bytesRead;
                fs.write(buf, 0, bytesRead);
                fs.flush();
                output.progress();
            }
            fs.close();
            ios.close();

            System.out.println("***Mcell termino de leer y guardar archivo .dat, tamao: " + totalbytes);
            System.out.println("Nombre que se le pasa a Fernet: " + salidaName);
            output.write(key, new Text(salidaName));
        } else {
            errorFile = new File("errorMcell.txt");
            if (errorFile.exists()) {
                InputStream in = new FileInputStream(errorFile);
                BufferedReader reader = new BufferedReader(new InputStreamReader(in));
                String l;
                while ((l = reader.readLine()) != null) {
                    res = res.concat(l);
                    res = res.concat("\n");
                }
                if (!"".equals(res)) {
                    mos.write("errormcell", key, new Text(res));
                }
            }
        }
    } catch (IOException | IllegalArgumentException | InterruptedException ex) {
        String salidaName = "errorMapper-" + key.getIdUsuario() + "." + key.getSubIdUsuario() + ".txt";
        FSDataOutputStream fs = FileSystem.get(output.getConfiguration()).create(new Path(salidaName));
        fs.write(new Byte("Error en Mapper MCell:"));
        fs.write(new Byte("\n"));
        fs.flush();
        fs.close();

        Logger.getLogger(McellMapper.class.getName()).log(Level.SEVERE, null, ex);
        throw new InterruptedException(ex.getMessage());
    }
}

From source file:cienciaCelularMR.ResultReducer.java

@Override
public void reduce(KeyMcell key, Iterable<FernetOutput> values, Context context) {
    System.out.println("Entro al reducer!!");
    System.out.println("Key del reducer: " + key);
    FileSystem fs;//from   w  w w  . j  ava 2  s .  c o m
    try {

        zipFileWriter = zips.get(key.toString());
        if (zipFileWriter == null) {
            System.out.println("CREAR " + key.toString());
            zipFileWriter = new ZipFileWriter(key.getIdUsuario() + "." + key.getSubIdUsuario() + "-resultados");//set subjob id
            zipFileWriter.setup(conf);
            zipFileWriter.openZipForWriting();
            zips.put(key.toString(), zipFileWriter);
            System.out.println("Guarde Zip en map " + key.toString());
        } else {
            System.out.println("GET " + key.toString());
        }

        fs = FileSystem.get(context.getConfiguration());
        for (FernetOutput t : values) {
            System.out.println("Archivo de salida fernet es: " + t.getFileName().toString());
            zipFileWriter.addBinaryFile(t.getFileName().toString(), t.getValue().getBytes(),
                    t.getValue().getLength());
        }

        zipFileWriter.getZipOutputStream().flush();
        zipFileWriter.closeZip();
        System.out.println("Cerre ZIP");

    } catch (Exception e) {
        System.out.println(e.getMessage());
    }
}

From source file:cienciaCelularMR.ZipFileWriter.java

public void setup(Configuration conf) {
    FileSystem fs;//from  w w  w . ja  v  a2s  . c o m
    try {
        fs = FileSystem.get(conf);
        Path path = new Path(zipFileName);
        os = fs.create(path);
        rootDir = "/data";
    } catch (IOException e) {
        e.printStackTrace();
    }
    log.info("CREADO " + zipFileName);
}

From source file:clone.ReadSequenceFile.java

License:Apache License

public static void main(String[] args) throws IOException {
    if (args.length < 1) {
        System.out.println("args: [path] [max-num-of-records-per-file]");
        System.exit(-1);//from   w  ww.  j  a v a 2 s  . c  o  m
    }

    String f = args[0];

    int max = Integer.MAX_VALUE;
    if (args.length >= 2) {
        max = Integer.parseInt(args[1]);
    }

    boolean useLocal = args.length >= 3 && args[2].equals("local") ? true : false;

    if (useLocal) {
        System.out.println("Reading from local filesystem");
    }

    FileSystem fs = useLocal ? FileSystem.getLocal(new Configuration()) : FileSystem.get(new Configuration());
    Path p = new Path(f);

    if (fs.getFileStatus(p).isDir()) {
        readSequenceFilesInDir(p, fs, max);
    } else {
        readSequenceFile(p, fs, max);
    }
}

From source file:cloud9.ComputeCooccurrenceMatrixStripesOOM.java

License:Apache License

/**
 * Runs this tool.//from   w  w  w  .  j  ava 2 s . c  o m
 */
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];

    int window = Integer.parseInt(args[2]);
    int reduceTasks = Integer.parseInt(args[3]);

    sLogger.info("Tool: ComputeCooccurrenceMatrixStripes");
    sLogger.info(" - input path: " + inputPath);
    sLogger.info(" - output path: " + outputPath);
    sLogger.info(" - window: " + window);
    sLogger.info(" - number of reducers: " + reduceTasks);

    Job job = new Job(getConf(), "CooccurrenceMatrixStripes");

    // Delete the output directory if it exists already
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    job.getConfiguration().setInt("window", window);

    job.setJarByClass(ComputeCooccurrenceMatrixStripesOOM.class);
    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(String2IntOpenHashMapWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyReducer.class);
    job.setReducerClass(MyReducer.class);

    job.getConfiguration().setInt("io.sort.mb", 400);

    job.getConfiguration().set("mapred.child.java.opts",
            "-Xmx1000m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps");
    //job.getConfiguration().set("mapred.child.java.opts", "-Xmx1000m");

    job.getConfiguration().setInt("child.monitor.jstat.seconds", 2);

    job.getConfiguration().set("fs.default.name", "hdfs://master:9000");
    job.getConfiguration().set("mapred.job.tracker", "master:9001");

    //conf.set("user.name", "xulijie");

    job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", 1);

    //job.getConfiguration().setFloat("io.sort.record.percent", 0.2f);
    //job.getConfiguration().setFloat("io.sort.spill.percent", 0.95f);
    // conf.setFloat("mapred.job.shuffle.input.buffer.percent", 0.9f);
    // conf.setFloat("mapred.job.shuffle.merge.percent", 0.9f);
    //conf.setFloat("mapred.job.reduce.input.buffer.percent", 0.4f);
    //conf.set("mapred.job.tracker", "local");
    //conf.set("fs.default.name", "file:///");
    job.getConfiguration().setLong("mapred.min.split.size", 512 * 1024 * 1024L);
    job.getConfiguration().setLong("mapred.max.split.size", 512 * 1024 * 1024L);

    job.getConfiguration().setInt("mapred.map.max.attempts", 0);
    job.getConfiguration().setInt("mapred.reduce.max.attempts", 0);

    //job.getConfiguration().set("heapdump.reduce.input.groups", "3,897,853[5]");
    //job.getConfiguration().set("heapdump.reduce.input.records", "8407734;8407737;8407740;8407743;8407746;8407749;8407750");

    //job.getConfiguration().set("omit.reduce.input.records", "8407733;8407750");
    //job.getConfiguration().set("heapdump.reduce.input.records", "8407751");
    //job.getConfiguration().set("heapdump.reduce.output.records", "3897853");

    job.getConfiguration().set("heapdump.task.attempt.ids", "attempt_201404281552_0001_r_000000_0");

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}