List of usage examples for org.apache.hadoop.fs FileSystem get
public static FileSystem get(Configuration conf) throws IOException
From source file:cc.wikitools.lucene.hadoop.HdfsWikipediaSearcher.java
License:Apache License
public HdfsWikipediaSearcher(Path indexLocation, Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); Directory directory = new FileSystemDirectory(fs, indexLocation, false, conf); reader = DirectoryReader.open(directory); init();/* w w w .j ava 2 s .c o m*/ }
From source file:cgl.hadoop.apps.runner.DataAnalysis.java
License:Open Source License
/** * Launch the MapReduce computation.//from w ww . j ava 2 s . co m * This method first, remove any previous working directories and create a new one * Then the data (file names) is copied to this new directory and launch the * MapReduce (map-only though) computation. * @param numMapTasks - Number of map tasks. * @param numReduceTasks - Number of reduce tasks =0. * @param programDir - The directory where the Cap3 program is. * @param execName - Name of the executable. * @param dataDir - Directory where the data is located. * @param outputDir - Output directory to place the output. * @param cmdArgs - These are the command line arguments to the Cap3 program. * @throws Exception - Throws any exception occurs in this program. */ void launch(int numReduceTasks, String programDir, String execName, String workingDir, String databaseArchive, String databaseName, String dataDir, String outputDir, String cmdArgs) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, execName); // First get the file system handler, delete any previous files, add the // files and write the data to it, then pass its name as a parameter to // job Path hdMainDir = new Path(outputDir); FileSystem fs = FileSystem.get(conf); fs.delete(hdMainDir, true); Path hdOutDir = new Path(hdMainDir, "out"); // Starting the data analysis. Configuration jc = job.getConfiguration(); jc.set(WORKING_DIR, workingDir); jc.set(EXECUTABLE, execName); jc.set(PROGRAM_DIR, programDir); // this the name of the executable archive jc.set(DB_ARCHIVE, databaseArchive); jc.set(DB_NAME, databaseName); jc.set(PARAMETERS, cmdArgs); jc.set(OUTPUT_DIR, outputDir); // using distributed cache // flush it //DistributedCache.releaseCache(new URI(programDir), jc); //DistributedCache.releaseCache(new URI(databaseArchive), jc); //DistributedCache.purgeCache(jc); // reput the data into cache long startTime = System.currentTimeMillis(); //DistributedCache.addCacheArchive(new URI(databaseArchive), jc); DistributedCache.addCacheArchive(new URI(programDir), jc); System.out.println( "Add Distributed Cache in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); FileInputFormat.setInputPaths(job, dataDir); FileOutputFormat.setOutputPath(job, hdOutDir); job.setJarByClass(DataAnalysis.class); job.setMapperClass(RunnerMap.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(DataFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(numReduceTasks); startTime = System.currentTimeMillis(); int exitStatus = job.waitForCompletion(true) ? 0 : 1; System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); //clean the cache System.exit(exitStatus); }
From source file:ch.sentric.hbase.coprocessor.LoadWithTableDescriptorExample.java
License:Apache License
public static void main(String[] args) throws IOException { Configuration conf = HBaseConfiguration.create(); FileSystem fs = FileSystem.get(conf); Path path = new Path(fs.getUri() + Path.SEPARATOR + "coprocessor-1.0-SNAPSHOT.jar"); HTableDescriptor htd = new HTableDescriptor("testtable"); htd.addFamily(new HColumnDescriptor("colfam1")); htd.setValue("COPROCESSOR$1", path.toString() + "|" + ProspectiveSearchRegionObserver.class.getCanonicalName() + "|" + Coprocessor.PRIORITY_USER); HBaseAdmin admin = new HBaseAdmin(conf); admin.createTable(htd);/*from ww w .j a v a 2 s . c o m*/ System.out.println(admin.getTableDescriptor(Bytes.toBytes("testtable"))); }
From source file:chapter5.KMeanSample.java
License:Apache License
/** * Return the path to the final iteration's clusters *//* ww w. j av a 2 s .com*/ private static Path finalClusterPath(Configuration conf, Path output, int maxIterations) throws IOException { FileSystem fs = FileSystem.get(conf); for (int i = maxIterations; i >= 0; i--) { Path clusters = new Path(output, "clusters-" + i); if (fs.exists(clusters)) { return clusters; } } return null; }
From source file:cienciaCelularMR.FernetMapper.java
@Override public void map(KeyMcell key, Text value, Context output) throws IOException, InterruptedException { try {//w ww .j a v a 2s . c o m System.out.println("Entro al Map"); System.out.println("Key del map fernet: " + key.toString()); System.out.println("Fernet empezo a leer y guardar archivo .dat"); try (FSDataInputStream fis = FileSystem.get(output.getConfiguration()) .open(new Path(value.toString()))) { File archivo = new File("entradaFernet.dat"); try (FileOutputStream fos = new FileOutputStream(archivo)) { byte[] buf = new byte[1024]; int bytesRead; while ((bytesRead = fis.read(buf)) > 0) { fos.write(buf, 0, bytesRead); fos.flush(); output.progress(); } fos.close(); fis.close(); } } System.out.println("Fernet termino de leer y guardar archivo .dat"); Process process = new ProcessBuilder("fernet.exe", "--mode=" + key.getModoFernet().toString(), "--config=fernet.cfg", "entradaFernet.dat").start(); InputStream is = process.getInputStream(); InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr); String line; System.out.println("Fernet is running"); System.out.println("La key de mapper fernet es: " + key); String res = ""; while ((line = br.readLine()) != null) { res = res.concat(line); output.progress(); } if ("point".equals(key.getModoFernet().toString())) { System.out.println("Fernet es point"); String salidaName = "salidaFernet-" + key.getIdUsuario() + "." + key.getSubIdUsuario() + ".txt"; FSDataOutputStream fs = FileSystem.get(output.getConfiguration()).create(new Path(salidaName)); File salidaFile = new File("out_point.txt"); if (salidaFile.exists()) { byte[] buffer = readFileToByteArray(new File("out_point.txt")); FernetOutput salida = new FernetOutput(); salida.setFileName(new Text("out_point.txt")); salida.setSubId(key.getIdUsuario()); salida.setValue(new BytesWritable(buffer)); output.write(key, salida); } } else { File dir = new File("."); FileFilter fileFilter = new WildcardFileFilter("multi_*.txt"); File[] files = dir.listFiles(fileFilter); for (File file : files) { byte[] buffer = readFileToByteArray(new File(file.getName())); FernetOutput salida = new FernetOutput(); salida.setFileName(new Text(file.getName())); salida.setSubId(key.getIdUsuario()); salida.setValue(new BytesWritable(buffer)); output.write(key, salida); } } } catch (Exception ex) { String salidaName = "errorMapper-" + key.getIdUsuario() + "." + key.getSubIdUsuario() + ".txt"; FSDataOutputStream fs = FileSystem.get(output.getConfiguration()).create(new Path(salidaName)); fs.write(new Byte("Error en Mapper FERnet")); fs.write(new Byte("\n")); fs.flush(); fs.close(); Logger.getLogger(FernetMapper.class.getName()).log(Level.SEVERE, null, ex); throw ex; } }
From source file:cienciaCelularMR.McellMapper.java
@Override public void map(KeyMcell key, BytesWritable value, Context output) throws IOException, InterruptedException { try {/*from w ww . j a va2s . c o m*/ System.out.println("Entro al Map"); System.out.println("Key del mcell mapper: " + key); byte[] arrayByte = value.copyBytes(); File archivo = new File("entradaMap.mdl"); try (FileOutputStream fos = new FileOutputStream(archivo)) { fos.write(arrayByte); fos.flush(); } Process process = new ProcessBuilder("mcell.exe", "-errfile", "errorMcell.txt", "entradaMap.mdl") .start(); InputStream is = process.getInputStream(); InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr); String line; Matcher matcher; System.out.println("Mcell is running"); String res = ""; while ((line = br.readLine()) != null) { res = res.concat(line); res = res.concat("\n"); output.progress(); try { matcher = pattern.matcher(line); if (matcher.find()) { int fieldCount; Text[] fields; fieldCount = matcher.groupCount(); fields = new Text[fieldCount]; for (int i = 0; i < fieldCount; i++) { fields[i] = new Text(matcher.group(i + 1)); } System.out.println("Progreso: " + Integer.parseInt(fields[0].toString()) + " de " + Integer.parseInt(fields[1].toString())); } } catch (Exception ex) { } } File errorFile = new File("errorMcell.txt"); if (errorFile.exists()) { InputStream in = new FileInputStream(errorFile); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); String l; while ((l = reader.readLine()) != null) { res = res.concat(l); res = res.concat("\n"); } } mos.write("controloutput", key, new Text(res)); //free memory res = ""; System.out.println("Leyendo salida de MCell..."); String salidaName = "salidaMCell-" + key.getIdUsuario() + "." + key.getSubIdUsuario() + ".dat"; FSDataOutputStream fs = FileSystem.get(output.getConfiguration()).create(new Path(salidaName)); File salidaFile = new File("joined_1.dat"); if (salidaFile.exists()) { FileInputStream ios = new FileInputStream(salidaFile); byte[] buf = new byte[1024]; int totalbytes = 0; int bytesRead; while ((bytesRead = ios.read(buf)) > 0) { totalbytes += bytesRead; fs.write(buf, 0, bytesRead); fs.flush(); output.progress(); } fs.close(); ios.close(); System.out.println("***Mcell termino de leer y guardar archivo .dat, tamao: " + totalbytes); System.out.println("Nombre que se le pasa a Fernet: " + salidaName); output.write(key, new Text(salidaName)); } else { errorFile = new File("errorMcell.txt"); if (errorFile.exists()) { InputStream in = new FileInputStream(errorFile); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); String l; while ((l = reader.readLine()) != null) { res = res.concat(l); res = res.concat("\n"); } if (!"".equals(res)) { mos.write("errormcell", key, new Text(res)); } } } } catch (IOException | IllegalArgumentException | InterruptedException ex) { String salidaName = "errorMapper-" + key.getIdUsuario() + "." + key.getSubIdUsuario() + ".txt"; FSDataOutputStream fs = FileSystem.get(output.getConfiguration()).create(new Path(salidaName)); fs.write(new Byte("Error en Mapper MCell:")); fs.write(new Byte("\n")); fs.flush(); fs.close(); Logger.getLogger(McellMapper.class.getName()).log(Level.SEVERE, null, ex); throw new InterruptedException(ex.getMessage()); } }
From source file:cienciaCelularMR.ResultReducer.java
@Override public void reduce(KeyMcell key, Iterable<FernetOutput> values, Context context) { System.out.println("Entro al reducer!!"); System.out.println("Key del reducer: " + key); FileSystem fs;//from w w w . j ava 2 s . c o m try { zipFileWriter = zips.get(key.toString()); if (zipFileWriter == null) { System.out.println("CREAR " + key.toString()); zipFileWriter = new ZipFileWriter(key.getIdUsuario() + "." + key.getSubIdUsuario() + "-resultados");//set subjob id zipFileWriter.setup(conf); zipFileWriter.openZipForWriting(); zips.put(key.toString(), zipFileWriter); System.out.println("Guarde Zip en map " + key.toString()); } else { System.out.println("GET " + key.toString()); } fs = FileSystem.get(context.getConfiguration()); for (FernetOutput t : values) { System.out.println("Archivo de salida fernet es: " + t.getFileName().toString()); zipFileWriter.addBinaryFile(t.getFileName().toString(), t.getValue().getBytes(), t.getValue().getLength()); } zipFileWriter.getZipOutputStream().flush(); zipFileWriter.closeZip(); System.out.println("Cerre ZIP"); } catch (Exception e) { System.out.println(e.getMessage()); } }
From source file:cienciaCelularMR.ZipFileWriter.java
public void setup(Configuration conf) { FileSystem fs;//from w w w . ja v a2s . c o m try { fs = FileSystem.get(conf); Path path = new Path(zipFileName); os = fs.create(path); rootDir = "/data"; } catch (IOException e) { e.printStackTrace(); } log.info("CREADO " + zipFileName); }
From source file:clone.ReadSequenceFile.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length < 1) { System.out.println("args: [path] [max-num-of-records-per-file]"); System.exit(-1);//from w ww. j a v a 2 s . c o m } String f = args[0]; int max = Integer.MAX_VALUE; if (args.length >= 2) { max = Integer.parseInt(args[1]); } boolean useLocal = args.length >= 3 && args[2].equals("local") ? true : false; if (useLocal) { System.out.println("Reading from local filesystem"); } FileSystem fs = useLocal ? FileSystem.getLocal(new Configuration()) : FileSystem.get(new Configuration()); Path p = new Path(f); if (fs.getFileStatus(p).isDir()) { readSequenceFilesInDir(p, fs, max); } else { readSequenceFile(p, fs, max); } }
From source file:cloud9.ComputeCooccurrenceMatrixStripesOOM.java
License:Apache License
/** * Runs this tool.//from w w w . j ava 2 s . c o m */ public int run(String[] args) throws Exception { if (args.length != 4) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int window = Integer.parseInt(args[2]); int reduceTasks = Integer.parseInt(args[3]); sLogger.info("Tool: ComputeCooccurrenceMatrixStripes"); sLogger.info(" - input path: " + inputPath); sLogger.info(" - output path: " + outputPath); sLogger.info(" - window: " + window); sLogger.info(" - number of reducers: " + reduceTasks); Job job = new Job(getConf(), "CooccurrenceMatrixStripes"); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); job.getConfiguration().setInt("window", window); job.setJarByClass(ComputeCooccurrenceMatrixStripesOOM.class); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setOutputKeyClass(Text.class); job.setOutputValueClass(String2IntOpenHashMapWritable.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); job.getConfiguration().setInt("io.sort.mb", 400); job.getConfiguration().set("mapred.child.java.opts", "-Xmx1000m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps"); //job.getConfiguration().set("mapred.child.java.opts", "-Xmx1000m"); job.getConfiguration().setInt("child.monitor.jstat.seconds", 2); job.getConfiguration().set("fs.default.name", "hdfs://master:9000"); job.getConfiguration().set("mapred.job.tracker", "master:9001"); //conf.set("user.name", "xulijie"); job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", 1); //job.getConfiguration().setFloat("io.sort.record.percent", 0.2f); //job.getConfiguration().setFloat("io.sort.spill.percent", 0.95f); // conf.setFloat("mapred.job.shuffle.input.buffer.percent", 0.9f); // conf.setFloat("mapred.job.shuffle.merge.percent", 0.9f); //conf.setFloat("mapred.job.reduce.input.buffer.percent", 0.4f); //conf.set("mapred.job.tracker", "local"); //conf.set("fs.default.name", "file:///"); job.getConfiguration().setLong("mapred.min.split.size", 512 * 1024 * 1024L); job.getConfiguration().setLong("mapred.max.split.size", 512 * 1024 * 1024L); job.getConfiguration().setInt("mapred.map.max.attempts", 0); job.getConfiguration().setInt("mapred.reduce.max.attempts", 0); //job.getConfiguration().set("heapdump.reduce.input.groups", "3,897,853[5]"); //job.getConfiguration().set("heapdump.reduce.input.records", "8407734;8407737;8407740;8407743;8407746;8407749;8407750"); //job.getConfiguration().set("omit.reduce.input.records", "8407733;8407750"); //job.getConfiguration().set("heapdump.reduce.input.records", "8407751"); //job.getConfiguration().set("heapdump.reduce.output.records", "3897853"); job.getConfiguration().set("heapdump.task.attempt.ids", "attempt_201404281552_0001_r_000000_0"); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }