List of usage examples for org.apache.hadoop.io SequenceFile createWriter
@Deprecated public static Writer createWriter(Configuration conf, FSDataOutputStream out, Class keyClass, Class valClass, CompressionType compressionType, CompressionCodec codec) throws IOException
From source file:io.covert.binary.analysis.BuildSequenceFileFromTarball.java
License:Apache License
public void load(FileSystem fs, Configuration conf, File inputTarball, Path outputDir) throws Exception { Text key = new Text(); BytesWritable val = new BytesWritable(); Path sequenceName = new Path(outputDir, inputTarball.getName() + ".seq"); System.out.println("Writing to " + sequenceName); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, sequenceName, Text.class, BytesWritable.class, CompressionType.RECORD); InputStream is = new FileInputStream(inputTarball); if (inputTarball.toString().toLowerCase().endsWith(".gz")) { is = new GZIPInputStream(is); } else if (inputTarball.toString().toLowerCase().endsWith(".bz") || inputTarball.toString().endsWith(".bz2")) { is.read(); // read 'B' is.read(); // read 'Z' is = new CBZip2InputStream(is); }/*from ww w.j a v a 2 s . c o m*/ final TarArchiveInputStream debInputStream = (TarArchiveInputStream) new ArchiveStreamFactory() .createArchiveInputStream("tar", is); TarArchiveEntry entry = null; while ((entry = (TarArchiveEntry) debInputStream.getNextEntry()) != null) { if (!entry.isDirectory()) { try { final ByteArrayOutputStream outputFileStream = new ByteArrayOutputStream(); IOUtils.copy(debInputStream, outputFileStream); outputFileStream.close(); byte[] outputFile = outputFileStream.toByteArray(); val.set(outputFile, 0, outputFile.length); MessageDigest md = MessageDigest.getInstance("MD5"); md.update(outputFile); byte[] digest = md.digest(); String hexdigest = ""; for (int i = 0; i < digest.length; i++) { hexdigest += Integer.toString((digest[i] & 0xff) + 0x100, 16).substring(1); } key.set(hexdigest); writer.append(key, val); } catch (IOException e) { System.err.println("Warning: tarball may be truncated: " + inputTarball); // Truncated Tarball break; } } } debInputStream.close(); writer.close(); }
From source file:io.covert.binary.analysis.BuildTarBzSequenceFile.java
License:Apache License
@Override public int run(String[] args) throws Exception { File inDir = new File(args[0]); Path name = new Path(args[1]); Text key = new Text(); BytesWritable val = new BytesWritable(); Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); if (!fs.exists(name)) { fs.mkdirs(name);//from ww w.j a v a 2s .c o m } for (File file : inDir.listFiles()) { Path sequenceName = new Path(name, file.getName() + ".seq"); System.out.println("Writing to " + sequenceName); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, sequenceName, Text.class, BytesWritable.class, CompressionType.RECORD); if (!file.isFile()) { System.out.println("Skipping " + file + " (not a file) ..."); continue; } final InputStream is = new FileInputStream(file); final TarArchiveInputStream debInputStream = (TarArchiveInputStream) new ArchiveStreamFactory() .createArchiveInputStream("tar", is); TarArchiveEntry entry = null; while ((entry = (TarArchiveEntry) debInputStream.getNextEntry()) != null) { if (!entry.isDirectory()) { final ByteArrayOutputStream outputFileStream = new ByteArrayOutputStream(); IOUtils.copy(debInputStream, outputFileStream); outputFileStream.close(); byte[] outputFile = outputFileStream.toByteArray(); val.set(outputFile, 0, outputFile.length); MessageDigest md = MessageDigest.getInstance("MD5"); md.update(outputFile); byte[] digest = md.digest(); String hexdigest = ""; for (int i = 0; i < digest.length; i++) { hexdigest += Integer.toString((digest[i] & 0xff) + 0x100, 16).substring(1); } key.set(hexdigest); writer.append(key, val); } } debInputStream.close(); writer.close(); } return 0; }
From source file:io.hops.erasure_coding.MapReduceEncoder.java
License:Apache License
/** * set up input file which has the list of input files. * * @return boolean//from w w w . j av a 2 s . c o m * @throws java.io.IOException */ private boolean prepareJob(PolicyInfo info) throws IOException { final String randomId = getRandomId(); JobClient jClient = new JobClient(jobconf); Path jobdir = new Path(jClient.getSystemDir(), NAME + "_" + randomId); LOG.info(JOB_DIR_LABEL + "=" + jobdir); jobconf.set(JOB_DIR_LABEL, jobdir.toString()); Path log = new Path(jobdir, "_logs"); FileOutputFormat.setOutputPath(jobconf, log); LOG.info("log=" + log); // create operation list FileSystem fs = jobdir.getFileSystem(jobconf); Path opList = new Path(jobdir, "_" + OP_LIST_LABEL); jobconf.set(OP_LIST_LABEL, opList.toString()); SequenceFile.Writer opWriter = null; try { opWriter = SequenceFile.createWriter(fs, jobconf, opList, Text.class, PolicyInfo.class, SequenceFile.CompressionType.NONE); opWriter.append(new Text(info.getSrcPath().toString()), info); } finally { if (opWriter != null) { opWriter.close(); } } jobconf.setInt(OP_COUNT_LABEL, 1); jobconf.setNumMapTasks(1); LOG.info("jobName= " + jobName + " numMapTasks=" + jobconf.getNumMapTasks()); return true; }
From source file:map_reduce.MapReduce_OptimizedBrandesAdditions_DO_JUNG.java
License:Open Source License
@SuppressWarnings("deprecation") @Override/*from w ww.j a v a 2 s . c o m*/ public int run(String[] args) throws Exception { if (args.length < 1) { System.err.println("Usage:\n"); System.exit(1); } // Job job = new Job(super.getConf()); // READ IN ALL COMMAND LINE ARGUMENTS // EXAMPLE: // hadoop jar MapReduce_OptimizedBrandesAdditions_DO_JUNG.jar // -libjars collections-generic-4.01.jar,jung-graph-impl-2.0.1.jar,jung-api-2.0.1.jar // -Dmapred.job.map.memory.mb=4096 // -Dmapred.job.reduce.memory.mb=4096 // -Dmapred.child.java.opts=-Xmx3500m // -Dmapreduce.task.timeout=60000000 // -Dmapreduce.job.queuename=QUEUENAME // input_iterbrandes_additions_nocomb_10k_1 output_iterbrandes_additions_nocomb_10k_1 // 10 1 10000 55245 10k 10k_randedges 100 1 false times/ betweenness/ int m = -1; // input path to use on hdfs Path inputPath = new Path(args[++m]); // output path to use on hdfs Path outputPath = new Path(args[++m]); // number of Mappers to split the sources: e.g., 1, 10, 100 etc. // rule of thumb: the larger the graph (i.e., number of roots to test), the larger should be this number. int numOfMaps = Integer.parseInt(args[++m]); // number of Reducers to collect the output int numOfReduce = Integer.parseInt(args[++m]); // Number of vertices in graph int N = Integer.parseInt(args[++m]); // Number of edges in graph int M = Integer.parseInt(args[++m]); // Graph file (edge list, tab delimited) (full path) String graph = args[++m]; // File with edges to be added (tab delimited) (full path) // Note: this version handles only edges between existing vertices in the graph. String random_edges = args[++m]; // Number of random edges added int re = Integer.parseInt(args[++m]); // Experiment iteration (in case of multiple experiments) int iter = Integer.parseInt(args[++m]); // Use combiner or not (true/false) Boolean comb = Boolean.valueOf(args[++m]); // Output path for file with stats String statsoutputpath = args[++m]; // Output path for file with final betweenness values String betoutputpath = args[++m]; // BEGIN INITIALIZATION JobConf conf = new JobConf(getConf(), MapReduce_OptimizedBrandesAdditions_DO_JUNG.class); FileSystem fs = FileSystem.get(conf); String setup = "_additions_edges" + re + "_maps" + numOfMaps + "_comb" + comb; conf.setJobName("OptimizedBrandesAdditionsDOJung_" + graph + setup + "_" + iter); conf.set("HDFS_GRAPH", graph + setup); conf.set("HDFS_Random_Edges", random_edges + setup); conf.set("output", outputPath.getName()); conf.set("setup", setup); // CREATE INPUT FILES FOR MAPPERS int numOfTasksperMap = (int) Math.ceil(N / numOfMaps); //generate an input file for each map task for (int i = 0; i < numOfMaps - 1; i++) { Path file = new Path(inputPath, "part-r-" + i); IntWritable start = new IntWritable(i * numOfTasksperMap); IntWritable end = new IntWritable((i * numOfTasksperMap) + numOfTasksperMap - 1); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, IntWritable.class, CompressionType.NONE); try { writer.append(start, end); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i + ": " + start + " - " + end); } // last mapper takes what is left Path file = new Path(inputPath, "part-r-" + (numOfMaps - 1)); IntWritable start = new IntWritable((numOfMaps - 1) * numOfTasksperMap); IntWritable end = new IntWritable(N - 1); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, IntWritable.class, CompressionType.NONE); try { writer.append(start, end); } finally { writer.close(); } System.out.println("Wrote input for Map #" + (numOfMaps - 1) + ": " + start + " - " + end); // COPY FILES TO MAPPERS System.out.println("Copying graph to cache"); String LOCAL_GRAPH = graph; Path hdfsPath = new Path(graph + setup); // upload the file to hdfs. Overwrite any existing copy. fs.copyFromLocalFile(false, true, new Path(LOCAL_GRAPH), hdfsPath); DistributedCache.addCacheFile(hdfsPath.toUri(), conf); System.out.println("Copying random edges to cache"); String LOCAL_Random_Edges = random_edges; hdfsPath = new Path(random_edges + setup); // upload the file to hdfs. Overwrite any existing copy. fs.copyFromLocalFile(false, true, new Path(LOCAL_Random_Edges), hdfsPath); DistributedCache.addCacheFile(hdfsPath.toUri(), conf); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(DoubleWritable.class); conf.setMapperClass(IterBrandesMapper.class); conf.setNumMapTasks(numOfMaps); if (comb) conf.setCombinerClass(IterBrandesReducer.class); conf.setReducerClass(IterBrandesReducer.class); conf.setNumReduceTasks(numOfReduce); // turn off speculative execution, because DFS doesn't handle multiple writers to the same file. conf.setSpeculativeExecution(false); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); // conf.set("mapred.job.name", "APS-" + outputPath.getName()); conf.setNumTasksToExecutePerJvm(-1); // JVM reuse System.out.println("Starting the execution...! Pray!! \n"); long time1 = System.nanoTime(); RunningJob rj = JobClient.runJob(conf); long time2 = System.nanoTime(); // READ OUTPUT FILES System.out.println("\nFinished and now reading/writing Betweenness Output...\n"); // Assuming 1 reducer. Path inFile = new Path(outputPath, "part-00000"); IntWritable id = new IntWritable(); DoubleWritable betweenness = new DoubleWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf); FileWriter fw = new FileWriter(new File(betoutputpath + graph + setup + "_betweenness_" + iter)); try { int i = 0; for (; i < (N + M + re); i++) { reader.next(id, betweenness); fw.write(id + "\t" + betweenness + "\n"); fw.flush(); } } finally { reader.close(); fw.close(); } System.out.println("\nWriting times Output...\n"); fw = new FileWriter(new File(statsoutputpath + graph + setup + "_times_" + iter)); fw.write("Total-time:\t" + (time2 - time1) + "\n"); fw.write("total-map\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("SLOTS_MILLIS_MAPS") + "\n"); fw.write("total-reduce\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("SLOTS_MILLIS_REDUCES") + "\n"); fw.write("total-cpu-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("CPU_MILLISECONDS") + "\n"); fw.write("total-gc-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter").getCounter("GC_TIME_MILLIS") + "\n"); fw.write("total-phy-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("PHYSICAL_MEMORY_BYTES") + "\n"); fw.write("total-vir-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("VIRTUAL_MEMORY_BYTES") + "\n"); fw.write("brandes\t" + rj.getCounters().getGroup("TimeForBrandes").getCounter("exectime_initial_brandes") + "\n"); fw.write("reduce\t" + rj.getCounters().getGroup("TimeForReduce").getCounter("reduceafteralledges") + "\n"); fw.flush(); try { Iterator<Counters.Counter> counters = rj.getCounters().getGroup("TimeForRandomEdges").iterator(); while (counters.hasNext()) { Counter cc = counters.next(); fw.write(cc.getName() + "\t" + cc.getCounter() + "\n"); fw.flush(); } } finally { fw.close(); } return 0; }
From source file:map_reduce.MapReduce_OptimizedBrandesDeletions_DO_JUNG.java
License:Open Source License
@SuppressWarnings("deprecation") @Override// ww w . ja va 2 s . c om public int run(String[] args) throws Exception { if (args.length < 1) { System.err.println("Usage:\n"); System.exit(1); } // Job job = new Job(super.getConf()); // READ IN ALL COMMAND LINE ARGUMENTS // EXAMPLE: // hadoop jar MapReduce_OptimizedBrandesDeletions_DO_JUNG.jar // -libjars collections-generic-4.01.jar,jung-graph-impl-2.0.1.jar,jung-api-2.0.1.jar // -Dmapred.job.map.memory.mb=4096 // -Dmapred.job.reduce.memory.mb=4096 // -Dmapred.child.java.opts=-Xmx3500m // -Dmapreduce.task.timeout=60000000 // -Dmapreduce.job.queuename=QUEUENAME // input_iterbrandes_deletions_nocomb_10k_1 output_iterbrandes_deletions_nocomb_10k_1 // 10 1 10000 55245 10k 10k_randedges 100 1 false times/ betweenness/ int m = -1; // input path to use on hdfs Path inputPath = new Path(args[++m]); // output path to use on hdfs Path outputPath = new Path(args[++m]); // number of Mappers to split the sources: e.g., 1, 10, 100 etc. // rule of thumb: the larger the graph (i.e., number of roots to test), the larger should be this number. int numOfMaps = Integer.parseInt(args[++m]); // number of Reducers to collect the output int numOfReduce = Integer.parseInt(args[++m]); // Number of vertices in graph int N = Integer.parseInt(args[++m]); // Number of edges in graph int M = Integer.parseInt(args[++m]); // Graph file (edge list, tab delimited) (full path) String graph = args[++m]; // File with edges to be added (tab delimited) (full path) // Note: this version handles only edges between existing vertices in the graph. String random_edges = args[++m]; // Number of random edges added int re = Integer.parseInt(args[++m]); // Experiment iteration (in case of multiple experiments) int iter = Integer.parseInt(args[++m]); // Use combiner or not (true/false) Boolean comb = Boolean.valueOf(args[++m]); // Output path for file with stats String statsoutputpath = args[++m]; // Output path for file with final betweenness values String betoutputpath = args[++m]; // BEGIN INITIALIZATION JobConf conf = new JobConf(getConf(), MapReduce_OptimizedBrandesDeletions_DO_JUNG.class); FileSystem fs = FileSystem.get(conf); String setup = "_deletions_edges" + re + "_maps" + numOfMaps + "_comb" + comb; conf.setJobName("OptimizedBrandesDeletionsDOJung_" + graph + setup + "_" + iter); conf.set("HDFS_GRAPH", graph + setup); conf.set("HDFS_Random_Edges", random_edges + setup); conf.set("output", outputPath.getName()); conf.set("setup", setup); // CREATE INPUT FILES FOR MAPPERS int numOfTasksperMap = (int) Math.ceil(N / numOfMaps); //generate an input file for each map task for (int i = 0; i < numOfMaps - 1; i++) { Path file = new Path(inputPath, "part-r-" + i); IntWritable start = new IntWritable(i * numOfTasksperMap); IntWritable end = new IntWritable((i * numOfTasksperMap) + numOfTasksperMap - 1); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, IntWritable.class, CompressionType.NONE); try { writer.append(start, end); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i + ": " + start + " - " + end); } // last mapper takes what is left Path file = new Path(inputPath, "part-r-" + (numOfMaps - 1)); IntWritable start = new IntWritable((numOfMaps - 1) * numOfTasksperMap); IntWritable end = new IntWritable(N - 1); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, IntWritable.class, CompressionType.NONE); try { writer.append(start, end); } finally { writer.close(); } System.out.println("Wrote input for Map #" + (numOfMaps - 1) + ": " + start + " - " + end); // COPY FILES TO MAPPERS System.out.println("Copying graph to cache"); String LOCAL_GRAPH = graph; Path hdfsPath = new Path(graph + setup); // upload the file to hdfs. Overwrite any existing copy. fs.copyFromLocalFile(false, true, new Path(LOCAL_GRAPH), hdfsPath); DistributedCache.addCacheFile(hdfsPath.toUri(), conf); System.out.println("Copying random edges to cache"); String LOCAL_Random_Edges = random_edges; hdfsPath = new Path(random_edges + setup); // upload the file to hdfs. Overwrite any existing copy. fs.copyFromLocalFile(false, true, new Path(LOCAL_Random_Edges), hdfsPath); DistributedCache.addCacheFile(hdfsPath.toUri(), conf); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(DoubleWritable.class); conf.setMapperClass(IterBrandesMapper.class); conf.setNumMapTasks(numOfMaps); if (comb) conf.setCombinerClass(IterBrandesReducer.class); conf.setReducerClass(IterBrandesReducer.class); conf.setNumReduceTasks(numOfReduce); // turn off speculative execution, because DFS doesn't handle multiple writers to the same file. conf.setSpeculativeExecution(false); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); // conf.set("mapred.job.name", "APS-" + outputPath.getName()); conf.setNumTasksToExecutePerJvm(-1); // JVM reuse System.out.println("Starting the execution...! Pray!! \n"); long time1 = System.nanoTime(); RunningJob rj = JobClient.runJob(conf); long time2 = System.nanoTime(); // READ OUTPUT FILES System.out.println("\nFinished and now reading/writing Betweenness Output...\n"); // Assuming 1 reducer. Path inFile = new Path(outputPath, "part-00000"); IntWritable id = new IntWritable(); DoubleWritable betweenness = new DoubleWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf); FileWriter fw = new FileWriter(new File(betoutputpath + graph + setup + "_betweenness_" + iter)); try { int i = 0; for (; i < (N + (M - re)); i++) { reader.next(id, betweenness); fw.write(id + "\t" + betweenness + "\n"); fw.flush(); } } finally { reader.close(); fw.close(); } System.out.println("\nWriting times Output...\n"); fw = new FileWriter(new File(statsoutputpath + graph + setup + "_times_" + iter)); fw.write("Total-time:\t" + (time2 - time1) + "\n"); fw.write("total-map\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("SLOTS_MILLIS_MAPS") + "\n"); fw.write("total-reduce\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("SLOTS_MILLIS_REDUCES") + "\n"); fw.write("total-cpu-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("CPU_MILLISECONDS") + "\n"); fw.write("total-gc-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter").getCounter("GC_TIME_MILLIS") + "\n"); fw.write("total-phy-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("PHYSICAL_MEMORY_BYTES") + "\n"); fw.write("total-vir-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("VIRTUAL_MEMORY_BYTES") + "\n"); fw.write("brandes\t" + rj.getCounters().getGroup("TimeForBrandes").getCounter("exectime_initial_brandes") + "\n"); fw.write("reduce\t" + rj.getCounters().getGroup("TimeForReduce").getCounter("reduceafteralledges") + "\n"); fw.flush(); try { Iterator<Counters.Counter> counters = rj.getCounters().getGroup("TimeForRandomEdges").iterator(); while (counters.hasNext()) { Counter cc = counters.next(); fw.write(cc.getName() + "\t" + cc.getCounter() + "\n"); fw.flush(); } } finally { fw.close(); } return 0; }
From source file:nthu.scopelab.tsqr.ssvd.SSVDSolver.java
License:Apache License
/** * run all SSVD jobs./*ww w .j a v a 2 s . c o m*/ * * @throws IOException * if I/O condition occurs. */ public void run() throws Exception { try { System.out.println("SSVD start!"); FileSystem fs = FileSystem.get(conf); Path qPath = new Path(outputPath, "Q-job"); Path btPath = new Path(outputPath, "Bt-job"); Path yPath = new Path(outputPath, "Y-job"); //tetst phase Path uHatPath = new Path(outputPath, "UHat"); Path svPath = new Path(outputPath, "Sigma"); Path uPath = new Path(outputPath, "U"); Path vPath = new Path(outputPath, "V"); if (overwrite) { fs.delete(outputPath, true); } int[] iseed = { 0, 0, 0, 1 }; double[] x = new double[1]; Dlarnv.dlarnv(2, iseed, 0, 1, x, 0); long seed = (long) (x[0] * (double) Long.MAX_VALUE); long start, end; start = new Date().getTime(); QJob.run(conf, inputPath, qPath.toString(), reduceSchedule, k, p, seed, mis); end = new Date().getTime(); System.out.println("Q-Job done " + Long.toString(end - start)); Logger LOG = LoggerFactory.getLogger(SSVDSolver.class); /* * restrict number of reducers to a reasonable number so we don't have to * run too many additions in the frontend when reconstructing BBt for the * last B' and BB' computations. The user may not realize that and gives a * bit too many (I would be happy i that were ever the case though). */ start = new Date().getTime(); BtJob.run(conf, inputPath, btPath, qPath.toString(), k, p, outerBlockHeight, q <= 0 ? Math.min(1000, reduceTasks) : reduceTasks, q <= 0, reduceSchedule, mis); end = new Date().getTime(); System.out.println("Bt-Job done " + Long.toString(end - start)); // power iterations is unnecessary in application of recommendation system /*for (int i = 0; i < q; i++) { Path btPathGlob = new Path(btPath, BtJob.OUTPUT_BT + "-*"); Path aBtPath = new Path(outputPath, String.format("ABt-job-%d", i + 1)); qPath = new Path(outputPath, String.format("ABtQ-job-%d", i + 1)); ABtDenseOutJob.run(conf, inputPath, btPathGlob, aBtPath,//qPath, //ablockRows, //minSplitSize, k, p, //abtBlockHeight, reduceTasks, //broadcast mis); ToolRunner.run(conf, new QRFirstJob(), new String[]{ "-input", aBtPath.toString(), "-output", qPath.toString(), "-mis",Integer.toString(mis), "-colsize", Integer.toString(k+p), "-reduceSchedule", reduceSchedule}); btPath = new Path(outputPath, String.format("Bt-job-%d", i + 1)); BtJob.run(conf, inputPath, btPath, qPath.toString(), k, p, outerBlockHeight, i == q - 1 ? Math.min(1000, reduceTasks) : reduceTasks, i == q - 1, reduceSchedule, mis); }*/ cmUpperTriangDenseMatrix bbt = loadAndSumUpperTriangMatrices(fs, new Path(btPath, BtJob.OUTPUT_BBT + "-*"), conf); // convert bbt to something our eigensolver could understand assert bbt.numColumns() == k + p; double[][] bbtSquare = new double[k + p][]; for (int i = 0; i < k + p; i++) { bbtSquare[i] = new double[k + p]; } for (int i = 0; i < k + p; i++) { for (int j = i; j < k + p; j++) { bbtSquare[i][j] = bbtSquare[j][i] = bbt.get(i, j); } } svalues = new double[k + p]; // try something else. EigenSolver eigenWrapper = new EigenSolver(bbtSquare); double[] eigenva2 = eigenWrapper.getWR(); for (int i = 0; i < k + p; i++) { svalues[i] = Math.sqrt(eigenva2[i]); // sqrt? } // save/redistribute UHat double[][] uHat = eigenWrapper.getVL(); //double[][] uHat = eigenWrapper.getUHat(); fs.mkdirs(uHatPath); SequenceFile.Writer uHatWriter = SequenceFile.createWriter(fs, conf, uHatPath = new Path(uHatPath, "uhat.seq"), IntWritable.class, VectorWritable.class, CompressionType.BLOCK); int m = uHat.length; IntWritable iw = new IntWritable(); VectorWritable vw = new VectorWritable(); for (int i = 0; i < m; i++) { vw.set(new DenseVector(uHat[i], true)); iw.set(i); uHatWriter.append(iw, vw); } uHatWriter.close(); SequenceFile.Writer svWriter = SequenceFile.createWriter(fs, conf, svPath = new Path(svPath, "svalues.seq"), IntWritable.class, VectorWritable.class, CompressionType.BLOCK); vw.set(new DenseVector(svalues, true)); svWriter.append(iw, vw); svWriter.close(); start = new Date().getTime(); UJob ujob = null; if (computeU) { ujob = new UJob(); ujob.start(conf, new Path(btPath, BtJob.Q_MAT + "-*"), uHatPath, svPath, uPath, k, cUHalfSigma, mis); // actually this is map-only job anyway } VJob vjob = null; if (computeV) { vjob = new VJob(); vjob.start(conf, new Path(btPath, BtJob.OUTPUT_BT + "-*"), uHatPath, svPath, vPath, k, reduceTasks, subRowSize, cVHalfSigma, mis); } if (ujob != null) { ujob.waitForCompletion(); this.uPath = uPath.toString(); } System.out.println("U-Job done "); if (vjob != null) { vjob.waitForCompletion(); this.vPath = vPath.toString(); } end = new Date().getTime(); System.out.println("U-Job+V-Job done " + (end - start)); } catch (InterruptedException exc) { throw new IOException("Interrupted", exc); } catch (ClassNotFoundException exc) { throw new IOException(exc); } }
From source file:org.apache.blur.manager.writer.SnapshotIndexDeletionPolicy.java
License:Apache License
private synchronized void storeGenerations() throws IOException { FileSystem fileSystem = _path.getFileSystem(_configuration); FileStatus[] listStatus = fileSystem.listStatus(_path); SortedSet<FileStatus> existing = new TreeSet<FileStatus>(Arrays.asList(listStatus)); long currentFile; if (!existing.isEmpty()) { FileStatus last = existing.last(); currentFile = Long.parseLong(last.getPath().getName()); } else {/*from w w w . j a v a 2 s . c o m*/ currentFile = 0; } Path path = new Path(_path, buffer(currentFile + 1)); LOG.info("Creating new snapshot file [{0}]", path); FSDataOutputStream outputStream = fileSystem.create(path, false); Writer writer = SequenceFile.createWriter(_configuration, outputStream, Text.class, LongWritable.class, CompressionType.NONE, null); for (Entry<String, Long> e : _namesToGenerations.entrySet()) { writer.append(new Text(e.getKey()), new LongWritable(e.getValue())); } writer.close(); outputStream.close(); cleanupOldFiles(fileSystem, existing); }
From source file:org.apache.flink.streaming.connectors.fs.SequenceFileWriter.java
License:Apache License
@Override public void open(FileSystem fs, Path path) throws IOException { super.open(fs, path); if (keyClass == null) { throw new IllegalStateException("Key Class has not been initialized."); }//w ww .ja v a 2s . c o m if (valueClass == null) { throw new IllegalStateException("Value Class has not been initialized."); } CompressionCodec codec = null; if (!compressionCodecName.equals("None")) { CompressionCodecFactory codecFactory = new CompressionCodecFactory(new Configuration()); codec = codecFactory.getCodecByName(compressionCodecName); if (codec == null) { throw new RuntimeException("Codec " + compressionCodecName + " not found."); } } // the non-deprecated constructor syntax is only available in recent hadoop versions... writer = SequenceFile.createWriter(new Configuration(), getStream(), keyClass, valueClass, compressionType, codec); }
From source file:org.apache.flume.sink.customhdfs.HDFSSequenceFile.java
License:Apache License
protected void open(Path dstPath, CompressionCodec codeC, CompressionType compType, Configuration conf, FileSystem hdfs) throws IOException { if (useRawLocalFileSystem) { if (hdfs instanceof LocalFileSystem) { hdfs = ((LocalFileSystem) hdfs).getRaw(); } else {//from w ww . j av a 2s. co m logger.warn("useRawLocalFileSystem is set to true but file system " + "is not of type LocalFileSystem: " + hdfs.getClass().getName()); } } if (conf.getBoolean("hdfs.append.support", false) == true && hdfs.isFile(dstPath)) { outStream = hdfs.append(dstPath); } else { outStream = hdfs.create(dstPath); } writer = SequenceFile.createWriter(conf, outStream, serializer.getKeyClass(), serializer.getValueClass(), compType, codeC); registerCurrentStream(outStream, hdfs, dstPath); }
From source file:org.apache.hadoop.examples.QuasiMonteCarlo.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi//ww w .j ava 2 s. com */ public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf) throws IOException, ClassNotFoundException, InterruptedException { Job job = Job.getInstance(conf); //setup job conf job.setJobName(QuasiMonteCarlo.class.getSimpleName()); job.setJarByClass(QuasiMonteCarlo.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(BooleanWritable.class); job.setOutputValueClass(LongWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(QmcMapper.class); job.setReducerClass(QmcReducer.class); job.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. job.setSpeculativeExecution(false); //setup input/output directories final Path inDir = new Path(tmpDir, "in"); final Path outDir = new Path(tmpDir, "out"); FileInputFormat.setInputPaths(job, inDir); FileOutputFormat.setOutputPath(job, outDir); final FileSystem fs = FileSystem.get(conf); if (fs.exists(tmpDir)) { throw new IOException( "Tmp directory " + fs.makeQualified(tmpDir) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } //start a map/reduce job System.out.println("Starting Job"); final long startTime = Time.monotonicNow(); job.waitForCompletion(true); if (!job.isSuccessful()) { System.out.println("Job " + job.getJobID() + " failed!"); System.exit(1); } final double duration = (Time.monotonicNow() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); //read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf); try { reader.next(numInside, numOutside); } finally { reader.close(); } //compute estimated value final BigDecimal numTotal = BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints)); return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(numTotal, RoundingMode.HALF_UP); } finally { fs.delete(tmpDir, true); } }