List of usage examples for org.apache.hadoop.fs FileSystem getLocal
public static LocalFileSystem getLocal(Configuration conf) throws IOException
From source file:common.DataNode.java
License:Apache License
/** * Make an instance of DataNode after ensuring that at least one of the * given data directories (and their parent directories, if necessary) * can be created.//from w ww.j av a2 s . com * @param dataDirs List of directories, where the new DataNode instance should * keep its files. * @param conf Configuration instance to use. * @return DataNode instance for given list of data dirs and conf, or null if * no directory from this directory list can be created. * @throws IOException */ static DataNode makeInstance(Collection<URI> dataDirs, Configuration conf) throws IOException { LocalFileSystem localFS = FileSystem.getLocal(conf); FsPermission permission = new FsPermission(conf.get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_PERMISSION_KEY, DFSConfigKeys.DFS_DATANODE_DATA_DIR_PERMISSION_DEFAULT)); ArrayList<File> dirs = getDataDirsFromURIs(dataDirs, localFS, permission); if (dirs.size() > 0) { return new DataNode(conf, dirs); } LOG.error("All directories in " + DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY + " are invalid."); return null; }
From source file:corr.util.DummyDataToSeqFile.java
License:Apache License
/** * Export CSV file to SequentialFile.// w w w .j a va 2s. c o m * @param file Name of CSV file. * @throws Exception */ private static void export(String file) throws Exception { String delim = ","; BufferedReader reader = null; SequenceFile.Writer writer = null; try { Path path = toPath(file); Configuration conf = new Configuration(); LocalFileSystem fs = FileSystem.getLocal(conf); writer = SequenceFile.createWriter(fs, conf, path, LongWritable.class, VectorWritable.class); reader = new BufferedReader(new FileReader(file)); String line = null; long counter = 0; while (null != (line = reader.readLine())) { if ("".equals(line)) continue; String[] tokens = line.split(delim); LongWritable key = new LongWritable(counter); VectorWritable val = toVector(tokens); writer.append(key, val); counter++; } } catch (Exception ex) { throw ex; } finally { if (null != reader) { try { reader.close(); } catch (Exception ex) { } } if (null != writer) { try { writer.close(); } catch (Exception ex) { } } } }
From source file:crunch.MaxTemperature.java
License:Apache License
@Test public void test() throws Exception { Configuration conf = new Configuration(); conf.set("fs.default.name", "file:///"); conf.set("mapred.job.tracker", "local"); Path input = new Path("input/ncdc/micro"); Path output = new Path("output"); FileSystem fs = FileSystem.getLocal(conf); fs.delete(output, true); // delete old output MaxTemperatureDriver driver = new MaxTemperatureDriver(); driver.setConf(conf);/*from w ww . jav a 2 s .c om*/ int exitCode = driver.run(new String[] { input.toString(), output.toString() }); assertThat(exitCode, is(0)); checkOutput(conf, output); }
From source file:crunch.MaxTemperature.java
License:Apache License
private void checkOutput(Configuration conf, Path output) throws IOException { FileSystem fs = FileSystem.getLocal(conf); Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(output, new OutputLogFilter())); assertThat(outputFiles.length, is(1)); BufferedReader actual = asBufferedReader(fs.open(outputFiles[0])); BufferedReader expected = asBufferedReader(getClass().getResourceAsStream("/expected.txt")); String expectedLine;/*from w w w. j a v a2s . c o m*/ while ((expectedLine = expected.readLine()) != null) { assertThat(actual.readLine(), is(expectedLine)); } assertThat(actual.readLine(), nullValue()); actual.close(); expected.close(); }
From source file:csc555.ebratt.depaul.edu.PutMerge.java
License:Open Source License
/** * @param args// w w w . jav a 2 s . c o m * [0] the input directory on the local filesystem * @param args * [1] the fully-qualified output filename on HDFS * @throws IOException * in the even there is an issue with input/output */ public static void main(String[] args) throws IOException { if (args.length != 3) { System.err.println("Usage: PutMerge.jar <fs.default.name> <in> <out>"); System.exit(2); } Configuration conf = new Configuration(); conf.set("fs.default.name", args[0]); FileSystem hdfs = FileSystem.get(conf); FileSystem localFS = FileSystem.getLocal(conf); Path localDir = new Path(args[1]); Path hdfsFile = new Path(args[2]); try { if (hdfs.exists(hdfsFile)) { System.out.println("deleting target file: " + hdfsFile.toString()); hdfs.delete(hdfsFile, true); } System.out.println("copying/merging files from: local:/" + localDir.toString() + " to hdfs:/" + hdfsFile.toString()); FileUtil.copyMerge(localFS, localDir, hdfs, hdfsFile, false, conf, null); } catch (IOException e) { e.printStackTrace(); } }
From source file:de.tuberlin.dima.aim3.assignment1.BookAndAuthorBroadcastJoin.java
License:Open Source License
@Override public int run(String[] args) throws Exception { Map<String, String> parsedArgs = parseArgs(args); Path authors = new Path(parsedArgs.get("--authors")); Path books = new Path(parsedArgs.get("--books")); Path outputPath = new Path(parsedArgs.get("--output")); //IMPLEMENT ME Job broadCastJoin = prepareJob(books, outputPath, TextInputFormat.class, BroadCastMapper.class, Text.class, Text.class, TextOutputFormat.class); DistributedCache.addArchiveToClassPath(authors, broadCastJoin.getConfiguration(), FileSystem.getLocal(broadCastJoin.getConfiguration())); broadCastJoin.waitForCompletion(true); return 0;/*from w ww. j a va2 s .c o m*/ }
From source file:de.tuberlin.dima.cuttlefish.preprocessing.vectorization.Vectorizer.java
License:Open Source License
public void vectorize(File luceneIndexDir, File outputDir) throws Exception { Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); SequenceFile.Writer writer = null; FeatureDictionary dict = new FeatureDictionary(); DirectoryReader reader = null;/* w w w . ja va 2 s . c o m*/ try { reader = DirectoryReader.open(new SimpleFSDirectory(luceneIndexDir)); writer = SequenceFile.createWriter(fs, conf, new Path(outputDir.toString(), "documentVectors.seq"), IDAndCodes.class, VectorWritable.class); IDAndCodes idAndCodes = new IDAndCodes(); VectorWritable vectorWritable = new VectorWritable(); Fields fields = MultiFields.getFields(reader); if (fields != null) { Iterator<String> fieldNames = fields.iterator(); while (fieldNames.hasNext()) { String field = fieldNames.next(); if (!field.startsWith("bip:") && !"itemID".equals(field)) { Terms terms = fields.terms(field); TermsEnum termsEnum = terms.iterator(null); BytesRef text; while ((text = termsEnum.next()) != null) { dict.addTextFeature(field, text.utf8ToString()); } } } } int numDocsVectorized = 0; for (int docID = 0; docID < reader.maxDoc(); docID++) { Document doc = reader.document(docID); int itemID = doc.getField("itemID").numericValue().intValue(); RandomAccessSparseVector documentVector = new RandomAccessSparseVector(dict.numFeatures()); Multimap<String, String> codes = HashMultimap.create(); for (IndexableField field : doc.getFields()) { String fieldName = field.name(); if (!fieldName.startsWith("bip:") && !"itemID".equals(fieldName)) { Terms termFreqVector = reader.getTermVector(docID, fieldName); if (termFreqVector != null) { int maxTermFrequency = maxTermFrequency(termFreqVector); TermsEnum te = termFreqVector.iterator(null); BytesRef term; while ((term = te.next()) != null) { String termStr = term.utf8ToString(); int termFrequency = (int) te.totalTermFreq(); int documentFrequency = reader.docFreq(new Term(fieldName, term)); int numDocs = reader.numDocs(); double weight = weighting.weight(fieldName, termStr, termFrequency, documentFrequency, maxTermFrequency, numDocs); int featureIndex = dict.index(fieldName, term.utf8ToString()); documentVector.setQuick(featureIndex, weight); } } } else if (fieldName.startsWith("bip:")) { for (String value : doc.getValues(fieldName)) { codes.put(fieldName, value); } } } Vector featureVector = new SequentialAccessSparseVector(documentVector); weighting.normalize(featureVector); idAndCodes.set(itemID, codes); vectorWritable.set(featureVector); writer.append(idAndCodes, vectorWritable); numDocsVectorized++; if (numDocsVectorized % 100 == 0) { log.info("Vectorized {} documents", numDocsVectorized); } } log.info("Vectorized {} documents", numDocsVectorized); dict.writeToFile(new File(outputDir, "features.txt")); log.info("Wrote feature dictionary"); } finally { Closeables.close(reader, true); Closeables.close(writer, true); } }
From source file:de.tudarmstadt.ukp.dkpro.bigdata.hadoop.UIMAMapReduceBase.java
License:Open Source License
@Override public void configure(JobConf job) { try {/* w w w. ja va 2 s. co m*/ this.job = job; this.mapOutputValueClass = job.getMapOutputValueClass(); this.outputValueClass = job.getOutputValueClass(); this.samplingPropability = job.getInt("dkpro.map.samplingratio", 100); final EngineFactory engineFactory = (EngineFactory) Class .forName(job.get("dkpro.uima.factory", DkproHadoopDriver.class.getName())).newInstance(); engineFactory.configure(job); final AnalysisEngineDescription engineDescription = getEngineDescription(engineFactory, job); // replace the $dir variable within the configuration. this.fs = FileSystem.get(job); this.localFS = FileSystem.getLocal(job); this.working_dir = new Path("uima_output_" + job.get("mapred.task.id")); final Path outputPath = FileOutputFormat.getOutputPath(job); this.results_dir = this.fs.startLocalOutput(outputPath, job.getLocalPath(this.working_dir.getName())); this.localFS.mkdirs(this.results_dir); final String[] resources = job.get("dkpro.resources", "").split(","); sLogger.info("Writing local data to: " + this.results_dir); this.resourceURIs = new TreeMap<String, URL>(); for (final String resource : resources) { final URL r = job.getResource(resource); if (r != null && !resource.isEmpty()) { this.resourceURIs.put(resource, r); } } replaceRecursively(engineDescription); this.engine = createEngine(engineDescription); } catch (final Exception e) { sLogger.fatal("Error while configuring pipeline", e); e.printStackTrace(); throw new RuntimeException(e); } }
From source file:edu.indiana.soic.ts.mapreduce.pwd.SWGMap.java
License:Open Source License
public void map(LongWritable blockIndex, Text value, Context context) throws IOException, InterruptedException { long startTime = System.nanoTime(); Configuration conf = context.getConfiguration(); Counter alignmentCounter = context.getCounter(Constants.RecordCounters.ALIGNMENTS); String valString = value.toString(); String valArgs[] = valString.split(Constants.BREAK); long rowBlock = Long.parseLong(valArgs[0]); long columnBlock = Long.parseLong(valArgs[1]); boolean isDiagonal = Boolean.parseBoolean(valArgs[2]); LOG.info("row column" + rowBlock + " " + columnBlock + " " + isDiagonal + " " + valArgs[2]); long row = rowBlock * blockSize; long column = columnBlock * blockSize; long parseStartTime = System.nanoTime(); FileSystem fs = FileSystem.getLocal(conf); // parse the inputFilePart for row Path rowPath = new Path(Constants.HDFS_SEQ_FILENAME + "_" + rowBlock); FSDataInputStream rowInStream = fs.open(rowPath); List<VectorPoint> rowSequences = SequenceParser.ParseFile(rowInStream); // parse the inputFilePart for column if this is not a diagonal block List<VectorPoint> colSequences; if (isDiagonal) { colSequences = rowSequences;//from w w w .ja v a2s. co m } else { // parse the inputFilePart for column Path colPath = new Path(Constants.HDFS_SEQ_FILENAME + "_" + columnBlock); FSDataInputStream colInStream = fs.open(colPath); colSequences = SequenceParser.ParseFile(colInStream); } LOG.info("Parsing time : " + ((System.nanoTime() - parseStartTime) / 1000000) + "ms"); short[][] alignments = new short[(int) blockSize][(int) blockSize]; double[][] doubleDistances = new double[(int) blockSize][(int) blockSize]; double max = Double.MIN_VALUE; for (int rowIndex = 0; ((rowIndex < blockSize) & ((row + rowIndex) < noOfSequences)); rowIndex++) { int columnIndex = 0; for (; ((columnIndex < blockSize) & ((column + columnIndex) < noOfSequences)); columnIndex++) { double alignment; alignment = distFunc.calc(rowSequences.get(rowIndex), colSequences.get(columnIndex)); if (alignment > max) { max = alignment; } // Get the identity and make it percent identity doubleDistances[rowIndex][columnIndex] = alignment; } alignmentCounter.increment(columnIndex); } // divide by max to get the range to 0 to 1 and then convert to short and output for (int rowIndex = 0; ((rowIndex < blockSize) & ((row + rowIndex) < noOfSequences)); rowIndex++) { int columnIndex = 0; for (; ((columnIndex < blockSize) & ((column + columnIndex) < noOfSequences)); columnIndex++) { double alignment = doubleDistances[rowIndex][columnIndex] / max; short scaledScore = (short) (alignment * Short.MAX_VALUE); alignments[rowIndex][columnIndex] = scaledScore; } } SWGWritable dataWritable = new SWGWritable(rowBlock, columnBlock, blockSize, false); dataWritable.setMax(max); dataWritable.setAlignments(alignments); context.write(new LongWritable(rowBlock), dataWritable); if (!isDiagonal) { // Create the transpose matrix of (rowBlock, colBlock) block to fill the // (colBlock, rowBlock) block. SWGWritable inverseDataWritable = new SWGWritable(columnBlock, rowBlock, blockSize, true); inverseDataWritable.setAlignments(alignments); context.write(new LongWritable(columnBlock), inverseDataWritable); } LOG.info("Map time : " + ((System.nanoTime() - startTime) / 1000000) + "ms"); }
From source file:edu.isi.mavuno.util.MavunoUtils.java
License:Apache License
public static void readParameters(String[] args, String prefix, Configuration conf) throws IOException { FileSystem fs = FileSystem.getLocal(conf); for (String arg : args) { if (arg.startsWith("-")) { int equalsIndex = arg.indexOf('='); if (equalsIndex == -1) { sLogger.warn("Ignoring malformed parameter -- " + arg); }/*from w w w . j ava2 s. com*/ String paramName = arg.substring(1, equalsIndex); String paramValue = arg.substring(equalsIndex + 1, arg.length()); conf.set(prefix + "." + paramName, paramValue); } else { // open parameter file BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(new Path(arg)))); // read/set parameters String input; while ((input = reader.readLine()) != null) { String[] cols = input.split("\t"); if (cols.length != 2) { sLogger.warn("Skipping malformed parameter file line -- " + input); } else { conf.set(prefix + "." + cols[0], cols[1]); } } reader.close(); } } }