List of usage examples for org.apache.hadoop.fs FileSystem getLocal
public static LocalFileSystem getLocal(Configuration conf) throws IOException
From source file:msc.fall2015.stock.kmeans.hbase.mapreduce.pwd.SWGMap.java
License:Open Source License
public void map(LongWritable blockIndex, Text value, Context context) throws IOException, InterruptedException { long startTime = System.nanoTime(); Configuration conf = context.getConfiguration(); Counter alignmentCounter = context.getCounter(Constants.RecordCounters.ALIGNMENTS); String valString = value.toString(); String valArgs[] = valString.split(Constants.BREAK); long rowBlock = Long.parseLong(valArgs[0]); long columnBlock = Long.parseLong(valArgs[1]); boolean isDiagonal = Boolean.parseBoolean(valArgs[2]); System.out.println("row column" + rowBlock + " " + columnBlock + " " + isDiagonal + " " + valArgs[2]); long blockSize = conf.getLong(Constants.BLOCK_SIZE, 1000); long noOfSequences = conf.getLong(Constants.NO_OF_SEQUENCES, blockSize * 10); long noOfDivisions = conf.getLong(Constants.NO_OF_DIVISIONS, noOfSequences / blockSize); boolean weightEnabled = conf.getBoolean(Constants.WEIGHT_ENABLED, false); long row = rowBlock * blockSize; long column = columnBlock * blockSize; long parseStartTime = System.nanoTime(); FileSystem fs = FileSystem.getLocal(conf); // parse the inputFilePart for row Path rowPath = new Path(Constants.HDFS_SEQ_FILENAME + "_" + rowBlock); FSDataInputStream rowInStream = fs.open(rowPath); List<VectorPoint> rowSequences; rowSequences = SequenceParser.ParseFile(rowInStream); // parse the inputFilePart for column if this is not a diagonal block List<VectorPoint> colSequences; if (isDiagonal) { colSequences = rowSequences;/*from w ww . java 2 s . com*/ } else { // parse the inputFilePart for column Path colPath = new Path(Constants.HDFS_SEQ_FILENAME + "_" + columnBlock); FSDataInputStream colInStream = fs.open(colPath); colSequences = SequenceParser.ParseFile(colInStream); } System.out.println("Parsing time : " + ((System.nanoTime() - parseStartTime) / 1000000) + "ms"); short[][] alignments = new short[(int) blockSize][(int) blockSize]; for (int rowIndex = 0; ((rowIndex < blockSize) & ((row + rowIndex) < noOfSequences)); rowIndex++) { int columnIndex = 0; for (; ((columnIndex < blockSize) & ((column + columnIndex) < noOfSequences)); columnIndex++) { double alignment = 0; if (weightEnabled) { alignment = rowSequences.get(rowIndex).weight(colSequences.get(columnIndex)); } else { alignment = rowSequences.get(rowIndex).corr(colSequences.get(columnIndex)); } // Get the identity and make it percent identity short scaledScore = (short) (alignment * Short.MAX_VALUE); alignments[rowIndex][columnIndex] = scaledScore; } alignmentCounter.increment(columnIndex); } SWGWritable dataWritable = new SWGWritable(rowBlock, columnBlock, blockSize, false); dataWritable.setAlignments(alignments); context.write(new LongWritable(rowBlock), dataWritable); if (!isDiagonal) { // Create the transpose matrix of (rowBlock, colBlock) block to fill the // (colBlock, rowBlock) block. SWGWritable inverseDataWritable = new SWGWritable(columnBlock, rowBlock, blockSize, true); inverseDataWritable.setAlignments(alignments); context.write(new LongWritable(columnBlock), inverseDataWritable); } System.out.println("Map time : " + ((System.nanoTime() - startTime) / 1000000) + "ms"); }
From source file:name.abhijitsarkar.hadoop.io.IOUtils.java
License:Open Source License
/** * //from w ww . j av a2 s . co m * @param archiveURI * The archive to be extracted * @param conf * Job configuration * @return Extracted file URI * @throws IOException * If fails to extract the archive */ public static URI uncompressFile(final URI archiveURI, final Configuration conf) throws IOException { Path archivePath = new Path(archiveURI); OutputStream outputStream = null; InputStream inputStream = null; Path uncompressionPath = null; try { final FileSystem fs = FileSystem.getLocal(conf); FileStatus[] statuses = new FileStatus[] { fs.getFileStatus(archivePath) }; if (statuses[0].isDir()) { statuses = fs.listStatus(archivePath); LOGGER.debug("Archive is a directory and contains {} elements.", statuses.length); archivePath = statuses[0].getPath(); } LOGGER.debug("archiveURI: {}.", archivePath.toUri()); final CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(archivePath); if (codec == null) { LOGGER.debug("Not an archive: {}.", archivePath.toUri()); return archivePath.toUri(); } LOGGER.debug("Using codec: {}.", codec.getClass().getName()); uncompressionPath = new Path(addExtension(archivePath.toUri().getPath(), ".new", true)); LOGGER.debug("uncompressedURI: {}.", uncompressionPath.toUri()); outputStream = new FileOutputStream(uncompressionPath.toUri().getPath()); inputStream = new FileInputStream(archivePath.toUri().getPath()); final CompressionInputStream in = codec.createInputStream(inputStream); org.apache.hadoop.io.IOUtils.copyBytes(in, outputStream, conf, false); } catch (IOException e) { throw e; } finally { closeStreams(inputStream, outputStream); } return uncompressionPath.toUri(); }
From source file:net.sf.katta.lib.mapfile.MapFileServer.java
License:Apache License
public MapFileServer() throws IOException { _fileSystem = FileSystem.getLocal(_conf); }
From source file:net.sf.katta.util.FileUtilTest.java
License:Apache License
@Test public void testUnzipPathFileFileSystemBoolean() throws IOException { Configuration configuration = new Configuration(); FileSystem fileSystem = FileSystem.getLocal(configuration); // Test the unspooled case File targetFolder = _temporaryFolder.newFolder("unpacked2"); Path zipPath = new Path(TestResources.SHARD1.getAbsolutePath()); FileUtil.unzip(zipPath, targetFolder, fileSystem, false); File segment = new File(targetFolder, "segments.gen"); assertTrue("Unzipped local zip directly to target", segment.exists()); // Test the spooled case targetFolder = _temporaryFolder.newFolder("unpacked3"); zipPath = new Path(TestResources.SHARD1.getAbsolutePath()); FileUtil.unzip(zipPath, targetFolder, fileSystem, true); segment = new File(targetFolder, "segments.gen"); assertTrue("Unzipped spooled local zip to target", segment.exists()); }
From source file:nl.basjes.hadoop.io.compress.TestSplittableCodecSeams.java
License:Apache License
/** * This creates a synthetic file and then uses it to run the split seam check. *//*from w w w . j av a 2s. c o m*/ private void validateSplitSeamsWithSyntheticFile(final Class<? extends SplittableCompressionCodec> codecClass, final long records, final int recordLength, final int recordLengthJitter, final long splitSize, final long lastSplitSizeLimit, final int randomizeEveryNChars) throws IOException { final Configuration conf = new Configuration(); if (recordLength + recordLengthJitter > splitSize) { fail("Test definition error: Make the splits bigger than the records."); } if (splitSize > lastSplitSizeLimit) { fail("Test definition error: The last split must be the same or larger as the other splits."); } final FileSystem fs = FileSystem.getLocal(conf); final Path filename = writeSplitTestFile(conf, codecClass, records, recordLength, recordLengthJitter, randomizeEveryNChars); LOG.info("Input is SYNTHETIC: " + "records=" + records + ", " + "recordLength=" + recordLength + (recordLengthJitter == 0 ? "" : "+random[0;" + recordLengthJitter + "].")); validateSplitSeams(conf, fs, filename, codecClass, splitSize, records, lastSplitSizeLimit); fs.delete(filename, true); }
From source file:nl.basjes.hadoop.io.compress.TestSplittableCodecSeams.java
License:Apache License
/** * Write the specified number of records to file in test dir using codec. * Records are simply lines random ASCII */// w w w.ja va 2 s . c om private static Path writeSplitTestFile(final Configuration conf, final Class<? extends SplittableCompressionCodec> codecClass, final long records, final int recordLength, final int trailingSizeJitter, final int randomizeEveryNChars) throws IOException { RAND.setSeed(1); // Make the tests better reproducable final FileSystem fs = FileSystem.getLocal(conf); final SplittableCompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf); final Path wd = new Path(new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(fs.getUri(), fs.getWorkingDirectory()), codec.getClass().getSimpleName()); final Path file = new Path(wd, "test-" + records + "-" + recordLength + "-" + trailingSizeJitter + codec.getDefaultExtension()); DataOutputStream out = null; final Compressor cmp = CodecPool.getCompressor(codec); try { out = new DataOutputStream(codec.createOutputStream(fs.create(file, true), cmp)); for (long seq = 1; seq <= records; ++seq) { final String line = randomGibberish( recordLength + (trailingSizeJitter > 0 ? RAND.nextInt(trailingSizeJitter) : 0), randomizeEveryNChars) + "\n"; // There must be a simpler way to output ACSII instead of 2 byte UNICODE out.writeBytes(new String(line.getBytes("UTF-8"), "US-ASCII")); } } finally { IOUtils.cleanup(LOG, out); CodecPool.returnCompressor(cmp); } return file; }
From source file:nur.aini.hadoop.CopyMergeRegexToLocal.java
License:GNU General Public License
public void run(String srcf, String dst) { final Path srcPath = new Path("./" + srcf); final Path desPath = new Path(dst); try {/* ww w . jav a2 s . c om*/ Path[] srcs = FileUtil.stat2Paths(hdfs.globStatus(srcPath), srcPath); OutputStream out = FileSystem.getLocal(conf).create(desPath); for (int i = 0; i < srcs.length; i++) { System.out.println(srcs[i]); InputStream in = hdfs.open(srcs[i]); IOUtils.copyBytes(in, out, conf, false); in.close(); } out.close(); } catch (IOException ex) { System.err.print(ex.getMessage()); } }
From source file:org.apache.accumulo.core.client.rfile.RFileClientTest.java
License:Apache License
private String createRFile(SortedMap<Key, Value> testData) throws Exception { String testFile = createTmpTestFile(); try (RFileWriter writer = RFile.newWriter().to(testFile) .withFileSystem(FileSystem.getLocal(new Configuration())).build()) { writer.append(testData.entrySet()); // TODO ensure compressors are returned }//from ww w . j a v a 2 s. c o m return testFile; }
From source file:org.apache.accumulo.core.client.rfile.RFileClientTest.java
License:Apache License
@Test public void testIndependance() throws Exception { // test to ensure two iterators allocated from same RFile scanner are independent. LocalFileSystem localFs = FileSystem.getLocal(new Configuration()); SortedMap<Key, Value> testData = createTestData(10, 10, 10); String testFile = createRFile(testData); Scanner scanner = RFile.newScanner().from(testFile).withFileSystem(localFs).build(); Range range1 = Range.exact(rowStr(5)); scanner.setRange(range1);//from w w w. j av a 2 s . c o m Iterator<Entry<Key, Value>> scnIter1 = scanner.iterator(); Iterator<Entry<Key, Value>> mapIter1 = testData.subMap(range1.getStartKey(), range1.getEndKey()).entrySet() .iterator(); Range range2 = new Range(rowStr(3), true, rowStr(4), true); scanner.setRange(range2); Iterator<Entry<Key, Value>> scnIter2 = scanner.iterator(); Iterator<Entry<Key, Value>> mapIter2 = testData.subMap(range2.getStartKey(), range2.getEndKey()).entrySet() .iterator(); while (scnIter1.hasNext() || scnIter2.hasNext()) { if (scnIter1.hasNext()) { assertTrue(mapIter1.hasNext()); assertEquals(scnIter1.next(), mapIter1.next()); } else { assertFalse(mapIter1.hasNext()); } if (scnIter2.hasNext()) { assertTrue(mapIter2.hasNext()); assertEquals(scnIter2.next(), mapIter2.next()); } else { assertFalse(mapIter2.hasNext()); } } assertFalse(mapIter1.hasNext()); assertFalse(mapIter2.hasNext()); scanner.close(); }
From source file:org.apache.accumulo.core.client.rfile.RFileClientTest.java
License:Apache License
@Test public void testMultipleSources() throws Exception { SortedMap<Key, Value> testData1 = createTestData(10, 10, 10); SortedMap<Key, Value> testData2 = createTestData(0, 10, 0, 10, 10); String testFile1 = createRFile(testData1); String testFile2 = createRFile(testData2); LocalFileSystem localFs = FileSystem.getLocal(new Configuration()); Scanner scanner = RFile.newScanner().from(testFile1, testFile2).withFileSystem(localFs).build(); TreeMap<Key, Value> expected = new TreeMap<>(testData1); expected.putAll(testData2);// ww w. ja v a 2 s. co m assertEquals(expected, toMap(scanner)); Range range = new Range(rowStr(3), true, rowStr(14), true); scanner.setRange(range); assertEquals(expected.subMap(range.getStartKey(), range.getEndKey()), toMap(scanner)); scanner.close(); }