Example usage for org.apache.hadoop.fs FileSystem getLocal

List of usage examples for org.apache.hadoop.fs FileSystem getLocal

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getLocal.

Prototype

public static LocalFileSystem getLocal(Configuration conf) throws IOException 

Source Link

Document

Get the local FileSystem.

Usage

From source file:msc.fall2015.stock.kmeans.hbase.mapreduce.pwd.SWGMap.java

License:Open Source License

public void map(LongWritable blockIndex, Text value, Context context) throws IOException, InterruptedException {
    long startTime = System.nanoTime();
    Configuration conf = context.getConfiguration();
    Counter alignmentCounter = context.getCounter(Constants.RecordCounters.ALIGNMENTS);
    String valString = value.toString();
    String valArgs[] = valString.split(Constants.BREAK);

    long rowBlock = Long.parseLong(valArgs[0]);
    long columnBlock = Long.parseLong(valArgs[1]);
    boolean isDiagonal = Boolean.parseBoolean(valArgs[2]);
    System.out.println("row column" + rowBlock + "  " + columnBlock + "  " + isDiagonal + "  " + valArgs[2]);

    long blockSize = conf.getLong(Constants.BLOCK_SIZE, 1000);
    long noOfSequences = conf.getLong(Constants.NO_OF_SEQUENCES, blockSize * 10);
    long noOfDivisions = conf.getLong(Constants.NO_OF_DIVISIONS, noOfSequences / blockSize);
    boolean weightEnabled = conf.getBoolean(Constants.WEIGHT_ENABLED, false);

    long row = rowBlock * blockSize;
    long column = columnBlock * blockSize;

    long parseStartTime = System.nanoTime();
    FileSystem fs = FileSystem.getLocal(conf);
    // parse the inputFilePart for row
    Path rowPath = new Path(Constants.HDFS_SEQ_FILENAME + "_" + rowBlock);
    FSDataInputStream rowInStream = fs.open(rowPath);
    List<VectorPoint> rowSequences;
    rowSequences = SequenceParser.ParseFile(rowInStream);
    // parse the inputFilePart for column if this is not a diagonal block
    List<VectorPoint> colSequences;
    if (isDiagonal) {
        colSequences = rowSequences;/*from  w  ww .  java  2 s .  com*/
    } else {
        // parse the inputFilePart for column
        Path colPath = new Path(Constants.HDFS_SEQ_FILENAME + "_" + columnBlock);
        FSDataInputStream colInStream = fs.open(colPath);
        colSequences = SequenceParser.ParseFile(colInStream);
    }
    System.out.println("Parsing time : " + ((System.nanoTime() - parseStartTime) / 1000000) + "ms");

    short[][] alignments = new short[(int) blockSize][(int) blockSize];
    for (int rowIndex = 0; ((rowIndex < blockSize) & ((row + rowIndex) < noOfSequences)); rowIndex++) {
        int columnIndex = 0;
        for (; ((columnIndex < blockSize) & ((column + columnIndex) < noOfSequences)); columnIndex++) {
            double alignment = 0;
            if (weightEnabled) {
                alignment = rowSequences.get(rowIndex).weight(colSequences.get(columnIndex));
            } else {
                alignment = rowSequences.get(rowIndex).corr(colSequences.get(columnIndex));
            }

            // Get the identity and make it percent identity
            short scaledScore = (short) (alignment * Short.MAX_VALUE);
            alignments[rowIndex][columnIndex] = scaledScore;
        }
        alignmentCounter.increment(columnIndex);
    }

    SWGWritable dataWritable = new SWGWritable(rowBlock, columnBlock, blockSize, false);
    dataWritable.setAlignments(alignments);
    context.write(new LongWritable(rowBlock), dataWritable);

    if (!isDiagonal) {
        // Create the transpose matrix of (rowBlock, colBlock) block to fill the
        // (colBlock, rowBlock) block.
        SWGWritable inverseDataWritable = new SWGWritable(columnBlock, rowBlock, blockSize, true);
        inverseDataWritable.setAlignments(alignments);
        context.write(new LongWritable(columnBlock), inverseDataWritable);
    }
    System.out.println("Map time : " + ((System.nanoTime() - startTime) / 1000000) + "ms");
}

From source file:name.abhijitsarkar.hadoop.io.IOUtils.java

License:Open Source License

/**
 * //from  w ww .  j  av a2  s  .  co  m
 * @param archiveURI
 *            The archive to be extracted
 * @param conf
 *            Job configuration
 * @return Extracted file URI
 * @throws IOException
 *             If fails to extract the archive
 */
public static URI uncompressFile(final URI archiveURI, final Configuration conf) throws IOException {
    Path archivePath = new Path(archiveURI);
    OutputStream outputStream = null;
    InputStream inputStream = null;
    Path uncompressionPath = null;

    try {
        final FileSystem fs = FileSystem.getLocal(conf);

        FileStatus[] statuses = new FileStatus[] { fs.getFileStatus(archivePath) };
        if (statuses[0].isDir()) {
            statuses = fs.listStatus(archivePath);

            LOGGER.debug("Archive is a directory and contains {} elements.", statuses.length);

            archivePath = statuses[0].getPath();
        }

        LOGGER.debug("archiveURI: {}.", archivePath.toUri());

        final CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(archivePath);
        if (codec == null) {
            LOGGER.debug("Not an archive: {}.", archivePath.toUri());
            return archivePath.toUri();
        }

        LOGGER.debug("Using codec: {}.", codec.getClass().getName());
        uncompressionPath = new Path(addExtension(archivePath.toUri().getPath(), ".new", true));

        LOGGER.debug("uncompressedURI: {}.", uncompressionPath.toUri());

        outputStream = new FileOutputStream(uncompressionPath.toUri().getPath());

        inputStream = new FileInputStream(archivePath.toUri().getPath());
        final CompressionInputStream in = codec.createInputStream(inputStream);
        org.apache.hadoop.io.IOUtils.copyBytes(in, outputStream, conf, false);
    } catch (IOException e) {
        throw e;
    } finally {
        closeStreams(inputStream, outputStream);
    }

    return uncompressionPath.toUri();
}

From source file:net.sf.katta.lib.mapfile.MapFileServer.java

License:Apache License

public MapFileServer() throws IOException {
    _fileSystem = FileSystem.getLocal(_conf);
}

From source file:net.sf.katta.util.FileUtilTest.java

License:Apache License

@Test
public void testUnzipPathFileFileSystemBoolean() throws IOException {
    Configuration configuration = new Configuration();
    FileSystem fileSystem = FileSystem.getLocal(configuration);

    // Test the unspooled case
    File targetFolder = _temporaryFolder.newFolder("unpacked2");
    Path zipPath = new Path(TestResources.SHARD1.getAbsolutePath());
    FileUtil.unzip(zipPath, targetFolder, fileSystem, false);
    File segment = new File(targetFolder, "segments.gen");
    assertTrue("Unzipped local zip directly to target", segment.exists());

    // Test the spooled case

    targetFolder = _temporaryFolder.newFolder("unpacked3");
    zipPath = new Path(TestResources.SHARD1.getAbsolutePath());
    FileUtil.unzip(zipPath, targetFolder, fileSystem, true);
    segment = new File(targetFolder, "segments.gen");
    assertTrue("Unzipped spooled local zip to target", segment.exists());

}

From source file:nl.basjes.hadoop.io.compress.TestSplittableCodecSeams.java

License:Apache License

/**
  * This creates a synthetic file and then uses it to run the split seam check.
  *//*from   w w  w  . j  av a 2s. c o m*/
private void validateSplitSeamsWithSyntheticFile(final Class<? extends SplittableCompressionCodec> codecClass,
        final long records, final int recordLength, final int recordLengthJitter, final long splitSize,
        final long lastSplitSizeLimit, final int randomizeEveryNChars) throws IOException {
    final Configuration conf = new Configuration();

    if (recordLength + recordLengthJitter > splitSize) {
        fail("Test definition error: Make the splits bigger than the records.");
    }

    if (splitSize > lastSplitSizeLimit) {
        fail("Test definition error: The last split must be the same or larger as the other splits.");
    }

    final FileSystem fs = FileSystem.getLocal(conf);
    final Path filename = writeSplitTestFile(conf, codecClass, records, recordLength, recordLengthJitter,
            randomizeEveryNChars);

    LOG.info("Input is SYNTHETIC: " + "records=" + records + ", " + "recordLength=" + recordLength
            + (recordLengthJitter == 0 ? "" : "+random[0;" + recordLengthJitter + "]."));

    validateSplitSeams(conf, fs, filename, codecClass, splitSize, records, lastSplitSizeLimit);

    fs.delete(filename, true);
}

From source file:nl.basjes.hadoop.io.compress.TestSplittableCodecSeams.java

License:Apache License

/**
 * Write the specified number of records to file in test dir using codec.
 * Records are simply lines random ASCII
 */// w w  w.ja  va 2  s  .  c om
private static Path writeSplitTestFile(final Configuration conf,
        final Class<? extends SplittableCompressionCodec> codecClass, final long records,
        final int recordLength, final int trailingSizeJitter, final int randomizeEveryNChars)
        throws IOException {

    RAND.setSeed(1); // Make the tests better reproducable

    final FileSystem fs = FileSystem.getLocal(conf);
    final SplittableCompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf);

    final Path wd = new Path(new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(fs.getUri(),
            fs.getWorkingDirectory()), codec.getClass().getSimpleName());

    final Path file = new Path(wd,
            "test-" + records + "-" + recordLength + "-" + trailingSizeJitter + codec.getDefaultExtension());
    DataOutputStream out = null;
    final Compressor cmp = CodecPool.getCompressor(codec);
    try {
        out = new DataOutputStream(codec.createOutputStream(fs.create(file, true), cmp));

        for (long seq = 1; seq <= records; ++seq) {
            final String line = randomGibberish(
                    recordLength + (trailingSizeJitter > 0 ? RAND.nextInt(trailingSizeJitter) : 0),
                    randomizeEveryNChars) + "\n";
            // There must be a simpler way to output ACSII instead of 2 byte UNICODE
            out.writeBytes(new String(line.getBytes("UTF-8"), "US-ASCII"));
        }
    } finally {
        IOUtils.cleanup(LOG, out);
        CodecPool.returnCompressor(cmp);
    }
    return file;
}

From source file:nur.aini.hadoop.CopyMergeRegexToLocal.java

License:GNU General Public License

public void run(String srcf, String dst) {

    final Path srcPath = new Path("./" + srcf);
    final Path desPath = new Path(dst);
    try {/* ww w  .  jav a2 s .  c  om*/
        Path[] srcs = FileUtil.stat2Paths(hdfs.globStatus(srcPath), srcPath);
        OutputStream out = FileSystem.getLocal(conf).create(desPath);
        for (int i = 0; i < srcs.length; i++) {
            System.out.println(srcs[i]);
            InputStream in = hdfs.open(srcs[i]);

            IOUtils.copyBytes(in, out, conf, false);
            in.close();

        }
        out.close();

    } catch (IOException ex) {
        System.err.print(ex.getMessage());
    }
}

From source file:org.apache.accumulo.core.client.rfile.RFileClientTest.java

License:Apache License

private String createRFile(SortedMap<Key, Value> testData) throws Exception {
    String testFile = createTmpTestFile();

    try (RFileWriter writer = RFile.newWriter().to(testFile)
            .withFileSystem(FileSystem.getLocal(new Configuration())).build()) {
        writer.append(testData.entrySet());
        // TODO ensure compressors are returned
    }//from  ww  w  . j  a v  a  2 s.  c o m

    return testFile;
}

From source file:org.apache.accumulo.core.client.rfile.RFileClientTest.java

License:Apache License

@Test
public void testIndependance() throws Exception {
    // test to ensure two iterators allocated from same RFile scanner are independent.

    LocalFileSystem localFs = FileSystem.getLocal(new Configuration());

    SortedMap<Key, Value> testData = createTestData(10, 10, 10);

    String testFile = createRFile(testData);

    Scanner scanner = RFile.newScanner().from(testFile).withFileSystem(localFs).build();
    Range range1 = Range.exact(rowStr(5));
    scanner.setRange(range1);//from w w  w.  j av a  2 s  . c o  m
    Iterator<Entry<Key, Value>> scnIter1 = scanner.iterator();
    Iterator<Entry<Key, Value>> mapIter1 = testData.subMap(range1.getStartKey(), range1.getEndKey()).entrySet()
            .iterator();

    Range range2 = new Range(rowStr(3), true, rowStr(4), true);
    scanner.setRange(range2);
    Iterator<Entry<Key, Value>> scnIter2 = scanner.iterator();
    Iterator<Entry<Key, Value>> mapIter2 = testData.subMap(range2.getStartKey(), range2.getEndKey()).entrySet()
            .iterator();

    while (scnIter1.hasNext() || scnIter2.hasNext()) {
        if (scnIter1.hasNext()) {
            assertTrue(mapIter1.hasNext());
            assertEquals(scnIter1.next(), mapIter1.next());
        } else {
            assertFalse(mapIter1.hasNext());
        }

        if (scnIter2.hasNext()) {
            assertTrue(mapIter2.hasNext());
            assertEquals(scnIter2.next(), mapIter2.next());
        } else {
            assertFalse(mapIter2.hasNext());
        }
    }

    assertFalse(mapIter1.hasNext());
    assertFalse(mapIter2.hasNext());

    scanner.close();
}

From source file:org.apache.accumulo.core.client.rfile.RFileClientTest.java

License:Apache License

@Test
public void testMultipleSources() throws Exception {
    SortedMap<Key, Value> testData1 = createTestData(10, 10, 10);
    SortedMap<Key, Value> testData2 = createTestData(0, 10, 0, 10, 10);

    String testFile1 = createRFile(testData1);
    String testFile2 = createRFile(testData2);

    LocalFileSystem localFs = FileSystem.getLocal(new Configuration());
    Scanner scanner = RFile.newScanner().from(testFile1, testFile2).withFileSystem(localFs).build();

    TreeMap<Key, Value> expected = new TreeMap<>(testData1);
    expected.putAll(testData2);//  ww w. ja  v a 2  s.  co m

    assertEquals(expected, toMap(scanner));

    Range range = new Range(rowStr(3), true, rowStr(14), true);
    scanner.setRange(range);
    assertEquals(expected.subMap(range.getStartKey(), range.getEndKey()), toMap(scanner));

    scanner.close();
}