Example usage for org.apache.hadoop.fs FileSystem get

List of usage examples for org.apache.hadoop.fs FileSystem get

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem get.

Prototype

public static FileSystem get(Configuration conf) throws IOException 

Source Link

Document

Returns the configured FileSystem implementation.

Usage

From source file:cn.edu.hfut.dmic.webcollector.fetcher.FetcherOutputFormat.java

@Override
public RecordWriter<Text, Writable> getRecordWriter(TaskAttemptContext tac)
        throws IOException, InterruptedException {
    Configuration conf = tac.getConfiguration();
    FileSystem fs = FileSystem.get(conf);
    String outputPath = conf.get("mapred.output.dir");

    Path fetchPath = new Path(outputPath, "fetch/info");
    Path contentPath = new Path(outputPath, "content/info");
    Path parseDataPath = new Path(outputPath, "parse/info");
    Path redirectPath = new Path(outputPath, "redirect/info");
    final SequenceFile.Writer fetchOut = new SequenceFile.Writer(fs, conf, fetchPath, Text.class,
            CrawlDatum.class);
    final SequenceFile.Writer contentOut = new SequenceFile.Writer(fs, conf, contentPath, Text.class,
            Content.class);
    final SequenceFile.Writer parseDataOut = new SequenceFile.Writer(fs, conf, parseDataPath, Text.class,
            CrawlDatum.class);
    final SequenceFile.Writer redirectOut = new SequenceFile.Writer(fs, conf, redirectPath, CrawlDatum.class,
            Text.class);

    return new RecordWriter<Text, Writable>() {

        @Override//from w  w  w.  j av a2  s . co  m
        public void write(Text k, Writable v) throws IOException, InterruptedException {
            if (v instanceof CrawlDatum) {
                fetchOut.append(k, v);
            } else if (v instanceof Content) {
                contentOut.append(k, v);
            } else if (v instanceof ParseData) {

                ParseData parseData = (ParseData) v;
                CrawlDatums next = parseData.next;
                for (CrawlDatum datum : next) {
                    parseDataOut.append(new Text(datum.getKey()), datum);
                }

            } else if (v instanceof Redirect) {
                Redirect redirect = (Redirect) v;
                redirectOut.append(redirect.datum, new Text(redirect.realUrl));
            }
        }

        @Override
        public void close(TaskAttemptContext tac) throws IOException, InterruptedException {
            fetchOut.close();
            contentOut.close();
            parseDataOut.close();
            redirectOut.close();
        }
    };

}

From source file:co.cask.cdap.common.io.Locations.java

License:Apache License

/**
 * Creates a new {@link InputSupplier} that can provides {@link SeekableInputStream} from the given location.
 *
 * @param location Location for the input stream.
 * @return A {@link InputSupplier}.//  w  w w  .  j  a  v a 2 s. com
 */
public static InputSupplier<? extends SeekableInputStream> newInputSupplier(final Location location) {
    return new InputSupplier<SeekableInputStream>() {
        @Override
        public SeekableInputStream getInput() throws IOException {
            InputStream input = location.getInputStream();
            try {
                if (input instanceof FileInputStream) {
                    return new FileSeekableInputStream((FileInputStream) input);
                }
                if (input instanceof FSDataInputStream) {
                    FSDataInputStream dataInput = (FSDataInputStream) input;
                    LocationFactory locationFactory = location.getLocationFactory();

                    FileSystem fs = null;
                    if (locationFactory instanceof HDFSLocationFactory) {
                        fs = ((HDFSLocationFactory) locationFactory).getFileSystem();
                    } else if (locationFactory instanceof FileContextLocationFactory) {
                        final FileContextLocationFactory lf = (FileContextLocationFactory) locationFactory;
                        fs = lf.getFileContext().getUgi().doAs(new PrivilegedExceptionAction<FileSystem>() {
                            @Override
                            public FileSystem run() throws IOException {
                                return FileSystem.get(lf.getConfiguration());
                            }
                        });
                    }

                    if (fs != null) {
                        return new DFSSeekableInputStream(dataInput,
                                createDFSStreamSizeProvider(fs, new Path(location.toURI()), dataInput));
                    }
                    // This shouldn't happen
                    return new DFSSeekableInputStream(dataInput, new StreamSizeProvider() {
                        @Override
                        public long size() throws IOException {
                            // Assumption is if the FS is not a HDFS fs, the location length tells the stream size
                            return location.length();
                        }
                    });
                }

                throw new IOException("Failed to create SeekableInputStream from location " + location);
            } catch (Throwable t) {
                Closeables.closeQuietly(input);
                Throwables.propagateIfInstanceOf(t, IOException.class);
                throw new IOException(t);
            }
        }
    };
}

From source file:co.cask.cdap.common.logging.LogCollector.java

License:Apache License

private FileSystem getFileSystem() throws IOException {
    if (fs == null) {
        synchronized (this) {
            if (fs == null) {
                fs = FileSystem.get(hConfig);
                // TODO horrible! what worth is the FileSystem abstraction then?
                // not sure why this is, but the local file system's hflush() does
                // not appear to work. Using the raw local file system fixes it.
                if (fs instanceof LocalFileSystem) {
                    fs = ((LocalFileSystem) fs).getRawFileSystem();
                }/*w  w w  . java  2 s .c o  m*/
            }
        }
    }
    return fs;
}

From source file:co.cask.cdap.common.logging.SyncTest.java

License:Apache License

@Test
@Ignore//from w w  w .j  a va 2  s  .  co m
public void testSync() throws IOException {
    FileSystem fs = FileSystem.get(config);
    // create a file and write n bytes, then sync
    Path path = new Path("/myfile");
    FSDataOutputStream out = fs.create(path, false, 4096, (short) 2, 4096L);
    int numBytes = 5000;
    for (int i = 0; i < numBytes; i++) {
        out.write((byte) i);
    }
    out.hflush();
    // verify the file is there
    Assert.assertTrue(fs.exists(path));
    // do not verify the length of the file, hflush() does not update that
    //Assert.assertEquals(numBytes, fs.getFileStatus(path).getLen());
    // read back and verify all bytes
    FSDataInputStream in = fs.open(path);
    byte[] buffer = new byte[numBytes];
    in.readFully(buffer);
    for (int i = 0; i < numBytes; i++) {
        Assert.assertEquals((byte) i, buffer[i]);
    }
    in.close();
    // now close the writer
    out.close();
}

From source file:co.cask.cdap.data.hbase.HBase10CDH550Test.java

License:Apache License

@Override
public HRegion createHRegion(byte[] tableName, byte[] startKey, byte[] stopKey, String callingMethod,
        Configuration conf, byte[]... families) throws IOException {
    if (conf == null) {
        conf = new Configuration();
    }/*ww  w. jav a 2  s .  c o m*/
    HTableDescriptor htd = new HTableDescriptor(tableName);
    for (byte[] family : families) {
        htd.addFamily(new HColumnDescriptor(family));
    }
    HRegionInfo info = new HRegionInfo(htd.getTableName(), startKey, stopKey, false);
    Path path = new Path(conf.get(HConstants.HBASE_DIR), callingMethod);
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(path)) {
        if (!fs.delete(path, true)) {
            throw new IOException("Failed delete of " + path);
        }
    }
    return HRegion.createHRegion(info, path, conf, htd);
}

From source file:co.cask.cdap.data.hbase.HBase94Test.java

License:Apache License

@Override
public HRegion createHRegion(byte[] tableName, byte[] startKey, byte[] stopKey, String callingMethod,
        Configuration conf, byte[]... families) throws IOException {
    if (conf == null) {
        conf = new Configuration();
    }/*from  w  w  w . j  a v a 2  s .  co m*/
    HTableDescriptor htd = new HTableDescriptor(tableName);
    for (byte[] family : families) {
        htd.addFamily(new HColumnDescriptor(family));
    }
    HRegionInfo info = new HRegionInfo(htd.getName(), startKey, stopKey, false);
    Path path = new Path(conf.get(HConstants.HBASE_DIR), callingMethod);
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(path)) {
        if (!fs.delete(path, true)) {
            throw new IOException("Failed delete of " + path);
        }
    }
    return HRegion.createHRegion(info, path, conf, htd);
}

From source file:co.cask.cdap.data.stream.StreamRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    inputSplit = (StreamInputSplit) split;
    reader = createReader(FileSystem.get(context.getConfiguration()), inputSplit);
    reader.initialize();/*from   www  .j a  v a2s. co m*/
    readFilter = new TimeRangeReadFilter(inputSplit.getStartTime(), inputSplit.getEndTime());
}

From source file:co.cask.cdap.data.tools.HBaseTableExporter.java

License:Apache License

/**
 * Sets up the actual MapReduce job.//w w  w  .  j  a  va2s  .c  om
 * @param tx The transaction which needs to be passed to the Scan instance. This transaction is be used by
 *           coprocessors to filter out the data corresonding to the invalid transactions .
 * @param tableName Name of the table which need to be exported as HFiles.
 * @return the configured job
 * @throws IOException
 */
public Job createSubmittableJob(Transaction tx, String tableName) throws IOException {

    Job job = Job.getInstance(hConf, "HBaseTableExporter");

    job.setJarByClass(HBaseTableExporter.class);
    Scan scan = new Scan();
    scan.setCacheBlocks(false);
    // Set the transaction attribute for the scan.
    scan.setAttribute(TxConstants.TX_OPERATION_ATTRIBUTE_KEY, new TransactionCodec().encode(tx));
    job.setNumReduceTasks(0);

    TableMapReduceUtil.initTableMapperJob(tableName, scan, KeyValueImporter.class, null, null, job);

    FileSystem fs = FileSystem.get(hConf);
    Random rand = new Random();
    Path root = new Path(fs.getWorkingDirectory(), "hbasetableexporter");
    fs.mkdirs(root);
    while (true) {
        bulkloadDir = new Path(root, "" + rand.nextLong());
        if (!fs.exists(bulkloadDir)) {
            break;
        }
    }

    HFileOutputFormat2.setOutputPath(job, bulkloadDir);
    HTable hTable = new HTable(hConf, tableName);
    HFileOutputFormat2.configureIncrementalLoad(job, hTable);

    return job;
}

From source file:co.cask.cdap.data.tools.ReplicationStatusTool.java

License:Apache License

private static SortedMap<String, String> getClusterChecksumMap() throws IOException {
    FileSystem fileSystem = FileSystem.get(hConf);
    List<String> fileList = addAllFiles(fileSystem);
    SortedMap<String, String> checksumMap = new TreeMap<String, String>();
    for (String file : fileList) {
        FileChecksum fileChecksum = fileSystem.getFileChecksum(new Path(file));
        checksumMap.put(normalizedFileName(file), fileChecksum.toString());
    }// w  w  w.  ja  v  a 2 s. com
    LOG.info("Added " + checksumMap.size() + " checksums for snapshot files.");
    return checksumMap;
}

From source file:co.cask.cdap.data2.increment.hbase10.IncrementSummingScannerTest.java

License:Apache License

static HRegion createRegion(Configuration hConf, CConfiguration cConf, TableId tableId, HColumnDescriptor cfd)
        throws Exception {
    HBaseTableUtil tableUtil = new HBaseTableUtilFactory(cConf).get();
    HTableDescriptorBuilder htd = tableUtil.buildHTableDescriptor(tableId);
    cfd.setMaxVersions(Integer.MAX_VALUE);
    cfd.setKeepDeletedCells(true);/*from  ww  w  .  j  a va 2  s  . c  om*/
    htd.addFamily(cfd);
    htd.addCoprocessor(IncrementHandler.class.getName());

    HTableDescriptor desc = htd.build();
    String tableName = desc.getNameAsString();
    Path tablePath = new Path("/tmp/" + tableName);
    Path hlogPath = new Path("/tmp/hlog-" + tableName);
    FileSystem fs = FileSystem.get(hConf);
    assertTrue(fs.mkdirs(tablePath));
    WALFactory walFactory = new WALFactory(hConf, null, hlogPath.toString());
    WAL hLog = walFactory.getWAL(new byte[] { 1 });
    HRegionInfo regionInfo = new HRegionInfo(desc.getTableName());
    HRegionFileSystem regionFS = HRegionFileSystem.createRegionOnFileSystem(hConf, fs, tablePath, regionInfo);
    return new HRegion(regionFS, hLog, hConf, desc, new LocalRegionServerServices(hConf,
            ServerName.valueOf(InetAddress.getLocalHost().getHostName(), 0, System.currentTimeMillis())));
}