Example usage for org.apache.hadoop.fs FileSystem open

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem open.

Prototype

public FSDataInputStream open(PathHandle fd) throws IOException

Source Link

Document

Open an FSDataInputStream matching the PathHandle instance.

Usage

From source file:com.kylinolap.common.persistence.HBaseResourceStoreTest.java

License:Apache License

@Test
public void testHBaseStoreWithLargeCell() throws Exception {
    String path = "/cube/_test_large_cell.json";
    String largeContent = "THIS_IS_A_LARGE_CELL";
    StringEntity content = new StringEntity(largeContent);
    KylinConfig config = KylinConfig.getInstanceFromEnv();
    int origSize = config.getHBaseKeyValueSize();
    ResourceStore store = ResourceStore.getStore(KylinConfig.getInstanceFromEnv());

    try {//w w w.j  ava  2s.  co  m
        config.setProperty("kylin.hbase.client.keyvalue.maxsize", String.valueOf(largeContent.length() - 1));

        store.deleteResource(path);

        store.putResource(path, content, StringEntity.serializer);
        assertTrue(store.exists(path));
        StringEntity t = store.getResource(path, StringEntity.class, StringEntity.serializer);
        assertEquals(content, t);

        Path redirectPath = ((HBaseResourceStore) store).bigCellHDFSPath(path);
        Configuration hconf = HadoopUtil.getCurrentConfiguration();
        FileSystem fileSystem = FileSystem.get(hconf);
        assertTrue(fileSystem.exists(redirectPath));

        FSDataInputStream in = fileSystem.open(redirectPath);
        assertEquals(largeContent, in.readUTF());
        in.close();

        store.deleteResource(path);
    } finally {
        config.setProperty("kylin.hbase.client.keyvalue.maxsize", "" + origSize);
        store.deleteResource(path);
    }
}

From source file:com.liferay.hadoop.store.HDFSStore.java

License:Open Source License

@Override
public InputStream getFileAsStream(long companyId, long repositoryId, String fileName, String versionLabel)
        throws PortalException, SystemException {

    Path fullPath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, fileName, versionLabel);

    try {//from   www .ja  va 2 s.  c om
        FileSystem fileSystem = HadoopManager.getFileSystem();

        if (!fileSystem.exists(fullPath)) {
            throw new PortalException("File " + fullPath.toUri().toString() + " does not exist");
        }

        return fileSystem.open(fullPath);
    } catch (IOException ioe) {
        throw new SystemException(ioe);
    }
}

From source file:com.lightboxtechnologies.nsrl.SmallTableLoader.java

License:Apache License

protected static void load(FileSystem fs, String filename, LineHandler lh, RecordLoader loader)
        throws IOException {
    InputStream in = null;//from w w  w.  j  a v a2 s  .co  m
    try {
        in = fs.open(new Path(filename));
        loader.load(in, lh);
        in.close();
    } finally {
        IOUtils.closeQuietly(in);
    }
}

From source file:com.lightboxtechnologies.spectrum.FileProxy.java

License:Apache License

public InputStream open(FileSystem fs, FSDataInputStream di, FsEntry entry) throws IOException {
    return fs.open(new Path(FilePath));
}

From source file:com.lightboxtechnologies.spectrum.HDFSArchiver.java

License:Apache License

protected static void handleFile(String relpath, FileSystem fs, Path p, ZipOutputStream zout, byte[] buf)
        throws IOException {
    final ZipEntry entry = new ZipEntry(relpath);
    zout.putNextEntry(entry);/* w  ww .j  a  v a 2  s  . com*/

    InputStream in = null;
    try {
        in = fs.open(p);
        IOUtils.copy(in, zout, buf);
        in.close();
    } finally {
        IOUtils.closeQuietly(in);
    }

    zout.closeEntry();
}

From source file:com.linkedin.cubert.io.rubix.RubixFile.java

License:Open Source License

@SuppressWarnings("unchecked")
public List<KeyData<K>> getKeyData() throws IOException, ClassNotFoundException {
    final FileSystem fs = FileSystem.get(conf);
    keyData = new ArrayList<KeyData<K>>();

    final long filesize = fs.getFileStatus(path).getLen();
    FSDataInputStream in = fs.open(path);

    /* The last long in the file is the start position of the trailer section */
    in.seek(filesize - 8);/*from  ww  w.  ja v  a 2 s . c om*/
    long metaDataStartPos = in.readLong();

    in.seek(metaDataStartPos);

    ObjectMapper mapper = new ObjectMapper();
    metadataJson = mapper.readValue(in.readUTF(), JsonNode.class);

    int keySectionSize = in.readInt();

    // load the key section
    byte[] keySection = new byte[keySectionSize];

    in.seek(filesize - keySectionSize - 8);
    in.read(keySection, 0, keySectionSize);
    in.close();

    ByteArrayInputStream bis = new ByteArrayInputStream(keySection);
    DataInput dataInput = new DataInputStream(bis);

    int numberOfBlocks = metadataJson.get("numberOfBlocks").getIntValue();

    // load the key section
    keyClass = (Class<K>) ClassCache.forName(JsonUtils.getText(metadataJson, "keyClass"));
    valueClass = (Class<V>) ClassCache.forName(JsonUtils.getText(metadataJson, "valueClass"));

    SerializationFactory serializationFactory = new SerializationFactory(conf);
    Deserializer<K> deserializer = serializationFactory.getDeserializer(keyClass);

    deserializer.open(bis);

    while (bis.available() > 0 && numberOfBlocks > 0) {
        K key = deserializer.deserialize(null);

        long offset = dataInput.readLong();
        long blockId = dataInput.readLong();
        long numRecords = dataInput.readLong();

        keyData.add(new KeyData<K>(key, offset, 0, numRecords, blockId));
        numberOfBlocks--;
    }

    // Assign length to each keydata entry
    int numEntries = keyData.size();
    for (int i = 1; i < numEntries; i++) {
        KeyData<K> prev = keyData.get(i - 1);
        KeyData<K> current = keyData.get(i);

        prev.setLength(current.getOffset() - prev.getOffset());
    }

    if (numEntries > 0) {
        KeyData<K> last = keyData.get(numEntries - 1);
        last.setLength(metaDataStartPos - last.offset);
    }

    return keyData;
}

From source file:com.linkedin.cubert.io.rubix.RubixFile.java

License:Open Source License

private static void extract(RubixFile<Tuple, Object> rfile, KeyData<Tuple> keyData, String output)
        throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException {
    final int BUF_SIZE = 32 * 1024;

    Configuration conf = new JobConf();
    File outFile = new File(output);
    if (outFile.exists())
        outFile.delete();/*  w ww.ja va  2  s  .c  o m*/
    outFile.createNewFile();

    long offset = keyData.getOffset();
    long length = keyData.getLength();
    Tuple key = keyData.getKey();

    print.f("Extracting block %d (off=%d len=%d) from %s", keyData.getBlockId(), offset, length,
            rfile.path.toString());

    BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(outFile));

    // copy the data
    if (length > 0) {
        FileSystem fs = FileSystem.get(conf);
        FSDataInputStream in = fs.open(rfile.path);
        in.seek(offset);

        byte[] data = new byte[BUF_SIZE];
        long toRead = length;
        while (toRead > 0) {
            int thisRead = toRead > BUF_SIZE ? BUF_SIZE : (int) toRead;
            in.readFully(data, 0, thisRead);
            bos.write(data, 0, thisRead);
            toRead -= thisRead;
            System.out.print(".");
        }
        System.out.println();
    }
    // copy the key section
    ByteArrayOutputStream keySectionStream = new ByteArrayOutputStream();
    DataOutput keySectionOut = new DataOutputStream(keySectionStream);
    SerializationFactory serializationFactory = new SerializationFactory(conf);
    Serializer<Tuple> keySerializer = serializationFactory.getSerializer(rfile.getKeyClass());
    keySerializer.open(keySectionStream);

    keySerializer.serialize(key);
    keySectionOut.writeLong(0); // position
    keySectionOut.writeLong(keyData.getBlockId());
    keySectionOut.writeLong(keyData.getNumRecords());

    byte[] trailerBytes = keySectionStream.toByteArray();

    JsonNode json = JsonUtils.cloneNode(rfile.metadataJson);
    ((ObjectNode) json).put("numberOfBlocks", 1);

    DataOutput out = new DataOutputStream(bos);
    out.writeUTF(json.toString());
    out.writeInt(trailerBytes.length);
    out.write(trailerBytes);
    out.writeLong(length); // trailer start offset
    bos.close();
}

From source file:com.linkedin.cubert.io.rubix.RubixRecordReader.java

License:Open Source License

public void initialize(InputSplit split, Configuration conf) throws IOException, InterruptedException {
    @SuppressWarnings("unchecked")
    RubixInputSplit<K, V> rsplit = (RubixInputSplit<K, V>) split;

    SerializationFactory serializationFactory = new SerializationFactory(conf);
    switch (rsplit.getBlockSerializationType()) {
    case DEFAULT:
        valueDeserializer = serializationFactory.getDeserializer(rsplit.getValueClass());
        break;/*w  ww .j av  a 2s  .com*/
    case COMPACT:
        BlockSchema schema = rsplit.getSchema();
        valueDeserializer = new CompactDeserializer<V>(schema);
        break;
    }

    key = rsplit.getKey();

    // store the blockid and partition key in the conf
    conf.setLong("MY_BLOCK_ID", rsplit.getBlockId());
    conf.setLong("MY_NUM_RECORDS", rsplit.getNumRecords());
    ByteArrayOutputStream tmpOut = new ByteArrayOutputStream();
    ((Tuple) key).write(new DataOutputStream(tmpOut));
    String keySerialized = SerializerUtils.serializeToString(tmpOut.toByteArray());
    conf.set("MY_PARTITION_KEY", keySerialized);

    Path path = rsplit.getFilename();
    offset = rsplit.getOffset();
    length = rsplit.getLength();

    FileSystem fs = path.getFileSystem(conf);
    FSDataInputStream fsin = fs.open(path);
    fsin.seek(offset);

    in = new BlockInputStream(fsin, length);
    CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(path);
    if (codec != null) {
        print.f("codec is not null and it is %s", codec.getClass().toString());
        in = codec.createInputStream(in);
    } else {
        print.f("codec is null");
    }

    valueDeserializer.open(in);
}

From source file:com.linkedin.cubert.pig.piggybank.storage.avro.AvroStorage.java

License:Apache License

/**
 * This method is called to return the schema of an avro schema file. This
 * method is different than {@link #getSchema}, which returns the schema
 * from a data file./*from  www  .j  ava2 s.co m*/
 *
 * @param path  path of a file or first level directory
 * @param fs  file system
 * @return avro schema
 * @throws IOException
 */
protected Schema getSchemaFromFile(Path path, FileSystem fs) throws IOException {
    /* get path of the last file */
    Path lastFile = AvroStorageUtils.getLast(path, fs);
    if (lastFile == null) {
        return null;
    }

    /* read in file and obtain schema */
    GenericDatumReader<Object> avroReader = new GenericDatumReader<Object>();
    InputStream hdfsInputStream = fs.open(lastFile);
    Schema ret = Schema.parse(hdfsInputStream);
    hdfsInputStream.close();

    return ret;
}

From source file:com.linkedin.cubert.pig.piggybank.storage.avro.AvroStorageUtils.java

License:Apache License

/**
 * This method is called by {@link #getAvroSchema}. The default implementation
 * returns the schema of an avro file; or the schema of the last file in a first-level
 * directory (it does not contain sub-directories).
 *
 * @param path  path of a file or first level directory
 * @param fs  file system//from w w w.java 2  s  .  co  m
 * @return avro schema
 * @throws IOException
 */
public static Schema getSchema(Path path, FileSystem fs) throws IOException {
    /* get path of the last file */
    Path lastFile = AvroStorageUtils.getLast(path, fs);
    if (lastFile == null) {
        return null;
    }

    /* read in file and obtain schema */
    GenericDatumReader<Object> avroReader = new GenericDatumReader<Object>();
    InputStream hdfsInputStream = fs.open(lastFile);
    DataFileStream<Object> avroDataStream = new DataFileStream<Object>(hdfsInputStream, avroReader);
    Schema ret = avroDataStream.getSchema();
    avroDataStream.close();

    return ret;
}