Example usage for org.apache.hadoop.fs FileSystem getConf

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getConf.

Prototype

@Override
    public Configuration getConf()

Source Link

Usage

From source file:org.kitesdk.data.spi.Schemas.java

License:Apache License

public static Schema fromParquet(FileSystem fs, Path location) throws IOException {
    ParquetMetadata footer = ParquetFileReader.readFooter(fs.getConf(), location);

    String schemaString = footer.getFileMetaData().getKeyValueMetaData().get("parquet.avro.schema");
    if (schemaString == null) {
        // try the older property
        schemaString = footer.getFileMetaData().getKeyValueMetaData().get("avro.schema");
    }/*  w w  w  .j ava  2s  . c  o m*/

    if (schemaString != null) {
        return new Schema.Parser().parse(schemaString);
    } else {
        return new AvroSchemaConverter().convert(footer.getFileMetaData().getSchema());
    }
}

From source file:org.kitesdk.data.TestDatasetDescriptor.java

License:Apache License

@Test
public void testSchemaFromHdfs() throws IOException {
    MiniDFSTest.setupFS();//w  ww  .  j a v a 2s  .com
    FileSystem fs = MiniDFSTest.getDFS();

    // copy a schema to HDFS
    Path schemaPath = fs.makeQualified(new Path("schema.avsc"));
    FSDataOutputStream out = fs.create(schemaPath);
    IOUtils.copyBytes(DatasetTestUtilities.USER_SCHEMA_URL.toURL().openStream(), out, fs.getConf());
    out.close();

    // build a schema using the HDFS path and check it's the same
    Schema schema = new DatasetDescriptor.Builder().schemaUri(schemaPath.toUri()).build().getSchema();

    Assert.assertEquals(DatasetTestUtilities.USER_SCHEMA, schema);
    MiniDFSTest.teardownFS();
}

From source file:org.mrgeo.data.accumulo.metadata.AccumuloMrsImagePyramidMetadataFileWriter.java

License:Apache License

@Override
public void write(MrsImagePyramidMetadata metadata) throws IOException {
    // write the metadata object to hdfs
    Properties mrgeoAccProps = AccumuloConnector.getAccumuloProperties();
    ColumnVisibility cv;//from w  w  w. ja  v a2 s .  c o  m
    if (mrgeoAccProps.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ) == null) {
        cv = new ColumnVisibility();
    } else {
        cv = new ColumnVisibility(mrgeoAccProps.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ));
    }
    Path path = new Path(workDir, "meta.rf");
    FileSystem fs = HadoopFileUtils.getFileSystem(path);
    if (fs.exists(path)) {
        fs.delete(path, false);
    }

    log.debug("Saving metadata to " + path.toString());
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    String metadataStr = null;
    try {
        metadata.save(baos);
        metadataStr = baos.toString();
        baos.close();

    } catch (IOException ioe) {
        throw new RuntimeException(ioe.getMessage());
    }

    FileSKVWriter metaWrite = FileOperations.getInstance().openWriter(path.toString(), fs, fs.getConf(),
            AccumuloConfiguration.getDefaultConfiguration());

    metaWrite.startDefaultLocalityGroup();

    Key metKey = new Key(MrGeoAccumuloConstants.MRGEO_ACC_METADATA, MrGeoAccumuloConstants.MRGEO_ACC_METADATA,
            MrGeoAccumuloConstants.MRGEO_ACC_CQALL);
    Value metValue = new Value(metadataStr.getBytes());
    metaWrite.append(metKey, metValue);
    metaWrite.close();

}

From source file:org.mrgeo.data.accumulo.metadata.AccumuloMrsPyramidMetadataFileWriter.java

License:Apache License

@Override
public void write(MrsPyramidMetadata metadata) throws IOException {
    // write the metadata object to hdfs
    //    Properties mrgeoAccProps = AccumuloConnector.getAccumuloProperties();
    //    ColumnVisibility cv;
    //    if(mrgeoAccProps.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ) == null){
    //      cv = new ColumnVisibility();
    //    } else {
    //      cv = new ColumnVisibility(mrgeoAccProps.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ));
    //    }/*from  w  ww.j  av  a2s  .  c  o  m*/
    Path path = new Path(workDir, "meta.rf");
    FileSystem fs = HadoopFileUtils.getFileSystem(path);
    if (fs.exists(path)) {
        fs.delete(path, false);
    }

    log.debug("Saving metadata to " + path.toString());
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    String metadataStr = null;
    metadata.save(baos);
    metadataStr = baos.toString();
    baos.close();

    FileSKVWriter metaWrite = FileOperations.getInstance().openWriter(path.toString(), fs, fs.getConf(),
            AccumuloConfiguration.getDefaultConfiguration());

    metaWrite.startDefaultLocalityGroup();

    Key metKey = new Key(MrGeoAccumuloConstants.MRGEO_ACC_METADATA, MrGeoAccumuloConstants.MRGEO_ACC_METADATA,
            MrGeoAccumuloConstants.MRGEO_ACC_CQALL);
    Value metValue = new Value(metadataStr.getBytes());
    metaWrite.append(metKey, metValue);
    metaWrite.close();

}

From source file:org.oclc.firefly.hadoop.backup.Backup.java

License:Apache License

/**
 * Create mapper input files containing their paths to copy
 * @param mapperInput The list of files that the copy mappers should copy
 * @param numMapTasks The number of map tasks
 * @param fs The file system to write to
 * @param id The mapper id//from   w  w  w  .j a v a 2  s.c  o  m
 * @return The list of input files for a a mapper
 * @throws IOException If we fail to create input files
 */
private List<Path> createMapperInputSequenceFiles(List<Pair<String, HRegionInfo>> mapperInput, int numMapTasks,
        FileSystem fs, int id) throws IOException {
    int idx = 0;
    List<Path> paths = new ArrayList<Path>();
    List<SequenceFile.Writer> writers = new ArrayList<SequenceFile.Writer>();
    String inputDir = getMapInputDirectory(id);

    // delete this directory if already exists
    fs.delete(new Path(inputDir), true);

    // each mapper gets an input file
    for (int i = 0; i < numMapTasks; i++) {
        // open the input file for writing
        Path mapInputFile = new Path(inputDir + "/mapper-input-" + i + ".txt");
        fs.delete(mapInputFile, false);

        SequenceFile.Writer writer = SequenceFile.createWriter(fs, fs.getConf(), mapInputFile, Text.class,
                HRegionInfo.class, SequenceFile.CompressionType.NONE);

        LOG.debug("Mapper input: " + mapInputFile);

        paths.add(mapInputFile);
        writers.add(writer);
    }

    // Assign copy paths to mappers
    for (Pair<String, HRegionInfo> pair : mapperInput) {
        Text key = new Text(pair.getFirst());
        HRegionInfo value = new HRegionInfo(pair.getSecond());

        LOG.debug("Appending " + key + ", " + value.getEncodedName());
        writers.get(idx).append(key, value);

        idx++;
        if (idx >= writers.size()) {
            idx = 0;
        }
    }

    // close writers
    for (SequenceFile.Writer writer : writers) {
        try {
            writer.sync();
            writer.close();
        } catch (Exception e) {
            // nothing to do here
        }
    }

    return paths;
}

From source file:org.oclc.firefly.hadoop.backup.Backup.java

License:Apache License

/**
 * Get the list of files to copy. /* w  w w .  j  a v a2  s  . c o  m*/
 * @param fs The file system to get file from
 * @param tableRegions the table regions for which to look files
 * @return A list of file names
 * @throws IOException When failed to communicate with filesystem
 */
public List<FileStatus> getTableInfoFiles(FileSystem fs, Map<String, List<CatalogRow>> tableRegions)
        throws IOException {
    List<FileStatus> ret = new ArrayList<FileStatus>();
    String rootDir = fs.getConf().get(HConstants.HBASE_DIR);

    // Get list of files to copy one table at a time
    for (Map.Entry<String, List<CatalogRow>> entry : tableRegions.entrySet()) {
        String tableName = entry.getKey();
        Path tableDirPath = new Path(rootDir, tableName);

        // Add .tableinfo to list of files to cppy
        try {
            FileStatus tableInfoFile = BackupUtils.getTableInfoPath(fs, tableDirPath);
            ret.add(tableInfoFile);
        } catch (FileNotFoundException e) {
            // Not sure what to do if we can't find this file
            LOG.warn("No .tableinfo file found for table " + tableName);
        }
    }

    return ret;
}

From source file:org.oclc.firefly.hadoop.backup.Backup.java

License:Apache License

/**
 * Get the list of files to copy. // ww  w.  java 2s. c om
 * @param fs The file system to get file from
 * @param tableRegions the table regions for which to look files
 * @return A list of file names
 * @throws IOException When failed to communicate with filesystem
 */
public List<FileStatus> getListOfFiles(FileSystem fs, Map<String, List<CatalogRow>> tableRegions)
        throws IOException {
    List<FileStatus> ret = new ArrayList<FileStatus>();
    String rootDir = fs.getConf().get(HConstants.HBASE_DIR);

    // Get list of files to copy one table at a time
    for (Map.Entry<String, List<CatalogRow>> entry : tableRegions.entrySet()) {
        String tableName = entry.getKey();
        Path tableDirPath = new Path(rootDir, tableName);

        // Add .tableinfo to list of files to cppy
        try {
            FileStatus tableInfoFile = BackupUtils.getTableInfoPath(fs, tableDirPath);
            ret.add(tableInfoFile);
        } catch (FileNotFoundException e) {
            // Not sure what to do if we can't find this file
            LOG.warn("No .tableinfo file found for table " + tableName);
        }

        // Get table descriptor so we may get information about the table we are extracting
        HTableDescriptor tDesc = FSTableDescriptors.getTableDescriptor(fs, tableDirPath);

        if (tDesc == null) {
            throw new TableNotFoundException("Could not get HTableDescriptor for table " + tableName);
        }

        // Need to find out what column families this table has
        // so that we may generate paths to the files we are copying
        HColumnDescriptor[] columnFamilies = tDesc.getColumnFamilies();

        List<CatalogRow> regions = entry.getValue();
        for (CatalogRow r : regions) {
            HRegionInfo info = r.getHRegionInfo();
            String regionName = info.getEncodedName();

            // Add .regioninfo to list of files to cppy
            Path regionDirPath = new Path(tableDirPath, regionName);
            Path regionInfoFilePath = new Path(regionDirPath, HRegion.REGIONINFO_FILE);

            try {
                FileStatus regionInfoFile = fs.getFileStatus(regionInfoFilePath);
                ret.add(regionInfoFile);
            } catch (FileNotFoundException e) {
                // Not sure what to do if we can't find this file
                LOG.warn("No .regioninfo file found for region " + tableName + "/" + regionName);
            }

            for (HColumnDescriptor col : columnFamilies) {
                String family = col.getNameAsString();
                Path regionFamilyDirPath = new Path(regionDirPath, family);

                try {
                    // Add column family directories to make sure
                    // they get copied should they be empty
                    FileStatus dirStatus = fs.getFileStatus(regionFamilyDirPath);
                    ret.add(dirStatus);

                    // Finally, get all the files under this column family
                    FileStatus[] statusList = fs.listStatus(regionFamilyDirPath);
                    if (statusList != null) {
                        for (FileStatus status : statusList) {
                            ret.add(status);
                        }
                    }
                } catch (FileNotFoundException e) {
                    LOG.warn("Expecting region family directory '" + regionFamilyDirPath + "' but not found");
                }
            }
        }
    }

    return ret;
}

From source file:org.oclc.firefly.hadoop.backup.BackupUtils.java

License:Apache License

/**
 * Get the list of files to copy. /*from   ww w.j av a  2s. c o m*/
 * @param fs The file system to get file from
 * @param region The region to get the list of files for
 * @return A list of file names
 * @throws IOException When failed to communicate with filesystem
 */
public static List<FileStatus> getListOfRegionFiles(FileSystem fs, HRegionInfo region) throws IOException {
    List<FileStatus> ret = new ArrayList<FileStatus>();
    String rootDir = fs.getConf().get(HConstants.HBASE_DIR);
    String tableName = region.getTableNameAsString();
    String regionName = region.getEncodedName();
    Path tableDirPath = new Path(rootDir, tableName);

    // Get table descriptor so we may get information about the table we are extracting
    HTableDescriptor tDesc = FSTableDescriptors.getTableDescriptor(fs, tableDirPath);

    if (tDesc == null) {
        throw new TableNotFoundException("Could not get HTableDescriptor for table " + tableName);
    }

    // Need to find out what column families this table has
    // so that we may generate paths to the files we are copying
    HColumnDescriptor[] columnFamilies = tDesc.getColumnFamilies();

    // Add .regioninfo to list of files to cppy
    Path regionDirPath = new Path(tableDirPath, regionName);
    Path regionInfoFilePath = new Path(regionDirPath, HRegion.REGIONINFO_FILE);

    // check that region directory still exists
    if (!fs.exists(regionDirPath)) {
        throw new FileNotFoundException("Region directory no longer exists: " + regionDirPath);
    }

    // need region info file
    FileStatus regionInfoFile = fs.getFileStatus(regionInfoFilePath);
    ret.add(regionInfoFile);

    // Go through each column family directory and list its files
    for (HColumnDescriptor col : columnFamilies) {
        String family = col.getNameAsString();
        Path regionFamilyDirPath = new Path(regionDirPath, family);

        try {
            // Add column family directories to make sure
            // they get copied should they be empty
            FileStatus dirStatus = fs.getFileStatus(regionFamilyDirPath);
            ret.add(dirStatus);

            // Finally, get all the files under this column family
            FileStatus[] statusList = fs.listStatus(regionFamilyDirPath);
            if (statusList != null) {
                for (FileStatus status : statusList) {
                    ret.add(status);
                }
            }
        } catch (FileNotFoundException e) {
            LOG.warn("Expecting region family directory '" + regionFamilyDirPath + "' but not found");
            throw e;
        }
    }

    return ret;
}

From source file:org.oclc.firefly.hadoop.backup.BackupUtils.java

License:Apache License

/**
 * Get the relative HBase path of the given path for the given file system
 * @param fs the file system where the path lives
 * @param path the absolute path to hbase file
 * @return Return the relative HBase path
 *//* w ww  .java  2 s.c  o  m*/
public static String getFsRelativePath(FileSystem fs, Path path) {
    String root = fs.getConf().get(HConstants.HBASE_DIR);
    return StringUtils.substringAfter(path.toString(), root);
}

From source file:org.pentaho.di.job.entries.hadooptransjobexecutor.DistributedCacheUtil.java

License:Apache License

/**
 * Stages the source file or folder to a Hadoop file system and sets their permission and replication value appropriately
 * to be used with the Distributed Cache. WARNING: This will delete the contents of dest before staging the archive.
 *
 * @param source    File or folder to copy to the file system. If it is a folder all contents will be copied into dest.
 * @param fs        Hadoop file system to store the contents of the archive in
 * @param dest      Destination to copy source into. If source is a file, the new file name will be exactly dest. If source
 *                  is a folder its contents will be copied into dest. For more info see
 *                  {@link FileSystem#copyFromLocalFile(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path)}.
 * @param overwrite Should an existing file or folder be overwritten? If not an exception will be thrown.
 * @throws IOException         Destination exists is not a directory
 * @throws KettleFileException Source does not exist or destination exists and overwrite is false.
 *//*ww w .  j  a  v a2s.c  o  m*/
public void stageForCache(FileObject source, FileSystem fs, Path dest, boolean overwrite)
        throws IOException, KettleFileException {
    if (!source.exists()) {
        throw new KettleFileException(BaseMessages.getString(DistributedCacheUtil.class,
                "DistributedCacheUtil.SourceDoesNotExist", source));
    }

    if (fs.exists(dest)) {
        if (overwrite) {
            // It is a directory, clear it out
            fs.delete(dest, true);
        } else {
            throw new KettleFileException(BaseMessages.getString(DistributedCacheUtil.class,
                    "DistributedCacheUtil.DestinationExists", dest.toUri().getPath()));
        }
    }

    // Use the same replication we'd use for submitting jobs
    short replication = (short) fs.getConf().getInt("mapred.submit.replication", 10);

    Path local = new Path(source.getURL().getPath());
    fs.copyFromLocalFile(local, dest);
    fs.setPermission(dest, CACHED_FILE_PERMISSION);
    fs.setReplication(dest, replication);
}