List of usage examples for org.apache.hadoop.fs FileSystem getConf
@Override
public Configuration getConf()
From source file:org.kitesdk.data.spi.Schemas.java
License:Apache License
public static Schema fromParquet(FileSystem fs, Path location) throws IOException { ParquetMetadata footer = ParquetFileReader.readFooter(fs.getConf(), location); String schemaString = footer.getFileMetaData().getKeyValueMetaData().get("parquet.avro.schema"); if (schemaString == null) { // try the older property schemaString = footer.getFileMetaData().getKeyValueMetaData().get("avro.schema"); }/* w w w .j ava 2s . c o m*/ if (schemaString != null) { return new Schema.Parser().parse(schemaString); } else { return new AvroSchemaConverter().convert(footer.getFileMetaData().getSchema()); } }
From source file:org.kitesdk.data.TestDatasetDescriptor.java
License:Apache License
@Test public void testSchemaFromHdfs() throws IOException { MiniDFSTest.setupFS();//w ww . j a v a 2s .com FileSystem fs = MiniDFSTest.getDFS(); // copy a schema to HDFS Path schemaPath = fs.makeQualified(new Path("schema.avsc")); FSDataOutputStream out = fs.create(schemaPath); IOUtils.copyBytes(DatasetTestUtilities.USER_SCHEMA_URL.toURL().openStream(), out, fs.getConf()); out.close(); // build a schema using the HDFS path and check it's the same Schema schema = new DatasetDescriptor.Builder().schemaUri(schemaPath.toUri()).build().getSchema(); Assert.assertEquals(DatasetTestUtilities.USER_SCHEMA, schema); MiniDFSTest.teardownFS(); }
From source file:org.mrgeo.data.accumulo.metadata.AccumuloMrsImagePyramidMetadataFileWriter.java
License:Apache License
@Override public void write(MrsImagePyramidMetadata metadata) throws IOException { // write the metadata object to hdfs Properties mrgeoAccProps = AccumuloConnector.getAccumuloProperties(); ColumnVisibility cv;//from w w w. ja v a2 s . c o m if (mrgeoAccProps.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ) == null) { cv = new ColumnVisibility(); } else { cv = new ColumnVisibility(mrgeoAccProps.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ)); } Path path = new Path(workDir, "meta.rf"); FileSystem fs = HadoopFileUtils.getFileSystem(path); if (fs.exists(path)) { fs.delete(path, false); } log.debug("Saving metadata to " + path.toString()); ByteArrayOutputStream baos = new ByteArrayOutputStream(); String metadataStr = null; try { metadata.save(baos); metadataStr = baos.toString(); baos.close(); } catch (IOException ioe) { throw new RuntimeException(ioe.getMessage()); } FileSKVWriter metaWrite = FileOperations.getInstance().openWriter(path.toString(), fs, fs.getConf(), AccumuloConfiguration.getDefaultConfiguration()); metaWrite.startDefaultLocalityGroup(); Key metKey = new Key(MrGeoAccumuloConstants.MRGEO_ACC_METADATA, MrGeoAccumuloConstants.MRGEO_ACC_METADATA, MrGeoAccumuloConstants.MRGEO_ACC_CQALL); Value metValue = new Value(metadataStr.getBytes()); metaWrite.append(metKey, metValue); metaWrite.close(); }
From source file:org.mrgeo.data.accumulo.metadata.AccumuloMrsPyramidMetadataFileWriter.java
License:Apache License
@Override public void write(MrsPyramidMetadata metadata) throws IOException { // write the metadata object to hdfs // Properties mrgeoAccProps = AccumuloConnector.getAccumuloProperties(); // ColumnVisibility cv; // if(mrgeoAccProps.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ) == null){ // cv = new ColumnVisibility(); // } else { // cv = new ColumnVisibility(mrgeoAccProps.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ)); // }/*from w ww.j av a2s . c o m*/ Path path = new Path(workDir, "meta.rf"); FileSystem fs = HadoopFileUtils.getFileSystem(path); if (fs.exists(path)) { fs.delete(path, false); } log.debug("Saving metadata to " + path.toString()); ByteArrayOutputStream baos = new ByteArrayOutputStream(); String metadataStr = null; metadata.save(baos); metadataStr = baos.toString(); baos.close(); FileSKVWriter metaWrite = FileOperations.getInstance().openWriter(path.toString(), fs, fs.getConf(), AccumuloConfiguration.getDefaultConfiguration()); metaWrite.startDefaultLocalityGroup(); Key metKey = new Key(MrGeoAccumuloConstants.MRGEO_ACC_METADATA, MrGeoAccumuloConstants.MRGEO_ACC_METADATA, MrGeoAccumuloConstants.MRGEO_ACC_CQALL); Value metValue = new Value(metadataStr.getBytes()); metaWrite.append(metKey, metValue); metaWrite.close(); }
From source file:org.oclc.firefly.hadoop.backup.Backup.java
License:Apache License
/** * Create mapper input files containing their paths to copy * @param mapperInput The list of files that the copy mappers should copy * @param numMapTasks The number of map tasks * @param fs The file system to write to * @param id The mapper id//from w w w .j a v a 2 s.c o m * @return The list of input files for a a mapper * @throws IOException If we fail to create input files */ private List<Path> createMapperInputSequenceFiles(List<Pair<String, HRegionInfo>> mapperInput, int numMapTasks, FileSystem fs, int id) throws IOException { int idx = 0; List<Path> paths = new ArrayList<Path>(); List<SequenceFile.Writer> writers = new ArrayList<SequenceFile.Writer>(); String inputDir = getMapInputDirectory(id); // delete this directory if already exists fs.delete(new Path(inputDir), true); // each mapper gets an input file for (int i = 0; i < numMapTasks; i++) { // open the input file for writing Path mapInputFile = new Path(inputDir + "/mapper-input-" + i + ".txt"); fs.delete(mapInputFile, false); SequenceFile.Writer writer = SequenceFile.createWriter(fs, fs.getConf(), mapInputFile, Text.class, HRegionInfo.class, SequenceFile.CompressionType.NONE); LOG.debug("Mapper input: " + mapInputFile); paths.add(mapInputFile); writers.add(writer); } // Assign copy paths to mappers for (Pair<String, HRegionInfo> pair : mapperInput) { Text key = new Text(pair.getFirst()); HRegionInfo value = new HRegionInfo(pair.getSecond()); LOG.debug("Appending " + key + ", " + value.getEncodedName()); writers.get(idx).append(key, value); idx++; if (idx >= writers.size()) { idx = 0; } } // close writers for (SequenceFile.Writer writer : writers) { try { writer.sync(); writer.close(); } catch (Exception e) { // nothing to do here } } return paths; }
From source file:org.oclc.firefly.hadoop.backup.Backup.java
License:Apache License
/** * Get the list of files to copy. /* w w w . j a v a2 s . c o m*/ * @param fs The file system to get file from * @param tableRegions the table regions for which to look files * @return A list of file names * @throws IOException When failed to communicate with filesystem */ public List<FileStatus> getTableInfoFiles(FileSystem fs, Map<String, List<CatalogRow>> tableRegions) throws IOException { List<FileStatus> ret = new ArrayList<FileStatus>(); String rootDir = fs.getConf().get(HConstants.HBASE_DIR); // Get list of files to copy one table at a time for (Map.Entry<String, List<CatalogRow>> entry : tableRegions.entrySet()) { String tableName = entry.getKey(); Path tableDirPath = new Path(rootDir, tableName); // Add .tableinfo to list of files to cppy try { FileStatus tableInfoFile = BackupUtils.getTableInfoPath(fs, tableDirPath); ret.add(tableInfoFile); } catch (FileNotFoundException e) { // Not sure what to do if we can't find this file LOG.warn("No .tableinfo file found for table " + tableName); } } return ret; }
From source file:org.oclc.firefly.hadoop.backup.Backup.java
License:Apache License
/** * Get the list of files to copy. // ww w. java 2s. c om * @param fs The file system to get file from * @param tableRegions the table regions for which to look files * @return A list of file names * @throws IOException When failed to communicate with filesystem */ public List<FileStatus> getListOfFiles(FileSystem fs, Map<String, List<CatalogRow>> tableRegions) throws IOException { List<FileStatus> ret = new ArrayList<FileStatus>(); String rootDir = fs.getConf().get(HConstants.HBASE_DIR); // Get list of files to copy one table at a time for (Map.Entry<String, List<CatalogRow>> entry : tableRegions.entrySet()) { String tableName = entry.getKey(); Path tableDirPath = new Path(rootDir, tableName); // Add .tableinfo to list of files to cppy try { FileStatus tableInfoFile = BackupUtils.getTableInfoPath(fs, tableDirPath); ret.add(tableInfoFile); } catch (FileNotFoundException e) { // Not sure what to do if we can't find this file LOG.warn("No .tableinfo file found for table " + tableName); } // Get table descriptor so we may get information about the table we are extracting HTableDescriptor tDesc = FSTableDescriptors.getTableDescriptor(fs, tableDirPath); if (tDesc == null) { throw new TableNotFoundException("Could not get HTableDescriptor for table " + tableName); } // Need to find out what column families this table has // so that we may generate paths to the files we are copying HColumnDescriptor[] columnFamilies = tDesc.getColumnFamilies(); List<CatalogRow> regions = entry.getValue(); for (CatalogRow r : regions) { HRegionInfo info = r.getHRegionInfo(); String regionName = info.getEncodedName(); // Add .regioninfo to list of files to cppy Path regionDirPath = new Path(tableDirPath, regionName); Path regionInfoFilePath = new Path(regionDirPath, HRegion.REGIONINFO_FILE); try { FileStatus regionInfoFile = fs.getFileStatus(regionInfoFilePath); ret.add(regionInfoFile); } catch (FileNotFoundException e) { // Not sure what to do if we can't find this file LOG.warn("No .regioninfo file found for region " + tableName + "/" + regionName); } for (HColumnDescriptor col : columnFamilies) { String family = col.getNameAsString(); Path regionFamilyDirPath = new Path(regionDirPath, family); try { // Add column family directories to make sure // they get copied should they be empty FileStatus dirStatus = fs.getFileStatus(regionFamilyDirPath); ret.add(dirStatus); // Finally, get all the files under this column family FileStatus[] statusList = fs.listStatus(regionFamilyDirPath); if (statusList != null) { for (FileStatus status : statusList) { ret.add(status); } } } catch (FileNotFoundException e) { LOG.warn("Expecting region family directory '" + regionFamilyDirPath + "' but not found"); } } } } return ret; }
From source file:org.oclc.firefly.hadoop.backup.BackupUtils.java
License:Apache License
/** * Get the list of files to copy. /*from ww w.j av a 2s. c o m*/ * @param fs The file system to get file from * @param region The region to get the list of files for * @return A list of file names * @throws IOException When failed to communicate with filesystem */ public static List<FileStatus> getListOfRegionFiles(FileSystem fs, HRegionInfo region) throws IOException { List<FileStatus> ret = new ArrayList<FileStatus>(); String rootDir = fs.getConf().get(HConstants.HBASE_DIR); String tableName = region.getTableNameAsString(); String regionName = region.getEncodedName(); Path tableDirPath = new Path(rootDir, tableName); // Get table descriptor so we may get information about the table we are extracting HTableDescriptor tDesc = FSTableDescriptors.getTableDescriptor(fs, tableDirPath); if (tDesc == null) { throw new TableNotFoundException("Could not get HTableDescriptor for table " + tableName); } // Need to find out what column families this table has // so that we may generate paths to the files we are copying HColumnDescriptor[] columnFamilies = tDesc.getColumnFamilies(); // Add .regioninfo to list of files to cppy Path regionDirPath = new Path(tableDirPath, regionName); Path regionInfoFilePath = new Path(regionDirPath, HRegion.REGIONINFO_FILE); // check that region directory still exists if (!fs.exists(regionDirPath)) { throw new FileNotFoundException("Region directory no longer exists: " + regionDirPath); } // need region info file FileStatus regionInfoFile = fs.getFileStatus(regionInfoFilePath); ret.add(regionInfoFile); // Go through each column family directory and list its files for (HColumnDescriptor col : columnFamilies) { String family = col.getNameAsString(); Path regionFamilyDirPath = new Path(regionDirPath, family); try { // Add column family directories to make sure // they get copied should they be empty FileStatus dirStatus = fs.getFileStatus(regionFamilyDirPath); ret.add(dirStatus); // Finally, get all the files under this column family FileStatus[] statusList = fs.listStatus(regionFamilyDirPath); if (statusList != null) { for (FileStatus status : statusList) { ret.add(status); } } } catch (FileNotFoundException e) { LOG.warn("Expecting region family directory '" + regionFamilyDirPath + "' but not found"); throw e; } } return ret; }
From source file:org.oclc.firefly.hadoop.backup.BackupUtils.java
License:Apache License
/** * Get the relative HBase path of the given path for the given file system * @param fs the file system where the path lives * @param path the absolute path to hbase file * @return Return the relative HBase path *//* w ww .java 2 s.c o m*/ public static String getFsRelativePath(FileSystem fs, Path path) { String root = fs.getConf().get(HConstants.HBASE_DIR); return StringUtils.substringAfter(path.toString(), root); }
From source file:org.pentaho.di.job.entries.hadooptransjobexecutor.DistributedCacheUtil.java
License:Apache License
/** * Stages the source file or folder to a Hadoop file system and sets their permission and replication value appropriately * to be used with the Distributed Cache. WARNING: This will delete the contents of dest before staging the archive. * * @param source File or folder to copy to the file system. If it is a folder all contents will be copied into dest. * @param fs Hadoop file system to store the contents of the archive in * @param dest Destination to copy source into. If source is a file, the new file name will be exactly dest. If source * is a folder its contents will be copied into dest. For more info see * {@link FileSystem#copyFromLocalFile(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path)}. * @param overwrite Should an existing file or folder be overwritten? If not an exception will be thrown. * @throws IOException Destination exists is not a directory * @throws KettleFileException Source does not exist or destination exists and overwrite is false. *//*ww w . j a v a2s.c o m*/ public void stageForCache(FileObject source, FileSystem fs, Path dest, boolean overwrite) throws IOException, KettleFileException { if (!source.exists()) { throw new KettleFileException(BaseMessages.getString(DistributedCacheUtil.class, "DistributedCacheUtil.SourceDoesNotExist", source)); } if (fs.exists(dest)) { if (overwrite) { // It is a directory, clear it out fs.delete(dest, true); } else { throw new KettleFileException(BaseMessages.getString(DistributedCacheUtil.class, "DistributedCacheUtil.DestinationExists", dest.toUri().getPath())); } } // Use the same replication we'd use for submitting jobs short replication = (short) fs.getConf().getInt("mapred.submit.replication", 10); Path local = new Path(source.getURL().getPath()); fs.copyFromLocalFile(local, dest); fs.setPermission(dest, CACHED_FILE_PERMISSION); fs.setReplication(dest, replication); }