List of usage examples for org.apache.hadoop.fs Path getName
public String getName()
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.HDFSUnsortedHoplogOrganizer.java
License:Apache License
private FileStatus[] getExpiredHoplogs() throws IOException { FileStatus files[] = FSUtils.listStatus(fileSystem, bucketPath, new PathFilter() { @Override/*from ww w . java 2 s. c om*/ public boolean accept(Path file) { // All expired hoplog end with expire extension and must match the valid file regex String fileName = file.getName(); if (!fileName.endsWith(EXPIRED_HOPLOG_EXTENSION)) { return false; } return true; } }); return files; }
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.HDFSUnsortedHoplogOrganizer.java
License:Apache License
/** * locks sorted oplogs collection, removes oplog and renames for deletion later * @throws IOException /*from w w w.j av a 2s. com*/ */ private void markHoplogsForDeletion() throws IOException { ArrayList<IOException> errors = new ArrayList<IOException>(); FileStatus validHoplogs[] = FSUtils.listStatus(fileSystem, bucketPath, new PathFilter() { @Override public boolean accept(Path file) { // All valid hoplog files must match the regex Matcher matcher = HOPLOG_PATTERN.matcher(file.getName()); return matcher.matches(); } }); FileStatus[] expired = getExpiredHoplogs(); validHoplogs = filterValidHoplogs(validHoplogs, expired); if (validHoplogs == null || validHoplogs.length == 0) { return; } for (FileStatus fileStatus : validHoplogs) { try { addExpiryMarkerForAFile(getHoplog(fileStatus.getPath())); } catch (IOException e) { // even if there is an IO error continue removing other hoplogs and // notify at the end errors.add(e); } } if (!errors.isEmpty()) { for (IOException e : errors) { logger.warn(LocalizedStrings.HOPLOG_HOPLOG_REMOVE_FAILED, e); } } }
From source file:com.github.dryangkun.hbase.tidx.hive.HiveHFileOutputFormat.java
License:Apache License
@Override public RecordWriter getHiveRecordWriter(final JobConf jc, final Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, final Progressable progressable) throws IOException { // Read configuration for the target path, first from jobconf, then from table properties String hfilePath = getFamilyPath(jc, tableProperties); if (hfilePath == null) { throw new RuntimeException("Please set " + HFILE_FAMILY_PATH + " to target location for HFiles"); }//from w ww . j ava2 s . c o m // Target path's last component is also the column family name. final Path columnFamilyPath = new Path(hfilePath); final String columnFamilyName = columnFamilyPath.getName(); final byte[] columnFamilyNameBytes = Bytes.toBytes(columnFamilyName); final Job job = new Job(jc); setCompressOutput(job, isCompressed); setOutputPath(job, finalOutPath); // Create the HFile writer final org.apache.hadoop.mapreduce.TaskAttemptContext tac = ShimLoader.getHadoopShims() .newTaskAttemptContext(job.getConfiguration(), progressable); final Path outputdir = FileOutputFormat.getOutputPath(tac); final org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable, KeyValue> fileWriter = getFileWriter( tac); // Individual columns are going to be pivoted to HBase cells, // and for each row, they need to be written out in order // of column name, so sort the column names now, creating a // mapping to their column position. However, the first // column is interpreted as the row key. String columnList = tableProperties.getProperty("columns"); String[] columnArray = columnList.split(","); final SortedMap<byte[], Integer> columnMap = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR); int i = 0; for (String columnName : columnArray) { if (i != 0) { columnMap.put(Bytes.toBytes(columnName), i); } ++i; } return new RecordWriter() { @Override public void close(boolean abort) throws IOException { try { fileWriter.close(null); if (abort) { return; } // Move the hfiles file(s) from the task output directory to the // location specified by the user. FileSystem fs = outputdir.getFileSystem(jc); fs.mkdirs(columnFamilyPath); Path srcDir = outputdir; for (;;) { FileStatus[] files = fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER); if ((files == null) || (files.length == 0)) { throw new IOException("No family directories found in " + srcDir); } if (files.length != 1) { throw new IOException("Multiple family directories found in " + srcDir); } srcDir = files[0].getPath(); if (srcDir.getName().equals(columnFamilyName)) { break; } } for (FileStatus regionFile : fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER)) { fs.rename(regionFile.getPath(), new Path(columnFamilyPath, regionFile.getPath().getName())); } // Hive actually wants a file as task output (not a directory), so // replace the empty directory with an empty file to keep it happy. fs.delete(outputdir, true); fs.createNewFile(outputdir); } catch (InterruptedException ex) { throw new IOException(ex); } } private void writeText(Text text) throws IOException { // Decompose the incoming text row into fields. String s = text.toString(); String[] fields = s.split("\u0001"); assert (fields.length <= (columnMap.size() + 1)); // First field is the row key. byte[] rowKeyBytes = Bytes.toBytes(fields[0]); // Remaining fields are cells addressed by column name within row. for (Map.Entry<byte[], Integer> entry : columnMap.entrySet()) { byte[] columnNameBytes = entry.getKey(); int iColumn = entry.getValue(); String val; if (iColumn >= fields.length) { // trailing blank field val = ""; } else { val = fields[iColumn]; if ("\\N".equals(val)) { // omit nulls continue; } } byte[] valBytes = Bytes.toBytes(val); KeyValue kv = new KeyValue(rowKeyBytes, columnFamilyNameBytes, columnNameBytes, valBytes); try { fileWriter.write(null, kv); } catch (IOException e) { LOG.error("Failed while writing row: " + s); throw e; } catch (InterruptedException ex) { throw new IOException(ex); } } } private void writePut(PutWritable put) throws IOException { ImmutableBytesWritable row = new ImmutableBytesWritable(put.getPut().getRow()); SortedMap<byte[], List<Cell>> cells = put.getPut().getFamilyCellMap(); for (Map.Entry<byte[], List<Cell>> entry : cells.entrySet()) { Collections.sort(entry.getValue(), new CellComparator()); for (Cell c : entry.getValue()) { try { fileWriter.write(row, KeyValueUtil.copyToNewKeyValue(c)); } catch (InterruptedException e) { throw (InterruptedIOException) new InterruptedIOException().initCause(e); } } } } @Override public void write(Writable w) throws IOException { if (w instanceof Text) { writeText((Text) w); } else if (w instanceof PutWritable) { writePut((PutWritable) w); } else { throw new IOException("Unexpected writable " + w); } } }; }
From source file:com.github.sadikovi.riff.Riff.java
License:Open Source License
/** * Infer compression codec from file name. * @param path path to the file/* ww w .j ava 2s .c o m*/ * @return compression codec or null for uncompressed */ protected static CompressionCodec inferCompressionCodec(Path path) { String name = path.getName(); int start = name.lastIndexOf('.'); String ext = (start <= 0) ? "" : name.substring(start); return CompressionCodecFactory.forFileExt(ext); }
From source file:com.github.sakserv.minicluster.yarn.InJvmContainerExecutor.java
License:Apache License
/** * YARN provides ability to pass resources (e.g., classpath) through * {@link LocalResource}s which allows user to provision all the resources * required to run the app. This method will extract those resources as a * {@link Set} of {@link URL}s so they are used when {@link ClassLoader} for a * container is created./* ww w. j av a 2s . c o m*/ * * This is done primarily as a convenience for applications that rely on * automatic classpath propagation (e.g., pull everything from my dev * classpath) instead of manual. * * @param container * @return */ private Set<URL> filterAndBuildUserClasspath(Container container) { if (logger.isDebugEnabled()) { logger.debug("Building additional classpath for the container: " + container); } Set<URL> additionalClassPathUrls = new HashSet<URL>(); Set<Path> userClassPath = this.extractUserProvidedClassPathEntries(container); for (Path resourcePath : userClassPath) { String resourceName = "file:///" + new File(resourcePath.getName()).getAbsolutePath(); if (logger.isDebugEnabled()) { logger.debug("\t adding " + resourceName + " to the classpath"); } try { additionalClassPathUrls.add(new URI(resourceName).toURL()); } catch (Exception e) { throw new IllegalArgumentException(e); } } return additionalClassPathUrls; }
From source file:com.github.xiaofu.demo.parquet.MyConvertUtils.java
License:Apache License
public static void convertCsvToParquet(File csvFile, Path outputParquetFile, boolean enableDictionary) throws IOException { LOG.info("Converting " + csvFile.getName() + " to " + outputParquetFile.getName()); String rawSchema = getSchema(csvFile); MessageType schema = MessageTypeParser.parseMessageType(rawSchema); MyCsvParquetWriter writer = new MyCsvParquetWriter(outputParquetFile, schema, CompressionCodecName.GZIP, BLOCK_SIZE, PAGE_SIZE, enableDictionary); BufferedReader br = new BufferedReader(new FileReader(csvFile)); String line;/*w w w . j ava2 s. com*/ int lineNumber = 0; try { while ((line = br.readLine()) != null) { String[] fields = line.split(Pattern.quote(CSV_DELIMITER)); writer.write(Arrays.asList(fields)); ++lineNumber; } writer.close(); } finally { LOG.info("Number of lines: " + lineNumber); Utils.closeQuietly(br); } }
From source file:com.github.xiaofu.demo.parquet.MyConvertUtils.java
License:Apache License
public static void convertParquetToCSV(Path parquetFilePath, File csvOutputFile) throws IOException { // Preconditions.checkArgument(parquetFilePath.getName().endsWith(".parquet"), // "parquet file should have .parquet extension"); Preconditions.checkArgument(csvOutputFile.getName().endsWith(".csv"), "csv file should have .csv extension"); // Preconditions.checkArgument(!csvOutputFile.exists(), // "Output file " + csvOutputFile.getAbsolutePath() + // " already exists"); LOG.info("Converting " + parquetFilePath.getName() + " to " + csvOutputFile.getName()); Configuration configuration = new Configuration(true); GroupReadSupport readSupport = new GroupReadSupport(); ParquetMetadata readFooter = ParquetFileReader.readFooter(configuration, parquetFilePath); MessageType schema = readFooter.getFileMetaData().getSchema(); readSupport.init(configuration, null, schema); BufferedWriter w = new BufferedWriter(new FileWriter(csvOutputFile)); ParquetReader<Group> reader = new ParquetReader<Group>(parquetFilePath, readSupport); try {// w w w .ja v a 2s .c om Group g = null; while ((g = reader.read()) != null) { writeGroup(w, g, schema); } reader.close(); } finally { Utils.closeQuietly(w); } }
From source file:com.github.xiaofu.demo.parquet.MyConvertUtils.java
License:Apache License
public static void convertParquetToCSV_With_Specified_Columns_And_Column_Filter(Path parquetFilePath, File csvOutputFile) throws IOException { // Preconditions.checkArgument(parquetFilePath.getName().endsWith(".parquet"), // "parquet file should have .parquet extension"); Preconditions.checkArgument(csvOutputFile.getName().endsWith(".csv"), "csv file should have .csv extension"); // Preconditions.checkArgument(!csvOutputFile.exists(), // "Output file " + csvOutputFile.getAbsolutePath() + // " already exists"); LOG.info("Converting " + parquetFilePath.getName() + " to " + csvOutputFile.getName()); //??readSupport?? Configuration configuration = new Configuration(true); configuration.set(ReadSupport.PARQUET_READ_SCHEMA, "message m { optional binary user_id;}"); GroupReadSupport readSupport = new GroupReadSupport(); /*ParquetMetadata readFooter = ParquetFileReader.readFooter( configuration, parquetFilePath);*/ //MessageType fileSchema = readFooter.getFileMetaData().getSchema(); MessageType requestSchema = MessageTypeParser.parseMessageType("message m { optional binary user_id;}"); //readSupport.init(configuration, null, fileSchema); BufferedWriter w = new BufferedWriter(new FileWriter(csvOutputFile)); ParquetReader<Group> reader = new ParquetReader<Group>(configuration, parquetFilePath, readSupport, ColumnRecordFilter.column("user_id", new Predicate() { @Override// w w w . j a v a 2 s. c om public boolean apply(ColumnReader input) { String temp = input.getBinary().toStringUsingUTF8(); return !temp.equals("0") && temp != ""; } })); try { Group g = null; while ((g = reader.read()) != null) { writeGroup(w, g, requestSchema); } reader.close(); } finally { Utils.closeQuietly(w); } }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemTestBase.java
License:Open Source License
/** * We override certain methods in FileSystem simply to provide debug tracing. (Search for * "Overridden functions for debug tracing" in GoogleHadoopFileSystemBase.java). * We do not add or update any functionality for such methods. The following * tests simply exercise that path to ensure coverage. Consequently, they do not * really test any functionality.//from w w w . j ava2s. c o m * * Having coverage for these methods lets us easily determine the amount of * coverage that is missing in the rest of the code. */ @Test public void provideCoverageForUnmodifiedMethods() throws IOException { // ------------------------------------------------------- // Create test data. // Temporary file in GHFS. URI tempFileUri = GoogleCloudStorageFileSystemIntegrationTest.getTempFilePath(); Path tempFilePath = ghfsHelper.castAsHadoopPath(tempFileUri); Path tempDirPath = tempFilePath.getParent(); String text = "Hello World!"; ghfsHelper.writeFile(tempFilePath, text, 1, false); // Another temporary file in GHFS. URI tempFileUri2 = GoogleCloudStorageFileSystemIntegrationTest.getTempFilePath(); Path tempFilePath2 = ghfsHelper.castAsHadoopPath(tempFileUri2); // Temporary file in local FS. File localTempFile = File.createTempFile("ghfs-test-", null); Path localTempFilePath = new Path(localTempFile.getPath()); Path localTempDirPath = localTempFilePath.getParent(); // ------------------------------------------------------- // Call methods to provide coverage for. Note that we do not attempt to // test their functionality as we are not testing Hadoop engine here. try { ghfs.deleteOnExit(tempFilePath); ghfs.getContentSummary(tempFilePath); ghfs.getDelegationToken("foo"); ghfs.copyFromLocalFile(false, true, localTempFilePath, tempDirPath); ghfs.copyFromLocalFile(false, true, new Path[] { localTempFilePath }, tempDirPath); localTempFile.delete(); ghfs.copyToLocalFile(true, tempFilePath, localTempDirPath); File localCopiedFile = new File(localTempDirPath.toString(), tempFilePath.getName()); localCopiedFile.delete(); Path localOutputPath = ghfs.startLocalOutput(tempFilePath2, localTempFilePath); FileWriter writer = new FileWriter(localOutputPath.toString()); writer.write(text); writer.close(); ghfs.completeLocalOutput(tempFilePath2, localOutputPath); ghfs.getUsed(); ghfs.setVerifyChecksum(false); ghfs.getFileChecksum(tempFilePath2); ghfs.setPermission(tempFilePath2, FsPermission.getDefault()); try { ghfs.setOwner(tempFilePath2, "foo-user", "foo-group"); } catch (IOException ioe) { // Some filesystems (like the LocalFileSystem) are strict about existence of owners. // TODO(user): Abstract out the behaviors around owners/permissions and properly test // the different behaviors between different filesystems. } ghfs.setTimes(tempFilePath2, 0, 0); } finally { // We do not need to separately delete the temp files created in GHFS because // we delete all test buckets recursively at the end of the tests. if (localTempFile.exists()) { localTempFile.delete(); } } }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopSyncableOutputStream.java
License:Open Source License
/** * Returns URI to be used for the next "tail" file in the series. *///from w w w. j a v a 2s. com private URI getNextTemporaryPath() { Path basePath = ghfs.getHadoopPath(finalGcsPath); Path baseDir = basePath.getParent(); Path tempPath = new Path(baseDir, String.format("%s%s.%d.%s", TEMPFILE_PREFIX, basePath.getName(), curComponentIndex, UUID.randomUUID().toString())); return ghfs.getGcsPath(tempPath); }