List of usage examples for org.apache.hadoop.fs FileSystem create
public FSDataOutputStream create(Path f, short replication) throws IOException
From source file:com.asakusafw.runtime.stage.temporary.TemporaryStorage.java
License:Apache License
/** * Opens a temporary {@link ModelOutput} for the specified path. * @param <V> data type/*from w w w . j a v a 2s. c o m*/ * @param conf configuration * @param dataType data type * @param path target path * @param compressionCodec compression codec, or null if not compressed * @return the opened {@link ModelOutput} * @throws IOException if failed to open output * @throws IllegalArgumentException if some parameters were {@code null} */ @SuppressWarnings("unchecked") public static <V> ModelOutput<V> openOutput(Configuration conf, Class<V> dataType, Path path, CompressionCodec compressionCodec) throws IOException { if (conf == null) { throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$ } if (dataType == null) { throw new IllegalArgumentException("dataType must not be null"); //$NON-NLS-1$ } if (path == null) { throw new IllegalArgumentException("path must not be null"); //$NON-NLS-1$ } FileSystem fs = path.getFileSystem(conf); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Opening temporary output: {0} (fs={1})", //$NON-NLS-1$ path, fs.getUri())); } if (Writable.class.isAssignableFrom(dataType)) { return (ModelOutput<V>) new TemporaryFileOutput<>(fs.create(path, true), dataType.getName(), OUTPUT_INIT_BUFFER_SIZE, OUTPUT_PAGE_SIZE); } SequenceFile.Writer out = newWriter(conf, fs, dataType, path, compressionCodec); return new SequenceFileModelOutput<>(out); }
From source file:com.asakusafw.runtime.util.cache.HadoopFileCacheRepository.java
License:Apache License
private void updateCache(Path file, long checksum, Path cachePath, Path cacheChecksumPath) throws IOException { if (LOG.isInfoEnabled()) { LOG.info(MessageFormat.format("updating library cache: {0} -> {1}", file, cachePath)); }/*w ww. ja v a2s. co m*/ FileSystem sourceFs = file.getFileSystem(configuration); FileSystem cacheFs = cachePath.getFileSystem(configuration); // remove checksum file -> cachePath delete(cacheFs, cacheChecksumPath); delete(cacheFs, cachePath); // sync source file to cache file try (FSDataOutputStream checksumOutput = cacheFs.create(cacheChecksumPath, false)) { checksumOutput.writeLong(checksum); syncFile(sourceFs, file, cacheFs, cachePath); } }
From source file:com.asakusafw.runtime.util.cache.HadoopFileCacheRepository.java
License:Apache License
private void syncFile(FileSystem sourceFs, Path sourceFile, FileSystem targetFs, Path targetFile) throws IOException { byte[] buf = byteBuffers.get(); try (FSDataOutputStream output = targetFs.create(targetFile, false); FSDataInputStream input = sourceFs.open(sourceFile)) { while (true) { int read = input.read(buf); if (read < 0) { break; }/*from w w w. j ava 2 s. c om*/ output.write(buf, 0, read); } } }
From source file:com.asiainfo.srd.HioBench.java
License:Apache License
static void writeFile(FileSystem fs) throws IOException { FSDataOutputStream fos = fs.create(options.filePath, (short) 1); byte arr[] = new byte[65536]; try {//w w w . j a v a2s .c om for (long off = 0; off < options.nBytesInFile;) { fillArrayWithExpected(arr, off, arr.length); long rem = options.nBytesInFile - off; int lim = rem > arr.length ? arr.length : (int) rem; fos.write(arr, 0, lim); off += lim; } } finally { fos.close(); } }
From source file:com.awcoleman.StandaloneJava.AvroCombinerByBlock.java
License:Apache License
public AvroCombinerByBlock(String inDirStr, String outDirStr, String handleExisting) throws IOException { //handle both an output directory and an output filename (ending with .avro) String outputFilename = DEFAULTOUTPUTFILENAME; if (outDirStr.endsWith(".avro")) { isOutputNameSpecifiedAndAFile = true; //String[] outputParts = outDirStr.split(":?\\\\"); String[] outputParts = outDirStr.split("/"); outputFilename = outputParts[outputParts.length - 1]; //remove outputFilename from outDirStr to get new outDirStr which is just directory (and trailing /) outDirStr = outDirStr.replaceAll(Pattern.quote(outputFilename), ""); outDirStr = outDirStr.substring(0, outDirStr.length() - (outDirStr.endsWith("/") ? 1 : 0)); }// w w w.ja v a 2s . co m //Get block size - not needed //long hdfsBlockSize = getBlockSize(); //System.out.println("HDFS FS block size: "+hdfsBlockSize); //Get list of input files ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>(); Configuration conf = new Configuration(); conf.addResource(new Path("/etc/hadoop/conf/core-site.xml")); conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less FileSystem hdfs = null; try { hdfs = FileSystem.get(conf); } catch (java.io.IOException ioe) { System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage()); System.exit(1); } if (hdfs.getStatus() == null) { System.out.println("Unable to contact HDFS filesystem. Exiting."); System.exit(1); } //Check if input and output dirs exist Path inDir = new Path(inDirStr); Path outDir = new Path(outDirStr); if (!(hdfs.exists(inDir) || hdfs.isDirectory(inDir))) { System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting."); System.exit(1); } if (!(hdfs.exists(outDir) || hdfs.isDirectory(outDir))) { if (hdfs.exists(outDir)) { //outDir exists and is a symlink or file, must die System.out.println("Requested output directory name ( " + outDirStr + " ) exists but is not a directory. Exiting."); System.exit(1); } else { hdfs.mkdirs(outDir); } } RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inDir, true); while (fileStatusListIterator.hasNext()) { LocatedFileStatus fileStatus = fileStatusListIterator.next(); if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) { inputFileList.add((FileStatus) fileStatus); } } if (inputFileList.size() <= 1 && !isOutputNameSpecifiedAndAFile) { //If an output file is specified assume we just want a rename. System.out.println("Only one or zero files found in input directory ( " + inDirStr + " ). Exiting."); System.exit(1); } //Get Schema and Compression Codec from seed file since we need it for the writer Path firstFile = inputFileList.get(0).getPath(); FsInput fsin = new FsInput(firstFile, conf); DataFileReader<Object> dfrFirstFile = new DataFileReader<Object>(fsin, new GenericDatumReader<Object>()); Schema fileSchema = dfrFirstFile.getSchema(); String compCodecName = dfrFirstFile.getMetaString("avro.codec"); //compCodecName should be null, deflate, snappy, or bzip2 if (compCodecName == null) { compCodecName = "deflate"; //set to deflate even though original is no compression } dfrFirstFile.close(); //Create Empty HDFS file in output dir String seedFileStr = outDirStr + "/" + outputFilename; Path seedFile = new Path(seedFileStr); FSDataOutputStream hdfsdos = null; try { hdfsdos = hdfs.create(seedFile, false); } catch (org.apache.hadoop.fs.FileAlreadyExistsException faee) { if (handleExisting.equals("overwrite")) { hdfs.delete(seedFile, false); hdfsdos = hdfs.create(seedFile, false); } else if (handleExisting.equals("append")) { hdfsdos = hdfs.append(seedFile); } else { System.out .println("File " + seedFileStr + " exists and will not overwrite. handleExisting is set to " + handleExisting + ". Exiting."); System.exit(1); } } if (hdfsdos == null) { System.out.println("Unable to create or write to output file ( " + seedFileStr + " ). handleExisting is set to " + handleExisting + ". Exiting."); System.exit(1); } //Append other files GenericDatumWriter gdw = new GenericDatumWriter(fileSchema); DataFileWriter dfwBase = new DataFileWriter(gdw); //Set compression to that found in the first file dfwBase.setCodec(CodecFactory.fromString(compCodecName)); DataFileWriter dfw = dfwBase.create(fileSchema, hdfsdos); for (FileStatus thisFileStatus : inputFileList) { //_SUCCESS files are 0 bytes if (thisFileStatus.getLen() == 0) { continue; } FsInput fsin1 = new FsInput(thisFileStatus.getPath(), conf); DataFileReader dfr = new DataFileReader<Object>(fsin1, new GenericDatumReader<Object>()); dfw.appendAllFrom(dfr, false); dfr.close(); } dfw.close(); dfwBase.close(); }
From source file:com.awcoleman.StandaloneJava.AvroCombinerByRecord.java
License:Apache License
public AvroCombinerByRecord(String inDirStr, String outDirStr) throws IOException { //Get list of input files ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>(); Configuration conf = new Configuration(); conf.addResource(new Path("/etc/hadoop/conf/core-site.xml")); FileSystem hdfs = FileSystem.get(conf); //Check if input and output dirs exist Path inDir = new Path(inDirStr); Path outDir = new Path(outDirStr); if (!(hdfs.exists(inDir) || hdfs.isDirectory(inDir))) { System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting."); System.exit(1);//from w w w . j av a 2 s . c om } if (!(hdfs.exists(outDir) || hdfs.isDirectory(outDir))) { if (hdfs.exists(outDir)) { //outDir exists and is a symlink or file, must die System.out.println("Requested output directory name ( " + outDirStr + " ) exists but is not a directory. Exiting."); System.exit(1); } else { hdfs.mkdirs(outDir); } } RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inDir, true); while (fileStatusListIterator.hasNext()) { LocatedFileStatus fileStatus = fileStatusListIterator.next(); if (fileStatus.isFile()) { inputFileList.add((FileStatus) fileStatus); } } if (inputFileList.size() <= 1) { System.out.println("Only one or zero files found in input directory ( " + inDirStr + " ). Exiting."); System.exit(1); } //Get Schema and Compression Codec from seed file since we need it for the writer Path firstFile = inputFileList.get(0).getPath(); FsInput fsin = new FsInput(firstFile, conf); DataFileReader<Object> dfrFirstFile = new DataFileReader<Object>(fsin, new GenericDatumReader<Object>()); Schema fileSchema = dfrFirstFile.getSchema(); String compCodecName = dfrFirstFile.getMetaString("avro.codec"); dfrFirstFile.close(); //Create Empty HDFS file in output dir Path seedFile = new Path(outDirStr + "/combinedByRecord.avro"); FSDataOutputStream hdfsdos = hdfs.create(seedFile, false); //Append other files GenericDatumWriter gdw = new GenericDatumWriter(fileSchema); DataFileWriter dfwBase = new DataFileWriter(gdw); //Set compression to that found in the first file dfwBase.setCodec(CodecFactory.fromString(compCodecName)); DataFileWriter dfw = dfwBase.create(fileSchema, hdfsdos); for (FileStatus thisFileStatus : inputFileList) { DataFileStream<Object> avroStream = null; FSDataInputStream inStream = hdfs.open(thisFileStatus.getPath()); GenericDatumReader<Object> reader = new GenericDatumReader<Object>(); avroStream = new DataFileStream<Object>(inStream, reader); long recordCounter = 0; while (avroStream.hasNext()) { dfw.append(avroStream.next()); recordCounter++; } avroStream.close(); inStream.close(); System.out.println("Appended " + recordCounter + " records from " + thisFileStatus.getPath().getName() + " to " + seedFile.getName()); } dfw.close(); dfwBase.close(); }
From source file:com.benchmark.mapred.terasort.TeraOutputFormat.java
License:Apache License
public RecordWriter<Text, Text> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { Path dir = getWorkOutputPath(job); FileSystem fs = dir.getFileSystem(job); FSDataOutputStream fileOut = fs.create(new Path(dir, name), progress); return new TeraRecordWriter(fileOut, job); }
From source file:com.bizosys.hsearch.console.ui.SearchServlet.java
License:Apache License
public void actionCreate(HttpServletRequest req, String projectName, String webappsClassDirPath, String schemaFileLoc) throws FileNotFoundException, UnsupportedEncodingException, IOException { String schemaXmlContent = req.getParameter("schema"); schemaXmlContent = (null == schemaXmlContent) ? StringUtils.Empty : schemaXmlContent.trim(); if (schemaXmlContent.length() == 0) throw new RuntimeException("Schema content is missing."); File schemaFile = new File(schemaFileLoc); PrintWriter writer = null;// w w w . j av a2 s . c om FileWriter fw = null; try { writer = new PrintWriter(schemaFile, "UTF-8"); writer.write(schemaXmlContent); writer.flush(); SetupServlet.create(projectName); File prjectFile = new File(webappsClassDirPath + "/projects.txt"); fw = new FileWriter(prjectFile, true); fw.write(projectName); fw.write('\n'); } finally { if (null != writer) writer.close(); if (null != fw) { fw.flush(); fw.close(); } } if (schemaFile.exists()) { FileSystem fs = null; FSDataOutputStream hdfsFile = null; try { fs = FileSystem.get(conf); Path schemaHdfsFilePath = new Path(schemaFile.getName()); hdfsFile = fs.create(schemaHdfsFilePath, fs.exists(schemaHdfsFilePath)); hdfsFile.write(FileReaderUtil.getBytes(new File(schemaFile.getAbsolutePath()))); } catch (Exception ex) { throw new IOException( "Unable to create @ hadoop Please check permission on dfs " + schemaFile.getName(), ex); } finally { if (null != hdfsFile) hdfsFile.close(); if (null != fs) fs.close(); } } }
From source file:com.blackberry.logdriver.mapred.BinaryRecordWriter.java
License:Apache License
public BinaryRecordWriter(JobConf job) { String extension = job.get("output.file.extension", ""); String taskid = job.get("mapred.task.id"); try {/*w w w . j a v a 2s . c o m*/ Path outputPath = BinaryOutputFormat.getTaskOutputPath(job, taskid + extension); FileSystem fs = FileSystem.get(job); LOG.info("Creating output path: {}", outputPath); out = fs.create(outputPath, true); } catch (IOException e) { LOG.error("Error creating output file.", e); } }
From source file:com.blackberry.logdriver.mapreduce.BinaryRecordWriter.java
License:Apache License
/** * Create a writer for the given BinaryOutputFormat and TaskAttemptContext. * /*from ww w . j ava2s . c o m*/ * @param outputFormat * @param context */ public BinaryRecordWriter(BinaryOutputFormat outputFormat, TaskAttemptContext context) { Configuration conf = context.getConfiguration(); String extension = conf.get("output.file.extension", ""); try { Path outputPath = outputFormat.getDefaultWorkFile(context, extension); FileSystem fs = FileSystem.get(conf); LOG.info("Creating output path: {}", outputPath); out = fs.create(outputPath, true); } catch (IOException e) { LOG.error("Error creating output file.", e); } }