List of usage examples for org.apache.hadoop.fs FileSystem create
public FSDataOutputStream create(Path f, short replication) throws IOException
From source file:com.blm.orc.OrcRecordUpdater.java
License:Apache License
OrcRecordUpdater(Path path, AcidOutputFormat.Options options) throws IOException { this.options = options; this.bucket.set(options.getBucket()); this.path = AcidUtils.createFilename(path, options); FileSystem fs = options.getFilesystem(); if (fs == null) { fs = path.getFileSystem(options.getConfiguration()); }/*from www .j a va2s.c o m*/ this.fs = fs; try { FSDataOutputStream strm = fs.create(new Path(path, ACID_FORMAT), false); strm.writeInt(ORC_ACID_VERSION); strm.close(); } catch (IOException ioe) { if (LOG.isDebugEnabled()) { LOG.debug("Failed to create " + path + "/" + ACID_FORMAT + " with " + ioe); } } if (options.getMinimumTransactionId() != options.getMaximumTransactionId() && !options.isWritingBase()) { flushLengths = fs.create(getSideFile(this.path), true, 8, options.getReporter()); } else { flushLengths = null; } OrcFile.WriterOptions writerOptions = null; if (options instanceof OrcOptions) { writerOptions = ((OrcOptions) options).getOrcOptions(); } if (writerOptions == null) { writerOptions = OrcFile.writerOptions(options.getConfiguration()); } writerOptions.fileSystem(fs).callback(indexBuilder); if (!options.isWritingBase()) { writerOptions.blockPadding(false); writerOptions.bufferSize(DELTA_BUFFER_SIZE); writerOptions.stripeSize(DELTA_STRIPE_SIZE); } rowInspector = (StructObjectInspector) options.getInspector(); writerOptions.inspector(createEventSchema(findRecId(options.getInspector(), options.getRecordIdColumn()))); this.writer = OrcFile.createWriter(this.path, writerOptions); item = new OrcStruct(FIELDS); item.setFieldValue(OPERATION, operation); item.setFieldValue(CURRENT_TRANSACTION, currentTransaction); item.setFieldValue(ORIGINAL_TRANSACTION, originalTransaction); item.setFieldValue(BUCKET, bucket); item.setFieldValue(ROW_ID, rowId); }
From source file:com.bonc.mr_roamRecognition_hjpt.comm.FileCountTextOutputFormat.java
License:Apache License
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); String keyValueSeparator = conf.get(SEPERATOR, "\t"); CompressionCodec codec = null;// w ww. ja v a2s . co m String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } Path file = getDefaultWorkFile(job, extension); FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); return new LineRecordWriter<K, V>(fileOut, keyValueSeparator); } else { FSDataOutputStream fileOut = fs.create(file, false); return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator); } }
From source file:com.chinnu.churndetection.fuzzykmeans.FuzzyKMeansReducer.java
@Override protected void reduce(IntWritable key, Iterable<Vector> values, Reducer<IntWritable, Vector, IntWritable, Text>.Context context) throws IOException, InterruptedException { double[] sum = new double[DATALENGTH]; for (int i = 0; i < DATALENGTH; i++) { sum[i] = 0;/* w ww . ja v a 2 s. c o m*/ } int count = 0; for (Vector vector : values) { for (int i = 0; i < DATALENGTH; i++) { sum[i] += vector.getData()[i]; } count++; Text text = new Text(vector.toString()); context.write(key, text); } double[] newCenter = new double[DATALENGTH]; for (int i = 0; i < DATALENGTH; i++) { newCenter[i] = sum[i] / count; } Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); List<double[]> curr_center = new ArrayList<>(); String[] lineSplit = CURR_CENTER.split("\n"); for (int j = 0; j < lineSplit.length; j++) { String line = lineSplit[j]; String[] split = line.split(","); double[] temp = new double[split.length]; for (int i = 0; i < split.length; i++) { temp[i] = Double.parseDouble(split[i]); } curr_center.add(temp); } List<String> appendLine = new ArrayList<>(); if (fs.exists(new Path(NEW_CENTER))) { BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(new Path(NEW_CENTER)))); String line; while ((line = br.readLine()) != null) { appendLine.add(line); } } PrintWriter pw = new PrintWriter(new OutputStreamWriter(fs.create(new Path(NEW_CENTER), true))); for (String string : appendLine) { pw.println(string); pw.flush(); } String line = ""; for (int i = 0; i < DATALENGTH; i++) { line += newCenter[i] + ","; } String substring = line.substring(0, line.length() - 1); pw.println(substring); pw.flush(); pw.close(); MRLogger.Log(context.getJobName()); MRLogger.Log(Arrays.toString(curr_center.get(key.get()))); MRLogger.Log(Arrays.toString(newCenter)); double curr_Distance = DistanceComparator.findDistance(curr_center.get(key.get()), newCenter); MRLogger.Log(curr_Distance + ""); if (curr_Distance < 0.01) { PrintWriter pw1 = new PrintWriter( new OutputStreamWriter(fs.create(new Path(ChurnDriver.CENTER_CONVERGED), true))); pw1.println("converged"); pw1.flush(); pw1.close(); } }
From source file:com.cijhwang.hadoop.TickDataReadWrite.java
License:Apache License
private static void writeTick(String line, String loc) { try {/*from w w w.j a v a 2 s . c o m*/ Path pt = new Path(loc); FileSystem fs = FileSystem.get(new Configuration()); BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(pt, true))); br.write(line); br.close(); } catch (Exception e) { System.out.println("File not found"); } }
From source file:com.cip.crane.agent.utils.TaskHelper.java
License:Open Source License
private void writeFileToHdfs(String srcFile, String destFile) throws IOException { File file = new File(srcFile); if (!file.exists()) { throw new FileNotFoundException("File not found"); }// w ww. j a v a 2 s .c om byte[] buf = new byte[BUFFER_SIZE]; FileInputStream input = new FileInputStream(file); FileSystem fs = FileSystem.get(URI.create(destFile), conf); Path destPath = new Path(destFile); if (fs.exists(destPath)) { fs.delete(destPath, true); } FSDataOutputStream hdfsoutput = fs.create(destPath, (short) 2); int num = input.read(buf); while (num != (-1)) {// ? hdfsoutput.write(buf, 0, num);// ? hdfsoutput.flush();// ? num = input.read(buf);// ?? } input.close(); hdfsoutput.close(); fs.close(); }
From source file:com.cloudera.cdk.data.filesystem.FileSystemMetadataProvider.java
License:Apache License
/** * Writes the contents of a {@code Descriptor} to files. * * @param fs The {@link FileSystem} where data will be stored * @param metadataLocation The directory {@link Path} where metadata files * will be located * @param name The {@link Dataset} name * @param descriptor The {@code Descriptor} contents to write * * @throws MetadataProviderException If the {@code metadataLocation} does not * exist or if any IOExceptions need to be * propagated. */// ww w . j a v a2s. c o m private static void writeDescriptor(FileSystem fs, Path metadataLocation, String name, DatasetDescriptor descriptor) { checkExists(fs, metadataLocation); FSDataOutputStream outputStream = null; final Path schemaPath = new Path(metadataLocation, SCHEMA_FILE_NAME); boolean threw = true; try { outputStream = fs.create(schemaPath, true /* overwrite */ ); outputStream.write(descriptor.getSchema().toString(true).getBytes(Charsets.UTF_8)); outputStream.flush(); threw = false; } catch (IOException e) { throw new MetadataProviderException("Unable to save schema file:" + schemaPath + " for dataset:" + name, e); } finally { try { Closeables.close(outputStream, threw); } catch (IOException e) { throw new MetadataProviderException(e); } } Properties properties = new Properties(); properties.setProperty(VERSION_FIELD_NAME, METADATA_VERSION); properties.setProperty(FORMAT_FIELD_NAME, descriptor.getFormat().getName()); final URI dataLocation = descriptor.getLocation(); if (dataLocation != null) { properties.setProperty(LOCATION_FIELD_NAME, dataLocation.toString()); } if (descriptor.isPartitioned()) { properties.setProperty(PARTITION_EXPRESSION_FIELD_NAME, Accessor.getDefault().toExpression(descriptor.getPartitionStrategy())); } // copy custom properties to the table for (String property : descriptor.listProperties()) { // no need to check the reserved list, those are not set on descriptors properties.setProperty(property, descriptor.getProperty(property)); } final Path descriptorPath = new Path(metadataLocation, DESCRIPTOR_FILE_NAME); threw = true; try { outputStream = fs.create(descriptorPath, true /* overwrite */ ); properties.store(outputStream, "Dataset descriptor for " + name); outputStream.flush(); threw = false; } catch (IOException e) { throw new MetadataProviderException( "Unable to save descriptor file:" + descriptorPath + " for dataset:" + name, e); } finally { try { Closeables.close(outputStream, threw); } catch (IOException e) { throw new MetadataProviderException(e); } } }
From source file:com.cloudera.cdk.tools.JobClasspathHelper.java
License:Apache License
/** * This method creates an file that contains a line with a MD5 sum * // w w w.ja v a 2s . co m * @param fs * FileSystem where to create the file. * @param md5sum * The string containing the MD5 sum. * @param remoteMd5Path * The path where to save the file. * @throws IOException */ private void createMd5SumFile(FileSystem fs, String md5sum, Path remoteMd5Path) throws IOException { FSDataOutputStream os = null; try { os = fs.create(remoteMd5Path, true); os.writeBytes(md5sum); os.flush(); } catch (Exception e) { logger.error("{}", e); } finally { if (os != null) { os.close(); } } }
From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.NFS4Handler.java
License:Apache License
/** * * @param stateID/*from w ww . j av a2 s. c om*/ * @param fs * @param fileHandle * @param overwrite * @return * @throws NFS4Exception * @throws IOException */ public synchronized FSDataOutputStream forWrite(StateID stateID, FileSystem fs, FileHandle fileHandle, boolean overwrite) throws NFS4Exception, IOException { FileHolder fileHolder = mFileHandleMap.get(fileHandle); if (fileHolder != null) { OpenFile<FSDataOutputStream> file = fileHolder.getFSDataOutputStream(); if (file != null) { if (file.isOwnedBy(stateID)) { return file.get(); } throw new NFS4Exception(NFS4ERR_FILE_OPEN); } Path path = new Path(fileHolder.getPath()); boolean exists = fs.exists(path); // If overwrite = false, fs.create throws IOException which // is useless. In case of IOE do we always return EXIST? // doesn't seem to make sense. As such, I am mitigating the issue // even if there is a known race between the exists and create if (!overwrite && exists) { // append to a file // We used to be NFS4ERR_EXIST here but the linux client behaved rather // oddly. // It would open the fily with overwrite=true but then send the data // which // was to be appended at offset 0 throw new NFS4Exception(NFS4ERR_PERM, "File Exists and overwrite = false", true); } if (path.getParent() != null) { // TODO bad perms will fail with IOException, perhaps we should check // that file can be created before trying to so we can return the // correct error perm denied } if (exists && fs.getFileStatus(path).isDir()) { throw new NFS4Exception(NFS4ERR_ISDIR); } FSDataOutputStream out = fs.create(path, overwrite); this.incrementMetric("FILES_OPENED_WRITE", 1); fileHolder.setFSDataOutputStream(stateID, out); return out; } throw new NFS4Exception(NFS4ERR_STALE); }
From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.state.HDFSState.java
License:Apache License
/** * Open a file handle for write// w ww . ja v a 2 s . co m * @param stateID * @param fileHandle * @param overwrite * @throws NFS4Exception * @throws IOException */ public synchronized HDFSOutputStream openForWrite(FileSystem fs, StateID stateID, FileHandle fileHandle, boolean overwrite) throws NFS4Exception, IOException { HDFSFile hdsfsFile = mOpenFilesMap.get(fileHandle); if (hdsfsFile != null) { OpenResource<HDFSOutputStream> file = hdsfsFile.getHDFSOutputStreamForWrite(); if (file != null) { if (file.isOwnedBy(stateID)) { return file.get(); } throw new NFS4Exception(NFS4ERR_FILE_OPEN); } } INode inode = mFileHandleINodeMap.getINodeByFileHandle(fileHandle); if (inode == null) { throw new NFS4Exception(NFS4ERR_STALE); } Path path = new Path(inode.getPath()); boolean exists = fs.exists(path); // If overwrite = false, fs.create throws IOException which // is useless. In case of IOE do we always return EXIST? // doesn't seem to make sense. As such, I am mitigating the issue // even if there is a known race between the exists and create if (!overwrite && exists) { // append to a file // We used to be NFS4ERR_EXIST here but the linux client behaved // rather oddly. It would open the file with overwrite=true but // then send the data which was to be appended at offset 0 throw new NFS4Exception(NFS4ERR_PERM, "File Exists and overwrite = false"); } if (path.getParent() != null) { // TODO bad perms will fail with IOException, perhaps we should check // that file can be created before trying to so we can return the // correct error perm denied // check(user, groups, status, access); } if (exists && fs.getFileStatus(path).isDir()) { throw new NFS4Exception(NFS4ERR_ISDIR); } HDFSOutputStream out = new HDFSOutputStream(fs.create(path, overwrite), path.toString(), fileHandle); mMetrics.incrementMetric(FILES_OPENED_WRITE, 1); if (hdsfsFile == null) { hdsfsFile = new HDFSFile(fileHandle, inode.getPath(), inode.getNumber()); mOpenFilesMap.put(fileHandle, hdsfsFile); } hdsfsFile.setHDFSOutputStream(stateID, out); return out; }
From source file:com.cloudera.kitten.util.LocalDataHelper.java
License:Open Source License
private void copyToHdfs(String key, String localDataName) throws IOException { if (!localToHdfs.containsKey(localDataName)) { FileSystem fs = FileSystem.get(conf); Path src = new Path(localDataName); Path dst = getPath(fs, src.getName()); InputStream data = getFileOrResource(localDataName); FSDataOutputStream os = fs.create(dst, true); ByteStreams.copy(data, os);//from ww w . j a v a2 s . com os.close(); URI uri = dst.toUri(); localToHdfs.put(key, uri); } }