List of usage examples for org.apache.hadoop.fs FileSystem newInstance
public static FileSystem newInstance(Configuration conf) throws IOException
From source file:com.datatorrent.flume.storage.HDFSStorage.java
License:Open Source License
@Override public void setup(com.datatorrent.api.Context context) { Configuration conf = new Configuration(); if (baseDir == null) { baseDir = conf.get("hadoop.tmp.dir"); if (baseDir == null || baseDir.isEmpty()) { throw new IllegalArgumentException("baseDir cannot be null."); }/* www. j a v a2 s.c om*/ } offset = 4; skipOffset = -1; skipFile = -1; int tempRetryCount = 0; while (tempRetryCount < retryCount && fs == null) { try { fs = FileSystem.newInstance(conf); tempRetryCount++; } catch (Throwable throwable) { logger.warn("Not able to get file system ", throwable); } } try { Path path = new Path(baseDir); basePath = new Path(path, id); if (fs == null) { fs = FileSystem.newInstance(conf); } if (!fs.exists(path)) { closeFs(); throw new RuntimeException(String.format("baseDir passed (%s) doesn't exist.", baseDir)); } if (!fs.isDirectory(path)) { closeFs(); throw new RuntimeException(String.format("baseDir passed (%s) is not a directory.", baseDir)); } if (!restore) { fs.delete(basePath, true); } if (!fs.exists(basePath) || !fs.isDirectory(basePath)) { fs.mkdirs(basePath); } if (blockSize == 0) { blockSize = fs.getDefaultBlockSize(new Path(basePath, "tempData")); } if (blockSize == 0) { blockSize = DEFAULT_BLOCK_SIZE; } blockSize = blockSizeMultiple * blockSize; currentWrittenFile = 0; cleanedFileCounter = -1; retrievalFile = -1; // fileCounterFile = new Path(basePath, IDENTITY_FILE); flushedFileCounter = -1; // cleanFileCounterFile = new Path(basePath, CLEAN_FILE); cleanFileOffsetFile = new Path(basePath, CLEAN_OFFSET_FILE); cleanFileOffsetFileTemp = new Path(basePath, CLEAN_OFFSET_FILE_TEMP); flushedCounterFile = new Path(basePath, FLUSHED_IDENTITY_FILE); flushedCounterFileTemp = new Path(basePath, FLUSHED_IDENTITY_FILE_TEMP); if (restore) { // // if (fs.exists(fileCounterFile) && fs.isFile(fileCounterFile)) { // //currentWrittenFile = Long.valueOf(new String(readData(fileCounterFile))); // } if (fs.exists(cleanFileOffsetFile) && fs.isFile(cleanFileOffsetFile)) { cleanedOffset = readData(cleanFileOffsetFile); } if (fs.exists(flushedCounterFile) && fs.isFile(flushedCounterFile)) { String strFlushedFileCounter = new String(readData(flushedCounterFile)); if (strFlushedFileCounter.isEmpty()) { logger.warn("empty flushed file"); } else { flushedFileCounter = Long.valueOf(strFlushedFileCounter); flushedFileWriteOffset = getFlushedFileWriteOffset( new Path(basePath, flushedFileCounter + OFFSET_SUFFIX)); bookKeepingFileOffset = getFlushedFileWriteOffset( new Path(basePath, flushedFileCounter + BOOK_KEEPING_FILE_OFFSET)); } } } fileWriteOffset = flushedFileWriteOffset; currentWrittenFile = flushedFileCounter; cleanedFileCounter = byteArrayToLong(cleanedOffset, offset) - 1; if (currentWrittenFile == -1) { ++currentWrittenFile; fileWriteOffset = 0; } } catch (IOException io) { throw new RuntimeException(io); } storageExecutor = Executors.newSingleThreadExecutor(new NameableThreadFactory("StorageHelper")); }
From source file:com.datatorrent.lib.io.fs.AbstractBlockReader.java
License:Open Source License
/** * Override this method to change the FileSystem instance that is used by the operator. * * @return A FileSystem object./*from w w w .java2 s . co m*/ * @throws IOException */ protected FileSystem getFSInstance() throws IOException { return FileSystem.newInstance(configuration); }
From source file:com.datatorrent.lib.io.fs.AbstractHdfsFileOutputOperator.java
License:Open Source License
/** * * @param context//w w w. j a v a 2 s.c o m */ @Override public void setup(OperatorContext context) { try { fs = FileSystem.newInstance(new Configuration()); } catch (IOException ex) { throw new RuntimeException(ex); } }
From source file:com.datatorrent.lib.io.fs.AbstractHDFSInputOperator.java
License:Open Source License
@Override public void setup(OperatorContext context) { try {/* w ww.j a v a2 s .c o m*/ fs = FileSystem.newInstance(new Configuration()); } catch (IOException ex) { throw new RuntimeException(ex); } }
From source file:com.datatorrent.stram.client.StramClientUtils.java
License:Apache License
public static FileSystem newFileSystemInstance(Configuration conf) throws IOException { String dfsRootDir = conf.get(DT_DFS_ROOT_DIR); if (StringUtils.isBlank(dfsRootDir)) { return FileSystem.newInstance(conf); } else {//w w w . jav a 2 s. c om if (dfsRootDir.contains(DT_DFS_USER_NAME)) { dfsRootDir = dfsRootDir.replace(DT_DFS_USER_NAME, UserGroupInformation.getLoginUser().getShortUserName()); conf.set(DT_DFS_ROOT_DIR, dfsRootDir); } try { return FileSystem.newInstance(new URI(dfsRootDir), conf); } catch (URISyntaxException ex) { LOG.warn("{} is not a valid URI. Returning the default filesystem", dfsRootDir, ex); return FileSystem.newInstance(conf); } } }
From source file:com.datatorrent.stram.security.StramUserLogin.java
License:Apache License
public static long refreshTokens(long tokenLifeTime, String destinationDir, String destinationFile, final Configuration conf, String hdfsKeyTabFile, final Credentials credentials, final InetSocketAddress rmAddress, final boolean renewRMToken) throws IOException { long expiryTime = System.currentTimeMillis() + tokenLifeTime; //renew tokens final String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); }/* w w w . j a va 2 s.co m*/ FileSystem fs = FileSystem.newInstance(conf); File keyTabFile; try { keyTabFile = FSUtil.copyToLocalFileSystem(fs, destinationDir, destinationFile, hdfsKeyTabFile, conf); } finally { fs.close(); } UserGroupInformation ugi = UserGroupInformation.loginUserFromKeytabAndReturnUGI( UserGroupInformation.getCurrentUser().getUserName(), keyTabFile.getAbsolutePath()); try { ugi.doAs(new PrivilegedExceptionAction<Object>() { @Override public Object run() throws Exception { FileSystem fs1 = FileSystem.newInstance(conf); YarnClient yarnClient = null; if (renewRMToken) { yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); } Credentials creds = new Credentials(); try { fs1.addDelegationTokens(tokenRenewer, creds); if (renewRMToken) { org.apache.hadoop.yarn.api.records.Token rmDelToken = yarnClient .getRMDelegationToken(new Text(tokenRenewer)); Token<RMDelegationTokenIdentifier> rmToken = ConverterUtils.convertFromYarn(rmDelToken, rmAddress); creds.addToken(rmToken.getService(), rmToken); } } finally { fs1.close(); if (renewRMToken) { yarnClient.stop(); } } credentials.addAll(creds); return null; } }); UserGroupInformation.getCurrentUser().addCredentials(credentials); } catch (InterruptedException e) { LOG.error("Error while renewing tokens ", e); expiryTime = System.currentTimeMillis(); } catch (IOException e) { LOG.error("Error while renewing tokens ", e); expiryTime = System.currentTimeMillis(); } LOG.debug("number of tokens: {}", credentials.getAllTokens().size()); Iterator<Token<?>> iter = credentials.getAllTokens().iterator(); while (iter.hasNext()) { Token<?> token = iter.next(); LOG.debug("updated token: {}", token); } keyTabFile.delete(); return expiryTime; }
From source file:com.mellanox.r4h.TestReadWhileWriting.java
License:Apache License
/** Test reading while writing. */ @Test/*from ww w. j a v a 2s. co m*/ public void pipeline_02_03() throws Exception { final Configuration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1); // create cluster final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build(); try { //change the lease limits. cluster.setLeasePeriod(SOFT_LEASE_LIMIT, HARD_LEASE_LIMIT); //wait for the cluster cluster.waitActive(); final FileSystem fs = cluster.getFileSystem(); final Path p = new Path(DIR, "file1"); final int half = BLOCK_SIZE / 2; //a. On Machine M1, Create file. Write half block of data. // Invoke DFSOutputStream.hflush() on the dfs file handle. // Do not close file yet. { final FSDataOutputStream out = fs.create(p, true, fs.getConf().getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, 4096), (short) 3, BLOCK_SIZE); write(out, 0, half); //hflush ((DFSOutputStream) out.getWrappedStream()).hflush(); } //b. On another machine M2, open file and verify that the half-block // of data can be read successfully. checkFile(p, half, conf); MiniDFSClusterBridge.getAppendTestUtilLOG().info("leasechecker.interruptAndJoin()"); ((DistributedFileSystem) fs).dfs.getLeaseRenewer().interruptAndJoin(); //c. On M1, append another half block of data. Close file on M1. { //sleep to let the lease is expired. Thread.sleep(2 * SOFT_LEASE_LIMIT); final UserGroupInformation current = UserGroupInformation.getCurrentUser(); final UserGroupInformation ugi = UserGroupInformation .createUserForTesting(current.getShortUserName() + "x", new String[] { "supergroup" }); final DistributedFileSystem dfs = ugi.doAs(new PrivilegedExceptionAction<DistributedFileSystem>() { @Override public DistributedFileSystem run() throws Exception { return (DistributedFileSystem) FileSystem.newInstance(conf); } }); final FSDataOutputStream out = append(dfs, p); write(out, 0, half); out.close(); } //d. On M2, open file and read 1 block of data from it. Close file. checkFile(p, 2 * half, conf); } finally { cluster.shutdown(); } }
From source file:com.microsoft.canberra.tf.util.DoubleMatrixTextIO.java
License:Open Source License
@Inject public DoubleMatrixTextIO() throws IOException { final YarnConfiguration yarnConf = new YarnConfiguration(); yarnConf.set("fs.hdfs.impl", DistributedFileSystem.class.getName()); yarnConf.set("fs.file.impl", LocalFileSystem.class.getName()); this.fileSystem = FileSystem.newInstance(yarnConf); }
From source file:com.mozilla.bagheera.sink.SequenceFileSink.java
License:Apache License
public SequenceFileSink(String namespace, String baseDirPath, String dateFormat, long maxFileSize, boolean useBytesValue, boolean addTimestamp) throws IOException { LOG.info("Initializing writer for namespace: " + namespace); conf = new Configuration(); conf.setBoolean("fs.automatic.close", false); hdfs = FileSystem.newInstance(conf); this.useBytesValue = useBytesValue; this.maxFileSize = maxFileSize; this.addTimestamp = addTimestamp; sdf = new SimpleDateFormat(dateFormat); if (!baseDirPath.endsWith(Path.SEPARATOR)) { baseDir = new Path(baseDirPath + Path.SEPARATOR + namespace + Path.SEPARATOR + sdf.format(new Date(System.currentTimeMillis()))); } else {/*from w w w. ja va2 s .co m*/ baseDir = new Path( baseDirPath + namespace + Path.SEPARATOR + sdf.format(new Date(System.currentTimeMillis()))); } initWriter(); stored = Metrics.newMeter(new MetricName("bagheera", "sink.hdfs.", namespace + ".stored"), "messages", TimeUnit.SECONDS); }
From source file:com.pagerankcalculator.TwitterPageRank.java
/** * Graph Parsing/*from w w w . j a v a2s . c o m*/ * Memasukan data mentah dan melakukan inisialisasi pagerank * * @param in file data masukan * @param out direktori output */ public int parseGraph(String in, String out) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf()); job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#1 Parsing Graph"); job.setJarByClass(TwitterPageRank.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(GraphParsingMapper.class); job.setReducerClass(GraphParsingReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setNumReduceTasks(TwitterPageRank.NUM_REDUCE_TASKS); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); Path inputFilePath = new Path(in); Path outputFilePath = new Path(out); FileInputFormat.addInputPath(job, inputFilePath); FileOutputFormat.setOutputPath(job, outputFilePath); FileSystem fs = FileSystem.newInstance(getConf()); if (fs.exists(outputFilePath)) { fs.delete(outputFilePath, true); } return job.waitForCompletion(true) ? 0 : 1; }