List of usage examples for org.apache.hadoop.fs Path toString
@Override
public String toString()
From source file:com.anhth12.lambda.app.ml.als.ALSUpdate.java
private static void saveFeaturesRDD(JavaPairRDD<Integer, double[]> features, Path path, final Map<Integer, String> reverseIDMaping) { log.info("Saving features RDD to {}", path); features.map(new Function<Tuple2<Integer, double[]>, String>() { @Override/*from ww w. j av a2 s. c om*/ public String call(Tuple2<Integer, double[]> keyAndVector) throws Exception { Integer id = keyAndVector._1(); String originalKey = reverseIDMaping.get(id); Object key = originalKey == null ? id : originalKey; double[] vector = keyAndVector._2(); return TextUtils.joinJSON(Arrays.asList(key, vector)); } }).saveAsTextFile(path.toString(), GzipCodec.class); }
From source file:com.anhth12.lambda.BatchUpdateFunction.java
private static String joinFSPaths(FileSystem fs, FileStatus[] statuses) { StringBuilder sb = new StringBuilder(); for (FileStatus status : statuses) { if (sb.length() > 0) { sb.append(","); }//from w ww . ja va 2s . c o m Path path = fs.makeQualified(status.getPath()); sb.append(StringUtils.escapeString(path.toString())); } return sb.toString(); }
From source file:com.asakusafw.bulkloader.collector.ExportFileSend.java
License:Apache License
/** * ????TSV??/*from w w w .j a v a 2s .co m*/ * {@link com.asakusafw.bulkloader.transfer.FileList.Writer}???? * @param <T> ? * @param targetTableModel Export??Model? * @param filePath Export * @param writer ?Writer * @param tableName ?? * @return ?????????????????? -1 * @throws BulkLoaderSystemException ?????? */ protected <T extends Writable> long send(Class<T> targetTableModel, String filePath, FileList.Writer writer, String tableName) throws BulkLoaderSystemException { FileSystem fs = null; String fileName = null; // ?? long maxSize = Long.parseLong(ConfigurationLoader.getProperty(Constants.PROP_KEY_EXP_LOAD_MAX_SIZE)); try { TsvIoFactory<T> factory = new TsvIoFactory<>(targetTableModel); Configuration conf = new Configuration(); fs = FileSystem.get(new URI(filePath), conf); // ????? FileStatus[] status = fs.globStatus(new Path(filePath)); Path[] listedPaths = FileUtil.stat2Paths(status); if (listedPaths == null) { LOG.info("TG-COLLECTOR-02006", tableName, filePath); return -1; } else { LOG.info("TG-COLLECTOR-02007", listedPaths.length, tableName, filePath); } long count = 0; boolean addEntry = false; for (Path path : listedPaths) { // ????? if (isSystemFile(path)) { continue; } // TODO ???? // ?????? ModelInput<T> input = TemporaryStorage.openInput(conf, targetTableModel, path); try { while (true) { // addEntry = true; fileName = FileNameUtil.createSendExportFileName(tableName, fileNameMap); OutputStream output = writer.openNext(FileList.content(fileName)); try { CountingOutputStream counter = new CountingOutputStream(output); ModelOutput<T> modelOut = factory.createModelOutput(counter); T model = factory.createModelObject(); LOG.info("TG-COLLECTOR-02004", tableName, path.toString(), fileName); // ???ModelTSV?? boolean nextFile = false; while (input.readTo(model)) { // Modol??? modelOut.write(model); count++; // ??????? // char?byte????????? // ??????(????) if (counter.getByteCount() > maxSize) { nextFile = true; break; } } modelOut.close(); LOG.info("TG-COLLECTOR-02005", tableName, path.toString(), fileName); if (nextFile) { // ??????? continue; } else { // ???????? break; } } finally { output.close(); } } } finally { input.close(); } } if (addEntry) { return count; } else { assert count == 0; return -1; } } catch (IOException e) { throw new BulkLoaderSystemException(e, getClass(), "TG-COLLECTOR-02001", MessageFormat .format("HDFS?{0} ???{1}", filePath, fileName)); } catch (URISyntaxException e) { throw new BulkLoaderSystemException(e, getClass(), "TG-COLLECTOR-02001", MessageFormat.format("HDFS???HDFS?{0}", filePath)); } finally { if (fs != null) { try { fs.close(); } catch (IOException e) { throw new BulkLoaderSystemException(e, this.getClass(), "TG-COLLECTOR-02001", MessageFormat.format( "HDFS???URI{0}", filePath)); } } } }
From source file:com.asakusafw.cleaner.main.HDFSCleaner.java
License:Apache License
/** * HDFSCleaner???/*from ww w . ja v a2s .co m*/ * @param args * @return */ protected int execute(String[] args) { String[] prop = new String[1]; String mode = null; String user = null; FileSystem fs = null; if (args.length > 0) { mode = args[0]; } if (args.length > 1) { user = args[1]; } if (args.length > 2) { prop[0] = args[2]; } // ?? if (args.length != 3) { System.err.println("ERROR????? ?" + args.length + " " + mode + " ??" + user + " " + prop[0]); Log.log(CLASS, MessageIdConst.HCLN_PARAMCHECK_ERROR, "?", args.length, new Date(), mode, prop[0]); return Constants.EXIT_CODE_ERROR; } try { // ?? if (!CleanerInitializer.initDFSCleaner(prop)) { Log.log(CLASS, MessageIdConst.HCLN_INIT_ERROR, new Date(), mode, prop[0]); return Constants.EXIT_CODE_ERROR; } // Log.log(CLASS, MessageIdConst.HCLN_START, new Date(), mode, prop[0]); // ? boolean recursive = false; if (Constants.CLEAN_MODE_NOMAL.equals(mode)) { recursive = false; } else if (Constants.CLEAN_MODE_RECURSIVE.equals(mode)) { recursive = true; } else { Log.log(CLASS, MessageIdConst.HCLN_PARAMCHECK_ERROR, "", mode, new Date(), mode, prop[0]); return Constants.EXIT_CODE_ERROR; } // HDFS?? DFSCleanerBean[] bean = null; try { bean = getCleanLocalPath(user); } catch (CleanerSystemException e) { Log.log(e.getCause(), e.getClazz(), e.getMessageId(), e.getMessageArgs()); return Constants.EXIT_CODE_ERROR; } // ??? int keepDate = getHDFSFileKeepDate(); boolean cleanResult = true; Date now = new Date(); for (int i = 0; i < bean.length; i++) { try { // Path cleanDir = bean[i].getCleanDir(); // ? try { Configuration conf = getConf(); fs = cleanDir.getFileSystem(conf); if (fs == null) { Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_ERROR, "Path.getFileSystem??null", cleanDir.toString()); cleanResult = false; continue; } } catch (IOException e) { Log.log(e, CLASS, MessageIdConst.HCLN_CLEN_DIR_ERROR, "HDFS????", cleanDir.toString()); cleanResult = false; continue; } boolean target = bean[i].hasExecutionId(); String pattern = bean[i].getPattern(); Log.log(CLASS, MessageIdConst.HCLN_CLEN_FILE, cleanDir.toString(), pattern, keepDate, mode, target, now); if (cleanDir(fs, cleanDir, target, pattern, keepDate, now, recursive)) { Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_SUCCESS, cleanDir.toString(), keepDate, mode); } else { Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_FAIL, cleanDir.toString(), keepDate, mode); cleanResult = false; } } catch (CleanerSystemException e) { Log.log(e.getCause(), e.getClazz(), e.getMessageId(), e.getMessageArgs()); cleanResult = false; } finally { if (fs != null) { // CHECKSTYLE:OFF EmptyBlockCheck try { fs.close(); } catch (IOException ignored) { // ignored } // CHECKSTYLE:ON EmptyBlockCheck } } } // if (cleanResult) { Log.log(CLASS, MessageIdConst.HCLN_EXIT_SUCCESS, new Date(), mode, prop[0]); return Constants.EXIT_CODE_SUCCESS; } else { Log.log(CLASS, MessageIdConst.HCLN_EXIT_WARNING, new Date(), mode, prop[0]); return Constants.EXIT_CODE_WARNING; } } catch (RuntimeException e) { try { Log.log(e, CLASS, MessageIdConst.HCLN_EXCEPRION, new Date(), mode, prop[0]); return Constants.EXIT_CODE_ERROR; } catch (Exception e1) { System.err.print("HDFSCleaner????????"); e1.printStackTrace(); return Constants.EXIT_CODE_ERROR; } } }
From source file:com.asakusafw.cleaner.main.HDFSCleaner.java
License:Apache License
/** * ?//from w w w .j a v a 2s . c om * @param fs HDFS? * @param cleanPath HDFS?? * @param isSetExecutionId ID???????? * @param pattern * @param keepDate ?? * @param now ? * @param recursive ???? * @return ? * @throws CleanerSystemException */ private boolean cleanDir(FileSystem fs, Path cleanPath, boolean isSetExecutionId, String pattern, int keepDate, Date now, boolean recursive) throws CleanerSystemException { try { if (!fs.exists(cleanPath)) { // ?????? Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_ERROR, "??????", cleanPath.toString()); return false; } if (!fs.getFileStatus(cleanPath).isDir()) { // ?????? Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_ERROR, "??????", cleanPath.toString()); return false; } // ? Log.log(CLASS, MessageIdConst.HCLN_FILE_DELETE, cleanPath.toString()); int cleanFileCount = 0; int cleanDirCount = 0; boolean result = true; FileStatus[] dirStatus = getListStatus(fs, cleanPath); Path[] listedPaths = FileUtil.stat2Paths(dirStatus); for (Path path : listedPaths) { FileStatus status = fs.getFileStatus(path); long lastModifiedTime = status.getModificationTime(); if (status.isDir() && recursive) { // ???????? if (isSetExecutionId) { // ID??????MM??????? String executionId = path.getName(); if (isRunningJobFlow(executionId)) { // ??????? Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_EXEC, path.toString()); continue; } } FileStatus[] childdirStatus = getListStatus(fs, path); if (childdirStatus.length == 0) { // ??????? if (isExpired(lastModifiedTime, keepDate, now)) { if (!fs.delete(path, false)) { Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "", path.toString()); result = false; } else { cleanDirCount++; Log.log(CLASS, MessageIdConst.HCLN_DIR_DELETE, path.toString()); } } } else { // ????????? if (cleanDir(fs, path, false, pattern, keepDate, now, recursive)) { // ???????? childdirStatus = getListStatus(fs, path); if (childdirStatus.length == 0) { if (isExpired(lastModifiedTime, keepDate, now)) { if (!fs.delete(path, false)) { Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "", path.toString()); result = false; } else { cleanDirCount++; Log.log(CLASS, MessageIdConst.HCLN_DIR_DELETE, path.toString()); } } } } else { Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "", path.toString()); result = false; } } } else if (!status.isDir()) { // ??????????? if (isExpired(lastModifiedTime, keepDate, now) && isMatchPattern(path, pattern)) { if (!fs.delete(path, false)) { Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "", path.toString()); result = false; } else { Log.log(CLASS, MessageIdConst.HCLN_DELETE_FILE, path.toString()); cleanFileCount++; } } } } Log.log(CLASS, MessageIdConst.HCLN_FILE_DELETE_SUCCESS, cleanPath.toString(), cleanDirCount, cleanFileCount); return result; } catch (IOException e) { Log.log(e, CLASS, MessageIdConst.HCLN_CLEN_DIR_EXCEPTION, cleanPath.getName()); return false; } }
From source file:com.asakusafw.cleaner.main.HDFSCleaner.java
License:Apache License
/** * ???????//from ww w . j a v a 2 s . c om * @param path * @param pattern * @return ?? * @throws CleanerSystemException */ private boolean isMatchPattern(Path path, String pattern) throws CleanerSystemException { if (pattern == null || pattern.equals("")) { return true; } else { String strFile = path.toString(); try { Matcher m = Pattern.compile(pattern).matcher(strFile); return m.matches(); } catch (PatternSyntaxException e) { throw new CleanerSystemException(e, this.getClass(), MessageIdConst.HCLN_PATTERN_FAIL, pattern); } } }
From source file:com.asakusafw.dag.compiler.codegen.CleanupStageClientGeneratorTest.java
License:Apache License
private ClassDescription generate() { Path base = new Path(temporary.getRoot().toURI()); Path target = new Path(base, StageConstants.EXPR_EXECUTION_ID); return add(CleanupStageClientGenerator.DEFAULT_CLASS, c -> new CleanupStageClientGenerator().generate("b", "f", target.toString(), c)); }
From source file:com.asakusafw.dag.compiler.internalio.InternalInputAdapterGeneratorTest.java
License:Apache License
private void check(String... values) { Path path = new Path(new File(temporary.getRoot(), "temp.bin").toURI()); Configuration conf = configurations.newInstance(); try (ModelOutput<Text> out = TemporaryStorage.openOutput(conf, Text.class, path)) { for (String v : values) { out.write(new Text(v)); }//from ww w . ja v a 2s .c o m } catch (IOException e) { throw new AssertionError(e); } ClassGeneratorContext gc = context(); Spec spec = new Spec("testing", Collections.singleton(path.toString()), Descriptions.typeOf(Text.class)); ClassDescription gen = add(c -> new InternalInputAdapterGenerator().generate(gc, spec, c)); List<String> results = new ArrayList<>(); loading(gen, c -> { VertexProcessorContext vc = new MockVertexProcessorContext().with(c).withResource(conf) .withResource(new StageInfo("u", "b", "f", "s", "e", Collections.emptyMap())); input(vc, c, o -> { results.add(o.toString()); }); }); assertThat(results, containsInAnyOrder((Object[]) values)); }
From source file:com.asakusafw.dag.compiler.internalio.InternalOutputAdapterGeneratorTest.java
License:Apache License
private void check(String... values) { Path path = new Path(new File(temporary.getRoot(), "part-*").toURI()); Configuration conf = configurations.newInstance(); ClassGeneratorContext gc = context(); Spec spec = new Spec("o", path.toString(), Descriptions.typeOf(Text.class)); ClassDescription gen = add(c -> new InternalOutputAdapterGenerator().generate(gc, Arrays.asList(spec), c)); loading(gen, c -> {//from w w w .ja v a 2s.co m VertexProcessorContext vc = new MockVertexProcessorContext().with(c).withResource(conf) .withResource(new StageInfo("u", "b", "f", "s", "e", Collections.emptyMap())); try (OutputAdapter adapter = adapter(c, vc)) { adapter.initialize(); OutputHandler<? super TaskProcessorContext> handler = adapter.newHandler(); Result<Text> sink = handler.getSink(Text.class, "o"); try (Session session = handler.start(new MockTaskProcessorContext("t"))) { for (String v : values) { sink.add(new Text(v)); } } } }); Set<String> results = new LinkedHashSet<>(); try { List<Path> paths = TemporaryStorage.list(conf, path); Text buf = new Text(); for (Path p : paths) { try (ModelInput<Text> in = TemporaryStorage.openInput(conf, Text.class, p)) { while (in.readTo(buf)) { results.add(buf.toString()); } } } } catch (IOException e) { throw new AssertionError(e); } assertThat(results, containsInAnyOrder(values)); }
From source file:com.asakusafw.lang.compiler.mapreduce.CleanupStageEmitterTest.java
License:Apache License
/** * simple case./*from w w w.ja v a 2s.c o m*/ * @throws Exception if failed */ @Test public void simple() throws Exception { File a = new File(folder.getRoot(), "a/test.txt"); File b = new File(folder.getRoot(), "b/test.txt"); FileEditor.put(a, "Hello, world!"); FileEditor.put(b, "Hello, world!"); Path root = new Path(folder.getRoot().toURI()); Path base = new Path(root, "a"); ClassDescription client = new ClassDescription("com.example.StageClient"); CleanupStageInfo info = new CleanupStageInfo( new StageInfo("simple", "simple", CleanupStageInfo.DEFAULT_STAGE_ID), base.toString()); CleanupStageEmitter.emit(client, info, javac); int status = MapReduceRunner.execute(new Configuration(), client, "testing", Collections.emptyMap(), javac.compile()); assertThat("exit status code", status, is(0)); assertThat(a.isFile(), is(false)); assertThat(b.isFile(), is(true)); }