List of usage examples for org.apache.hadoop.io SequenceFile createWriter
@Deprecated public static Writer createWriter(Configuration conf, FSDataOutputStream out, Class keyClass, Class valClass, CompressionType compressionType, CompressionCodec codec, Metadata metadata) throws IOException
From source file:gobblin.metastore.FsStateStore.java
License:Apache License
/** * See {@link StateStore#putAll(String, String, Collection)}. * * <p>//w w w . jav a 2 s. co m * This implementation does not support putting the state objects into an existing store as * append is to be supported by the Hadoop SequenceFile (HADOOP-7139). * </p> */ @Override public void putAll(String storeName, String tableName, Collection<T> states) throws IOException { String tmpTableName = this.useTmpFileForPut ? TMP_FILE_PREFIX + tableName : tableName; Path tmpTablePath = new Path(new Path(this.storeRootDir, storeName), tmpTableName); if (!this.fs.exists(tmpTablePath) && !create(storeName, tmpTableName)) { throw new IOException("Failed to create a state file for table " + tmpTableName); } Closer closer = Closer.create(); try { @SuppressWarnings("deprecation") SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tmpTablePath, Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec())); for (T state : states) { writer.append(new Text(Strings.nullToEmpty(state.getId())), state); } } catch (Throwable t) { throw closer.rethrow(t); } finally { closer.close(); } if (this.useTmpFileForPut) { Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName); HadoopUtils.renamePath(this.fs, tmpTablePath, tablePath); } }
From source file:io.druid.indexer.IndexGeneratorJobTest.java
License:Apache License
private void writeDataToLocalSequenceFile(File outputFile, List<String> data) throws IOException { Configuration conf = new Configuration(); LocalFileSystem fs = FileSystem.getLocal(conf); Writer fileWriter = SequenceFile.createWriter(fs, conf, new Path(outputFile.getAbsolutePath()), BytesWritable.class, BytesWritable.class, SequenceFile.CompressionType.NONE, (CompressionCodec) null);// www .j a va2 s.c om int keyCount = 10; for (String line : data) { ByteBuffer buf = ByteBuffer.allocate(4); buf.putInt(keyCount); BytesWritable key = new BytesWritable(buf.array()); BytesWritable value = new BytesWritable(line.getBytes(Charsets.UTF_8)); fileWriter.append(key, value); keyCount += 1; } fileWriter.close(); }
From source file:org.apache.druid.indexer.IndexGeneratorJobTest.java
License:Apache License
private void writeDataToLocalSequenceFile(File outputFile, List<String> data) throws IOException { Configuration conf = new Configuration(); LocalFileSystem fs = FileSystem.getLocal(conf); Writer fileWriter = SequenceFile.createWriter(fs, conf, new Path(outputFile.getAbsolutePath()), BytesWritable.class, BytesWritable.class, SequenceFile.CompressionType.NONE, (CompressionCodec) null);/*from w w w . j a v a 2s.com*/ int keyCount = 10; for (String line : data) { ByteBuffer buf = ByteBuffer.allocate(4); buf.putInt(keyCount); BytesWritable key = new BytesWritable(buf.array()); BytesWritable value = new BytesWritable(StringUtils.toUtf8(line)); fileWriter.append(key, value); keyCount += 1; } fileWriter.close(); }
From source file:org.apache.gobblin.metastore.FsStateStore.java
License:Apache License
/** * See {@link StateStore#put(String, String, T)}. * * <p>// w ww .j a v a 2 s. c o m * This implementation does not support putting the state object into an existing store as * append is to be supported by the Hadoop SequenceFile (HADOOP-7139). * </p> */ @Override public void put(String storeName, String tableName, T state) throws IOException { String tmpTableName = this.useTmpFileForPut ? TMP_FILE_PREFIX + tableName : tableName; Path tmpTablePath = new Path(new Path(this.storeRootDir, storeName), tmpTableName); if (!this.fs.exists(tmpTablePath) && !create(storeName, tmpTableName)) { throw new IOException("Failed to create a state file for table " + tmpTableName); } Closer closer = Closer.create(); try { @SuppressWarnings("deprecation") SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tmpTablePath, Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec())); writer.append(new Text(Strings.nullToEmpty(state.getId())), state); } catch (Throwable t) { throw closer.rethrow(t); } finally { closer.close(); } if (this.useTmpFileForPut) { Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName); renamePath(tmpTablePath, tablePath); } }
From source file:org.apache.gobblin.metastore.FsStateStore.java
License:Apache License
/** * See {@link StateStore#putAll(String, String, Collection)}. * * <p>/*from w w w . j a v a 2 s . c o m*/ * This implementation does not support putting the state objects into an existing store as * append is to be supported by the Hadoop SequenceFile (HADOOP-7139). * </p> */ @Override public void putAll(String storeName, String tableName, Collection<T> states) throws IOException { String tmpTableName = this.useTmpFileForPut ? TMP_FILE_PREFIX + tableName : tableName; Path tmpTablePath = new Path(new Path(this.storeRootDir, storeName), tmpTableName); if (!this.fs.exists(tmpTablePath) && !create(storeName, tmpTableName)) { throw new IOException("Failed to create a state file for table " + tmpTableName); } Closer closer = Closer.create(); try { @SuppressWarnings("deprecation") SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tmpTablePath, Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec())); for (T state : states) { writer.append(new Text(Strings.nullToEmpty(state.getId())), state); } } catch (Throwable t) { throw closer.rethrow(t); } finally { closer.close(); } if (this.useTmpFileForPut) { Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName); renamePath(tmpTablePath, tablePath); } }
From source file:org.apache.mahout.math.hadoop.stochasticsvd.LocalSSVDPCADenseTest.java
License:Apache License
public void runSSVDSolver(int q) throws IOException { Configuration conf = new Configuration(); conf.set("mapred.job.tracker", "local"); conf.set("fs.default.name", "file:///"); // conf.set("mapred.job.tracker","localhost:11011"); // conf.set("fs.default.name","hdfs://localhost:11010/"); Deque<Closeable> closeables = new LinkedList<Closeable>(); Random rnd = RandomUtils.getRandom(); File tmpDir = getTestTempDir("svdtmp"); conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath()); Path aLocPath = new Path(getTestTempDirPath("svdtmp/A"), "A.seq"); // create distributed row matrix-like struct SequenceFile.Writer w = SequenceFile.createWriter(FileSystem.getLocal(conf), conf, aLocPath, IntWritable.class, VectorWritable.class, CompressionType.BLOCK, new DefaultCodec()); closeables.addFirst(w);//from ww w .j av a 2 s .com int n = 100; int m = 2000; double percent = 5; VectorWritable vw = new VectorWritable(); IntWritable roww = new IntWritable(); Vector xi = new DenseVector(n); double muAmplitude = 50.0; for (int i = 0; i < m; i++) { Vector dv = new SequentialAccessSparseVector(n); for (int j = 0; j < n * percent / 100; j++) { dv.setQuick(rnd.nextInt(n), muAmplitude * (rnd.nextDouble() - 0.25)); } roww.set(i); vw.set(dv); w.append(roww, vw); xi.assign(dv, Functions.PLUS); } closeables.remove(w); Closeables.close(w, true); xi.assign(Functions.mult(1 / m)); FileSystem fs = FileSystem.get(conf); Path tempDirPath = getTestTempDirPath("svd-proc"); Path aPath = new Path(tempDirPath, "A/A.seq"); fs.copyFromLocalFile(aLocPath, aPath); Path xiPath = new Path(tempDirPath, "xi/xi.seq"); SSVDHelper.saveVector(xi, xiPath, conf); Path svdOutPath = new Path(tempDirPath, "SSVD-out"); // make sure we wipe out previous test results, just a convenience fs.delete(svdOutPath, true); // Solver starts here: System.out.println("Input prepared, starting solver..."); int ablockRows = 867; int p = 60; int k = 40; SSVDSolver ssvd = new SSVDSolver(conf, new Path[] { aPath }, svdOutPath, ablockRows, k, p, 3); ssvd.setOuterBlockHeight(500); ssvd.setAbtBlockHeight(251); ssvd.setPcaMeanPath(xiPath); /* * removing V,U jobs from this test to reduce running time. i will keep them * put in the dense test though. */ ssvd.setComputeU(false); ssvd.setComputeV(false); ssvd.setOverwrite(true); ssvd.setQ(q); ssvd.setBroadcast(true); ssvd.run(); Vector stochasticSValues = ssvd.getSingularValues(); System.out.println("--SSVD solver singular values:"); LocalSSVDSolverSparseSequentialTest.dumpSv(stochasticSValues); System.out.println("--Colt SVD solver singular values:"); // try to run the same thing without stochastic algo double[][] a = SSVDHelper.loadDistributedRowMatrix(fs, aPath, conf); // subtract pseudo pca mean for (int i = 0; i < m; i++) for (int j = 0; j < n; j++) a[i][j] -= xi.getQuick(j); SingularValueDecomposition svd2 = new SingularValueDecomposition(new DenseMatrix(a)); Vector svalues2 = new DenseVector(svd2.getSingularValues()); LocalSSVDSolverSparseSequentialTest.dumpSv(svalues2); for (int i = 0; i < k + p; i++) { assertTrue(Math.abs(svalues2.getQuick(i) - stochasticSValues.getQuick(i)) <= s_epsilon); } double[][] mQ = SSVDHelper.loadDistributedRowMatrix(fs, new Path(svdOutPath, "Bt-job/" + BtJob.OUTPUT_Q + "-*"), conf); SSVDCommonTest.assertOrthonormality(new DenseMatrix(mQ), false, s_epsilon); }
From source file:org.apache.mahout.math.hadoop.stochasticsvd.LocalSSVDPCASparseTest.java
License:Apache License
public void runSSVDSolver(int q) throws IOException { Configuration conf = new Configuration(); conf.set("mapred.job.tracker", "local"); conf.set("fs.default.name", "file:///"); // conf.set("mapred.job.tracker","localhost:11011"); // conf.set("fs.default.name","hdfs://localhost:11010/"); Deque<Closeable> closeables = Lists.newLinkedList(); try {/*from ww w. ja v a 2 s . c o m*/ Random rnd = RandomUtils.getRandom(); File tmpDir = getTestTempDir("svdtmp"); conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath()); Path aLocPath = new Path(getTestTempDirPath("svdtmp/A"), "A.seq"); // create distributed row matrix-like struct SequenceFile.Writer w = SequenceFile.createWriter(FileSystem.getLocal(conf), conf, aLocPath, Text.class, VectorWritable.class, CompressionType.BLOCK, new DefaultCodec()); closeables.addFirst(w); int n = 100; int m = 2000; double percent = 5; VectorWritable vw = new VectorWritable(); Text rkey = new Text(); Vector xi = new DenseVector(n); double muAmplitude = 50.0; for (int i = 0; i < m; i++) { Vector dv = new SequentialAccessSparseVector(n); String rowname = "row-" + i; NamedVector namedRow = new NamedVector(dv, rowname); for (int j = 0; j < n * percent / 100; j++) { dv.setQuick(rnd.nextInt(n), muAmplitude * (rnd.nextDouble() - 0.25)); } rkey.set("row-i" + i); vw.set(namedRow); w.append(rkey, vw); xi.assign(dv, Functions.PLUS); } closeables.remove(w); Closeables.close(w, false); xi.assign(Functions.mult(1.0 / m)); FileSystem fs = FileSystem.get(conf); Path tempDirPath = getTestTempDirPath("svd-proc"); Path aPath = new Path(tempDirPath, "A/A.seq"); fs.copyFromLocalFile(aLocPath, aPath); Path xiPath = new Path(tempDirPath, "xi/xi.seq"); SSVDHelper.saveVector(xi, xiPath, conf); Path svdOutPath = new Path(tempDirPath, "SSVD-out"); // make sure we wipe out previous test results, just a convenience fs.delete(svdOutPath, true); // Solver starts here: System.out.println("Input prepared, starting solver..."); int ablockRows = 867; int p = 60; int k = 40; SSVDSolver ssvd = new SSVDSolver(conf, new Path[] { aPath }, svdOutPath, ablockRows, k, p, 3); ssvd.setOuterBlockHeight(500); ssvd.setAbtBlockHeight(251); ssvd.setPcaMeanPath(xiPath); /* * Removing V,U jobs from this test to reduce running time. i will keep them * put in the dense test though. * * For PCA test, we also want to request U*Sigma output and check it for named * vector propagation. */ ssvd.setComputeU(false); ssvd.setComputeV(false); ssvd.setcUSigma(true); ssvd.setOverwrite(true); ssvd.setQ(q); ssvd.setBroadcast(true); ssvd.run(); Vector stochasticSValues = ssvd.getSingularValues(); // try to run the same thing without stochastic algo Matrix a = SSVDHelper.drmLoadAsDense(fs, aPath, conf); verifyInternals(svdOutPath, a, new Omega(ssvd.getOmegaSeed(), k + p), k + p, q); // subtract pseudo pca mean for (int i = 0; i < m; i++) { a.viewRow(i).assign(xi, Functions.MINUS); } SingularValueDecomposition svd2 = new SingularValueDecomposition(a); Vector svalues2 = new DenseVector(svd2.getSingularValues()); System.out.println("--SSVD solver singular values:"); LocalSSVDSolverSparseSequentialTest.dumpSv(stochasticSValues); System.out.println("--SVD solver singular values:"); LocalSSVDSolverSparseSequentialTest.dumpSv(svalues2); for (int i = 0; i < k + p; i++) { assertTrue(Math.abs(svalues2.getQuick(i) - stochasticSValues.getQuick(i)) <= s_epsilon); } DenseMatrix mQ = SSVDHelper.drmLoadAsDense(fs, new Path(svdOutPath, "Bt-job/" + BtJob.OUTPUT_Q + "-*"), conf); SSVDCommonTest.assertOrthonormality(mQ, false, s_epsilon); // assert name propagation for (Iterator<Pair<Writable, Vector>> iter = SSVDHelper.drmIterator(fs, new Path(ssvd.getuSigmaPath() + "/*"), conf, closeables); iter.hasNext();) { Pair<Writable, Vector> pair = iter.next(); Writable key = pair.getFirst(); Vector v = pair.getSecond(); assertTrue(v instanceof NamedVector); assertTrue(key instanceof Text); } } finally { IOUtils.close(closeables); } }
From source file:org.apache.mahout.math.hadoop.stochasticsvd.LocalSSVDSolverSparseSequentialTest.java
License:Apache License
public void runSSVDSolver(int q) throws IOException { Configuration conf = getConfiguration(); conf.set("mapred.job.tracker", "local"); conf.set("fs.default.name", "file:///"); // conf.set("mapred.job.tracker","localhost:11011"); // conf.set("fs.default.name","hdfs://localhost:11010/"); Deque<Closeable> closeables = Lists.newLinkedList(); ;/* ww w . j av a 2s . c om*/ Random rnd = RandomUtils.getRandom(); File tmpDir = getTestTempDir("svdtmp"); conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath()); Path aLocPath = new Path(getTestTempDirPath("svdtmp/A"), "A.seq"); // create distributed row matrix-like struct SequenceFile.Writer w = SequenceFile.createWriter(FileSystem.getLocal(conf), conf, aLocPath, IntWritable.class, VectorWritable.class, CompressionType.BLOCK, new DefaultCodec()); closeables.addFirst(w); int n = 100; int m = 2000; double percent = 5; VectorWritable vw = new VectorWritable(); IntWritable roww = new IntWritable(); double muAmplitude = 50.0; for (int i = 0; i < m; i++) { Vector dv = new SequentialAccessSparseVector(n); for (int j = 0; j < n * percent / 100; j++) { dv.setQuick(rnd.nextInt(n), muAmplitude * (rnd.nextDouble() - 0.5)); } roww.set(i); vw.set(dv); w.append(roww, vw); } closeables.remove(w); Closeables.close(w, false); FileSystem fs = FileSystem.get(aLocPath.toUri(), conf); Path tempDirPath = getTestTempDirPath("svd-proc"); Path aPath = new Path(tempDirPath, "A/A.seq"); fs.copyFromLocalFile(aLocPath, aPath); Path svdOutPath = new Path(tempDirPath, "SSVD-out"); // make sure we wipe out previous test results, just a convenience fs.delete(svdOutPath, true); // Solver starts here: System.out.println("Input prepared, starting solver..."); int ablockRows = 867; int p = 60; int k = 40; SSVDSolver ssvd = new SSVDSolver(conf, new Path[] { aPath }, svdOutPath, ablockRows, k, p, 3); ssvd.setOuterBlockHeight(500); ssvd.setAbtBlockHeight(251); /* * removing V,U jobs from this test to reduce running time. i will keep them * put in the dense test though. */ ssvd.setComputeU(false); ssvd.setComputeV(false); ssvd.setOverwrite(true); ssvd.setQ(q); ssvd.setBroadcast(true); ssvd.run(); Vector stochasticSValues = ssvd.getSingularValues(); System.out.println("--SSVD solver singular values:"); dumpSv(stochasticSValues); System.out.println("--Colt SVD solver singular values:"); // try to run the same thing without stochastic algo DenseMatrix a = SSVDHelper.drmLoadAsDense(fs, aPath, conf); // SingularValueDecompositionImpl svd=new SingularValueDecompositionImpl(new // Array2DRowRealMatrix(a)); SingularValueDecomposition svd2 = new SingularValueDecomposition(a); Vector svalues2 = new DenseVector(svd2.getSingularValues()); dumpSv(svalues2); for (int i = 0; i < k + p; i++) { assertTrue(Math.abs(svalues2.getQuick(i) - stochasticSValues.getQuick(i)) <= s_epsilon); } DenseMatrix mQ = SSVDHelper.drmLoadAsDense(fs, new Path(svdOutPath, "Bt-job/" + BtJob.OUTPUT_Q + "-*"), conf); SSVDCommonTest.assertOrthonormality(mQ, false, s_epsilon); IOUtils.close(closeables); }
From source file:org.apache.nutch.parse.ParseOutputFormat.java
License:Apache License
public RecordWriter<Text, Parse> getRecordWriter(FileSystem fs, JobConf job, String name, Progressable progress) throws IOException { if (job.getBoolean("parse.filter.urls", true)) { filters = new URLFilters(job); }/*w w w .ja v a 2 s. c om*/ if (job.getBoolean("parse.normalize.urls", true)) { normalizers = new URLNormalizers(job, URLNormalizers.SCOPE_OUTLINK); } this.scfilters = new ScoringFilters(job); final int interval = job.getInt("db.fetch.interval.default", 2592000); final boolean ignoreExternalLinks = job.getBoolean("db.ignore.external.links", false); int maxOutlinksPerPage = job.getInt("db.max.outlinks.per.page", 100); final boolean isParsing = job.getBoolean("fetcher.parse", true); final int maxOutlinks = (maxOutlinksPerPage < 0) ? Integer.MAX_VALUE : maxOutlinksPerPage; final CompressionType compType = SequenceFileOutputFormat.getOutputCompressionType(job); Path out = FileOutputFormat.getOutputPath(job); Path text = new Path(new Path(out, ParseText.DIR_NAME), name); Path data = new Path(new Path(out, ParseData.DIR_NAME), name); Path crawl = new Path(new Path(out, CrawlDatum.PARSE_DIR_NAME), name); final String[] parseMDtoCrawlDB = job.get("db.parsemeta.to.crawldb", "").split(" *, *"); final MapFile.Writer textOut = new MapFile.Writer(job, fs, text.toString(), Text.class, ParseText.class, CompressionType.RECORD, progress); final MapFile.Writer dataOut = new MapFile.Writer(job, fs, data.toString(), Text.class, ParseData.class, compType, progress); final SequenceFile.Writer crawlOut = SequenceFile.createWriter(fs, job, crawl, Text.class, CrawlDatum.class, compType, progress); return new RecordWriter<Text, Parse>() { public void write(Text key, Parse parse) throws IOException { String fromUrl = key.toString(); String fromHost = null; textOut.append(key, new ParseText(parse.getText())); ParseData parseData = parse.getData(); // recover the signature prepared by Fetcher or ParseSegment String sig = parseData.getContentMeta().get(Nutch.SIGNATURE_KEY); if (sig != null) { byte[] signature = StringUtil.fromHexString(sig); if (signature != null) { // append a CrawlDatum with a signature CrawlDatum d = new CrawlDatum(CrawlDatum.STATUS_SIGNATURE, 0); d.setSignature(signature); crawlOut.append(key, d); } } // see if the parse metadata contain things that we'd like // to pass to the metadata of the crawlDB entry CrawlDatum parseMDCrawlDatum = null; for (String mdname : parseMDtoCrawlDB) { String mdvalue = parse.getData().getParseMeta().get(mdname); if (mdvalue != null) { if (parseMDCrawlDatum == null) parseMDCrawlDatum = new CrawlDatum(CrawlDatum.STATUS_PARSE_META, 0); parseMDCrawlDatum.getMetaData().put(new Text(mdname), new Text(mdvalue)); } } if (parseMDCrawlDatum != null) crawlOut.append(key, parseMDCrawlDatum); try { ParseStatus pstatus = parseData.getStatus(); if (pstatus != null && pstatus.isSuccess() && pstatus.getMinorCode() == ParseStatus.SUCCESS_REDIRECT) { String newUrl = pstatus.getMessage(); int refreshTime = Integer.valueOf(pstatus.getArgs()[1]); try { if (normalizers != null) { newUrl = normalizers.normalize(newUrl, URLNormalizers.SCOPE_FETCHER); } } catch (MalformedURLException mfue) { newUrl = null; } if (filters != null) { if (newUrl != null) newUrl = filters.filter(newUrl); } String url = key.toString(); if (newUrl != null && !newUrl.equals(url)) { String reprUrl = URLUtil.chooseRepr(url, newUrl, refreshTime < Fetcher.PERM_REFRESH_TIME); CrawlDatum newDatum = new CrawlDatum(); newDatum.setStatus(CrawlDatum.STATUS_LINKED); if (reprUrl != null && !reprUrl.equals(newUrl)) { newDatum.getMetaData().put(Nutch.WRITABLE_REPR_URL_KEY, new Text(reprUrl)); } crawlOut.append(new Text(newUrl), newDatum); } } } catch (URLFilterException e) { // ignore } // collect outlinks for subsequent db update Outlink[] links = parseData.getOutlinks(); int outlinksToStore = Math.min(maxOutlinks, links.length); if (ignoreExternalLinks) { try { fromHost = new URL(fromUrl).getHost().toLowerCase(); } catch (MalformedURLException e) { fromHost = null; } } else { fromHost = null; } int validCount = 0; CrawlDatum adjust = null; List<Entry<Text, CrawlDatum>> targets = new ArrayList<Entry<Text, CrawlDatum>>(outlinksToStore); List<Outlink> outlinkList = new ArrayList<Outlink>(outlinksToStore); for (int i = 0; i < links.length && validCount < outlinksToStore; i++) { String toUrl = links[i].getToUrl(); // Only normalize and filter if fetcher.parse = false if (!isParsing) { toUrl = ParseOutputFormat.filterNormalize(fromUrl, toUrl, fromHost, ignoreExternalLinks, filters, normalizers); if (toUrl == null) { continue; } } CrawlDatum target = new CrawlDatum(CrawlDatum.STATUS_LINKED, interval); Text targetUrl = new Text(toUrl); try { scfilters.initialScore(targetUrl, target); } catch (ScoringFilterException e) { LOG.warn("Cannot filter init score for url " + key + ", using default: " + e.getMessage()); target.setScore(0.0f); } targets.add(new SimpleEntry(targetUrl, target)); // OVerwrite URL in Outlink object with normalized URL (NUTCH-1174) links[i].setUrl(toUrl); outlinkList.add(links[i]); validCount++; } try { // compute score contributions and adjustment to the original score adjust = scfilters.distributeScoreToOutlinks((Text) key, parseData, targets, null, links.length); } catch (ScoringFilterException e) { LOG.warn("Cannot distribute score from " + key + ": " + e.getMessage()); } for (Entry<Text, CrawlDatum> target : targets) { crawlOut.append(target.getKey(), target.getValue()); } if (adjust != null) crawlOut.append(key, adjust); Outlink[] filteredLinks = outlinkList.toArray(new Outlink[outlinkList.size()]); parseData = new ParseData(parseData.getStatus(), parseData.getTitle(), filteredLinks, parseData.getContentMeta(), parseData.getParseMeta()); dataOut.append(key, parseData); if (!parse.isCanonical()) { CrawlDatum datum = new CrawlDatum(); datum.setStatus(CrawlDatum.STATUS_FETCH_SUCCESS); String timeString = parse.getData().getContentMeta().get(Nutch.FETCH_TIME_KEY); try { datum.setFetchTime(Long.parseLong(timeString)); } catch (Exception e) { LOG.warn("Can't read fetch time for: " + key); datum.setFetchTime(System.currentTimeMillis()); } crawlOut.append(key, datum); } } public void close(Reporter reporter) throws IOException { textOut.close(); dataOut.close(); crawlOut.close(); } }; }
From source file:org.apache.sqoop.connector.hdfs.hdfsWriter.HdfsSequenceWriter.java
License:Apache License
@SuppressWarnings("deprecation") public void initialize(Path filepath, Configuration conf, CompressionCodec codec) throws IOException { if (codec != null) { filewriter = SequenceFile.createWriter(filepath.getFileSystem(conf), conf, filepath, Text.class, NullWritable.class, SequenceFile.CompressionType.BLOCK, codec); } else {// w w w . j ava 2s . c om filewriter = SequenceFile.createWriter(filepath.getFileSystem(conf), conf, filepath, Text.class, NullWritable.class, SequenceFile.CompressionType.NONE); } text = new Text(); }