List of usage examples for org.apache.hadoop.io Text equals
@Override public boolean equals(Object o)
o
is a Text with the same contents. From source file:com.hdfs.concat.crush.Crush.java
License:Apache License
private void cloneOutput() throws IOException { List<FileStatus> listStatus = getOutputMappings(); /*/* w w w . j a va2 s .co m*/ * Initialize to empty list, in which case swap() will be a no-op. The reference is then replaced with a real list, which is * used in the subsequent iterations. */ List<Path> crushInput = emptyList(); Text srcFile = new Text(); Text crushOut = new Text(); Text prevCrushOut = new Text(); for (FileStatus partFile : listStatus) { Path path = partFile.getPath(); Reader reader = new Reader(fs, path, fs.getConf()); try { while (reader.next(srcFile, crushOut)) { if (!crushOut.equals(prevCrushOut)) { swap(crushInput, prevCrushOut.toString()); prevCrushOut.set(crushOut); crushInput = new LinkedList<Path>(); } crushInput.add(new Path(srcFile.toString())); } } finally { try { reader.close(); } catch (IOException e) { LOG.warn("Trapped exception when closing " + path, e); } } swap(crushInput, prevCrushOut.toString()); } }
From source file:com.ibm.db2j.AccumuloVTI.java
License:Open Source License
/** * Gives VTI's table schema, i.e. number of columns, their types, names, sizes etc. * Deduces this from the first row of data in the targeted table (whose name should be specified in gaiandb_config.properties). * This method is always called by the querying engine (Gaian or Derby) *before* query execution. *//* w ww.j a v a2 s.com*/ @Override public GaianResultSetMetaData getMetaData() throws SQLException { if (false == isDeriveSchemaFromFirstRow) accumuloTableRSMD = super.getMetaData(); else if (null == accumuloTableRSMD) { // Get table shape from first accumulo record rowScanIterator = standardScanner.iterator(); if (false == rowScanIterator.hasNext()) throw new SQLException("Table has no data to derive it's schema. Table name = " + accumuloTable); Key key = rowScanIterator.next().getKey(); Text rowID = key.getRow(), previousRowID = null; StringBuilder tableDefSB = new StringBuilder( (isRowidInSchema ? ROWID + ' ' + VC256 + ',' : "") + key.getColumnFamily() + ' ' + VC256); while (rowScanIterator.hasNext()) { key = rowScanIterator.next().getKey(); previousRowID = rowID; rowID = key.getRow(); if (false == rowID.equals(previousRowID)) break; // stop when a full record has been read. tableDefSB.append(',' + key.getColumnFamily().toString() + ' ' + VC256); } reinitialise(); // clear scanner for re-use try { accumuloTableRSMD = new GaianResultSetMetaData(tableDefSB.toString()); } catch (Exception e) { throw new SQLException("Unable to build AccumuloVTI RSMD table schema from definition: " + tableDefSB + " (returning null), cause: " + e); } } // ROWID must always be included... if missing then hardly no qualifiers can be pushed down at all. // The logical table could still be configured to cut out the ROWID if this was really necessary (but performance could not longer be optimised). // isIncludeRowID = null != accumuloTableRSMD && "ROWID".equalsIgnoreCase( accumuloTableRSMD.getColumnName(1) ); return accumuloTableRSMD; }
From source file:com.ibm.db2j.AccumuloVTI.java
License:Open Source License
/** * GaianDB extract rows by calling this method repeatedly. * 'dvdRecord' contains the number of columns resolved in tableShapeRSMD. * However we only need to populate the projected columns indexes. *//*from w w w . j a v a2 s . com*/ @Override public int nextRow(final DataValueDescriptor[] dvdRecord) throws StandardException, SQLException { // logger.logDetail("Getting new relational record based on set of Accumulo rows. rowCount = " + rowCount + // ", currenAccumuloRow: " + currentAccumuloRow ); if (0 == rowCount) { numRowsReceivedFromAccumulo = 0; if (0 == projectedColumns.length || false == rowScanIterator.hasNext()) return IFastPath.SCAN_COMPLETED; // empty table else currentAccumuloRow = rowScanIterator.next(); // kick-start row extraction } // Check if there are any Accumulo records left... if (null == currentAccumuloRow) return IFastPath.SCAN_COMPLETED; Key key = currentAccumuloRow.getKey(); // lots of info available off the Key: rowID, col name/family, col qualifier, visibility, timestamp Text rowID = key.getRow(); // Look for a new record... until one is found that meets qualifiers, or until none are left do { // Check if there are any Accumulo records left... if (null == currentAccumuloRow) return IFastPath.SCAN_COMPLETED; numRowsReceivedFromAccumulo++; // Set rowID column before extracting others associated with it in the while loop if (1 == rowidColShift) dvdRecord[0].setValue(rowID.toString()); // Initialise column cells to NULL value. for (int i = rowidColShift; i < projectedColumns.length; i++) dvdRecord[projectedColumns[i] - 1].setToNull(); // Extract columns from Accumulo records for this rowID - note: Accumulo rows don't have to be complete Text previousRowID = rowID; while (rowID.equals(previousRowID)) { final String colName = key.getColumnFamily().toString(); final Integer pColID = projectedColumnsNameToIndexMap.get(colName); if (null == pColID) { logger.logImportant( "Encountered Accumulo column which was not requested as column family (skipped): " + colName); continue; // this column was not requested - should not happen } // Log info about the newly found column final String cellStringValue = isExtractAccumuloColumnQualifiersInPlaceOfValues ? currentAccumuloRow.getKey().getColumnQualifier().toString() : currentAccumuloRow.getValue().toString(); // logger.logDetail("Setting ProjectedColID: " + pColID + // ", from record with Key: " + key + " ==> ColFamily: " + key.getColumnFamily() // + ( isExtractAccumuloColumnQualifiersInPlaceOfValues ? ", ColQualifier: " : ", Value: " ) + cellStringValue ); // Set column value for the row - this also does type conversion. dvdRecord[pColID - 1].setValue(cellStringValue); // normalise to 0-based // Scroll to the next column - break if we run out of records (rows don't have to be complete) if (false == rowScanIterator.hasNext()) { currentAccumuloRow = null; break; } currentAccumuloRow = rowScanIterator.next(); key = currentAccumuloRow.getKey(); previousRowID = rowID; rowID = key.getRow(); } } while (null != qualifiers && false == RowsFilter.testQualifiers(dvdRecord, qualifiers)); rowCount++; return IFastPath.GOT_ROW; }
From source file:com.inmobi.conduit.distcp.tools.CopyListing.java
License:Apache License
/** * Validate the final resulting path listing to see if there are any duplicate entries * * @param pathToListFile - path listing build by doBuildListing * @throws IOException - Any issues while checking for duplicates and throws * @throws DuplicateFileException - if there are duplicates *//*from ww w . j a va 2s.c o m*/ protected void checkForDuplicates(Path pathToListFile) throws DuplicateFileException, IOException { Configuration config = getConf(); FileSystem fs = pathToListFile.getFileSystem(config); Path sortedList = DistCpUtils.sortListing(fs, config, pathToListFile); SequenceFile.Reader reader = new SequenceFile.Reader(fs, sortedList, config); try { Text lastKey = new Text("*"); //source relative path can never hold * FileStatus lastFileStatus = new FileStatus(); Text currentKey = new Text(); while (reader.next(currentKey)) { if (currentKey.equals(lastKey)) { FileStatus currentFileStatus = new FileStatus(); reader.getCurrentValue(currentFileStatus); throw new DuplicateFileException("File " + lastFileStatus.getPath() + " and " + currentFileStatus.getPath() + " would cause duplicates. Aborting"); } reader.getCurrentValue(lastFileStatus); lastKey.set(currentKey); } } finally { IOUtils.closeStream(reader); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java
License:Apache License
private void deleteMissing(Configuration conf) throws IOException { LOG.info("-delete option is enabled. About to remove entries from " + "target that are missing in source"); Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH)); FileSystem clusterFS = sourceListing.getFileSystem(conf); Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing); Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq"); CopyListing target = new GlobbedCopyListing(conf, null); List<Path> targets = new ArrayList<Path>(1); Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH)); targets.add(targetFinalPath);/* w w w . j a va 2 s . co m*/ DistCpOptions options = new DistCpOptions(targets, new Path("/NONE")); target.buildListing(targetListing, options); Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing); long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen(); SequenceFile.Reader sourceReader = new SequenceFile.Reader(clusterFS, sortedSourceListing, conf); SequenceFile.Reader targetReader = new SequenceFile.Reader(clusterFS, sortedTargetListing, conf); long deletedEntries = 0; try { FileStatus srcFileStatus = new FileStatus(); Text srcRelPath = new Text(); FileStatus trgtFileStatus = new FileStatus(); Text trgtRelPath = new Text(); FileSystem targetFS = targetFinalPath.getFileSystem(conf); boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus); while (targetReader.next(trgtRelPath, trgtFileStatus)) { while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) { srcAvailable = sourceReader.next(srcRelPath, srcFileStatus); } if (srcAvailable && trgtRelPath.equals(srcRelPath)) continue; boolean result = (!targetFS.exists(trgtFileStatus.getPath()) || targetFS.delete(trgtFileStatus.getPath(), true)); if (result) { LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source"); deletedEntries++; } else { throw new IOException("Unable to delete " + trgtFileStatus.getPath()); } HadoopCompat.progress(taskAttemptContext); HadoopCompat.setStatus(taskAttemptContext, "Deleting missing files from target. [" + targetReader.getPosition() * 100 / totalLen + "%]"); } } finally { IOUtils.closeStream(sourceReader); IOUtils.closeStream(targetReader); } LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0)); }
From source file:com.intel.hadoopRPCBenchmark.BenchmarkEngineTokenSelector.java
License:Apache License
@SuppressWarnings("unchecked") public Token<BenchmarkEngineTokenIdentifier> selectToken(Text service, Collection<Token<? extends TokenIdentifier>> tokens) { if (service == null) { return null; }/*from w ww. j a v a2 s . c om*/ for (Token<? extends TokenIdentifier> token : tokens) { if (BenchmarkEngineTokenIdentifier.KIND_NAME.equals(token.getKind()) && service.equals(token.getService())) { return (Token<BenchmarkEngineTokenIdentifier>) token; } } return null; }
From source file:com.kit.udf.UDFDateFormat.java
License:Apache License
public Text evaluate(Text dateText, Text patternText) { if (dateText == null || patternText == null) { return null; }//from ww w.jav a 2 s . co m try { if (!patternText.equals(lastPatternText)) { formatter.applyPattern(patternText.toString()); lastPatternText.set(patternText); } } catch (Exception e) { return null; } Date date; try { date = standardFormatter.parse(dateText.toString()); result.set(formatter.format(date)); return result; } catch (ParseException e) { return null; } }
From source file:com.m6d.filecrush.crush.Crush.java
License:Apache License
private void cloneOutput() throws IOException { List<FileStatus> listStatus = getOutputMappings(); /*//w w w . j a va 2s . com * Initialize to empty list, in which case swap() will be a no-op. The reference is then replaced with a real list, which is * used in the subsequent iterations. */ List<Path> crushInput = emptyList(); Text srcFile = new Text(); Text crushOut = new Text(); Text prevCrushOut = new Text(); for (FileStatus partFile : listStatus) { Path path = partFile.getPath(); Reader reader = new Reader(fs, path, fs.getConf()); try { while (reader.next(srcFile, crushOut)) { if (!crushOut.equals(prevCrushOut)) { swap(crushInput, prevCrushOut.toString()); prevCrushOut.set(crushOut); crushInput = new LinkedList<Path>(); } crushInput.add(new Path(srcFile.toString())); } } finally { try { reader.close(); } catch (IOException e) { LOG.warn("Trapped exception when closing " + path, e); } } swap(crushInput, prevCrushOut.toString()); } /* * Don't forget to move the files that were not crushed to the output dir so that the output dir has all the data that was in * the input dir, the difference being there are fewer files in the output dir. */ if (removableFiles.size() > 0) { String srcDirName = fs.makeQualified(srcDir).toUri().getPath(); String destName = fs.makeQualified(dest).toUri().getPath(); print(Verbosity.INFO, "\n\nMoving removed files to " + destName); for (String name : removableFiles) { Path srcPath = new Path(name); Path destPath = new Path(destName + name).getParent(); print(Verbosity.INFO, "\n Moving " + srcPath + " to " + destPath); rename(srcPath, destPath, null); } } }
From source file:com.talis.labs.pagerank.mapreduce.CheckingDataReducer.java
License:Apache License
@Override public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { Set<String> links = new TreeSet<String>(); for (Text value : values) { if (!value.equals(CheckingDataMapper.NONE)) { links.add(value.toString()); }/*from w w w .j a va 2 s. co m*/ } StringBuffer sb = new StringBuffer(); for (String link : links) { sb.append(link).append("\t"); } context.write(key, new Text(sb.toString())); }
From source file:com.talis.labs.pagerank.mapreduce.InitializePageRanksMapper.java
License:Apache License
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { StringTokenizer st = new StringTokenizer(value.toString()); Text page = null; StringBuffer sb = new StringBuffer(); boolean first = true; Set<String> links = new HashSet<String>(); while (st.hasMoreTokens()) { String token = st.nextToken(); if (first) { page = new Text(token); sb.append(pagerank).append("\t"); // current pagerank sb.append(pagerank).append("\t"); // previous pagerank first = false;/*from w ww . j a v a 2 s .com*/ } else { // to remove duplicated links and self-references if ((links.add(token)) && (!page.equals(token))) { sb.append(token).append("\t"); } } } context.write(page, new Text(sb.toString())); }