Example usage for org.apache.hadoop.io Text equals

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text equals.

Prototype

@Override
public boolean equals(Object o)

Source Link

Document

Returns true iff o is a Text with the same contents.

Usage

From source file:com.hdfs.concat.crush.Crush.java

License:Apache License

private void cloneOutput() throws IOException {

    List<FileStatus> listStatus = getOutputMappings();

    /*/*  w  w w  .  j a va2 s .co  m*/
     * Initialize to empty list, in which case swap() will be a no-op. The reference is then replaced with a real list, which is
     * used in the subsequent iterations.
     */
    List<Path> crushInput = emptyList();

    Text srcFile = new Text();
    Text crushOut = new Text();
    Text prevCrushOut = new Text();

    for (FileStatus partFile : listStatus) {
        Path path = partFile.getPath();

        Reader reader = new Reader(fs, path, fs.getConf());

        try {
            while (reader.next(srcFile, crushOut)) {
                if (!crushOut.equals(prevCrushOut)) {
                    swap(crushInput, prevCrushOut.toString());

                    prevCrushOut.set(crushOut);
                    crushInput = new LinkedList<Path>();
                }

                crushInput.add(new Path(srcFile.toString()));
            }
        } finally {
            try {
                reader.close();
            } catch (IOException e) {
                LOG.warn("Trapped exception when closing " + path, e);
            }
        }

        swap(crushInput, prevCrushOut.toString());
    }
}

From source file:com.ibm.db2j.AccumuloVTI.java

License:Open Source License

/**
 * Gives VTI's table schema, i.e. number of columns, their types, names, sizes etc.
 * Deduces this from the first row of data in the targeted table (whose name should be specified in gaiandb_config.properties).
 * This method is always called by the querying engine (Gaian or Derby) *before* query execution.
 *//* w  ww.j  a  v a2  s.com*/
@Override
public GaianResultSetMetaData getMetaData() throws SQLException {

    if (false == isDeriveSchemaFromFirstRow)
        accumuloTableRSMD = super.getMetaData();
    else if (null == accumuloTableRSMD) {

        // Get table shape from first accumulo record

        rowScanIterator = standardScanner.iterator();
        if (false == rowScanIterator.hasNext())
            throw new SQLException("Table has no data to derive it's schema. Table name = " + accumuloTable);

        Key key = rowScanIterator.next().getKey();
        Text rowID = key.getRow(), previousRowID = null;

        StringBuilder tableDefSB = new StringBuilder(
                (isRowidInSchema ? ROWID + ' ' + VC256 + ',' : "") + key.getColumnFamily() + ' ' + VC256);

        while (rowScanIterator.hasNext()) {

            key = rowScanIterator.next().getKey();
            previousRowID = rowID;
            rowID = key.getRow();

            if (false == rowID.equals(previousRowID))
                break; // stop when a full record has been read.

            tableDefSB.append(',' + key.getColumnFamily().toString() + ' ' + VC256);
        }

        reinitialise(); // clear scanner for re-use

        try {
            accumuloTableRSMD = new GaianResultSetMetaData(tableDefSB.toString());
        } catch (Exception e) {
            throw new SQLException("Unable to build AccumuloVTI RSMD table schema from definition: "
                    + tableDefSB + " (returning null), cause: " + e);
        }
    }

    // ROWID must always be included... if missing then hardly no qualifiers can be pushed down at all.
    // The logical table could still be configured to cut out the ROWID if this was really necessary (but performance could not longer be optimised).
    //      isIncludeRowID = null != accumuloTableRSMD && "ROWID".equalsIgnoreCase( accumuloTableRSMD.getColumnName(1) );

    return accumuloTableRSMD;
}

From source file:com.ibm.db2j.AccumuloVTI.java

License:Open Source License

/**
 * GaianDB extract rows by calling this method repeatedly.
 * 'dvdRecord' contains the number of columns resolved in tableShapeRSMD.
 * However we only need to populate the projected columns indexes.
 *//*from   w  w w  . j a  v a2 s . com*/
@Override
public int nextRow(final DataValueDescriptor[] dvdRecord) throws StandardException, SQLException {

    //      logger.logDetail("Getting new relational record based on set of Accumulo rows. rowCount = " + rowCount +
    //            ", currenAccumuloRow: " + currentAccumuloRow );

    if (0 == rowCount) {
        numRowsReceivedFromAccumulo = 0;
        if (0 == projectedColumns.length || false == rowScanIterator.hasNext())
            return IFastPath.SCAN_COMPLETED; // empty table
        else
            currentAccumuloRow = rowScanIterator.next(); // kick-start row extraction
    }

    // Check if there are any Accumulo records left...
    if (null == currentAccumuloRow)
        return IFastPath.SCAN_COMPLETED;

    Key key = currentAccumuloRow.getKey(); // lots of info available off the Key: rowID, col name/family, col qualifier, visibility, timestamp
    Text rowID = key.getRow();

    // Look for a new record... until one is found that meets qualifiers, or until none are left
    do {
        // Check if there are any Accumulo records left...
        if (null == currentAccumuloRow)
            return IFastPath.SCAN_COMPLETED;

        numRowsReceivedFromAccumulo++;

        // Set rowID column before extracting others associated with it in the while loop
        if (1 == rowidColShift)
            dvdRecord[0].setValue(rowID.toString());

        // Initialise column cells to NULL value.
        for (int i = rowidColShift; i < projectedColumns.length; i++)
            dvdRecord[projectedColumns[i] - 1].setToNull();

        // Extract columns from Accumulo records for this rowID - note: Accumulo rows don't have to be complete
        Text previousRowID = rowID;
        while (rowID.equals(previousRowID)) {

            final String colName = key.getColumnFamily().toString();
            final Integer pColID = projectedColumnsNameToIndexMap.get(colName);
            if (null == pColID) {
                logger.logImportant(
                        "Encountered Accumulo column which was not requested as column family (skipped): "
                                + colName);
                continue; // this column was not requested - should not happen
            }

            // Log info about the newly found column
            final String cellStringValue = isExtractAccumuloColumnQualifiersInPlaceOfValues
                    ? currentAccumuloRow.getKey().getColumnQualifier().toString()
                    : currentAccumuloRow.getValue().toString();
            //            logger.logDetail("Setting ProjectedColID: " + pColID +
            //                  ", from record with Key: " + key + " ==> ColFamily: " + key.getColumnFamily()
            //                  + ( isExtractAccumuloColumnQualifiersInPlaceOfValues ? ", ColQualifier: " : ", Value: " ) + cellStringValue );

            // Set column value for the row - this also does type conversion.
            dvdRecord[pColID - 1].setValue(cellStringValue); // normalise to 0-based

            // Scroll to the next column - break if we run out of records (rows don't have to be complete)
            if (false == rowScanIterator.hasNext()) {
                currentAccumuloRow = null;
                break;
            }
            currentAccumuloRow = rowScanIterator.next();
            key = currentAccumuloRow.getKey();
            previousRowID = rowID;
            rowID = key.getRow();
        }

    } while (null != qualifiers && false == RowsFilter.testQualifiers(dvdRecord, qualifiers));

    rowCount++;
    return IFastPath.GOT_ROW;
}

From source file:com.inmobi.conduit.distcp.tools.CopyListing.java

License:Apache License

/**
 * Validate the final resulting path listing to see if there are any duplicate entries
 *
 * @param pathToListFile - path listing build by doBuildListing
 * @throws IOException - Any issues while checking for duplicates and throws
 * @throws DuplicateFileException - if there are duplicates
 *//*from   ww w  . j a va 2s.c  o m*/
protected void checkForDuplicates(Path pathToListFile) throws DuplicateFileException, IOException {

    Configuration config = getConf();
    FileSystem fs = pathToListFile.getFileSystem(config);

    Path sortedList = DistCpUtils.sortListing(fs, config, pathToListFile);

    SequenceFile.Reader reader = new SequenceFile.Reader(fs, sortedList, config);
    try {
        Text lastKey = new Text("*"); //source relative path can never hold *
        FileStatus lastFileStatus = new FileStatus();

        Text currentKey = new Text();
        while (reader.next(currentKey)) {
            if (currentKey.equals(lastKey)) {
                FileStatus currentFileStatus = new FileStatus();
                reader.getCurrentValue(currentFileStatus);
                throw new DuplicateFileException("File " + lastFileStatus.getPath() + " and "
                        + currentFileStatus.getPath() + " would cause duplicates. Aborting");
            }
            reader.getCurrentValue(lastFileStatus);
            lastKey.set(currentKey);
        }
    } finally {
        IOUtils.closeStream(reader);
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java

License:Apache License

private void deleteMissing(Configuration conf) throws IOException {
    LOG.info("-delete option is enabled. About to remove entries from " + "target that are missing in source");

    Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
    FileSystem clusterFS = sourceListing.getFileSystem(conf);
    Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing);

    Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq");
    CopyListing target = new GlobbedCopyListing(conf, null);

    List<Path> targets = new ArrayList<Path>(1);
    Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
    targets.add(targetFinalPath);/*  w  w w  . j  a  va  2 s  . co m*/
    DistCpOptions options = new DistCpOptions(targets, new Path("/NONE"));

    target.buildListing(targetListing, options);
    Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing);
    long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen();

    SequenceFile.Reader sourceReader = new SequenceFile.Reader(clusterFS, sortedSourceListing, conf);
    SequenceFile.Reader targetReader = new SequenceFile.Reader(clusterFS, sortedTargetListing, conf);

    long deletedEntries = 0;
    try {
        FileStatus srcFileStatus = new FileStatus();
        Text srcRelPath = new Text();
        FileStatus trgtFileStatus = new FileStatus();
        Text trgtRelPath = new Text();

        FileSystem targetFS = targetFinalPath.getFileSystem(conf);
        boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
        while (targetReader.next(trgtRelPath, trgtFileStatus)) {
            while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) {
                srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
            }

            if (srcAvailable && trgtRelPath.equals(srcRelPath))
                continue;

            boolean result = (!targetFS.exists(trgtFileStatus.getPath())
                    || targetFS.delete(trgtFileStatus.getPath(), true));
            if (result) {
                LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source");
                deletedEntries++;
            } else {
                throw new IOException("Unable to delete " + trgtFileStatus.getPath());
            }
            HadoopCompat.progress(taskAttemptContext);
            HadoopCompat.setStatus(taskAttemptContext, "Deleting missing files from target. ["
                    + targetReader.getPosition() * 100 / totalLen + "%]");
        }
    } finally {
        IOUtils.closeStream(sourceReader);
        IOUtils.closeStream(targetReader);
    }
    LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0));
}

From source file:com.intel.hadoopRPCBenchmark.BenchmarkEngineTokenSelector.java

License:Apache License

@SuppressWarnings("unchecked")
public Token<BenchmarkEngineTokenIdentifier> selectToken(Text service,
        Collection<Token<? extends TokenIdentifier>> tokens) {
    if (service == null) {
        return null;
    }/*from w ww. j a v  a2  s . c om*/
    for (Token<? extends TokenIdentifier> token : tokens) {
        if (BenchmarkEngineTokenIdentifier.KIND_NAME.equals(token.getKind())
                && service.equals(token.getService())) {
            return (Token<BenchmarkEngineTokenIdentifier>) token;
        }
    }
    return null;
}

From source file:com.kit.udf.UDFDateFormat.java

License:Apache License

public Text evaluate(Text dateText, Text patternText) {
    if (dateText == null || patternText == null) {
        return null;
    }//from  ww  w.jav a 2 s  . co m

    try {
        if (!patternText.equals(lastPatternText)) {
            formatter.applyPattern(patternText.toString());
            lastPatternText.set(patternText);
        }
    } catch (Exception e) {
        return null;
    }

    Date date;
    try {
        date = standardFormatter.parse(dateText.toString());
        result.set(formatter.format(date));
        return result;
    } catch (ParseException e) {
        return null;
    }
}

From source file:com.m6d.filecrush.crush.Crush.java

License:Apache License

private void cloneOutput() throws IOException {

    List<FileStatus> listStatus = getOutputMappings();

    /*//w w w  . j  a  va 2s  . com
     * Initialize to empty list, in which case swap() will be a no-op. The reference is then replaced with a real list, which is
     * used in the subsequent iterations.
     */
    List<Path> crushInput = emptyList();

    Text srcFile = new Text();
    Text crushOut = new Text();
    Text prevCrushOut = new Text();

    for (FileStatus partFile : listStatus) {
        Path path = partFile.getPath();

        Reader reader = new Reader(fs, path, fs.getConf());

        try {
            while (reader.next(srcFile, crushOut)) {
                if (!crushOut.equals(prevCrushOut)) {
                    swap(crushInput, prevCrushOut.toString());

                    prevCrushOut.set(crushOut);
                    crushInput = new LinkedList<Path>();
                }

                crushInput.add(new Path(srcFile.toString()));
            }
        } finally {
            try {
                reader.close();
            } catch (IOException e) {
                LOG.warn("Trapped exception when closing " + path, e);
            }
        }

        swap(crushInput, prevCrushOut.toString());
    }

    /*
     * Don't forget to move the files that were not crushed to the output dir so that the output dir has all the data that was in
     * the input dir, the difference being there are fewer files in the output dir.
     */
    if (removableFiles.size() > 0) {
        String srcDirName = fs.makeQualified(srcDir).toUri().getPath();
        String destName = fs.makeQualified(dest).toUri().getPath();
        print(Verbosity.INFO, "\n\nMoving removed files to " + destName);
        for (String name : removableFiles) {
            Path srcPath = new Path(name);
            Path destPath = new Path(destName + name).getParent();

            print(Verbosity.INFO, "\n  Moving " + srcPath + " to " + destPath);
            rename(srcPath, destPath, null);
        }
    }
}

From source file:com.talis.labs.pagerank.mapreduce.CheckingDataReducer.java

License:Apache License

@Override
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
    Set<String> links = new TreeSet<String>();
    for (Text value : values) {
        if (!value.equals(CheckingDataMapper.NONE)) {
            links.add(value.toString());
        }/*from  w w  w  .j  a va  2 s. co m*/
    }
    StringBuffer sb = new StringBuffer();
    for (String link : links) {
        sb.append(link).append("\t");
    }
    context.write(key, new Text(sb.toString()));
}

From source file:com.talis.labs.pagerank.mapreduce.InitializePageRanksMapper.java

License:Apache License

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    StringTokenizer st = new StringTokenizer(value.toString());
    Text page = null;
    StringBuffer sb = new StringBuffer();
    boolean first = true;
    Set<String> links = new HashSet<String>();
    while (st.hasMoreTokens()) {
        String token = st.nextToken();
        if (first) {
            page = new Text(token);
            sb.append(pagerank).append("\t"); // current pagerank
            sb.append(pagerank).append("\t"); // previous pagerank
            first = false;/*from w ww  .  j a  v a 2 s .com*/
        } else {
            // to remove duplicated links and self-references
            if ((links.add(token)) && (!page.equals(token))) {
                sb.append(token).append("\t");
            }
        }
    }

    context.write(page, new Text(sb.toString()));
}