Example usage for org.apache.lucene.util BytesRef compareTo

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef compareTo.

Prototype

@Override
public int compareTo(BytesRef other)

Source Link

Document

Unsigned byte order comparison

Usage

From source file:org.opengrok.indexer.search.context.OGKUnifiedHighlighter.java

License:Apache License

private String getRepoFileContent(String repoRelPath, String storedU) throws IOException {

    if (storedU == null) {
        LOGGER.log(Level.FINE, "Missing U[UID] for: {0}", repoRelPath);
        return null;
    }/*from   w w  w . jav a  2s  . com*/

    String repoAbsPath = env.getSourceRootPath() + repoRelPath;
    File repoAbsFile = new File(repoAbsPath);
    if (!repoAbsFile.exists()) {
        LOGGER.log(Level.FINE, "Missing file: {0}", repoAbsPath);
        return null;
    }

    repoRelPath = Util.fixPathIfWindows(repoRelPath);
    // Verify that timestamp (U) is unchanged by comparing UID.
    String uid = Util.path2uid(repoRelPath,
            DateTools.timeToString(repoAbsFile.lastModified(), DateTools.Resolution.MILLISECOND));
    BytesRef buid = new BytesRef(uid);
    BytesRef storedBuid = new BytesRef(storedU);
    if (storedBuid.compareTo(buid) != 0) {
        LOGGER.log(Level.FINE, "Last-modified differs for: {0}", repoRelPath);
        return null;
    }

    StringBuilder bld = new StringBuilder();
    StreamSource src = StreamSource.fromFile(repoAbsFile);
    try (InputStream in = src.getStream(); Reader rdr = getReader(in)) {
        int c;
        while ((c = rdr.read()) != -1) {
            bld.append((char) c);
        }
    }

    return bld.toString();
}

From source file:org.opensolaris.opengrok.search.context.OGKUnifiedHighlighter.java

License:Apache License

private String getRepoFileContent(String repoRelPath, String storedU) throws IOException {

    if (storedU == null) {
        LOGGER.log(Level.FINE, "Missing U[UID] for: {0}", repoRelPath);
        return null;
    }//from w  ww  .j a va  2s . co  m

    String repoAbsPath = env.getSourceRootPath() + repoRelPath;
    File repoAbsFile = new File(repoAbsPath);
    if (!repoAbsFile.exists()) {
        LOGGER.log(Level.FINE, "Missing file: {0}", repoAbsPath);
        return null;
    }

    // Verify that timestamp (U) is unchanged by comparing UID.
    String uid = Util.path2uid(repoRelPath,
            DateTools.timeToString(repoAbsFile.lastModified(), DateTools.Resolution.MILLISECOND));
    BytesRef buid = new BytesRef(uid);
    BytesRef storedBuid = new BytesRef(storedU);
    if (storedBuid.compareTo(buid) != 0) {
        LOGGER.log(Level.FINE, "Last-modified differs for: {0}", repoRelPath);
        return null;
    }

    StringBuilder bld = new StringBuilder();
    StreamSource src = StreamSource.fromFile(repoAbsFile);
    try (InputStream in = src.getStream(); Reader rdr = getReader(in)) {
        int c;
        while ((c = rdr.read()) != -1) {
            bld.append((char) c);
        }
    }

    return bld.toString();
}

From source file:stemmer.Dictionary.java

License:Apache License

/**
 * Reads the dictionary file through the provided InputStreams, building up the words map
 *
 * @param dictionaries InputStreams to read the dictionary file through
 * @param decoder CharsetDecoder used to decode the contents of the file
 * @throws IOException Can be thrown while reading from the file
 *//*  w  ww .  j  a va 2  s . co  m*/
private void readDictionaryFiles(List<InputStream> dictionaries, CharsetDecoder decoder, Builder<IntsRef> words)
        throws IOException {
    BytesRef flagsScratch = new BytesRef();
    IntsRef scratchInts = new IntsRef();

    StringBuilder sb = new StringBuilder();

    File unsorted = File.createTempFile("unsorted", "dat", tempDir);
    ByteSequencesWriter writer = new ByteSequencesWriter(unsorted);
    boolean success = false;
    try {
        for (InputStream dictionary : dictionaries) {
            BufferedReader lines = new BufferedReader(new InputStreamReader(dictionary, decoder));
            String line = lines.readLine(); // first line is number of entries (approximately, sometimes)

            while ((line = lines.readLine()) != null) {
                line = unescapeEntry(line);
                if (needsInputCleaning) {
                    int flagSep = line.lastIndexOf(FLAG_SEPARATOR);
                    if (flagSep == -1) {
                        CharSequence cleansed = cleanInput(line, sb);
                        writer.write(cleansed.toString().getBytes(StandardCharsets.UTF_8));
                    } else {
                        String text = line.substring(0, flagSep);
                        CharSequence cleansed = cleanInput(text, sb);
                        if (cleansed != sb) {
                            sb.setLength(0);
                            sb.append(cleansed);
                        }
                        sb.append(line.substring(flagSep));
                        writer.write(sb.toString().getBytes(StandardCharsets.UTF_8));
                    }
                } else {
                    writer.write(line.getBytes(StandardCharsets.UTF_8));
                }
            }
        }
        success = true;
    } finally {
        if (success) {
            IOUtils.close(writer);
        } else {
            IOUtils.closeWhileHandlingException(writer);
        }
    }
    File sorted = File.createTempFile("sorted", "dat", tempDir);

    OfflineSorter sorter = new OfflineSorter(new Comparator<BytesRef>() {
        BytesRef scratch1 = new BytesRef();
        BytesRef scratch2 = new BytesRef();

        @Override
        public int compare(BytesRef o1, BytesRef o2) {
            scratch1.bytes = o1.bytes;
            scratch1.offset = o1.offset;
            scratch1.length = o1.length;

            for (int i = scratch1.length - 1; i >= 0; i--) {
                if (scratch1.bytes[scratch1.offset + i] == FLAG_SEPARATOR) {
                    scratch1.length = i;
                    break;
                }
            }

            scratch2.bytes = o2.bytes;
            scratch2.offset = o2.offset;
            scratch2.length = o2.length;

            for (int i = scratch2.length - 1; i >= 0; i--) {
                if (scratch2.bytes[scratch2.offset + i] == FLAG_SEPARATOR) {
                    scratch2.length = i;
                    break;
                }
            }

            int cmp = scratch1.compareTo(scratch2);
            if (cmp == 0) {
                // tie break on whole row
                return o1.compareTo(o2);
            } else {
                return cmp;
            }
        }
    });
    sorter.sort(unsorted, sorted);
    unsorted.delete();

    ByteSequencesReader reader = new ByteSequencesReader(sorted);
    BytesRef scratchLine = new BytesRef();

    // TODO: the flags themselves can be double-chars (long) or also numeric
    // either way the trick is to encode them as char... but they must be parsed differently

    String currentEntry = null;
    IntsRef currentOrds = new IntsRef();

    String line;
    while (reader.read(scratchLine)) {
        line = scratchLine.utf8ToString();
        String entry;
        char wordForm[];

        int flagSep = line.lastIndexOf(FLAG_SEPARATOR);
        if (flagSep == -1) {
            wordForm = NOFLAGS;
            entry = line;
        } else {
            // note, there can be comments (morph description) after a flag.
            // we should really look for any whitespace: currently just tab and space
            int end = line.indexOf('\t', flagSep);
            if (end == -1)
                end = line.length();
            int end2 = line.indexOf(' ', flagSep);
            if (end2 == -1)
                end2 = line.length();
            end = Math.min(end, end2);

            String flagPart = line.substring(flagSep + 1, end);
            if (aliasCount > 0) {
                flagPart = getAliasValue(Integer.parseInt(flagPart));
            }

            wordForm = flagParsingStrategy.parseFlags(flagPart);
            Arrays.sort(wordForm);
            entry = line.substring(0, flagSep);
        }

        int cmp = currentEntry == null ? 1 : entry.compareTo(currentEntry);
        if (cmp < 0) {
            throw new IllegalArgumentException("out of order: " + entry + " < " + currentEntry);
        } else {
            encodeFlags(flagsScratch, wordForm);
            int ord = flagLookup.add(flagsScratch);
            if (ord < 0) {
                // already exists in our hash
                ord = (-ord) - 1;
            }
            // finalize current entry, and switch "current" if necessary
            if (cmp > 0 && currentEntry != null) {
                Util.toUTF32(currentEntry, scratchInts);
                words.add(scratchInts, currentOrds);
            }
            // swap current
            if (cmp > 0 || currentEntry == null) {
                currentEntry = entry;
                currentOrds = new IntsRef(); // must be this way
            }
            currentOrds.grow(currentOrds.length + 1);
            currentOrds.ints[currentOrds.length++] = ord;
        }
    }

    // finalize last entry
    Util.toUTF32(currentEntry, scratchInts);
    words.add(scratchInts, currentOrds);

    reader.close();
    sorted.delete();
}