List of usage examples for org.apache.lucene.util BytesRef compareTo
@Override public int compareTo(BytesRef other)
From source file:org.opengrok.indexer.search.context.OGKUnifiedHighlighter.java
License:Apache License
private String getRepoFileContent(String repoRelPath, String storedU) throws IOException { if (storedU == null) { LOGGER.log(Level.FINE, "Missing U[UID] for: {0}", repoRelPath); return null; }/*from w w w . jav a 2s . com*/ String repoAbsPath = env.getSourceRootPath() + repoRelPath; File repoAbsFile = new File(repoAbsPath); if (!repoAbsFile.exists()) { LOGGER.log(Level.FINE, "Missing file: {0}", repoAbsPath); return null; } repoRelPath = Util.fixPathIfWindows(repoRelPath); // Verify that timestamp (U) is unchanged by comparing UID. String uid = Util.path2uid(repoRelPath, DateTools.timeToString(repoAbsFile.lastModified(), DateTools.Resolution.MILLISECOND)); BytesRef buid = new BytesRef(uid); BytesRef storedBuid = new BytesRef(storedU); if (storedBuid.compareTo(buid) != 0) { LOGGER.log(Level.FINE, "Last-modified differs for: {0}", repoRelPath); return null; } StringBuilder bld = new StringBuilder(); StreamSource src = StreamSource.fromFile(repoAbsFile); try (InputStream in = src.getStream(); Reader rdr = getReader(in)) { int c; while ((c = rdr.read()) != -1) { bld.append((char) c); } } return bld.toString(); }
From source file:org.opensolaris.opengrok.search.context.OGKUnifiedHighlighter.java
License:Apache License
private String getRepoFileContent(String repoRelPath, String storedU) throws IOException { if (storedU == null) { LOGGER.log(Level.FINE, "Missing U[UID] for: {0}", repoRelPath); return null; }//from w ww .j a va 2s . co m String repoAbsPath = env.getSourceRootPath() + repoRelPath; File repoAbsFile = new File(repoAbsPath); if (!repoAbsFile.exists()) { LOGGER.log(Level.FINE, "Missing file: {0}", repoAbsPath); return null; } // Verify that timestamp (U) is unchanged by comparing UID. String uid = Util.path2uid(repoRelPath, DateTools.timeToString(repoAbsFile.lastModified(), DateTools.Resolution.MILLISECOND)); BytesRef buid = new BytesRef(uid); BytesRef storedBuid = new BytesRef(storedU); if (storedBuid.compareTo(buid) != 0) { LOGGER.log(Level.FINE, "Last-modified differs for: {0}", repoRelPath); return null; } StringBuilder bld = new StringBuilder(); StreamSource src = StreamSource.fromFile(repoAbsFile); try (InputStream in = src.getStream(); Reader rdr = getReader(in)) { int c; while ((c = rdr.read()) != -1) { bld.append((char) c); } } return bld.toString(); }
From source file:stemmer.Dictionary.java
License:Apache License
/** * Reads the dictionary file through the provided InputStreams, building up the words map * * @param dictionaries InputStreams to read the dictionary file through * @param decoder CharsetDecoder used to decode the contents of the file * @throws IOException Can be thrown while reading from the file *//* w ww . j a va 2 s . co m*/ private void readDictionaryFiles(List<InputStream> dictionaries, CharsetDecoder decoder, Builder<IntsRef> words) throws IOException { BytesRef flagsScratch = new BytesRef(); IntsRef scratchInts = new IntsRef(); StringBuilder sb = new StringBuilder(); File unsorted = File.createTempFile("unsorted", "dat", tempDir); ByteSequencesWriter writer = new ByteSequencesWriter(unsorted); boolean success = false; try { for (InputStream dictionary : dictionaries) { BufferedReader lines = new BufferedReader(new InputStreamReader(dictionary, decoder)); String line = lines.readLine(); // first line is number of entries (approximately, sometimes) while ((line = lines.readLine()) != null) { line = unescapeEntry(line); if (needsInputCleaning) { int flagSep = line.lastIndexOf(FLAG_SEPARATOR); if (flagSep == -1) { CharSequence cleansed = cleanInput(line, sb); writer.write(cleansed.toString().getBytes(StandardCharsets.UTF_8)); } else { String text = line.substring(0, flagSep); CharSequence cleansed = cleanInput(text, sb); if (cleansed != sb) { sb.setLength(0); sb.append(cleansed); } sb.append(line.substring(flagSep)); writer.write(sb.toString().getBytes(StandardCharsets.UTF_8)); } } else { writer.write(line.getBytes(StandardCharsets.UTF_8)); } } } success = true; } finally { if (success) { IOUtils.close(writer); } else { IOUtils.closeWhileHandlingException(writer); } } File sorted = File.createTempFile("sorted", "dat", tempDir); OfflineSorter sorter = new OfflineSorter(new Comparator<BytesRef>() { BytesRef scratch1 = new BytesRef(); BytesRef scratch2 = new BytesRef(); @Override public int compare(BytesRef o1, BytesRef o2) { scratch1.bytes = o1.bytes; scratch1.offset = o1.offset; scratch1.length = o1.length; for (int i = scratch1.length - 1; i >= 0; i--) { if (scratch1.bytes[scratch1.offset + i] == FLAG_SEPARATOR) { scratch1.length = i; break; } } scratch2.bytes = o2.bytes; scratch2.offset = o2.offset; scratch2.length = o2.length; for (int i = scratch2.length - 1; i >= 0; i--) { if (scratch2.bytes[scratch2.offset + i] == FLAG_SEPARATOR) { scratch2.length = i; break; } } int cmp = scratch1.compareTo(scratch2); if (cmp == 0) { // tie break on whole row return o1.compareTo(o2); } else { return cmp; } } }); sorter.sort(unsorted, sorted); unsorted.delete(); ByteSequencesReader reader = new ByteSequencesReader(sorted); BytesRef scratchLine = new BytesRef(); // TODO: the flags themselves can be double-chars (long) or also numeric // either way the trick is to encode them as char... but they must be parsed differently String currentEntry = null; IntsRef currentOrds = new IntsRef(); String line; while (reader.read(scratchLine)) { line = scratchLine.utf8ToString(); String entry; char wordForm[]; int flagSep = line.lastIndexOf(FLAG_SEPARATOR); if (flagSep == -1) { wordForm = NOFLAGS; entry = line; } else { // note, there can be comments (morph description) after a flag. // we should really look for any whitespace: currently just tab and space int end = line.indexOf('\t', flagSep); if (end == -1) end = line.length(); int end2 = line.indexOf(' ', flagSep); if (end2 == -1) end2 = line.length(); end = Math.min(end, end2); String flagPart = line.substring(flagSep + 1, end); if (aliasCount > 0) { flagPart = getAliasValue(Integer.parseInt(flagPart)); } wordForm = flagParsingStrategy.parseFlags(flagPart); Arrays.sort(wordForm); entry = line.substring(0, flagSep); } int cmp = currentEntry == null ? 1 : entry.compareTo(currentEntry); if (cmp < 0) { throw new IllegalArgumentException("out of order: " + entry + " < " + currentEntry); } else { encodeFlags(flagsScratch, wordForm); int ord = flagLookup.add(flagsScratch); if (ord < 0) { // already exists in our hash ord = (-ord) - 1; } // finalize current entry, and switch "current" if necessary if (cmp > 0 && currentEntry != null) { Util.toUTF32(currentEntry, scratchInts); words.add(scratchInts, currentOrds); } // swap current if (cmp > 0 || currentEntry == null) { currentEntry = entry; currentOrds = new IntsRef(); // must be this way } currentOrds.grow(currentOrds.length + 1); currentOrds.ints[currentOrds.length++] = ord; } } // finalize last entry Util.toUTF32(currentEntry, scratchInts); words.add(scratchInts, currentOrds); reader.close(); sorted.delete(); }