List of usage examples for org.apache.lucene.util IOUtils close
public static void close(Iterable<? extends Closeable> objects) throws IOException
From source file:com.browseengine.bobo.geosearch.index.impl.GeoIndexer.java
License:Apache License
@Override public void flush(Directory directory, String segmentName) throws IOException { Set<CartesianGeoRecord> treeToFlush; Set<String> fieldNamesToFlush; synchronized (treeLock) { fieldNamesToFlush = fieldNames;/*www.j a v a 2 s . co m*/ fieldNames = new HashSet<String>(); treeToFlush = fieldTree; fieldTree = geoUtil.getBinaryTreeOrderedByBitMag(); } GeoSegmentWriter<CartesianGeoRecord> geoRecordBTree = null; GeoSegmentInfo geoSegmentInfo = buildGeoSegmentInfo(fieldNamesToFlush, segmentName); boolean success = false; try { String fileName = config.getGeoFileName(segmentName); geoRecordBTree = new GeoSegmentWriter<CartesianGeoRecord>(treeToFlush, directory, fileName, geoSegmentInfo, geoRecordSerializer); success = true; } finally { // see https://issues.apache.org/jira/browse/LUCENE-3405 if (success) { IOUtils.close(geoRecordBTree); } else { IOUtils.closeWhileHandlingException(geoRecordBTree); } } }
From source file:com.browseengine.bobo.geosearch.merge.impl.BufferedGeoMerger.java
License:Apache License
@Override //TODO: Handle more frequent checkAborts public void merge(IGeoMergeInfo geoMergeInfo, GeoSearchConfig config) throws IOException { IGeoConverter geoConverter = config.getGeoConverter(); int bufferSizePerGeoReader = config.getBufferSizePerGeoSegmentReader(); Directory directory = geoMergeInfo.getDirectory(); List<SegmentReader> readers = geoMergeInfo.getReaders(); List<SegmentInfo> segments = geoMergeInfo.getSegmentsToMerge(); List<BTree<CartesianGeoRecord>> mergeInputBTrees = new ArrayList<BTree<CartesianGeoRecord>>( segments.size());//from w w w . j a v a 2 s . c o m List<BitVector> deletedDocsList = new ArrayList<BitVector>(segments.size()); boolean success = false; try { assert (readers.size() == segments.size()); IFieldNameFilterConverter fieldNameFilterConverter = config.getGeoConverter() .makeFieldNameFilterConverter(); boolean hasFieldNameFilterConverter = false; for (SegmentReader reader : readers) { String geoFileName = config.getGeoFileName(reader.getSegmentName()); BTree<CartesianGeoRecord> segmentBTree = getInputBTree(directory, geoFileName, bufferSizePerGeoReader); mergeInputBTrees.add(segmentBTree); BitVector deletedDocs = buildDeletedDocsForSegment(reader); deletedDocsList.add(deletedDocs); //just take the first fieldNameFilterConverter for now. Don't worry about merging them. if (!hasFieldNameFilterConverter) { hasFieldNameFilterConverter = loadFieldNameFilterConverter(directory, geoFileName, fieldNameFilterConverter); } } if (!hasFieldNameFilterConverter) { // we are merging a bunch of segments, none of which have a corresponding .geo file // so there is nothing to do, it is okay if the outcome of this merge continues to // not have a .geo file. LOGGER.warn("nothing to do during geo merge, no .geo files found for segments"); success = true; return; } int newSegmentSize = calculateMergedSegmentSize(deletedDocsList, mergeInputBTrees, geoConverter); buildMergedSegment(mergeInputBTrees, deletedDocsList, newSegmentSize, geoMergeInfo, config, fieldNameFilterConverter); success = true; } finally { // see https://issues.apache.org/jira/browse/LUCENE-3405 if (success) { IOUtils.close(mergeInputBTrees); } else { IOUtils.closeWhileHandlingException(mergeInputBTrees); } } }
From source file:com.browseengine.bobo.geosearch.merge.impl.BufferedGeoMerger.java
License:Apache License
private void buildMergedSegment(List<BTree<CartesianGeoRecord>> mergeInputBTrees, List<BitVector> deletedDocsList, int newSegmentSize, IGeoMergeInfo geoMergeInfo, GeoSearchConfig config, IFieldNameFilterConverter fieldNameFilterConverter) throws IOException { Directory directory = geoMergeInfo.getDirectory(); IGeoConverter geoConverter = config.getGeoConverter(); String segmentName = geoMergeInfo.getNewSegment().name; String outputFileName = config.getGeoFileName(segmentName); GeoSegmentInfo geoSegmentInfo = buildGeoSegmentInfo(segmentName, fieldNameFilterConverter); Iterator<CartesianGeoRecord> inputIterator = new ChainedConvertedGeoRecordIterator(geoConverter, mergeInputBTrees, deletedDocsList, BUFFER_CAPACITY); BTree<CartesianGeoRecord> mergeOutputBTree = null; boolean success = false; try {// w ww . java 2 s . c om mergeOutputBTree = getOutputBTree(newSegmentSize, inputIterator, directory, outputFileName, geoSegmentInfo); success = true; } finally { // see https://issues.apache.org/jira/browse/LUCENE-3405 if (success) { IOUtils.close(mergeOutputBTree); } else { IOUtils.closeWhileHandlingException(mergeOutputBTree); } } }
From source file:com.github.cstoku.neologd.unidic.lucene.analysis.ja.dict.CharacterDefinition.java
License:Apache License
private CharacterDefinition() throws IOException { InputStream is = null;// w w w . j a va 2 s. c o m boolean success = false; try { is = BinaryDictionary.getClassResource(getClass(), FILENAME_SUFFIX); is = new BufferedInputStream(is); final DataInput in = new InputStreamDataInput(is); CodecUtil.checkHeader(in, HEADER, VERSION, VERSION); in.readBytes(characterCategoryMap, 0, characterCategoryMap.length); for (int i = 0; i < CLASS_COUNT; i++) { final byte b = in.readByte(); invokeMap[i] = (b & 0x01) != 0; groupMap[i] = (b & 0x02) != 0; } success = true; } finally { if (success) { IOUtils.close(is); } else { IOUtils.closeWhileHandlingException(is); } } }
From source file:com.github.cstoku.neologd.unidic.lucene.analysis.ja.dict.ConnectionCosts.java
License:Apache License
private ConnectionCosts() throws IOException { InputStream is = null;//from w ww. j a va2 s . co m short[][] costs = null; boolean success = false; try { is = BinaryDictionary.getClassResource(getClass(), FILENAME_SUFFIX); is = new BufferedInputStream(is); final DataInput in = new InputStreamDataInput(is); CodecUtil.checkHeader(in, HEADER, VERSION, VERSION); int forwardSize = in.readVInt(); int backwardSize = in.readVInt(); costs = new short[backwardSize][forwardSize]; int accum = 0; for (int j = 0; j < costs.length; j++) { final short[] a = costs[j]; for (int i = 0; i < a.length; i++) { accum += in.readZInt(); a[i] = (short) accum; } } success = true; } finally { if (success) { IOUtils.close(is); } else { IOUtils.closeWhileHandlingException(is); } } this.costs = costs; }
From source file:com.github.cstoku.neologd.unidic.lucene.analysis.ja.dict.TokenInfoDictionary.java
License:Apache License
private TokenInfoDictionary() throws IOException { super();//w w w .j a va 2 s .com InputStream is = null; FST<Long> fst = null; boolean success = false; try { is = getResource(FST_FILENAME_SUFFIX); is = new BufferedInputStream(is); fst = new FST<>(new InputStreamDataInput(is), PositiveIntOutputs.getSingleton()); success = true; } finally { if (success) { IOUtils.close(is); } else { IOUtils.closeWhileHandlingException(is); } } // TODO: some way to configure? this.fst = new TokenInfoFST(fst, true); }
From source file:com.lucure.core.codec.CompressingStoredFieldsReader.java
License:Apache License
/** * Close the underlying {@link IndexInput}s. *//* ww w . ja va 2 s. com*/ @Override public void close() throws IOException { if (!closed) { IOUtils.close(fieldsStream); closed = true; } }
From source file:com.sonicle.webtop.core.app.servlet.response.GzippableResponseWrapper.java
License:Open Source License
public void finishResponse() throws IOException { IOUtils.close(writer); IOUtils.close(gzipOutputStream); }
From source file:elhuyar.bilakit.Dictionary.java
License:Apache License
/** * Reads the dictionary file through the provided InputStreams, building up the words map *//from w w w.j ava 2 s .co m * @param dictionaries InputStreams to read the dictionary file through * @param decoder CharsetDecoder used to decode the contents of the file * @throws IOException Can be thrown while reading from the file */ private void readDictionaryFiles(List<InputStream> dictionaries, CharsetDecoder decoder, Builder<IntsRef> words) throws IOException { BytesRefBuilder flagsScratch = new BytesRefBuilder(); IntsRefBuilder scratchInts = new IntsRefBuilder(); StringBuilder sb = new StringBuilder(); File unsorted = File.createTempFile("unsorted", "dat", tempDir); ByteSequencesWriter writer = new ByteSequencesWriter(unsorted); boolean success = false; try { for (InputStream dictionary : dictionaries) { BufferedReader lines = new BufferedReader(new InputStreamReader(dictionary, decoder)); String line = lines.readLine(); // first line is number of entries (approximately, sometimes) while ((line = lines.readLine()) != null) { // wild and unpredictable code comment rules if (line.isEmpty() || line.charAt(0) == '/' || line.charAt(0) == '#' || line.charAt(0) == '\t') { continue; } line = unescapeEntry(line); // if we havent seen any stem exceptions, try to parse one if (hasStemExceptions == false) { int morphStart = line.indexOf(MORPH_SEPARATOR); if (morphStart >= 0 && morphStart < line.length()) { hasStemExceptions = parseStemException(line.substring(morphStart + 1)) != null; } } if (needsInputCleaning) { int flagSep = line.indexOf(FLAG_SEPARATOR); if (flagSep == -1) { flagSep = line.indexOf(MORPH_SEPARATOR); } if (flagSep == -1) { CharSequence cleansed = cleanInput(line, sb); writer.write(cleansed.toString().getBytes(StandardCharsets.UTF_8)); } else { String text = line.substring(0, flagSep); CharSequence cleansed = cleanInput(text, sb); if (cleansed != sb) { sb.setLength(0); sb.append(cleansed); } sb.append(line.substring(flagSep)); writer.write(sb.toString().getBytes(StandardCharsets.UTF_8)); } } else { writer.write(line.getBytes(StandardCharsets.UTF_8)); } } } success = true; } finally { if (success) { IOUtils.close(writer); } else { IOUtils.closeWhileHandlingException(writer); } } File sorted = File.createTempFile("sorted", "dat", tempDir); OfflineSorter sorter = new OfflineSorter(new Comparator<BytesRef>() { BytesRef scratch1 = new BytesRef(); BytesRef scratch2 = new BytesRef(); @Override public int compare(BytesRef o1, BytesRef o2) { scratch1.bytes = o1.bytes; scratch1.offset = o1.offset; scratch1.length = o1.length; for (int i = scratch1.length - 1; i >= 0; i--) { if (scratch1.bytes[scratch1.offset + i] == FLAG_SEPARATOR || scratch1.bytes[scratch1.offset + i] == MORPH_SEPARATOR) { scratch1.length = i; break; } } scratch2.bytes = o2.bytes; scratch2.offset = o2.offset; scratch2.length = o2.length; for (int i = scratch2.length - 1; i >= 0; i--) { if (scratch2.bytes[scratch2.offset + i] == FLAG_SEPARATOR || scratch2.bytes[scratch2.offset + i] == MORPH_SEPARATOR) { scratch2.length = i; break; } } int cmp = scratch1.compareTo(scratch2); if (cmp == 0) { // tie break on whole row return o1.compareTo(o2); } else { return cmp; } } }); sorter.sort(unsorted, sorted); unsorted.delete(); ByteSequencesReader reader = new ByteSequencesReader(sorted); BytesRefBuilder scratchLine = new BytesRefBuilder(); // TODO: the flags themselves can be double-chars (long) or also numeric // either way the trick is to encode them as char... but they must be parsed differently String currentEntry = null; IntsRefBuilder currentOrds = new IntsRefBuilder(); String line; while (reader.read(scratchLine)) { line = scratchLine.get().utf8ToString(); String entry; char wordForm[]; int end; int flagSep = line.indexOf(FLAG_SEPARATOR); if (flagSep == -1) { wordForm = NOFLAGS; end = line.indexOf(MORPH_SEPARATOR); entry = line.substring(0, end); } else { end = line.indexOf(MORPH_SEPARATOR); String flagPart = line.substring(flagSep + 1, end); if (aliasCount > 0) { flagPart = getAliasValue(Integer.parseInt(flagPart)); } wordForm = flagParsingStrategy.parseFlags(flagPart); Arrays.sort(wordForm); entry = line.substring(0, flagSep); } // we possibly have morphological data int stemExceptionID = 0; if (hasStemExceptions && end + 1 < line.length()) { String stemException = parseStemException(line.substring(end + 1)); if (stemException != null) { if (stemExceptionCount == stemExceptions.length) { int newSize = ArrayUtil.oversize(stemExceptionCount + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF); stemExceptions = Arrays.copyOf(stemExceptions, newSize); } stemExceptionID = stemExceptionCount + 1; // we use '0' to indicate no exception for the form stemExceptions[stemExceptionCount++] = stemException; } } int cmp = currentEntry == null ? 1 : entry.compareTo(currentEntry); if (cmp < 0) { throw new IllegalArgumentException("out of order: " + entry + " < " + currentEntry); } else { encodeFlags(flagsScratch, wordForm); int ord = flagLookup.add(flagsScratch.get()); if (ord < 0) { // already exists in our hash ord = (-ord) - 1; } // finalize current entry, and switch "current" if necessary if (cmp > 0 && currentEntry != null) { Util.toUTF32(currentEntry, scratchInts); words.add(scratchInts.get(), currentOrds.get()); } // swap current if (cmp > 0 || currentEntry == null) { currentEntry = entry; currentOrds = new IntsRefBuilder(); // must be this way } if (hasStemExceptions) { currentOrds.append(ord); currentOrds.append(stemExceptionID); } else { currentOrds.append(ord); } } } // finalize last entry Util.toUTF32(currentEntry, scratchInts); words.add(scratchInts.get(), currentOrds.get()); reader.close(); sorted.delete(); }
From source file:io.bdrc.lucene.bo.TibetanAnalyzer.java
License:Apache License
/** * @param inputStream//w ww . ja v a2s . c om * stream to the list of stopwords * @param comment * The string representing a comment * @throws IOException * if the file containing stopwords can't be opened * @return result the {@link ArrayList} to fill with the reader's words */ public static ArrayList<String> getWordList(InputStream inputStream, String comment) throws IOException { ArrayList<String> result = new ArrayList<String>(); BufferedReader br = null; try { br = new BufferedReader(new InputStreamReader(inputStream)); String word = null; while ((word = br.readLine()) != null) { word = word.replace("\t", ""); if (word.contains(comment)) { if (!word.startsWith(comment)) { word = word.substring(0, word.indexOf(comment)); word = word.trim(); if (!word.isEmpty()) result.add(word); } } else { word = word.trim(); if (!word.isEmpty()) result.add(word); } } } finally { IOUtils.close(br); } return result; }