List of usage examples for com.google.common.collect SortedSetMultimap asMap
@Override Map<K, Collection<V>> asMap();
From source file:com.streamsets.pipeline.stage.processor.fuzzy.FuzzyFieldProcessor.java
@Override protected void process(Record record, SingleLaneBatchMaker batchMaker) throws StageException { for (String rootFieldPath : rootFieldPaths) { final Field rootField = record.get(rootFieldPath); final Map<String, Field> rootFieldMap = flattenRootField(rootField); final Set<String> originalFieldNames = rootFieldMap.keySet(); SortedSetMultimap<String, MatchCandidate> candidates = findCandidatesFor(rootFieldMap); Map<String, Field> newFields = new HashMap<>(); for (Map.Entry<String, Collection<MatchCandidate>> entry : candidates.asMap().entrySet()) { Collection<MatchCandidate> candidatesForKey = entry.getValue(); Iterable<Field> fieldIterable = Iterables.transform(candidatesForKey, new Function<MatchCandidate, Field>() { @Nullable/*from www. jav a2s . c om*/ @Override public Field apply(MatchCandidate input) { Field field; originalFieldNames.remove(input.getFieldPath()); if (inPlace) { field = input.getField(); } else { Map<String, Field> map = new HashMap<>(); map.put(HEADER, Field.create(input.getFieldPath())); map.put(VALUE, input.getField()); map.put(SCORE, Field.create(input.getScore())); field = Field.create(map); } return field; } }); List<Field> fieldCandidates = Lists.newArrayList(fieldIterable); // Flatten this is there's we're only keeping a single candidate if (allCandidates) { newFields.put(entry.getKey(), Field.create(fieldCandidates)); } else { newFields.put(entry.getKey(), fieldCandidates.get(0)); } } if (preserveUnmatchedFields) { for (String originalFieldName : originalFieldNames) { newFields.put(originalFieldName, rootFieldMap.get(originalFieldName)); } } record.set(rootFieldPath, Field.create(newFields)); } batchMaker.addRecord(record); }
From source file:org.jabylon.updatecenter.repository.impl.OBRRepositoryConnectorImpl.java
private List<Resource> removeOldVersions(List<Resource> resources) { SortedSetMultimap<String, Resource> map = TreeMultimap.create(Collator.getInstance(), new ResourceComparator()); for (Resource bundle : resources) { map.put(bundle.getSymbolicName(), bundle); }// w w w .j av a 2s . co m resources.clear(); Set<Entry<String, Collection<Resource>>> entries = map.asMap().entrySet(); for (Entry<String, Collection<Resource>> entry : entries) { //add the highest version resources.add(entry.getValue().iterator().next()); } return resources; }
From source file:com.mgmtp.perfload.perfalyzer.reporting.ReportCreator.java
public void createReport(final List<PerfAlyzerFile> files) throws IOException { Function<PerfAlyzerFile, String> classifier = perfAlyzerFile -> { String marker = perfAlyzerFile.getMarker(); return marker == null ? "Overall" : marker; };//from w ww . ja va 2 s. c o m Supplier<Map<String, List<PerfAlyzerFile>>> mapFactory = () -> new TreeMap<>(Ordering.explicit(tabNames)); Map<String, List<PerfAlyzerFile>> filesByMarker = files.stream() .collect(Collectors.groupingBy(classifier, mapFactory, toList())); Map<String, SortedSetMultimap<String, PerfAlyzerFile>> contentItemFiles = new LinkedHashMap<>(); for (Entry<String, List<PerfAlyzerFile>> entry : filesByMarker.entrySet()) { SortedSetMultimap<String, PerfAlyzerFile> contentItemFilesByMarker = contentItemFiles.computeIfAbsent( entry.getKey(), s -> TreeMultimap.create(new ItemComparator(reportContentsConfigMap.get("priorities")), Ordering.natural())); for (PerfAlyzerFile perfAlyzerFile : entry.getValue()) { File file = perfAlyzerFile.getFile(); String groupKey = removeExtension(file.getPath()); boolean excluded = false; for (Pattern pattern : reportContentsConfigMap.get("exclusions")) { Matcher matcher = pattern.matcher(groupKey); if (matcher.matches()) { excluded = true; log.debug("Excluded from report: {}", groupKey); break; } } if (!excluded) { contentItemFilesByMarker.put(groupKey, perfAlyzerFile); } } } // explicitly copy it because it is otherwise filtered from the report in order to only show in the overview String loadProfilePlot = new File("console", "[loadprofile].png").getPath(); copyFile(new File(soureDir, loadProfilePlot), new File(destDir, loadProfilePlot)); Map<String, List<ContentItem>> tabItems = new LinkedHashMap<>(); Map<String, QuickJump> quickJumps = new HashMap<>(); Set<String> tabNames = contentItemFiles.keySet(); for (Entry<String, SortedSetMultimap<String, PerfAlyzerFile>> tabEntry : contentItemFiles.entrySet()) { String tab = tabEntry.getKey(); SortedSetMultimap<String, PerfAlyzerFile> filesForTab = tabEntry.getValue(); List<ContentItem> contentItems = tabItems.computeIfAbsent(tab, list -> new ArrayList<>()); Map<String, String> quickJumpMap = new LinkedHashMap<>(); quickJumps.put(tab, new QuickJump(tab, quickJumpMap)); int itemIndex = 0; for (Entry<String, Collection<PerfAlyzerFile>> itemEntry : filesForTab.asMap().entrySet()) { String title = itemEntry.getKey(); Collection<PerfAlyzerFile> itemFiles = itemEntry.getValue(); TableData tableData = null; String plotSrc = null; for (PerfAlyzerFile file : itemFiles) { if ("png".equals(getExtension(file.getFile().getName()))) { plotSrc = file.getFile().getPath(); copyFile(new File(soureDir, plotSrc), new File(destDir, plotSrc)); } else { tableData = createTableData(file.getFile()); } } // strip off potential marker title = substringBefore(title, "{"); String[] titleParts = split(title, SystemUtils.FILE_SEPARATOR); StringBuilder sb = new StringBuilder(50); String separator = " - "; sb.append(resourceBundle.getString(titleParts[0])); sb.append(separator); sb.append(resourceBundle.getString(titleParts[1])); List<String> fileNameParts = extractFileNameParts(titleParts[1], true); if (titleParts[1].contains("[distribution]")) { String operation = fileNameParts.get(1); sb.append(separator); sb.append(operation); } else if ("comparison".equals(titleParts[0])) { String operation = fileNameParts.get(1); sb.append(separator); sb.append(operation); } else if (titleParts[1].contains("[gclog]")) { if (fileNameParts.size() > 1) { sb.append(separator); sb.append(fileNameParts.get(1)); } } title = sb.toString(); ContentItem item = new ContentItem(tab, itemIndex, title, tableData, plotSrc, resourceBundle.getString("report.topLink")); contentItems.add(item); quickJumpMap.put(tab + "_" + itemIndex, title); itemIndex++; } } NavBar navBar = new NavBar(tabNames, quickJumps); String testName = removeExtension(testMetadata.getTestPlanFile()); OverviewItem overviewItem = new OverviewItem(testMetadata, resourceBundle, locale); Content content = new Content(tabItems); String perfAlyzerVersion; try { perfAlyzerVersion = Resources.toString(Resources.getResource("perfAlyzer.version"), Charsets.UTF_8); } catch (IOException ex) { log.error("Could not read perfAlyzer version from classpath resource 'perfAlyzer.version'", ex); perfAlyzerVersion = ""; } String dateTimeString = DateTimeFormatter.ISO_OFFSET_DATE_TIME.withLocale(locale) .format(ZonedDateTime.now()); String createdString = String.format(resourceBundle.getString("footer.created"), perfAlyzerVersion, dateTimeString); HtmlSkeleton html = new HtmlSkeleton(testName, createdString, navBar, overviewItem, content); writeReport(html); }
From source file:com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory.java
/** * Validates that a given class conforms to the following properties: * <ul>/*from ww w . j ava 2s. c o m*/ * <li>Any property with the same name must have the same return type for all derived * interfaces of {@link PipelineOptions}. * <li>Every bean property of any interface derived from {@link PipelineOptions} must have a * getter and setter method. * <li>Every method must conform to being a getter or setter for a JavaBean. * <li>Only getters may be annotated with {@link JsonIgnore @JsonIgnore}. * <li>If any getter is annotated with {@link JsonIgnore @JsonIgnore}, then all getters for * this property must be annotated with {@link JsonIgnore @JsonIgnore}. * </ul> * * @param iface The interface to validate. * @param validatedPipelineOptionsInterfaces The set of validated pipeline options interfaces to * validate against. * @param klass The proxy class representing the interface. * @return A list of {@link PropertyDescriptor}s representing all valid bean properties of * {@code iface}. * @throws IntrospectionException if invalid property descriptors. */ private static List<PropertyDescriptor> validateClass(Class<? extends PipelineOptions> iface, Set<Class<? extends PipelineOptions>> validatedPipelineOptionsInterfaces, Class<? extends PipelineOptions> klass) throws IntrospectionException { Set<Method> methods = Sets.newHashSet(IGNORED_METHODS); // Ignore synthetic methods for (Method method : klass.getMethods()) { if (Modifier.isStatic(method.getModifiers()) || method.isSynthetic()) { methods.add(method); } } // Ignore standard infrastructure methods on the generated class. try { methods.add(klass.getMethod("equals", Object.class)); methods.add(klass.getMethod("hashCode")); methods.add(klass.getMethod("toString")); methods.add(klass.getMethod("as", Class.class)); methods.add(klass.getMethod("cloneAs", Class.class)); methods.add(klass.getMethod("populateDisplayData", DisplayData.Builder.class)); } catch (NoSuchMethodException | SecurityException e) { throw new RuntimeException(e); } // Verify that there are no methods with the same name with two different return types. Iterable<Method> interfaceMethods = FluentIterable .from(ReflectHelpers.getClosureOfMethodsOnInterface(iface)).filter(NOT_SYNTHETIC_PREDICATE) .toSortedSet(MethodComparator.INSTANCE); SortedSetMultimap<Method, Method> methodNameToMethodMap = TreeMultimap.create(MethodNameComparator.INSTANCE, MethodComparator.INSTANCE); for (Method method : interfaceMethods) { methodNameToMethodMap.put(method, method); } List<MultipleDefinitions> multipleDefinitions = Lists.newArrayList(); for (Map.Entry<Method, Collection<Method>> entry : methodNameToMethodMap.asMap().entrySet()) { Set<Class<?>> returnTypes = FluentIterable.from(entry.getValue()) .transform(ReturnTypeFetchingFunction.INSTANCE).toSet(); SortedSet<Method> collidingMethods = FluentIterable.from(entry.getValue()) .toSortedSet(MethodComparator.INSTANCE); if (returnTypes.size() > 1) { MultipleDefinitions defs = new MultipleDefinitions(); defs.method = entry.getKey(); defs.collidingMethods = collidingMethods; multipleDefinitions.add(defs); } } throwForMultipleDefinitions(iface, multipleDefinitions); // Verify that there is no getter with a mixed @JsonIgnore annotation and verify // that no setter has @JsonIgnore. Iterable<Method> allInterfaceMethods = FluentIterable .from(ReflectHelpers.getClosureOfMethodsOnInterfaces(validatedPipelineOptionsInterfaces)) .append(ReflectHelpers.getClosureOfMethodsOnInterface(iface)).filter(NOT_SYNTHETIC_PREDICATE) .toSortedSet(MethodComparator.INSTANCE); SortedSetMultimap<Method, Method> methodNameToAllMethodMap = TreeMultimap .create(MethodNameComparator.INSTANCE, MethodComparator.INSTANCE); for (Method method : allInterfaceMethods) { methodNameToAllMethodMap.put(method, method); } List<PropertyDescriptor> descriptors = getPropertyDescriptors(klass); List<InconsistentlyIgnoredGetters> incompletelyIgnoredGetters = new ArrayList<>(); List<IgnoredSetter> ignoredSetters = new ArrayList<>(); for (PropertyDescriptor descriptor : descriptors) { if (descriptor.getReadMethod() == null || descriptor.getWriteMethod() == null || IGNORED_METHODS.contains(descriptor.getReadMethod()) || IGNORED_METHODS.contains(descriptor.getWriteMethod())) { continue; } SortedSet<Method> getters = methodNameToAllMethodMap.get(descriptor.getReadMethod()); SortedSet<Method> gettersWithJsonIgnore = Sets.filter(getters, JsonIgnorePredicate.INSTANCE); Iterable<String> getterClassNames = FluentIterable.from(getters) .transform(MethodToDeclaringClassFunction.INSTANCE).transform(ReflectHelpers.CLASS_NAME); Iterable<String> gettersWithJsonIgnoreClassNames = FluentIterable.from(gettersWithJsonIgnore) .transform(MethodToDeclaringClassFunction.INSTANCE).transform(ReflectHelpers.CLASS_NAME); if (!(gettersWithJsonIgnore.isEmpty() || getters.size() == gettersWithJsonIgnore.size())) { InconsistentlyIgnoredGetters err = new InconsistentlyIgnoredGetters(); err.descriptor = descriptor; err.getterClassNames = getterClassNames; err.gettersWithJsonIgnoreClassNames = gettersWithJsonIgnoreClassNames; incompletelyIgnoredGetters.add(err); } if (!incompletelyIgnoredGetters.isEmpty()) { continue; } SortedSet<Method> settersWithJsonIgnore = Sets.filter( methodNameToAllMethodMap.get(descriptor.getWriteMethod()), JsonIgnorePredicate.INSTANCE); Iterable<String> settersWithJsonIgnoreClassNames = FluentIterable.from(settersWithJsonIgnore) .transform(MethodToDeclaringClassFunction.INSTANCE).transform(ReflectHelpers.CLASS_NAME); if (!settersWithJsonIgnore.isEmpty()) { IgnoredSetter ignored = new IgnoredSetter(); ignored.descriptor = descriptor; ignored.settersWithJsonIgnoreClassNames = settersWithJsonIgnoreClassNames; ignoredSetters.add(ignored); } } throwForGettersWithInconsistentJsonIgnore(incompletelyIgnoredGetters); throwForSettersWithJsonIgnore(ignoredSetters); List<MissingBeanMethod> missingBeanMethods = new ArrayList<>(); // Verify that each property has a matching read and write method. for (PropertyDescriptor propertyDescriptor : descriptors) { if (!(IGNORED_METHODS.contains(propertyDescriptor.getWriteMethod()) || propertyDescriptor.getReadMethod() != null)) { MissingBeanMethod method = new MissingBeanMethod(); method.property = propertyDescriptor; method.methodType = "getter"; missingBeanMethods.add(method); continue; } if (!(IGNORED_METHODS.contains(propertyDescriptor.getReadMethod()) || propertyDescriptor.getWriteMethod() != null)) { MissingBeanMethod method = new MissingBeanMethod(); method.property = propertyDescriptor; method.methodType = "setter"; missingBeanMethods.add(method); continue; } methods.add(propertyDescriptor.getReadMethod()); methods.add(propertyDescriptor.getWriteMethod()); } throwForMissingBeanMethod(iface, missingBeanMethods); // Verify that no additional methods are on an interface that aren't a bean property. SortedSet<Method> unknownMethods = new TreeSet<>(MethodComparator.INSTANCE); unknownMethods.addAll(Sets.filter(Sets.difference(Sets.newHashSet(klass.getMethods()), methods), NOT_SYNTHETIC_PREDICATE)); checkArgument(unknownMethods.isEmpty(), "Methods %s on [%s] do not conform to being bean properties.", FluentIterable.from(unknownMethods).transform(ReflectHelpers.METHOD_FORMATTER), iface.getName()); return descriptors; }
From source file:org.apache.beam.sdk.options.PipelineOptionsFactory.java
/** * Validates that any method with the same name must have the same return type for all derived * interfaces of {@link PipelineOptions}. * * @param iface The interface to validate. *//*from w w w .j a v a 2 s. c om*/ private static void validateReturnType(Class<? extends PipelineOptions> iface) { Iterable<Method> interfaceMethods = FluentIterable .from(ReflectHelpers.getClosureOfMethodsOnInterface(iface)).filter(NOT_SYNTHETIC_PREDICATE) .toSortedSet(MethodComparator.INSTANCE); SortedSetMultimap<Method, Method> methodNameToMethodMap = TreeMultimap.create(MethodNameComparator.INSTANCE, MethodComparator.INSTANCE); for (Method method : interfaceMethods) { methodNameToMethodMap.put(method, method); } List<MultipleDefinitions> multipleDefinitions = Lists.newArrayList(); for (Map.Entry<Method, Collection<Method>> entry : methodNameToMethodMap.asMap().entrySet()) { Set<Class<?>> returnTypes = FluentIterable.from(entry.getValue()) .transform(ReturnTypeFetchingFunction.INSTANCE).toSet(); SortedSet<Method> collidingMethods = FluentIterable.from(entry.getValue()) .toSortedSet(MethodComparator.INSTANCE); if (returnTypes.size() > 1) { MultipleDefinitions defs = new MultipleDefinitions(); defs.method = entry.getKey(); defs.collidingMethods = collidingMethods; multipleDefinitions.add(defs); } } throwForMultipleDefinitions(iface, multipleDefinitions); }
From source file:org.jabylon.updatecenter.repository.impl.OBRRepositoryConnectorImpl.java
protected List<String> getHighestBundleVersions(String... filenames) { if (filenames == null) return Collections.emptyList(); SortedSetMultimap<String, String> map = TreeMultimap.create(Collator.getInstance(), new VersionComparator()); for (String string : filenames) { Matcher matcher = BUNDLE_PATTERN.matcher(string); if (matcher.matches()) { String name = matcher.group(1); String version = matcher.group(2); map.put(name, version);//www . j av a 2 s . c o m } else { logger.warn("{} does not match the pattern {}. Skipping", string, BUNDLE_PATTERN); } } Set<Entry<String, Collection<String>>> entrySet = map.asMap().entrySet(); List<String> result = new ArrayList<String>(entrySet.size()); for (Entry<String, Collection<String>> entry : entrySet) { result.add(entry.getKey() + "_" + entry.getValue().iterator().next() + ".jar"); } return result; }
From source file:google.registry.dns.ReadDnsQueueAction.java
/** Leases all tasks from the pull queue and creates per-tld update actions for them. */ @Override//from ww w.j a v a2 s . c om public void run() { Set<String> tldsOfInterest = getTlds(); List<TaskHandle> tasks = dnsQueue.leaseTasks(writeLockTimeout); if (tasks.isEmpty()) { return; } logger.infofmt("leased %d tasks", tasks.size()); // Normally, all tasks will be deleted from the pull queue. But some might have to remain if // we are not interested in the associated TLD, or if the TLD is paused. Remember which these // are. Set<TaskHandle> tasksToKeep = new HashSet<>(); // The paused TLDs for which we found at least one refresh request. Set<String> pausedTlds = new HashSet<>(); // Create a sorted multimap into which we will insert the refresh items, so that the items for // each TLD will be grouped together, and domains and hosts will be grouped within a TLD. The // grouping and ordering of domains and hosts is not technically necessary, but a predictable // ordering makes it possible to write detailed tests. SortedSetMultimap<String, RefreshItem> refreshItemMultimap = TreeMultimap.create(); // Read all tasks on the DNS pull queue and load them into the refresh item multimap. for (TaskHandle task : tasks) { try { Map<String, String> params = ImmutableMap.copyOf(task.extractParams()); String tld = params.get(RequestParameters.PARAM_TLD); if (tld == null) { logger.severe("discarding invalid DNS refresh request; no TLD specified"); } else if (!tldsOfInterest.contains(tld)) { tasksToKeep.add(task); } else if (Registry.get(tld).getDnsPaused()) { tasksToKeep.add(task); pausedTlds.add(tld); } else { String typeString = params.get(DNS_TARGET_TYPE_PARAM); String name = params.get(DNS_TARGET_NAME_PARAM); TargetType type = TargetType.valueOf(typeString); switch (type) { case DOMAIN: case HOST: refreshItemMultimap.put(tld, RefreshItem.create(type, name)); break; default: logger.severefmt("discarding DNS refresh request of type %s", typeString); break; } } } catch (RuntimeException | UnsupportedEncodingException e) { logger.severefmt(e, "discarding invalid DNS refresh request (task %s)", task); } } if (!pausedTlds.isEmpty()) { logger.infofmt("the dns-pull queue is paused for tlds: %s", pausedTlds); } // Loop through the multimap by TLD and generate refresh tasks for the hosts and domains. for (Map.Entry<String, Collection<RefreshItem>> tldRefreshItemsEntry : refreshItemMultimap.asMap() .entrySet()) { for (List<RefreshItem> chunk : Iterables.partition(tldRefreshItemsEntry.getValue(), tldUpdateBatchSize)) { TaskOptions options = withUrl(PublishDnsUpdatesAction.PATH).countdownMillis( jitterSeconds.isPresent() ? random.nextInt((int) SECONDS.toMillis(jitterSeconds.get())) : 0) .param(RequestParameters.PARAM_TLD, tldRefreshItemsEntry.getKey()); for (RefreshItem refreshItem : chunk) { options.param((refreshItem.type() == TargetType.HOST) ? PublishDnsUpdatesAction.HOSTS_PARAM : PublishDnsUpdatesAction.DOMAINS_PARAM, refreshItem.name()); } taskEnqueuer.enqueue(dnsPublishPushQueue, options); } } Set<TaskHandle> tasksToDelete = difference(ImmutableSet.copyOf(tasks), tasksToKeep); // In keepTasks mode, never delete any tasks. if (keepTasks) { logger.infofmt("would have deleted %d tasks", tasksToDelete.size()); for (TaskHandle task : tasks) { dnsQueue.dropTaskLease(task); } // Otherwise, either delete or drop the lease of each task. } else { logger.infofmt("deleting %d tasks", tasksToDelete.size()); dnsQueue.deleteTasks(ImmutableList.copyOf(tasksToDelete)); logger.infofmt("dropping %d tasks", tasksToKeep.size()); for (TaskHandle task : tasksToKeep) { dnsQueue.dropTaskLease(task); } logger.infofmt("done"); } }
From source file:org.apache.solr.search.SynonymExpandingExtendedDismaxQParserPlugin.java
/** * Given the synonymAnalyzer, returns a list of all alternate queries expanded from the original user query. * @param synonymAnalyzer/* w ww. j a v a 2s. c om*/ * @param solrParams * @return */ private List<Query> generateSynonymQueries(Analyzer synonymAnalyzer, SolrParams solrParams) throws IOException { // TODO: make the token stream reusable? TokenStream tokenStream = synonymAnalyzer.tokenStream(Const.IMPOSSIBLE_FIELD_NAME, new StringReader(getQueryStringFromParser())); SortedSetMultimap<Integer, TextInQuery> startPosToTextsInQuery = TreeMultimap.create(); boolean constructPhraseQueries = solrParams.getBool(Params.SYNONYMS_CONSTRUCT_PHRASES, false); boolean bag = solrParams.getBool(Params.SYNONYMS_BAG, false); List<String> synonymBag = new ArrayList<String>(); try { tokenStream.reset(); while (tokenStream.incrementToken()) { CharTermAttribute term = tokenStream.getAttribute(CharTermAttribute.class); OffsetAttribute offsetAttribute = tokenStream.getAttribute(OffsetAttribute.class); TypeAttribute typeAttribute = tokenStream.getAttribute(TypeAttribute.class); if (!typeAttribute.type().equals("shingle")) { // ignore shingles; we only care about synonyms and the original text // TODO: filter other types as well String termToAdd = term.toString(); if (typeAttribute.type().equals("SYNONYM")) { synonymBag.add(termToAdd); } if (constructPhraseQueries && typeAttribute.type().equals("SYNONYM")) { // make a phrase out of the synonym termToAdd = new StringBuilder(termToAdd).insert(0, '"').append('"').toString(); } if (!bag) { // create a graph of all possible synonym combinations, // e.g. dog bite, hound bite, dog nibble, hound nibble, etc. TextInQuery textInQuery = new TextInQuery(termToAdd, offsetAttribute.startOffset(), offsetAttribute.endOffset()); startPosToTextsInQuery.put(offsetAttribute.startOffset(), textInQuery); } } } tokenStream.end(); } catch (IOException e) { throw new RuntimeException("uncaught exception in synonym processing", e); } finally { try { tokenStream.close(); } catch (IOException e) { throw new RuntimeException("uncaught exception in synonym processing", e); } } List<String> alternateQueries = synonymBag; if (!bag) { // use a graph rather than a bag List<List<TextInQuery>> sortedTextsInQuery = new ArrayList<List<TextInQuery>>( startPosToTextsInQuery.values().size()); for (Collection<TextInQuery> sortedSet : startPosToTextsInQuery.asMap().values()) { sortedTextsInQuery.add(new ArrayList<TextInQuery>(sortedSet)); } // have to use the start positions and end positions to figure out all possible combinations alternateQueries = buildUpAlternateQueries(solrParams, sortedTextsInQuery); } // save for debugging purposes expandedSynonyms = alternateQueries; return createSynonymQueries(solrParams, alternateQueries); }
From source file:com.github.healthonnet.search.SynonymExpandingExtendedDismaxQParserPlugin.java
/** * Given the synonymAnalyzer, returns a list of all alternate queries expanded from the original user query. * /*w w w . ja v a2 s.c o m*/ * @param synonymAnalyzer * @param solrParams * @return */ private List<Query> generateSynonymQueries(Analyzer synonymAnalyzer, SolrParams solrParams) { String origQuery = getQueryStringFromParser(); int queryLen = origQuery.length(); // TODO: make the token stream reusable? TokenStream tokenStream = synonymAnalyzer.tokenStream(Const.IMPOSSIBLE_FIELD_NAME, new StringReader(origQuery)); SortedSetMultimap<Integer, TextInQuery> startPosToTextsInQuery = TreeMultimap.create(); boolean constructPhraseQueries = solrParams.getBool(Params.SYNONYMS_CONSTRUCT_PHRASES, false); boolean bag = solrParams.getBool(Params.SYNONYMS_BAG, false); List<String> synonymBag = new ArrayList<>(); try { tokenStream.reset(); while (tokenStream.incrementToken()) { CharTermAttribute term = tokenStream.getAttribute(CharTermAttribute.class); OffsetAttribute offsetAttribute = tokenStream.getAttribute(OffsetAttribute.class); TypeAttribute typeAttribute = tokenStream.getAttribute(TypeAttribute.class); if (!typeAttribute.type().equals("shingle")) { // ignore shingles; we only care about synonyms and the original text // TODO: filter other types as well String termToAdd = term.toString(); if (typeAttribute.type().equals("SYNONYM")) { synonymBag.add(termToAdd); } // Don't quote sibgle term term synonyms if (constructPhraseQueries && typeAttribute.type().equals("SYNONYM") && termToAdd.contains(" ")) { // Don't Quote when original is already surrounded by quotes if (offsetAttribute.startOffset() == 0 || offsetAttribute.endOffset() == queryLen || origQuery.charAt(offsetAttribute.startOffset() - 1) != '"' || origQuery.charAt(offsetAttribute.endOffset()) != '"') { // make a phrase out of the synonym termToAdd = new StringBuilder(termToAdd).insert(0, '"').append('"').toString(); } } if (!bag) { // create a graph of all possible synonym combinations, // e.g. dog bite, hound bite, dog nibble, hound nibble, etc. TextInQuery textInQuery = new TextInQuery(termToAdd, offsetAttribute.startOffset(), offsetAttribute.endOffset()); startPosToTextsInQuery.put(offsetAttribute.startOffset(), textInQuery); } } } tokenStream.end(); } catch (IOException e) { throw new RuntimeException("uncaught exception in synonym processing", e); } finally { try { tokenStream.close(); } catch (IOException e) { throw new RuntimeException("uncaught exception in synonym processing", e); } } List<String> alternateQueries = synonymBag; if (!bag) { // use a graph rather than a bag List<List<TextInQuery>> sortedTextsInQuery = new ArrayList<>(startPosToTextsInQuery.values().size()); sortedTextsInQuery.addAll(startPosToTextsInQuery.asMap().values().stream().map(ArrayList::new) .collect(Collectors.toList())); // have to use the start positions and end positions to figure out all possible combinations alternateQueries = buildUpAlternateQueries(solrParams, sortedTextsInQuery); } // save for debugging purposes expandedSynonyms = alternateQueries; return createSynonymQueries(solrParams, alternateQueries); }