List of usage examples for com.amazonaws.services.cloudsearchv2.model IndexFieldStatus getOptions
public IndexField getOptions()
From source file:com.digitalpebble.stormcrawler.aws.bolt.CloudSearchIndexerBolt.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked" }) @Override// ww w . ja v a 2 s . co m public void prepare(Map conf, TopologyContext context, OutputCollector collector) { super.prepare(conf, context, collector); _collector = collector; this.eventCounter = context.registerMetric("CloudSearchIndexer", new MultiCountMetric(), 10); maxTimeBuffered = ConfUtils.getInt(conf, CloudSearchConstants.MAX_TIME_BUFFERED, 10); maxDocsInBatch = ConfUtils.getInt(conf, CloudSearchConstants.MAX_DOCS_BATCH, -1); buffer = new StringBuffer(MAX_SIZE_BATCH_BYTES).append('['); dumpBatchFilesToTemp = ConfUtils.getBoolean(conf, "cloudsearch.batch.dump", false); if (dumpBatchFilesToTemp) { // only dumping to local file // no more config required return; } String endpoint = ConfUtils.getString(conf, "cloudsearch.endpoint"); if (StringUtils.isBlank(endpoint)) { String message = "Missing CloudSearch endpoint"; LOG.error(message); throw new RuntimeException(message); } String regionName = ConfUtils.getString(conf, CloudSearchConstants.REGION); AmazonCloudSearchClient cl = new AmazonCloudSearchClient(); if (StringUtils.isNotBlank(regionName)) { cl.setRegion(RegionUtils.getRegion(regionName)); } String domainName = null; // retrieve the domain name DescribeDomainsResult domains = cl.describeDomains(new DescribeDomainsRequest()); Iterator<DomainStatus> dsiter = domains.getDomainStatusList().iterator(); while (dsiter.hasNext()) { DomainStatus ds = dsiter.next(); if (ds.getDocService().getEndpoint().equals(endpoint)) { domainName = ds.getDomainName(); break; } } // check domain name if (StringUtils.isBlank(domainName)) { throw new RuntimeException("No domain name found for CloudSearch endpoint"); } DescribeIndexFieldsResult indexDescription = cl .describeIndexFields(new DescribeIndexFieldsRequest().withDomainName(domainName)); for (IndexFieldStatus ifs : indexDescription.getIndexFields()) { String indexname = ifs.getOptions().getIndexFieldName(); String indextype = ifs.getOptions().getIndexFieldType(); LOG.info("CloudSearch index name {} of type {}", indexname, indextype); csfields.put(indexname, indextype); } client = new AmazonCloudSearchDomainClient(); client.setEndpoint(endpoint); }
From source file:org.apache.nutch.indexwriter.cloudsearch.CloudSearchIndexWriter.java
License:Apache License
@Override public void open(IndexWriterParams parameters) throws IOException { // LOG.debug("CloudSearchIndexWriter.open() name={} ", name); endpoint = parameters.get(CloudSearchConstants.ENDPOINT); dumpBatchFilesToTemp = parameters.getBoolean(CloudSearchConstants.BATCH_DUMP, false); this.regionName = parameters.get(CloudSearchConstants.REGION); if (StringUtils.isBlank(endpoint) && !dumpBatchFilesToTemp) { String message = "Missing CloudSearch endpoint. Should set it set via -D " + CloudSearchConstants.ENDPOINT + " or in nutch-site.xml"; message += "\n" + describe(); LOG.error(message);//from w w w. j a va 2 s . c o m throw new RuntimeException(message); } maxDocsInBatch = parameters.getInt(CloudSearchConstants.MAX_DOCS_BATCH, -1); buffer = new StringBuffer(MAX_SIZE_BATCH_BYTES).append('['); if (dumpBatchFilesToTemp) { // only dumping to local file // no more config required return; } if (StringUtils.isBlank(endpoint)) { throw new RuntimeException("endpoint not set for CloudSearch"); } AmazonCloudSearchClient cl = new AmazonCloudSearchClient(); if (StringUtils.isNotBlank(regionName)) { cl.setRegion(RegionUtils.getRegion(regionName)); } String domainName = null; // retrieve the domain name DescribeDomainsResult domains = cl.describeDomains(new DescribeDomainsRequest()); Iterator<DomainStatus> dsiter = domains.getDomainStatusList().iterator(); while (dsiter.hasNext()) { DomainStatus ds = dsiter.next(); if (ds.getDocService().getEndpoint().equals(endpoint)) { domainName = ds.getDomainName(); break; } } // check domain name if (StringUtils.isBlank(domainName)) { throw new RuntimeException("No domain name found for CloudSearch endpoint"); } DescribeIndexFieldsResult indexDescription = cl .describeIndexFields(new DescribeIndexFieldsRequest().withDomainName(domainName)); for (IndexFieldStatus ifs : indexDescription.getIndexFields()) { String indexname = ifs.getOptions().getIndexFieldName(); String indextype = ifs.getOptions().getIndexFieldType(); LOG.info("CloudSearch index name {} of type {}", indexname, indextype); csfields.put(indexname, indextype); } client = new AmazonCloudSearchDomainClient(); client.setEndpoint(endpoint); }