List of usage examples for java.util.concurrent.atomic AtomicLong AtomicLong
public AtomicLong(long initialValue)
From source file:eu.esdihumboldt.hale.io.haleconnect.internal.HaleConnectServiceImpl.java
private ApiCallback<Feedback> createUploadFileCallback(final SettableFuture<Boolean> future, final ProgressIndicator progress, final File file, final int totalWork) { return new ApiCallback<Feedback>() { AtomicLong chunkWritten = new AtomicLong(0); AtomicLong bytesReported = new AtomicLong(0); @Override//from w w w . j ava 2 s . c o m public void onDownloadProgress(long bytesRead, long contentLength, boolean done) { // not required } @Override public void onFailure(com.haleconnect.api.projectstore.v1.ApiException e, int statusCode, Map<String, List<String>> responseHeaders) { progress.end(); future.setException(new HaleConnectException(e.getMessage(), e, statusCode, responseHeaders)); } @Override public void onSuccess(Feedback result, int statusCode, Map<String, List<String>> responseHeaders) { if (result.getError()) { log.error(MessageFormat.format("Error uploading project file \"{0}\": {1}", file.getAbsolutePath(), result.getMessage())); future.set(false); } else { future.set(true); } progress.end(); } @Override public void onUploadProgress(long bytesWritten, long contentLength, boolean done) { // bytesWritten contains the accumulated amount of bytes written if (totalWork != ProgressIndicator.UNKNOWN) { // Wait until at least 1 KiB was written long chunk = chunkWritten.get(); chunk += bytesWritten - bytesReported.get(); if (chunk >= 1024) { long workToReport = chunk >> 10; // cannot overflow, total size in KiB // is guaranteed to be < Integer.MAX_VALUE progress.advance(Math.toIntExact(workToReport)); chunk -= workToReport << 10; // chunkWritten now always < 1024 } chunkWritten.set(chunk); bytesReported.set(bytesWritten); } } }; }
From source file:ai.susi.mind.SusiMind.java
/** * This is the core principle of creativity: being able to match a given input * with problem-solving knowledge./*from ww w.j a v a2s .c om*/ * This method finds ideas (with a query instantiated skills) for a given query. * The skills are selected using a scoring system and pattern matching with the query. * Not only the most recent user query is considered for skill selection but also * previously requested queries and their answers to be able to set new skill selections * in the context of the previous conversation. * @param query the user input * @param previous_argument the latest conversation with the same user * @param maxcount the maximum number of ideas to return * @return an ordered list of ideas, first idea should be considered first. */ public List<SusiIdea> creativity(String query, SusiThought latest_thought, int maxcount) { // tokenize query to have hint for idea collection final List<SusiIdea> ideas = new ArrayList<>(); this.reader.tokenizeSentence(query).forEach(token -> { Set<SusiSkill> skill_for_category = this.skilltrigger.get(token.categorized); Set<SusiSkill> skill_for_original = token.original.equals(token.categorized) ? null : this.skilltrigger.get(token.original); Set<SusiSkill> r = new HashSet<>(); if (skill_for_category != null) r.addAll(skill_for_category); if (skill_for_original != null) r.addAll(skill_for_original); r.forEach(skill -> ideas.add(new SusiIdea(skill).setIntent(token))); }); for (SusiIdea idea : ideas) DAO.log("idea.phrase-1: score=" + idea.getSkill().getScore().score + " : " + idea.getSkill().getPhrases().toString() + " " + idea.getSkill().getActionsClone()); // add catchall skills always (those are the 'bad ideas') Collection<SusiSkill> ca = this.skilltrigger.get(SusiSkill.CATCHALL_KEY); if (ca != null) ca.forEach(skill -> ideas.add(new SusiIdea(skill))); // create list of all ideas that might apply TreeMap<Long, List<SusiIdea>> scored = new TreeMap<>(); AtomicLong count = new AtomicLong(0); ideas.forEach(idea -> { int score = idea.getSkill().getScore().score; long orderkey = Long.MAX_VALUE - ((long) score) * 1000L + count.incrementAndGet(); List<SusiIdea> r = scored.get(orderkey); if (r == null) { r = new ArrayList<>(); scored.put(orderkey, r); } r.add(idea); }); // make a sorted list of all ideas ideas.clear(); scored.values().forEach(r -> ideas.addAll(r)); for (SusiIdea idea : ideas) DAO.log("idea.phrase-2: score=" + idea.getSkill().getScore().score + " : " + idea.getSkill().getPhrases().toString() + " " + idea.getSkill().getActionsClone()); // test ideas and collect those which match up to maxcount List<SusiIdea> plausibleIdeas = new ArrayList<>(Math.min(10, maxcount)); for (SusiIdea idea : ideas) { SusiSkill skill = idea.getSkill(); Collection<Matcher> m = skill.matcher(query); if (m.isEmpty()) continue; // TODO: evaluate leading SEE flow commands right here as well plausibleIdeas.add(idea); if (plausibleIdeas.size() >= maxcount) break; } for (SusiIdea idea : plausibleIdeas) { DAO.log("idea.phrase-3: score=" + idea.getSkill().getScore().score + " : " + idea.getSkill().getPhrases().toString() + " " + idea.getSkill().getActionsClone()); DAO.log("idea.phrase-3: log=" + idea.getSkill().getScore().log); } return plausibleIdeas; }
From source file:org.apache.hadoop.hbase.wal.TestWALFactory.java
/** * Tests that we can write out an edit, close, and then read it back in again. * @throws IOException/*from w ww . j a v a 2s. c o m*/ */ @Test public void testEditAdd() throws IOException { final int COL_COUNT = 10; final HTableDescriptor htd = new HTableDescriptor(TableName.valueOf("tablename")) .addFamily(new HColumnDescriptor("column")); final byte[] row = Bytes.toBytes("row"); WAL.Reader reader = null; try { final AtomicLong sequenceId = new AtomicLong(1); // Write columns named 1, 2, 3, etc. and then values of single byte // 1, 2, 3... long timestamp = System.currentTimeMillis(); WALEdit cols = new WALEdit(); for (int i = 0; i < COL_COUNT; i++) { cols.add(new KeyValue(row, Bytes.toBytes("column"), Bytes.toBytes(Integer.toString(i)), timestamp, new byte[] { (byte) (i + '0') })); } HRegionInfo info = new HRegionInfo(htd.getTableName(), row, Bytes.toBytes(Bytes.toString(row) + "1"), false); final WAL log = wals.getWAL(info.getEncodedNameAsBytes()); final long txid = log.append(htd, info, new WALKey(info.getEncodedNameAsBytes(), htd.getTableName(), System.currentTimeMillis()), cols, sequenceId, true, null); log.sync(txid); log.startCacheFlush(info.getEncodedNameAsBytes(), htd.getFamiliesKeys()); log.completeCacheFlush(info.getEncodedNameAsBytes()); log.shutdown(); Path filename = DefaultWALProvider.getCurrentFileName(log); // Now open a reader on the log and assert append worked. reader = wals.createReader(fs, filename); // Above we added all columns on a single row so we only read one // entry in the below... thats why we have '1'. for (int i = 0; i < 1; i++) { WAL.Entry entry = reader.next(null); if (entry == null) break; WALKey key = entry.getKey(); WALEdit val = entry.getEdit(); assertTrue(Bytes.equals(info.getEncodedNameAsBytes(), key.getEncodedRegionName())); assertTrue(htd.getTableName().equals(key.getTablename())); Cell cell = val.getCells().get(0); assertTrue(Bytes.equals(row, 0, row.length, cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())); assertEquals((byte) (i + '0'), CellUtil.cloneValue(cell)[0]); System.out.println(key + " " + val); } } finally { if (reader != null) { reader.close(); } } }
From source file:org.apache.hadoop.hbase.regionserver.wal.TestHLog.java
@Test(timeout = 300000) public void testAppendClose() throws Exception { TableName tableName = TableName.valueOf(getName()); HRegionInfo regioninfo = new HRegionInfo(tableName, HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, false);/*from w w w . j a va2s . c o m*/ HLog wal = HLogFactory.createHLog(fs, dir, "hlogdir", "hlogdir_archive", conf); final AtomicLong sequenceId = new AtomicLong(1); final int total = 20; HTableDescriptor htd = new HTableDescriptor(); htd.addFamily(new HColumnDescriptor(tableName.getName())); for (int i = 0; i < total; i++) { WALEdit kvs = new WALEdit(); kvs.add(new KeyValue(Bytes.toBytes(i), tableName.getName(), tableName.getName())); wal.append(regioninfo, tableName, kvs, System.currentTimeMillis(), htd, sequenceId); } // Now call sync to send the data to HDFS datanodes wal.sync(); int namenodePort = cluster.getNameNodePort(); final Path walPath = ((FSHLog) wal).computeFilename(); // Stop the cluster. (ensure restart since we're sharing MiniDFSCluster) try { DistributedFileSystem dfs = (DistributedFileSystem) cluster.getFileSystem(); dfs.setSafeMode(FSConstants.SafeModeAction.SAFEMODE_ENTER); TEST_UTIL.shutdownMiniDFSCluster(); try { // wal.writer.close() will throw an exception, // but still call this since it closes the LogSyncer thread first wal.close(); } catch (IOException e) { LOG.info(e); } fs.close(); // closing FS last so DFSOutputStream can't call close LOG.info("STOPPED first instance of the cluster"); } finally { // Restart the cluster while (cluster.isClusterUp()) { LOG.error("Waiting for cluster to go down"); Thread.sleep(1000); } assertFalse(cluster.isClusterUp()); cluster = null; for (int i = 0; i < 100; i++) { try { cluster = TEST_UTIL.startMiniDFSClusterForTestHLog(namenodePort); break; } catch (BindException e) { LOG.info("Sleeping. BindException bringing up new cluster"); Threads.sleep(1000); } } cluster.waitActive(); fs = cluster.getFileSystem(); LOG.info("STARTED second instance."); } // set the lease period to be 1 second so that the // namenode triggers lease recovery upon append request Method setLeasePeriod = cluster.getClass().getDeclaredMethod("setLeasePeriod", new Class[] { Long.TYPE, Long.TYPE }); setLeasePeriod.setAccessible(true); setLeasePeriod.invoke(cluster, 1000L, 1000L); try { Thread.sleep(1000); } catch (InterruptedException e) { LOG.info(e); } // Now try recovering the log, like the HMaster would do final FileSystem recoveredFs = fs; final Configuration rlConf = conf; class RecoverLogThread extends Thread { public Exception exception = null; public void run() { try { FSUtils.getInstance(fs, rlConf).recoverFileLease(recoveredFs, walPath, rlConf, null); } catch (IOException e) { exception = e; } } } RecoverLogThread t = new RecoverLogThread(); t.start(); // Timeout after 60 sec. Without correct patches, would be an infinite loop t.join(60 * 1000); if (t.isAlive()) { t.interrupt(); throw new Exception("Timed out waiting for HLog.recoverLog()"); } if (t.exception != null) throw t.exception; // Make sure you can read all the content HLog.Reader reader = HLogFactory.createReader(fs, walPath, conf); int count = 0; HLog.Entry entry = new HLog.Entry(); while (reader.next(entry) != null) { count++; assertTrue("Should be one KeyValue per WALEdit", entry.getEdit().getKeyValues().size() == 1); } assertEquals(total, count); reader.close(); // Reset the lease period setLeasePeriod.invoke(cluster, new Object[] { new Long(60000), new Long(3600000) }); }
From source file:com.github.aptd.simulation.datamodel.CXMLReader.java
/** * create the train list/* www . j a v a 2 s . co m*/ * * @param p_network network component * @param p_agents map with agent asl scripts * @param p_factory factory * @return unmodifiable map with trains */ private static Pair<Map<String, ITrain<?>>, Map<String, IDoor<?>>> train(final Network p_network, final Map<String, String> p_agents, final IFactory p_factory, final ITime p_time, final double p_minfreetimetoclose) { final String l_dooragent = IStatefulElement.getDefaultAsl("door"); final Map<String, IElement.IGenerator<ITrain<?>>> l_generators = new ConcurrentHashMap<>(); final Set<IAction> l_actions = CCommon.actionsFromPackage().collect(Collectors.toSet()); final IElement.IGenerator<IDoor<?>> l_doorgenerator = doorgenerator(p_factory, l_dooragent, l_actions, p_time); final Map<String, AtomicLong> l_doorcount = Collections.synchronizedMap(new HashMap<>()); final Map<String, IDoor<?>> l_doors = Collections.synchronizedMap(new HashMap<>()); return new ImmutablePair<>( Collections.<String, ITrain<?>>unmodifiableMap( p_network.getTimetable().getTrains().getTrain().parallelStream() .filter(i -> hasagentname(i.getAny3())).map(i -> agentname(i, i.getAny3())) .map(i -> l_generators .computeIfAbsent(i.getRight(), a -> traingenerator(p_factory, p_agents.get(i.getRight()), l_actions, p_time)) .generatesingle(i.getLeft().getId(), i.getLeft().getTrainPartSequence().stream().flatMap(ref -> { // @todo support multiple train parts final EOcpTT[] l_tts = ((ETrainPart) ref.getTrainPartRef() .get(0).getRef()).getOcpsTT().getOcpTT() .toArray(new EOcpTT[0]); final CTrain.CTimetableEntry[] l_entries = new CTrain.CTimetableEntry[l_tts.length]; for (int j = 0; j < l_tts.length; j++) { final EArrivalDepartureTimes l_times = l_tts[j].getTimes() .stream() .filter(t -> t.getScope() .equalsIgnoreCase("published")) .findAny().orElseThrow(() -> new CSemanticException( "missing published times")); l_entries[j] = new CTrain.CTimetableEntry( j < 1 ? 0.0 : ((ETrack) l_tts[j - 1].getSectionTT() .getTrackRef().get(0).getRef()) .getTrackTopology() .getTrackEnd().getPos() .doubleValue(), ((EOcp) l_tts[j].getOcpRef()).getId(), l_tts[j].getStopDescription().getOtherAttributes() .getOrDefault(PLATFORM_REF_ATTRIBUTE, null), l_times.getArrival() == null ? null : l_times.getArrival().toGregorianCalendar() .toZonedDateTime() .with(LocalDate.from(p_time .current() .atZone(ZoneId .systemDefault()))) .toInstant(), l_times.getDeparture() == null ? null : l_times.getDeparture() .toGregorianCalendar() .toZonedDateTime() .with(LocalDate.from(p_time .current() .atZone(ZoneId .systemDefault()))) .toInstant()); } return Arrays.stream(l_entries); }), i.getLeft().getTrainPartSequence().stream() // @todo support multiple train parts .map(s -> (ETrainPart) s.getTrainPartRef().get(0).getRef()) .map(p -> (EFormation) p.getFormationTT().getFormationRef()) .flatMap(f -> f.getTrainOrder().getVehicleRef().stream()) .map(r -> new ImmutablePair<BigInteger, TDoors>( r.getVehicleCount(), ((EVehicle) r.getVehicleRef()).getWagon() .getPassenger().getDoors())) .flatMap(v -> IntStream .range(0, v.getLeft().intValue() * v.getRight() .getNumber().intValue()) .mapToObj(j -> l_doors.computeIfAbsent("door-" + i.getLeft().getId() + "-" + l_doorcount .computeIfAbsent(i.getLeft() .getId(), id -> new AtomicLong(1L)) .getAndIncrement(), id -> l_doorgenerator.generatesingle(id, i.getLeft().getId(), v.getRight().getEntranceWidth() .doubleValue() / v.getRight().getNumber() .longValue(), p_minfreetimetoclose)))) .collect(Collectors.toList()))) .collect(Collectors.toMap(IElement::id, i -> i))), l_doors); }
From source file:fr.gouv.vitam.mdbes.QueryBench.java
/** * To be called each time an execute will be called if the request starts from zero * @param indexName ES Index/*from w w w .j av a 2 s .c o m*/ * @param typeName ES type in Index * @return the new BenchContext */ public BenchContext getNewContext(String indexName, String typeName) { BenchContext newBenchContext = new BenchContext(); newBenchContext.indexName = indexName; newBenchContext.typeName = typeName; newBenchContext.savedNames.putAll(context.savedNames); for (String key : context.cpts.keySet()) { AtomicLong cpt = context.cpts.get(key); if (distribCpt != null && distribCpt == cpt) { AtomicLong count = new AtomicLong(0); newBenchContext.cpts.put(key, count); newBenchContext.distrib = count; } else { newBenchContext.cpts.put(key, new AtomicLong(0)); } } return newBenchContext; }
From source file:org.apache.distributedlog.auditor.DLAuditor.java
private long calculateLedgerSpaceUsage(BookKeeperClient bkc, final ExecutorService executorService) throws IOException { final AtomicLong totalBytes = new AtomicLong(0); final AtomicLong totalEntries = new AtomicLong(0); final AtomicLong numLedgers = new AtomicLong(0); LedgerManager lm = BookKeeperAccessor.getLedgerManager(bkc.get()); final CompletableFuture<Void> doneFuture = FutureUtils.createFuture(); final BookKeeper bk = bkc.get(); BookkeeperInternalCallbacks.Processor<Long> collector = new BookkeeperInternalCallbacks.Processor<Long>() { @Override/*from www . ja v a2s . c om*/ public void process(final Long lid, final AsyncCallback.VoidCallback cb) { numLedgers.incrementAndGet(); executorService.submit(new Runnable() { @Override public void run() { bk.asyncOpenLedgerNoRecovery(lid, BookKeeper.DigestType.CRC32, conf.getBKDigestPW().getBytes(UTF_8), new org.apache.bookkeeper.client.AsyncCallback.OpenCallback() { @Override public void openComplete(int rc, LedgerHandle lh, Object ctx) { final int cbRc; if (BKException.Code.OK == rc) { totalBytes.addAndGet(lh.getLength()); totalEntries.addAndGet(lh.getLastAddConfirmed() + 1); cbRc = rc; } else { cbRc = BKException.Code.ZKException; } executorService.submit(new Runnable() { @Override public void run() { cb.processResult(cbRc, null, null); } }); } }, null); } }); } }; AsyncCallback.VoidCallback finalCb = new AsyncCallback.VoidCallback() { @Override public void processResult(int rc, String path, Object ctx) { if (BKException.Code.OK == rc) { doneFuture.complete(null); } else { doneFuture.completeExceptionally(BKException.create(rc)); } } }; lm.asyncProcessLedgers(collector, finalCb, null, BKException.Code.OK, BKException.Code.ZKException); try { doneFuture.get(); logger.info("calculated {} ledgers\n\ttotal bytes = {}\n\ttotal entries = {}", new Object[] { numLedgers.get(), totalBytes.get(), totalEntries.get() }); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new DLInterruptedException("Interrupted on calculating ledger space : ", e); } catch (ExecutionException e) { if (e.getCause() instanceof IOException) { throw (IOException) (e.getCause()); } else { throw new IOException("Failed to calculate ledger space : ", e.getCause()); } } return totalBytes.get(); }
From source file:io.druid.java.util.common.CompressionUtilsTest.java
@Test public void testGoodGzipWithException() throws Exception { final AtomicLong flushes = new AtomicLong(0); final File tmpDir = temporaryFolder.newFolder("testGoodGzipByteSource"); final File gzFile = new File(tmpDir, testFile.getName() + ".gz"); Assert.assertFalse(gzFile.exists()); CompressionUtils.gzip(Files.asByteSource(testFile), new ByteSink() { @Override// w w w . ja v a 2 s . c om public OutputStream openStream() throws IOException { return new FilterOutputStream(new FileOutputStream(gzFile)) { @Override public void flush() throws IOException { if (flushes.getAndIncrement() > 0) { super.flush(); } else { throw new IOException("Haven't flushed enough"); } } }; } }, Predicates.<Throwable>alwaysTrue()); Assert.assertTrue(gzFile.exists()); try (final InputStream inputStream = CompressionUtils.decompress(new FileInputStream(gzFile), "file.gz")) { assertGoodDataStream(inputStream); } if (!testFile.delete()) { throw new IOE("Unable to delete file [%s]", testFile.getAbsolutePath()); } Assert.assertFalse(testFile.exists()); CompressionUtils.gunzip(Files.asByteSource(gzFile), testFile); Assert.assertTrue(testFile.exists()); try (final InputStream inputStream = new FileInputStream(testFile)) { assertGoodDataStream(inputStream); } Assert.assertEquals(4, flushes.get()); // 2 for suppressed closes, 2 for manual calls to shake out errors }
From source file:com.twitter.distributedlog.auditor.DLAuditor.java
private long calculateLedgerSpaceUsage(BookKeeperClient bkc, final ExecutorService executorService) throws IOException { final AtomicLong totalBytes = new AtomicLong(0); final AtomicLong totalEntries = new AtomicLong(0); final AtomicLong numLedgers = new AtomicLong(0); LedgerManager lm = BookKeeperAccessor.getLedgerManager(bkc.get()); final SettableFuture<Void> doneFuture = SettableFuture.create(); final BookKeeper bk = bkc.get(); BookkeeperInternalCallbacks.Processor<Long> collector = new BookkeeperInternalCallbacks.Processor<Long>() { @Override/*from w ww. jav a 2 s .c om*/ public void process(final Long lid, final AsyncCallback.VoidCallback cb) { numLedgers.incrementAndGet(); executorService.submit(new Runnable() { @Override public void run() { bk.asyncOpenLedgerNoRecovery(lid, BookKeeper.DigestType.CRC32, conf.getBKDigestPW().getBytes(UTF_8), new org.apache.bookkeeper.client.AsyncCallback.OpenCallback() { @Override public void openComplete(int rc, LedgerHandle lh, Object ctx) { final int cbRc; if (BKException.Code.OK == rc) { totalBytes.addAndGet(lh.getLength()); totalEntries.addAndGet(lh.getLastAddConfirmed() + 1); cbRc = rc; } else { cbRc = BKException.Code.ZKException; } executorService.submit(new Runnable() { @Override public void run() { cb.processResult(cbRc, null, null); } }); } }, null); } }); } }; AsyncCallback.VoidCallback finalCb = new AsyncCallback.VoidCallback() { @Override public void processResult(int rc, String path, Object ctx) { if (BKException.Code.OK == rc) { doneFuture.set(null); } else { doneFuture.setException(BKException.create(rc)); } } }; lm.asyncProcessLedgers(collector, finalCb, null, BKException.Code.OK, BKException.Code.ZKException); try { doneFuture.get(); logger.info("calculated {} ledgers\n\ttotal bytes = {}\n\ttotal entries = {}", new Object[] { numLedgers.get(), totalBytes.get(), totalEntries.get() }); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new DLInterruptedException("Interrupted on calculating ledger space : ", e); } catch (ExecutionException e) { if (e.getCause() instanceof IOException) { throw (IOException) (e.getCause()); } else { throw new IOException("Failed to calculate ledger space : ", e.getCause()); } } return totalBytes.get(); }
From source file:org.apache.nifi.processors.hive.SelectHive_1_1QL.java
private void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { FlowFile fileToProcess = (context.hasIncomingConnection() ? session.get() : null); FlowFile flowfile = null;//from www.jav a 2 s. c o m // If we have no FlowFile, and all incoming connections are self-loops then we can continue on. // However, if we have no FlowFile and we have connections coming from other Processors, then // we know that we should run only if we have a FlowFile. if (context.hasIncomingConnection()) { if (fileToProcess == null && context.hasNonLoopConnection()) { return; } } final ComponentLog logger = getLogger(); final Hive_1_1DBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE) .asControllerService(Hive_1_1DBCPService.class); final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue()); List<String> preQueries = getQueries( context.getProperty(HIVEQL_PRE_QUERY).evaluateAttributeExpressions(fileToProcess).getValue()); List<String> postQueries = getQueries( context.getProperty(HIVEQL_POST_QUERY).evaluateAttributeExpressions(fileToProcess).getValue()); final boolean flowbased = !(context.getProperty(HIVEQL_SELECT_QUERY).isSet()); // Source the SQL String hqlStatement; if (context.getProperty(HIVEQL_SELECT_QUERY).isSet()) { hqlStatement = context.getProperty(HIVEQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess) .getValue(); } else { // If the query is not set, then an incoming flow file is required, and expected to contain a valid SQL select query. // If there is no incoming connection, onTrigger will not be called as the processor will fail when scheduled. final StringBuilder queryContents = new StringBuilder(); session.read(fileToProcess, in -> queryContents.append(IOUtils.toString(in, charset))); hqlStatement = queryContents.toString(); } final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions(fileToProcess) .asInteger(); final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE) .evaluateAttributeExpressions(fileToProcess).asInteger(); final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet() ? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions(fileToProcess).asInteger() : 0; final String outputFormat = context.getProperty(HIVEQL_OUTPUT_FORMAT).getValue(); final boolean convertNamesForAvro = context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean(); final StopWatch stopWatch = new StopWatch(true); final boolean header = context.getProperty(HIVEQL_CSV_HEADER).asBoolean(); final String altHeader = context.getProperty(HIVEQL_CSV_ALT_HEADER) .evaluateAttributeExpressions(fileToProcess).getValue(); final String delimiter = context.getProperty(HIVEQL_CSV_DELIMITER) .evaluateAttributeExpressions(fileToProcess).getValue(); final boolean quote = context.getProperty(HIVEQL_CSV_QUOTE).asBoolean(); final boolean escape = context.getProperty(HIVEQL_CSV_HEADER).asBoolean(); final String fragmentIdentifier = UUID.randomUUID().toString(); try (final Connection con = dbcpService .getConnection(fileToProcess == null ? Collections.emptyMap() : fileToProcess.getAttributes()); final Statement st = (flowbased ? con.prepareStatement(hqlStatement) : con.createStatement())) { Pair<String, SQLException> failure = executeConfigStatements(con, preQueries); if (failure != null) { // In case of failure, assigning config query to "hqlStatement" to follow current error handling hqlStatement = failure.getLeft(); flowfile = (fileToProcess == null) ? session.create() : fileToProcess; fileToProcess = null; throw failure.getRight(); } if (fetchSize != null && fetchSize > 0) { try { st.setFetchSize(fetchSize); } catch (SQLException se) { // Not all drivers support this, just log the error (at debug level) and move on logger.debug("Cannot set fetch size to {} due to {}", new Object[] { fetchSize, se.getLocalizedMessage() }, se); } } final List<FlowFile> resultSetFlowFiles = new ArrayList<>(); try { logger.debug("Executing query {}", new Object[] { hqlStatement }); if (flowbased) { // Hive JDBC Doesn't Support this yet: // ParameterMetaData pmd = ((PreparedStatement)st).getParameterMetaData(); // int paramCount = pmd.getParameterCount(); // Alternate way to determine number of params in SQL. int paramCount = StringUtils.countMatches(hqlStatement, "?"); if (paramCount > 0) { setParameters(1, (PreparedStatement) st, paramCount, fileToProcess.getAttributes()); } } final ResultSet resultSet; try { resultSet = (flowbased ? ((PreparedStatement) st).executeQuery() : st.executeQuery(hqlStatement)); } catch (SQLException se) { // If an error occurs during the query, a flowfile is expected to be routed to failure, so ensure one here flowfile = (fileToProcess == null) ? session.create() : fileToProcess; fileToProcess = null; throw se; } int fragmentIndex = 0; String baseFilename = (fileToProcess != null) ? fileToProcess.getAttribute(CoreAttributes.FILENAME.key()) : null; while (true) { final AtomicLong nrOfRows = new AtomicLong(0L); flowfile = (fileToProcess == null) ? session.create() : session.create(fileToProcess); if (baseFilename == null) { baseFilename = flowfile.getAttribute(CoreAttributes.FILENAME.key()); } try { flowfile = session.write(flowfile, out -> { try { if (AVRO.equals(outputFormat)) { nrOfRows.set(HiveJdbcCommon.convertToAvroStream(resultSet, out, maxRowsPerFlowFile, convertNamesForAvro)); } else if (CSV.equals(outputFormat)) { CsvOutputOptions options = new CsvOutputOptions(header, altHeader, delimiter, quote, escape, maxRowsPerFlowFile); nrOfRows.set(HiveJdbcCommon.convertToCsvStream(resultSet, out, options)); } else { nrOfRows.set(0L); throw new ProcessException("Unsupported output format: " + outputFormat); } } catch (final SQLException | RuntimeException e) { throw new ProcessException("Error during database query or conversion of records.", e); } }); } catch (ProcessException e) { // Add flowfile to results before rethrowing so it will be removed from session in outer catch resultSetFlowFiles.add(flowfile); throw e; } if (nrOfRows.get() > 0 || resultSetFlowFiles.isEmpty()) { final Map<String, String> attributes = new HashMap<>(); // Set attribute for how many rows were selected attributes.put(RESULT_ROW_COUNT, String.valueOf(nrOfRows.get())); try { // Set input/output table names by parsing the query attributes.putAll(toQueryTableAttributes(findTableNames(hqlStatement))); } catch (Exception e) { // If failed to parse the query, just log a warning message, but continue. getLogger().warn("Failed to parse query: {} due to {}", new Object[] { hqlStatement, e }, e); } // Set MIME type on output document and add extension to filename if (AVRO.equals(outputFormat)) { attributes.put(CoreAttributes.MIME_TYPE.key(), MIME_TYPE_AVRO_BINARY); attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".avro"); } else if (CSV.equals(outputFormat)) { attributes.put(CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE); attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".csv"); } if (maxRowsPerFlowFile > 0) { attributes.put("fragment.identifier", fragmentIdentifier); attributes.put("fragment.index", String.valueOf(fragmentIndex)); } flowfile = session.putAllAttributes(flowfile, attributes); logger.info("{} contains {} " + outputFormat + " records; transferring to 'success'", new Object[] { flowfile, nrOfRows.get() }); if (context.hasIncomingConnection()) { // If the flow file came from an incoming connection, issue a Fetch provenance event session.getProvenanceReporter().fetch(flowfile, dbcpService.getConnectionURL(), "Retrieved " + nrOfRows.get() + " rows", stopWatch.getElapsed(TimeUnit.MILLISECONDS)); } else { // If we created a flow file from rows received from Hive, issue a Receive provenance event session.getProvenanceReporter().receive(flowfile, dbcpService.getConnectionURL(), stopWatch.getElapsed(TimeUnit.MILLISECONDS)); } resultSetFlowFiles.add(flowfile); } else { // If there were no rows returned (and the first flow file has been sent, we're done processing, so remove the flowfile and carry on session.remove(flowfile); if (resultSetFlowFiles != null && resultSetFlowFiles.size() > 0) { flowfile = resultSetFlowFiles.get(resultSetFlowFiles.size() - 1); } break; } fragmentIndex++; if (maxFragments > 0 && fragmentIndex >= maxFragments) { break; } } for (int i = 0; i < resultSetFlowFiles.size(); i++) { // Set count on all FlowFiles if (maxRowsPerFlowFile > 0) { resultSetFlowFiles.set(i, session.putAttribute(resultSetFlowFiles.get(i), "fragment.count", Integer.toString(fragmentIndex))); } } } catch (final SQLException e) { throw e; } failure = executeConfigStatements(con, postQueries); if (failure != null) { hqlStatement = failure.getLeft(); if (resultSetFlowFiles != null) { resultSetFlowFiles.forEach(ff -> session.remove(ff)); } flowfile = (fileToProcess == null) ? session.create() : fileToProcess; fileToProcess = null; throw failure.getRight(); } session.transfer(resultSetFlowFiles, REL_SUCCESS); if (fileToProcess != null) { session.remove(fileToProcess); } } catch (final ProcessException | SQLException e) { logger.error("Issue processing SQL {} due to {}.", new Object[] { hqlStatement, e }); if (flowfile == null) { // This can happen if any exceptions occur while setting up the connection, statement, etc. logger.error("Unable to execute HiveQL select query {} due to {}. No FlowFile to route to failure", new Object[] { hqlStatement, e }); context.yield(); } else { if (context.hasIncomingConnection()) { logger.error("Unable to execute HiveQL select query {} for {} due to {}; routing to failure", new Object[] { hqlStatement, flowfile, e }); flowfile = session.penalize(flowfile); } else { logger.error("Unable to execute HiveQL select query {} due to {}; routing to failure", new Object[] { hqlStatement, e }); context.yield(); } session.transfer(flowfile, REL_FAILURE); } } }