public class Emitter implements ICommitterTridentSpout.Emitter { IOpaquePartitionedTridentSpout.Emitter _emitter; TransactionalState _state; TreeMap > _cachedMetas = new TreeMap<>(); Map _partitionStates = new HashMap<>(); int _index; int _numTasks; public Emitter(String txStateId, Map conf, TopologyContext context) { _emitter = _spout.getEmitter(conf, context); _index = context.getThisTaskIndex(); _numTasks = context.getComponentTasks(context.getThisComponentId()).size(); _state = TransactionalState.newUserState(conf, txStateId); LOG.debug("Created {}", this); } Object _savedCoordinatorMeta = null; boolean _changedMeta = false; @Override public void emitBatch(TransactionAttempt tx, Object coordinatorMeta, TridentCollector collector) { LOG.debug("Emitting Batch. [transaction = {}], [coordinatorMeta = {}], [collector = {}], [{}]", tx, coordinatorMeta, collector, this); if(_savedCoordinatorMeta==null || !_savedCoordinatorMeta.equals(coordinatorMeta)) { _partitionStates.clear(); final List taskPartitions = _emitter.getPartitionsForTask(_index, _numTasks, coordinatorMeta); for (ISpoutPartition partition : taskPartitions) { _partitionStates.put(partition.getId(), new EmitterPartitionState(new RotatingTransactionalState(_state, partition.getId()), partition)); } // refresh all partitions for backwards compatibility with old spout _emitter.refreshPartitions(_emitter.getOrderedPartitions(coordinatorMeta)); _savedCoordinatorMeta = coordinatorMeta; _changedMeta = true; } Map metas = new HashMap<>(); _cachedMetas.put(tx.getTransactionId(), metas); Entry > entry = _cachedMetas.lowerEntry(tx.getTransactionId()); Map prevCached; if(entry!=null) { prevCached = entry.getValue(); } else { prevCached = new HashMap<>(); } for(Entry e: _partitionStates.entrySet()) { String id = e.getKey(); EmitterPartitionState s = e.getValue(); s.rotatingState.removeState(tx.getTransactionId()); Object lastMeta = prevCached.get(id); if(lastMeta==null) lastMeta = s.rotatingState.getLastState(); Object meta = _emitter.emitPartitionBatch(tx, collector, s.partition, lastMeta); metas.put(id, meta); } LOG.debug("Emitted Batch. [transaction = {}], [coordinatorMeta = {}], [collector = {}], [{}]", tx, coordinatorMeta, collector, this); } @Override public void success(TransactionAttempt tx) { for(EmitterPartitionState state: _partitionStates.values()) { state.rotatingState.cleanupBefore(tx.getTransactionId()); } LOG.debug("Success transaction {}. [{}]", tx, this); } @Override public void commit(TransactionAttempt attempt) { LOG.debug("Committing transaction {}. [{}]", attempt, this); // this code here handles a case where a previous commit failed, and the partitions // changed since the last commit. This clears out any state for the removed partitions // for this txid. // we make sure only a single task ever does this. we're also guaranteed that // it's impossible for there to be another writer to the directory for that partition // because only a single commit can be happening at once. this is because in order for // another attempt of the batch to commit, the batch phase must have succeeded in between. // hence, all tasks for the prior commit must have finished committing (whether successfully or not) if(_changedMeta && _index==0) { Set validIds = new HashSet<>(); for(ISpoutPartition p: _emitter.getOrderedPartitions(_savedCoordinatorMeta)) { validIds.add(p.getId()); } for(String existingPartition: _state.list("")) { if(!validIds.contains(existingPartition)) { RotatingTransactionalState s = new RotatingTransactionalState(_state, existingPartition); s.removeState(attempt.getTransactionId()); } } _changedMeta = false; } Long txid = attempt.getTransactionId(); Map metas = _cachedMetas.remove(txid); for(Entry entry: metas.entrySet()) { _partitionStates.get(entry.getKey()).rotatingState.overrideState(txid, entry.getValue()); } LOG.debug("Exiting commit method for transaction {}. [{}]", attempt, this); } @Override public void close() { LOG.debug("Closing"); _emitter.close(); LOG.debug("Closed"); } @Override public String toString() { return "Emitter{" + ", _state=" + _state + ", _cachedMetas=" + _cachedMetas + ", _partitionStates=" + _partitionStates + ", _index=" + _index + ", _numTasks=" + _numTasks + ", _savedCoordinatorMeta=" + _savedCoordinatorMeta + ", _changedMeta=" + _changedMeta + '}'; } } static class EmitterPartitionState { public RotatingTransactionalState rotatingState; public ISpoutPartition partition; public EmitterPartitionState(RotatingTransactionalState s, ISpoutPartition p) { rotatingState = s; partition = p; } }
public class KafkaTridentSpoutOpaqueCoordinator implements IOpaquePartitionedTridentSpout.Coordinator >>, Serializable { private static final Logger LOG = LoggerFactory.getLogger(KafkaTridentSpoutOpaqueCoordinator.class); private final TopicPartitionSerializer tpSerializer = new TopicPartitionSerializer(); private final KafkaTridentSpoutManager kafkaManager; public KafkaTridentSpoutOpaqueCoordinator(KafkaTridentSpoutManager kafkaManager) { this.kafkaManager = kafkaManager; LOG.debug("Created {}", this.toString()); } @Override public boolean isReady(long txid) { LOG.debug("isReady = true"); return true; // the "old" trident kafka spout always returns true, like this } @Override public List > getPartitionsForBatch() { final ArrayList topicPartitions = new ArrayList<>(kafkaManager.getTopicPartitions()); LOG.debug("TopicPartitions for batch {}", topicPartitions); List > tps = new ArrayList<>(); for(TopicPartition tp : topicPartitions) { tps.add(tpSerializer.toMap(tp)); } return tps; } @Override public void close() { LOG.debug("Closed"); // the "old" trident kafka spout is no op like this } @Override public final String toString() { return super.toString() + "{kafkaManager=" + kafkaManager + '}'; }}
public class KafkaTridentSpoutEmitter implements IOpaquePartitionedTridentSpout.Emitter< List >, KafkaTridentSpoutTopicPartition, Map >, Serializable { private static final long serialVersionUID = -7343927794834130435L; private static final Logger LOG = LoggerFactory.getLogger(KafkaTridentSpoutEmitter.class); // Kafka private final KafkaConsumer kafkaConsumer; // Bookkeeping private final KafkaTridentSpoutManager kafkaManager; // set of topic-partitions for which first poll has already occurred, and the first polled txid private final Map firstPollTransaction = new HashMap<>(); // Declare some KafkaTridentSpoutManager references for convenience private final long pollTimeoutMs; private final KafkaSpoutConfig.FirstPollOffsetStrategy firstPollOffsetStrategy; private final RecordTranslator translator; private final Timer refreshSubscriptionTimer; private final TopicPartitionSerializer tpSerializer = new TopicPartitionSerializer(); private TopologyContext topologyContext; /** * Create a new Kafka spout emitter. * @param kafkaManager The Kafka consumer manager to use * @param topologyContext The topology context * @param refreshSubscriptionTimer The timer for deciding when to recheck the subscription */ public KafkaTridentSpoutEmitter(KafkaTridentSpoutManager kafkaManager, TopologyContext topologyContext, Timer refreshSubscriptionTimer) { this.kafkaConsumer = kafkaManager.createAndSubscribeKafkaConsumer(topologyContext); this.kafkaManager = kafkaManager; this.topologyContext = topologyContext; this.refreshSubscriptionTimer = refreshSubscriptionTimer; this.translator = kafkaManager.getKafkaSpoutConfig().getTranslator(); final KafkaSpoutConfig kafkaSpoutConfig = kafkaManager.getKafkaSpoutConfig(); this.pollTimeoutMs = kafkaSpoutConfig.getPollTimeoutMs(); this.firstPollOffsetStrategy = kafkaSpoutConfig.getFirstPollOffsetStrategy(); LOG.debug("Created {}", this.toString()); } /** * Creates instance of this class with default 500 millisecond refresh subscription timer */ public KafkaTridentSpoutEmitter(KafkaTridentSpoutManager kafkaManager, TopologyContext topologyContext) { this(kafkaManager, topologyContext, new Timer(500, kafkaManager.getKafkaSpoutConfig().getPartitionRefreshPeriodMs(), TimeUnit.MILLISECONDS)); } //...... @Override public Map emitPartitionBatch(TransactionAttempt tx, TridentCollector collector, KafkaTridentSpoutTopicPartition currBatchPartition, Map lastBatch) { LOG.debug("Processing batch: [transaction = {}], [currBatchPartition = {}], [lastBatchMetadata = {}], [collector = {}]", tx, currBatchPartition, lastBatch, collector); final TopicPartition currBatchTp = currBatchPartition.getTopicPartition(); final Set assignments = kafkaConsumer.assignment(); KafkaTridentSpoutBatchMetadata lastBatchMeta = lastBatch == null ? null : KafkaTridentSpoutBatchMetadata.fromMap(lastBatch); KafkaTridentSpoutBatchMetadata currentBatch = lastBatchMeta; Collection pausedTopicPartitions = Collections.emptySet(); if (assignments == null || !assignments.contains(currBatchPartition.getTopicPartition())) { LOG.warn("SKIPPING processing batch [transaction = {}], [currBatchPartition = {}], [lastBatchMetadata = {}], " + "[collector = {}] because it is not part of the assignments {} of consumer instance [{}] " + "of consumer group [{}]", tx, currBatchPartition, lastBatch, collector, assignments, kafkaConsumer, kafkaManager.getKafkaSpoutConfig().getConsumerGroupId()); } else { try { // pause other topic-partitions to only poll from current topic-partition pausedTopicPartitions = pauseTopicPartitions(currBatchTp); seek(currBatchTp, lastBatchMeta, tx.getTransactionId()); // poll if (refreshSubscriptionTimer.isExpiredResetOnTrue()) { kafkaManager.getKafkaSpoutConfig().getSubscription().refreshAssignment(); } final ConsumerRecords records = kafkaConsumer.poll(pollTimeoutMs); LOG.debug("Polled [{}] records from Kafka.", records.count()); if (!records.isEmpty()) { emitTuples(collector, records); // build new metadata currentBatch = new KafkaTridentSpoutBatchMetadata(currBatchTp, records); } } finally { kafkaConsumer.resume(pausedTopicPartitions); LOG.trace("Resumed topic-partitions {}", pausedTopicPartitions); } LOG.debug("Emitted batch: [transaction = {}], [currBatchPartition = {}], [lastBatchMetadata = {}], " + "[currBatchMetadata = {}], [collector = {}]", tx, currBatchPartition, lastBatch, currentBatch, collector); } return currentBatch == null ? null : currentBatch.toMap(); } private void emitTuples(TridentCollector collector, ConsumerRecords records) { for (ConsumerRecord record : records) { final List tuple = translator.apply(record); collector.emit(tuple); LOG.debug("Emitted tuple {} for record [{}]", tuple, record); } } @Override public void refreshPartitions(List partitionResponsibilities) { LOG.trace("Refreshing of topic-partitions handled by Kafka. " + "No action taken by this method for topic partitions {}", partitionResponsibilities); } /** * Computes ordered list of topic-partitions for this task taking into consideration that topic-partitions * for this task must be assigned to the Kafka consumer running on this task. * * @param allPartitionInfo list of all partitions as returned by {@link KafkaTridentSpoutOpaqueCoordinator} * @return ordered list of topic partitions for this task */ @Override public List getOrderedPartitions(final List > allPartitionInfo) { List allTopicPartitions = new ArrayList<>(); for(Map map : allPartitionInfo) { allTopicPartitions.add(tpSerializer.fromMap(map)); } final List allPartitions = newKafkaTridentSpoutTopicPartitions(allTopicPartitions); LOG.debug("Returning all topic-partitions {} across all tasks. Current task index [{}]. Total tasks [{}] ", allPartitions, topologyContext.getThisTaskIndex(), getNumTasks()); return allPartitions; } @Override public List getPartitionsForTask(int taskId, int numTasks, List > allPartitionInfo) { final Set assignedTps = kafkaConsumer.assignment(); LOG.debug("Consumer [{}], running on task with index [{}], has assigned topic-partitions {}", kafkaConsumer, taskId, assignedTps); final List taskTps = newKafkaTridentSpoutTopicPartitions(assignedTps); LOG.debug("Returning topic-partitions {} for task with index [{}]", taskTps, taskId); return taskTps; } @Override public void close() { kafkaConsumer.close(); LOG.debug("Closed"); } @Override public final String toString() { return super.toString() + "{kafkaManager=" + kafkaManager + '}'; }}