feat(outbox): implement concurrent claim mechanism with UPDATE RETURNING + SKIP LOCKED
- Add migration 0004 to introduce 'claiming' status and timeout index - Add StatusClaiming to platformevent domain and allow it in Validate() - Rewrite ListDue as transactional UPDATE ... RETURNING with FOR UPDATE SKIP LOCKED - Add ReleaseStaleClaims to reset expired claiming events back to retrying - Worker Start() now runs a 30s ticker for stale claim recovery (5m timeout) - Update stubEventStore in tests to satisfy new EventStore interface Refs: D-02
This commit is contained in:
@@ -75,14 +75,30 @@ func (s *PlatformEventStore) ListDue(ctx context.Context, platform string, dueBe
|
||||
if platform == "" {
|
||||
return nil, fmt.Errorf("platform is required")
|
||||
}
|
||||
rows, err := s.db.QueryContext(ctx, `
|
||||
SELECT id, platform, event_type, COALESCE(session_id::text, ''), COALESCE(ticket_id::text, ''), COALESCE(source_message_id, ''),
|
||||
payload, status, attempt_count, next_attempt_at, occurred_at, created_at, updated_at,
|
||||
delivered_at, COALESCE(last_error, '')
|
||||
FROM cs_platform_event_outbox
|
||||
WHERE platform = $1 AND status IN ('pending', 'retrying') AND next_attempt_at <= $2
|
||||
ORDER BY next_attempt_at ASC, occurred_at ASC, created_at ASC, id ASC
|
||||
LIMIT $3
|
||||
|
||||
tx, err := s.db.BeginTx(ctx, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
_ = tx.Rollback()
|
||||
}
|
||||
}()
|
||||
|
||||
rows, err := tx.QueryContext(ctx, `
|
||||
UPDATE cs_platform_event_outbox
|
||||
SET status = 'claiming', updated_at = NOW()
|
||||
WHERE id IN (
|
||||
SELECT id FROM cs_platform_event_outbox
|
||||
WHERE platform = $1 AND status IN ('pending','retrying') AND next_attempt_at <= $2
|
||||
ORDER BY next_attempt_at ASC, occurred_at ASC, created_at ASC, id ASC
|
||||
LIMIT $3
|
||||
FOR UPDATE SKIP LOCKED
|
||||
)
|
||||
RETURNING id, platform, event_type, COALESCE(session_id::text, ''), COALESCE(ticket_id::text, ''), COALESCE(source_message_id, ''),
|
||||
payload, status, attempt_count, next_attempt_at, occurred_at, created_at, updated_at,
|
||||
delivered_at, COALESCE(last_error, '')
|
||||
`, platform, dueBefore, limit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -126,9 +142,32 @@ func (s *PlatformEventStore) ListDue(ctx context.Context, platform string, dueBe
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := tx.Commit(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return events, nil
|
||||
}
|
||||
|
||||
func (s *PlatformEventStore) ReleaseStaleClaims(ctx context.Context, timeout time.Duration) (int, error) {
|
||||
if s.db == nil {
|
||||
return 0, fmt.Errorf("db is nil")
|
||||
}
|
||||
res, err := s.db.ExecContext(ctx, `
|
||||
UPDATE cs_platform_event_outbox
|
||||
SET status = 'retrying', updated_at = NOW()
|
||||
WHERE status = 'claiming' AND updated_at < NOW() - $1::interval
|
||||
`, timeout.Seconds())
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
n, err := res.RowsAffected()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return int(n), nil
|
||||
}
|
||||
|
||||
func (s *PlatformEventStore) MarkDelivered(ctx context.Context, eventID string, deliveredAt time.Time) error {
|
||||
if s.db == nil {
|
||||
return fmt.Errorf("db is nil")
|
||||
|
||||
Reference in New Issue
Block a user