| | | 1 | | using Elsa.Common; |
| | | 2 | | using Elsa.KeyValues.Contracts; |
| | | 3 | | using Elsa.KeyValues.Entities; |
| | | 4 | | using Elsa.KeyValues.Models; |
| | | 5 | | using Elsa.Workflows.Runtime.Options; |
| | | 6 | | using Microsoft.Extensions.DependencyInjection; |
| | | 7 | | using Microsoft.Extensions.Options; |
| | | 8 | | |
| | | 9 | | namespace Elsa.Workflows.Runtime.Services; |
| | | 10 | | |
| | | 11 | | /// <summary> |
| | | 12 | | /// Default thread-safe implementation of <see cref="IQuiescenceSignal"/>. Uses a single lock for transitions |
| | | 13 | | /// and a volatile reference read for lock-free state queries. See FR-001..FR-005 and research R8 for |
| | | 14 | | /// pause-persistence semantics. |
| | | 15 | | /// </summary> |
| | | 16 | | public sealed class QuiescenceSignal : IQuiescenceSignal |
| | | 17 | | { |
| | | 18 | | private const string PersistenceKeyPrefix = "elsa.quiescence.pause."; |
| | | 19 | | |
| | 98 | 20 | | private readonly object _sync = new(); |
| | | 21 | | // Serializes persistence I/O so racing Pause/Resume can't reorder writes in the store. Held only across |
| | | 22 | | // the I/O — the in-memory transition still uses the fast _sync lock, and pause/resume aren't hot paths. |
| | 98 | 23 | | private readonly SemaphoreSlim _persistenceMutex = new(1, 1); |
| | | 24 | | private readonly IOptions<GracefulShutdownOptions> _options; |
| | | 25 | | private readonly ISystemClock _clock; |
| | | 26 | | private readonly IKeyValueStore? _keyValueStore; |
| | | 27 | | private readonly IServiceScopeFactory? _serviceScopeFactory; |
| | | 28 | | private readonly IExecutionCycleRegistry _cycleRegistry; |
| | | 29 | | private readonly string _persistenceKey; |
| | | 30 | | |
| | | 31 | | private QuiescenceState _state; |
| | | 32 | | |
| | | 33 | | /// <summary> |
| | | 34 | | /// Creates the signal. The generation id defaults to a new GUID per construction — when the container is torn |
| | | 35 | | /// down and rebuilt (shell reactivation or host restart), a fresh id is minted, which is what scopes recovery |
| | | 36 | | /// in <c>RecoverInterruptedWorkflowsStartupTask</c>. |
| | | 37 | | /// </summary> |
| | | 38 | | [ActivatorUtilitiesConstructor] |
| | | 39 | | public QuiescenceSignal( |
| | | 40 | | IOptions<GracefulShutdownOptions> options, |
| | | 41 | | ISystemClock clock, |
| | | 42 | | IExecutionCycleRegistry cycleRegistry, |
| | | 43 | | IServiceScopeFactory serviceScopeFactory, |
| | | 44 | | string? shellName = null, |
| | 81 | 45 | | string? generationId = null) : this(options, clock, cycleRegistry, keyValueStore: null, serviceScopeFactory, she |
| | | 46 | | { |
| | 81 | 47 | | } |
| | | 48 | | |
| | | 49 | | public QuiescenceSignal( |
| | | 50 | | IOptions<GracefulShutdownOptions> options, |
| | | 51 | | ISystemClock clock, |
| | | 52 | | IExecutionCycleRegistry cycleRegistry, |
| | | 53 | | string? shellName = null, |
| | 9 | 54 | | string? generationId = null) : this(options, clock, cycleRegistry, keyValueStore: null, serviceScopeFactory: nul |
| | | 55 | | { |
| | 9 | 56 | | } |
| | | 57 | | |
| | | 58 | | /// <summary> |
| | | 59 | | /// Creates the signal with a fixed key-value store. Intended for tests and non-container usage. |
| | | 60 | | /// </summary> |
| | | 61 | | public static QuiescenceSignal Create( |
| | | 62 | | IOptions<GracefulShutdownOptions> options, |
| | | 63 | | ISystemClock clock, |
| | | 64 | | IExecutionCycleRegistry cycleRegistry, |
| | | 65 | | IKeyValueStore? keyValueStore = null, |
| | | 66 | | string? shellName = null, |
| | 8 | 67 | | string? generationId = null) => new(options, clock, cycleRegistry, keyValueStore, serviceScopeFactory: null, she |
| | | 68 | | |
| | 98 | 69 | | private QuiescenceSignal( |
| | 98 | 70 | | IOptions<GracefulShutdownOptions> options, |
| | 98 | 71 | | ISystemClock clock, |
| | 98 | 72 | | IExecutionCycleRegistry cycleRegistry, |
| | 98 | 73 | | IKeyValueStore? keyValueStore, |
| | 98 | 74 | | IServiceScopeFactory? serviceScopeFactory, |
| | 98 | 75 | | string? shellName, |
| | 98 | 76 | | string? generationId) |
| | | 77 | | { |
| | 98 | 78 | | _options = options; |
| | 98 | 79 | | _clock = clock; |
| | 98 | 80 | | _cycleRegistry = cycleRegistry; |
| | 98 | 81 | | _keyValueStore = keyValueStore; |
| | 98 | 82 | | _serviceScopeFactory = serviceScopeFactory; |
| | 98 | 83 | | _persistenceKey = PersistenceKeyPrefix + (shellName ?? "default"); |
| | 98 | 84 | | _state = QuiescenceState.Initial(generationId ?? Guid.NewGuid().ToString("N")); |
| | 98 | 85 | | } |
| | | 86 | | |
| | | 87 | | /// <inheritdoc /> |
| | | 88 | | public QuiescenceState CurrentState |
| | | 89 | | { |
| | | 90 | | get |
| | | 91 | | { |
| | | 92 | | // Volatile read — the reference is always overwritten atomically under the lock. |
| | 285 | 93 | | return Volatile.Read(ref _state); |
| | | 94 | | } |
| | | 95 | | } |
| | | 96 | | |
| | | 97 | | /// <inheritdoc /> |
| | 274 | 98 | | public bool IsAcceptingNewWork => CurrentState.IsAcceptingNewWork; |
| | | 99 | | |
| | | 100 | | /// <inheritdoc /> |
| | 1 | 101 | | public int ActiveExecutionCycleCount => _cycleRegistry.ActiveCount; |
| | | 102 | | |
| | | 103 | | /// <summary> |
| | | 104 | | /// Loads any persisted administrative pause state. Called once per runtime generation by a startup task when |
| | | 105 | | /// <see cref="GracefulShutdownOptions.PausePersistence"/> is <see cref="PausePersistencePolicy.AcrossReactivations" |
| | | 106 | | /// No-op otherwise, or when the key-value store is not registered. |
| | | 107 | | /// </summary> |
| | | 108 | | public async ValueTask InitializePersistedStateAsync(CancellationToken cancellationToken) |
| | | 109 | | { |
| | 162 | 110 | | if (_options.Value.PausePersistence != PausePersistencePolicy.AcrossReactivations) return; |
| | | 111 | | |
| | 3 | 112 | | var pair = await UseKeyValueStoreAsync(store => store.FindAsync(new KeyValueFilter { Key = _persistenceKey }, ca |
| | 3 | 113 | | if (pair is null) return; |
| | | 114 | | |
| | 1 | 115 | | lock (_sync) |
| | | 116 | | { |
| | 1 | 117 | | if ((_state.Reason & QuiescenceReason.AdministrativePause) != 0) return; // someone already paused us |
| | 1 | 118 | | var next = _state with |
| | 1 | 119 | | { |
| | 1 | 120 | | Reason = _state.Reason | QuiescenceReason.AdministrativePause, |
| | 1 | 121 | | PausedAt = _clock.UtcNow, |
| | 1 | 122 | | PauseReasonText = pair.SerializedValue, |
| | 1 | 123 | | PauseRequestedBy = "persisted", |
| | 1 | 124 | | }; |
| | 1 | 125 | | Volatile.Write(ref _state, next); |
| | 1 | 126 | | } |
| | 82 | 127 | | } |
| | | 128 | | |
| | | 129 | | /// <inheritdoc /> |
| | | 130 | | public ValueTask<QuiescenceState> BeginDrainAsync(CancellationToken cancellationToken = default) |
| | | 131 | | { |
| | | 132 | | QuiescenceState next; |
| | 15 | 133 | | lock (_sync) |
| | | 134 | | { |
| | 15 | 135 | | if ((_state.Reason & QuiescenceReason.Drain) != 0) |
| | | 136 | | { |
| | 1 | 137 | | return new ValueTask<QuiescenceState>(_state); |
| | | 138 | | } |
| | | 139 | | |
| | 14 | 140 | | next = _state with |
| | 14 | 141 | | { |
| | 14 | 142 | | Reason = _state.Reason | QuiescenceReason.Drain, |
| | 14 | 143 | | DrainStartedAt = _clock.UtcNow, |
| | 14 | 144 | | }; |
| | 14 | 145 | | Volatile.Write(ref _state, next); |
| | 14 | 146 | | } |
| | | 147 | | |
| | 14 | 148 | | return new ValueTask<QuiescenceState>(next); |
| | 1 | 149 | | } |
| | | 150 | | |
| | | 151 | | /// <inheritdoc /> |
| | | 152 | | public async ValueTask<QuiescenceState> PauseAsync(string? reasonText, string? requestedBy, CancellationToken cancel |
| | | 153 | | { |
| | | 154 | | QuiescenceState next; |
| | 18 | 155 | | bool transitioned = false; |
| | 18 | 156 | | lock (_sync) |
| | | 157 | | { |
| | 18 | 158 | | if ((_state.Reason & QuiescenceReason.AdministrativePause) != 0) |
| | | 159 | | { |
| | 2 | 160 | | next = _state; |
| | | 161 | | } |
| | | 162 | | else |
| | | 163 | | { |
| | 16 | 164 | | next = _state with |
| | 16 | 165 | | { |
| | 16 | 166 | | Reason = _state.Reason | QuiescenceReason.AdministrativePause, |
| | 16 | 167 | | PausedAt = _clock.UtcNow, |
| | 16 | 168 | | PauseReasonText = reasonText, |
| | 16 | 169 | | PauseRequestedBy = requestedBy, |
| | 16 | 170 | | }; |
| | 16 | 171 | | Volatile.Write(ref _state, next); |
| | 16 | 172 | | transitioned = true; |
| | | 173 | | } |
| | 18 | 174 | | } |
| | | 175 | | |
| | 18 | 176 | | if (transitioned) |
| | 16 | 177 | | await PersistAsync(); |
| | | 178 | | |
| | 18 | 179 | | return next; |
| | 18 | 180 | | } |
| | | 181 | | |
| | | 182 | | /// <inheritdoc /> |
| | | 183 | | public async ValueTask<QuiescenceState> ResumeAsync(string? requestedBy, CancellationToken cancellationToken) |
| | | 184 | | { |
| | | 185 | | QuiescenceState next; |
| | 9 | 186 | | bool transitioned = false; |
| | 9 | 187 | | lock (_sync) |
| | | 188 | | { |
| | | 189 | | // Resume is a no-op while drain is active — the runtime cannot return to normal operation within the same g |
| | 11 | 190 | | if ((_state.Reason & QuiescenceReason.Drain) != 0) { return _state; } |
| | 9 | 191 | | if ((_state.Reason & QuiescenceReason.AdministrativePause) == 0) { return _state; } |
| | | 192 | | |
| | 5 | 193 | | next = _state with |
| | 5 | 194 | | { |
| | 5 | 195 | | Reason = _state.Reason & ~QuiescenceReason.AdministrativePause, |
| | 5 | 196 | | PausedAt = null, |
| | 5 | 197 | | PauseReasonText = null, |
| | 5 | 198 | | PauseRequestedBy = requestedBy, |
| | 5 | 199 | | }; |
| | 5 | 200 | | Volatile.Write(ref _state, next); |
| | 5 | 201 | | transitioned = true; |
| | 5 | 202 | | } |
| | | 203 | | |
| | 5 | 204 | | if (transitioned) |
| | 5 | 205 | | await PersistAsync(); |
| | | 206 | | |
| | 5 | 207 | | return next; |
| | 9 | 208 | | } |
| | | 209 | | |
| | | 210 | | /// <summary> |
| | | 211 | | /// Persists the current administrative-pause state. Serialized via <see cref="_persistenceMutex"/> so racing |
| | | 212 | | /// Pause/Resume can't reorder writes in the store. The live state is re-read inside the semaphore — each I/O |
| | | 213 | | /// writes whatever the latest in-memory transition was, so N racing transitions produce N serialized writes |
| | | 214 | | /// and the final persisted state always matches the final in-memory state. |
| | | 215 | | /// </summary> |
| | | 216 | | /// <remarks> |
| | | 217 | | /// Uses <see cref="CancellationToken.None"/> deliberately for both the semaphore wait and the store I/O. |
| | | 218 | | /// By the time this runs the in-memory transition has already committed; if a cancelled HTTP request token |
| | | 219 | | /// caused the persistence to skip, in-memory state would diverge from the store — and the idempotent |
| | | 220 | | /// fast-path in <see cref="PauseAsync"/>/<see cref="ResumeAsync"/> (transitioned == false) means a later call |
| | | 221 | | /// would not retry the write. So persistence must complete regardless of caller cancellation. |
| | | 222 | | /// </remarks> |
| | | 223 | | private async ValueTask PersistAsync() |
| | | 224 | | { |
| | 21 | 225 | | if (_options.Value.PausePersistence != PausePersistencePolicy.AcrossReactivations) |
| | 10 | 226 | | return; |
| | | 227 | | |
| | 11 | 228 | | await _persistenceMutex.WaitAsync(CancellationToken.None); |
| | | 229 | | try |
| | | 230 | | { |
| | 11 | 231 | | var live = Volatile.Read(ref _state); |
| | 11 | 232 | | if ((live.Reason & QuiescenceReason.AdministrativePause) != 0) |
| | 15 | 233 | | await UseKeyValueStoreAsync(store => store.SaveAsync(new SerializedKeyValuePair { Key = _persistenceKey, |
| | | 234 | | else |
| | 5 | 235 | | await UseKeyValueStoreAsync(store => store.DeleteAsync(_persistenceKey, CancellationToken.None)); |
| | 11 | 236 | | } |
| | | 237 | | finally |
| | | 238 | | { |
| | 11 | 239 | | _persistenceMutex.Release(); |
| | | 240 | | } |
| | 21 | 241 | | } |
| | | 242 | | |
| | | 243 | | private async ValueTask<TResult> UseKeyValueStoreAsync<TResult>(Func<IKeyValueStore, Task<TResult>> action, TResult |
| | | 244 | | { |
| | 2 | 245 | | if (_keyValueStore is not null) |
| | 1 | 246 | | return await action(_keyValueStore); |
| | | 247 | | |
| | 1 | 248 | | if (_serviceScopeFactory is null) |
| | 1 | 249 | | return defaultValue; |
| | | 250 | | |
| | 0 | 251 | | using var scope = _serviceScopeFactory.CreateScope(); |
| | 0 | 252 | | var store = scope.ServiceProvider.GetService<IKeyValueStore>(); |
| | | 253 | | |
| | 0 | 254 | | return store is null ? defaultValue : await action(store); |
| | 2 | 255 | | } |
| | | 256 | | |
| | | 257 | | private async ValueTask UseKeyValueStoreAsync(Func<IKeyValueStore, Task> action) |
| | | 258 | | { |
| | 11 | 259 | | if (_keyValueStore is not null) |
| | | 260 | | { |
| | 8 | 261 | | await action(_keyValueStore); |
| | 8 | 262 | | return; |
| | | 263 | | } |
| | | 264 | | |
| | 3 | 265 | | if (_serviceScopeFactory is null) |
| | 2 | 266 | | return; |
| | | 267 | | |
| | 1 | 268 | | using var scope = _serviceScopeFactory.CreateScope(); |
| | 1 | 269 | | var store = scope.ServiceProvider.GetService<IKeyValueStore>(); |
| | 1 | 270 | | if (store is not null) |
| | 1 | 271 | | await action(store); |
| | 11 | 272 | | } |
| | | 273 | | } |