Skip to content

Commit 301fc11

Browse files
authored
Updating SiloMetadata caching (#9380)
* Updating SiloMetadata caching - Skipping inactive members - Negative caching for errored pulls * Adding required parameters * namespaces * bump to trigger new build * cleaning up negative cache * consolidating DateTime.UtcNow * using regular dictionary for negative cache * updating negative cache period * Update SiloMetadaCache.cs
1 parent 29ed522 commit 301fc11

File tree

1 file changed

+26
-3
lines changed

1 file changed

+26
-3
lines changed

src/Orleans.Runtime/MembershipService/SiloMetadata/SiloMetadaCache.cs

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,32 @@
55
using System.Threading;
66
using System.Threading.Tasks;
77
using Microsoft.Extensions.Logging;
8+
using Microsoft.Extensions.Options;
9+
using Orleans.Configuration;
810

911
#nullable enable
1012
namespace Orleans.Runtime.MembershipService.SiloMetadata;
1113

1214
internal class SiloMetadataCache(
1315
ISiloMetadataClient siloMetadataClient,
1416
MembershipTableManager membershipTableManager,
17+
IOptions<ClusterMembershipOptions> clusterMembershipOptions,
1518
ILogger<SiloMetadataCache> logger)
1619
: ISiloMetadataCache, ILifecycleParticipant<ISiloLifecycle>, IDisposable
1720
{
1821
private readonly ConcurrentDictionary<SiloAddress, SiloMetadata> _metadata = new();
22+
private readonly Dictionary<SiloAddress, DateTime> _negativeCache = new();
1923
private readonly CancellationTokenSource _cts = new();
24+
private TimeSpan negativeCachePeriod;
2025

2126
void ILifecycleParticipant<ISiloLifecycle>.Participate(ISiloLifecycle lifecycle)
2227
{
2328
Task? task = null;
2429
Task OnStart(CancellationToken _)
2530
{
31+
// This gives time for the cluster to be voted Dead and for membership updates to propagate that out
32+
negativeCachePeriod = clusterMembershipOptions.Value.ProbeTimeout * clusterMembershipOptions.Value.NumMissedProbesLimit
33+
+ (2 * clusterMembershipOptions.Value.TableRefreshTimeout);
2634
task = Task.Run(() => this.ProcessMembershipUpdates(_cts.Token));
2735
return Task.CompletedTask;
2836
}
@@ -51,26 +59,39 @@ private async Task ProcessMembershipUpdates(CancellationToken ct)
5159
await foreach (var update in membershipTableManager.MembershipTableUpdates.WithCancellation(ct))
5260
{
5361
// Add entries for members that aren't already in the cache
54-
foreach (var membershipEntry in update.Entries.Where(e => e.Value.Status is SiloStatus.Active or SiloStatus.Joining))
62+
var now = DateTime.UtcNow;
63+
var recentlyActiveSilos = update.Entries
64+
.Where(e => e.Value.Status is SiloStatus.Active or SiloStatus.Joining)
65+
.Where(e => !e.Value.HasMissedIAmAlives(clusterMembershipOptions.Value, now));
66+
foreach (var membershipEntry in recentlyActiveSilos)
5567
{
5668
if (!_metadata.ContainsKey(membershipEntry.Key))
5769
{
70+
if (_negativeCache.TryGetValue(membershipEntry.Key, out var expiration) && expiration > now)
71+
{
72+
continue;
73+
}
5874
try
5975
{
6076
var metadata = await siloMetadataClient.GetSiloMetadata(membershipEntry.Key).WaitAsync(ct);
6177
_metadata.TryAdd(membershipEntry.Key, metadata);
78+
_negativeCache.Remove(membershipEntry.Key, out _);
6279
}
6380
catch(Exception exception)
6481
{
82+
_negativeCache.TryAdd(membershipEntry.Key, now + negativeCachePeriod);
6583
logger.LogError(exception, "Error fetching metadata for silo {Silo}", membershipEntry.Key);
6684
}
6785
}
6886
}
6987

7088
// Remove entries for members that are now dead
71-
foreach (var membershipEntry in update.Entries.Where(e => e.Value.Status == SiloStatus.Dead))
89+
var deadSilos = update.Entries
90+
.Where(e => e.Value.Status == SiloStatus.Dead);
91+
foreach (var membershipEntry in deadSilos)
7292
{
7393
_metadata.TryRemove(membershipEntry.Key, out _);
94+
_negativeCache.Remove(membershipEntry.Key, out _);
7495
}
7596

7697
// Remove entries for members that are no longer in the table
@@ -79,6 +100,7 @@ private async Task ProcessMembershipUpdates(CancellationToken ct)
79100
if (!update.Entries.ContainsKey(silo))
80101
{
81102
_metadata.TryRemove(silo, out _);
103+
_negativeCache.Remove(silo, out _);
82104
}
83105
}
84106
}
@@ -102,4 +124,5 @@ private async Task ProcessMembershipUpdates(CancellationToken ct)
102124
public void SetMetadata(SiloAddress siloAddress, SiloMetadata metadata) => _metadata.TryAdd(siloAddress, metadata);
103125

104126
public void Dispose() => _cts.Cancel();
105-
}
127+
}
128+

0 commit comments

Comments
 (0)