Reliability improvements to key ring updates

- Optimistically treat failures as transient and continue to use any existing cached key ring for a short period of time
- Updates to the key ring shouldn't block other threads; they can use the outdated version while waiting for the update
This commit is contained in:
Levi B 2015-03-16 23:53:56 -07:00
parent 612a81d9ce
commit d673df7ef3
3 changed files with 157 additions and 15 deletions

View File

@ -36,5 +36,16 @@ namespace Microsoft.AspNet.DataProtection.KeyManagement
&& !keyRing._expirationToken.IsCancellationRequested
&& keyRing.ExpirationTimeUtc > utcNow;
}
/// <summary>
/// Returns a new <see cref="CacheableKeyRing"/> which is identical to 'this' but with a
/// lifetime extended 2 minutes from <paramref name="now"/>. The inner cancellation token
/// is also disconnected.
/// </summary>
internal CacheableKeyRing WithTemporaryExtendedLifetime(DateTimeOffset now)
{
TimeSpan extension = TimeSpan.FromMinutes(2);
return new CacheableKeyRing(CancellationToken.None, now + extension, KeyRing);
}
}
}

View File

@ -146,26 +146,87 @@ namespace Microsoft.AspNet.DataProtection.KeyManagement
return existingCacheableKeyRing.KeyRing;
}
// The cached keyring hasn't been created or must be refreshed.
lock (_cacheableKeyRingLockObj)
// The cached keyring hasn't been created or must be refreshed. We'll allow one thread to
// update the keyring, and all other threads will continue to use the existing cached
// keyring while the first thread performs the update. There is an exception: if there
// is no usable existing cached keyring, all callers must block until the keyring exists.
bool acquiredLock = false;
try
{
// Did somebody update the keyring while we were waiting for the lock?
existingCacheableKeyRing = Volatile.Read(ref _cacheableKeyRing);
if (CacheableKeyRing.IsValid(existingCacheableKeyRing, utcNow))
Monitor.TryEnter(_cacheableKeyRingLockObj, (existingCacheableKeyRing != null) ? 0 : Timeout.Infinite, ref acquiredLock);
if (acquiredLock)
{
// This thread acquired the critical section and is responsible for updating the
// cached keyring. But first, let's make sure that somebody didn't sneak in before
// us and update the keyring on our behalf.
existingCacheableKeyRing = Volatile.Read(ref _cacheableKeyRing);
if (CacheableKeyRing.IsValid(existingCacheableKeyRing, utcNow))
{
return existingCacheableKeyRing.KeyRing;
}
if (existingCacheableKeyRing != null && _logger.IsVerboseLevelEnabled())
{
_logger.LogVerbose("Existing cached key ring is expired. Refreshing.");
}
// It's up to us to refresh the cached keyring.
// This call is performed *under lock*.
CacheableKeyRing newCacheableKeyRing;
try
{
newCacheableKeyRing = _cacheableKeyRingProvider.GetCacheableKeyRing(utcNow);
}
catch (Exception ex)
{
if (_logger.IsErrorLevelEnabled())
{
if (existingCacheableKeyRing != null)
{
_logger.LogError(ex, "An error occurred while refreshing the key ring. Will try again in 2 minutes.");
}
else
{
_logger.LogError(ex, "An error occurred while reading the key ring.");
}
}
// Failures that occur while refreshing the keyring are most likely transient, perhaps due to a
// temporary network outage. Since we don't want every subsequent call to result in failure, we'll
// create a new keyring object whose expiration is now + some short period of time (currently 2 min),
// and after this period has elapsed the next caller will try refreshing. If we don't have an
// existing keyring (perhaps because this is the first call), then there's nothing to extend, so
// each subsequent caller will keep going down this code path until one succeeds.
if (existingCacheableKeyRing != null)
{
Volatile.Write(ref _cacheableKeyRing, existingCacheableKeyRing.WithTemporaryExtendedLifetime(utcNow));
}
// The immediate caller should fail so that he can report the error up his chain. This makes it more likely
// that an administrator can see the error and react to it as appropriate. The caller can retry the operation
// and will probably have success as long as he falls within the temporary extension mentioned above.
throw;
}
Volatile.Write(ref _cacheableKeyRing, newCacheableKeyRing);
return newCacheableKeyRing.KeyRing;
}
else
{
// We didn't acquire the critical section. This should only occur if we passed
// zero for the Monitor.TryEnter timeout, which implies that we had an existing
// (but outdated) keyring that we can use as a fallback.
Debug.Assert(existingCacheableKeyRing != null);
return existingCacheableKeyRing.KeyRing;
}
if (existingCacheableKeyRing != null && _logger.IsVerboseLevelEnabled())
}
finally
{
if (acquiredLock)
{
_logger.LogVerbose("Existing cached key ring is expired. Refreshing.");
Monitor.Exit(_cacheableKeyRingLockObj);
}
// It's up to us to refresh the cached keyring.
// This call is performed *under lock*.
var newCacheableKeyRing = _cacheableKeyRingProvider.GetCacheableKeyRing(utcNow);
Volatile.Write(ref _cacheableKeyRing, newCacheableKeyRing);
return newCacheableKeyRing.KeyRing;
}
}

View File

@ -7,6 +7,7 @@ using System.Globalization;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.AspNet.DataProtection.AuthenticatedEncryption;
using Microsoft.AspNet.Testing;
using Microsoft.Framework.DependencyInjection;
using Moq;
using Xunit;
@ -467,7 +468,7 @@ namespace Microsoft.AspNet.DataProtection.KeyManagement
}
[Fact]
public void GetCurrentKeyRing_ImplementsDoubleCheckLockPatternCorrectly()
public void GetCurrentKeyRing_NoExistingKeyRing_HoldsAllThreadsUntilKeyRingCreated()
{
// Arrange
var now = StringToDateTime("2015-03-01 00:00:00Z");
@ -515,6 +516,75 @@ namespace Microsoft.AspNet.DataProtection.KeyManagement
mockCacheableKeyRingProvider.Verify(o => o.GetCacheableKeyRing(It.IsAny<DateTimeOffset>()), Times.Once);
}
[Fact]
public void GetCurrentKeyRing_WithExpiredExistingKeyRing_AllowsOneThreadToUpdate_ReturnsExistingKeyRingToOtherCallersWithoutBlocking()
{
// Arrange
var originalKeyRing = new Mock<IKeyRing>().Object;
var originalKeyRingTime = StringToDateTime("2015-03-01 00:00:00Z");
var updatedKeyRing = new Mock<IKeyRing>().Object;
var updatedKeyRingTime = StringToDateTime("2015-03-02 00:00:00Z");
var mockCacheableKeyRingProvider = new Mock<ICacheableKeyRingProvider>();
var keyRingProvider = CreateKeyRingProvider(mockCacheableKeyRingProvider.Object);
// In this test, the foreground thread acquires the critial section in GetCurrentKeyRing,
// and the background thread returns the original key ring rather than blocking while
// waiting for the foreground thread to update the key ring.
TimeSpan testTimeout = TimeSpan.FromSeconds(10);
IKeyRing keyRingReturnedToBackgroundThread = null;
mockCacheableKeyRingProvider.Setup(o => o.GetCacheableKeyRing(originalKeyRingTime))
.Returns(new CacheableKeyRing(CancellationToken.None, StringToDateTime("2015-03-02 00:00:00Z"), originalKeyRing));
mockCacheableKeyRingProvider.Setup(o => o.GetCacheableKeyRing(updatedKeyRingTime))
.Returns<DateTimeOffset>(dto =>
{
// at this point we're inside the critical section - spawn the background thread now
var backgroundGetKeyRingTask = Task.Run(() =>
{
keyRingReturnedToBackgroundThread = keyRingProvider.GetCurrentKeyRingCore(updatedKeyRingTime);
});
Assert.True(backgroundGetKeyRingTask.Wait(testTimeout), "Test timed out.");
return new CacheableKeyRing(CancellationToken.None, StringToDateTime("2015-03-03 00:00:00Z"), updatedKeyRing);
});
// Assert - underlying provider only should have been called once with the updated time (by the foreground thread)
Assert.Same(originalKeyRing, keyRingProvider.GetCurrentKeyRingCore(originalKeyRingTime));
Assert.Same(updatedKeyRing, keyRingProvider.GetCurrentKeyRingCore(updatedKeyRingTime));
Assert.Same(originalKeyRing, keyRingReturnedToBackgroundThread);
mockCacheableKeyRingProvider.Verify(o => o.GetCacheableKeyRing(updatedKeyRingTime), Times.Once);
}
[Fact]
public void GetCurrentKeyRing_WithExpiredExistingKeyRing_UpdateFails_ThrowsButCachesOldKeyRing()
{
// Arrange
var cts = new CancellationTokenSource();
var mockCacheableKeyRingProvider = new Mock<ICacheableKeyRingProvider>();
var originalKeyRing = new Mock<IKeyRing>().Object;
var originalKeyRingTime = StringToDateTime("2015-03-01 00:00:00Z");
mockCacheableKeyRingProvider.Setup(o => o.GetCacheableKeyRing(originalKeyRingTime))
.Returns(new CacheableKeyRing(cts.Token, StringToDateTime("2015-03-02 00:00:00Z"), originalKeyRing));
var throwKeyRingTime = StringToDateTime("2015-03-01 12:00:00Z");
mockCacheableKeyRingProvider.Setup(o => o.GetCacheableKeyRing(throwKeyRingTime)).Throws(new Exception("How exceptional."));
var updatedKeyRing = new Mock<IKeyRing>().Object;
var updatedKeyRingTime = StringToDateTime("2015-03-01 12:02:00Z");
mockCacheableKeyRingProvider.Setup(o => o.GetCacheableKeyRing(updatedKeyRingTime))
.Returns(new CacheableKeyRing(CancellationToken.None, StringToDateTime("2015-03-02 00:00:00Z"), updatedKeyRing));
var keyRingProvider = CreateKeyRingProvider(mockCacheableKeyRingProvider.Object);
// Act & assert
Assert.Same(originalKeyRing, keyRingProvider.GetCurrentKeyRingCore(originalKeyRingTime));
cts.Cancel(); // invalidate the key ring
ExceptionAssert.Throws<Exception>(() => keyRingProvider.GetCurrentKeyRingCore(throwKeyRingTime), "How exceptional.");
Assert.Same(originalKeyRing, keyRingProvider.GetCurrentKeyRingCore(throwKeyRingTime));
Assert.Same(updatedKeyRing, keyRingProvider.GetCurrentKeyRingCore(updatedKeyRingTime));
mockCacheableKeyRingProvider.Verify(o => o.GetCacheableKeyRing(originalKeyRingTime), Times.Once);
mockCacheableKeyRingProvider.Verify(o => o.GetCacheableKeyRing(throwKeyRingTime), Times.Once);
mockCacheableKeyRingProvider.Verify(o => o.GetCacheableKeyRing(updatedKeyRingTime), Times.Once);
}
private static KeyRingProvider CreateKeyRingProvider(ICacheableKeyRingProvider cacheableKeyRingProvider)
{
var serviceCollection = new ServiceCollection();