@@ -1294,8 +1294,21 @@ size_t CacheAllocator<CacheTrait>::wakeUpWaitersLocked(folly::StringPiece key,
1294
1294
}
1295
1295
1296
1296
template <typename CacheTrait>
1297
- void CacheAllocator<CacheTrait>::moveRegularItemWithSync(
1297
+ bool CacheAllocator<CacheTrait>::moveRegularItemWithSync(
1298
1298
Item& oldItem, WriteHandle& newItemHdl) {
1299
+ // on function exit - the new item handle is no longer moving
1300
+ // and other threads may access it - but in case where
1301
+ // we failed to replace in access container we can give the
1302
+ // new item back to the allocator
1303
+ auto guard = folly::makeGuard ([&]() {
1304
+ auto ref = newItemHdl->unmarkMoving ();
1305
+ if (UNLIKELY (ref == 0 )) {
1306
+ const auto res =
1307
+ releaseBackToAllocator (*newItemHdl, RemoveContext::kNormal , false );
1308
+ XDCHECK (res == ReleaseRes::kReleased );
1309
+ }
1310
+ });
1311
+
1299
1312
XDCHECK (oldItem.isMoving ());
1300
1313
XDCHECK (!oldItem.isExpired ());
1301
1314
// TODO: should we introduce new latency tracker. E.g. evictRegularLatency_
@@ -1326,6 +1339,22 @@ void CacheAllocator<CacheTrait>::moveRegularItemWithSync(
1326
1339
1327
1340
auto replaced = accessContainer_->replaceIf (oldItem, *newItemHdl,
1328
1341
predicate);
1342
+ // another thread may have called insertOrReplace which could have
1343
+ // marked this item as unaccessible causing the replaceIf
1344
+ // in the access container to fail - in this case we want
1345
+ // to abort the move since the item is no longer valid
1346
+ if (!replaced) {
1347
+ return false ;
1348
+ }
1349
+ // what if another thread calls insertOrReplace now when
1350
+ // the item is moving and already replaced in the hash table?
1351
+ // 1. it succeeds in updating the hash table - so there is
1352
+ // no guarentee that isAccessible() is true
1353
+ // 2. it will then try to remove from MM container
1354
+ // - this operation will wait for newItemHdl to
1355
+ // be unmarkedMoving via the waitContext
1356
+ // 3. replaced handle is returned and eventually drops
1357
+ // ref to 0 and the item is recycled back to allocator.
1329
1358
1330
1359
if (config_.moveCb ) {
1331
1360
// Execute the move callback. We cannot make any guarantees about the
@@ -1367,14 +1396,7 @@ void CacheAllocator<CacheTrait>::moveRegularItemWithSync(
1367
1396
XDCHECK (newItemHdl->hasChainedItem ());
1368
1397
}
1369
1398
newItemHdl.unmarkNascent ();
1370
- auto ref = newItemHdl->unmarkMoving ();
1371
- // remove because there is a chance the new item was not
1372
- // added to the access container
1373
- if (UNLIKELY (ref == 0 )) {
1374
- const auto res =
1375
- releaseBackToAllocator (*newItemHdl, RemoveContext::kNormal , false );
1376
- XDCHECK (res == ReleaseRes::kReleased );
1377
- }
1399
+ return true ;
1378
1400
}
1379
1401
1380
1402
template <typename CacheTrait>
@@ -1529,7 +1551,6 @@ template <typename CacheTrait>
1529
1551
void CacheAllocator<CacheTrait>::unlinkItemForEviction(Item& it) {
1530
1552
XDCHECK (it.isMarkedForEviction ());
1531
1553
XDCHECK (it.getRefCount () == 0 );
1532
-
1533
1554
accessContainer_->remove (it);
1534
1555
removeFromMMContainer (it);
1535
1556
@@ -1624,28 +1645,43 @@ CacheAllocator<CacheTrait>::findEviction(TierId tid, PoolId pid, ClassId cid) {
1624
1645
auto evictedToNext = lastTier ? nullptr
1625
1646
: tryEvictToNextMemoryTier (*candidate, false );
1626
1647
if (!evictedToNext) {
1627
- if (!token.isValid ()) {
1648
+ // if insertOrReplace was called during move
1649
+ // then candidate will not be accessible (failed replace during tryEvict)
1650
+ // - therefore this was why we failed to
1651
+ // evict to the next tier and insertOrReplace
1652
+ // will remove from NVM cache
1653
+ // however, if candidate is accessible
1654
+ // that means the allocation in the next
1655
+ // tier failed - so we will continue to
1656
+ // evict the item to NVM cache
1657
+ bool failedToReplace = !candidate->isAccessible ();
1658
+ if (!token.isValid () && !failedToReplace) {
1628
1659
token = createPutToken (*candidate);
1629
1660
}
1630
- // tryEvictToNextMemoryTier should only fail if allocation of the new item fails
1631
- // in that case, it should be still possible to mark item as exclusive.
1661
+ // tryEvictToNextMemoryTier can fail if:
1662
+ // a) allocation of the new item fails in that case,
1663
+ // it should be still possible to mark item for eviction.
1664
+ // b) another thread calls insertOrReplace and the item
1665
+ // is no longer accessible
1632
1666
//
1633
1667
// in case that we are on the last tier, we whould have already marked
1634
1668
// as exclusive since we will not be moving the item to the next tier
1635
1669
// but rather just evicting all together, no need to
1636
- // markExclusiveWhenMoving
1670
+ // markForEvictionWhenMoving
1637
1671
auto ret = lastTier ? true : candidate->markForEvictionWhenMoving ();
1638
1672
XDCHECK (ret);
1639
1673
1640
1674
unlinkItemForEviction (*candidate);
1675
+
1676
+ if (token.isValid () && shouldWriteToNvmCacheExclusive (*candidate)
1677
+ && !failedToReplace) {
1678
+ nvmCache_->put (*candidate, std::move (token));
1679
+ }
1641
1680
// wake up any readers that wait for the move to complete
1642
1681
// it's safe to do now, as we have the item marked exclusive and
1643
1682
// no other reader can be added to the waiters list
1644
1683
wakeUpWaiters (*candidate, {});
1645
1684
1646
- if (token.isValid () && shouldWriteToNvmCacheExclusive (*candidate)) {
1647
- nvmCache_->put (*candidate, std::move (token));
1648
- }
1649
1685
} else {
1650
1686
XDCHECK (!evictedToNext->isMarkedForEviction () && !evictedToNext->isMoving ());
1651
1687
XDCHECK (!candidate->isMarkedForEviction () && !candidate->isMoving ());
@@ -1756,7 +1792,10 @@ CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(
1756
1792
1757
1793
if (newItemHdl) {
1758
1794
XDCHECK_EQ (newItemHdl->getSize (), item.getSize ());
1759
- moveRegularItemWithSync (item, newItemHdl);
1795
+ if (!moveRegularItemWithSync (item, newItemHdl)) {
1796
+ return WriteHandle{};
1797
+ }
1798
+ XDCHECK_EQ (newItemHdl->getKey (),item.getKey ());
1760
1799
item.unmarkMoving ();
1761
1800
return newItemHdl;
1762
1801
} else {
@@ -1795,7 +1834,9 @@ CacheAllocator<CacheTrait>::tryPromoteToNextMemoryTier(
1795
1834
1796
1835
if (newItemHdl) {
1797
1836
XDCHECK_EQ (newItemHdl->getSize (), item.getSize ());
1798
- moveRegularItemWithSync (item, newItemHdl);
1837
+ if (!moveRegularItemWithSync (item, newItemHdl)) {
1838
+ return WriteHandle{};
1839
+ }
1799
1840
item.unmarkMoving ();
1800
1841
return newItemHdl;
1801
1842
} else {
@@ -3148,9 +3189,23 @@ bool CacheAllocator<CacheTrait>::tryMovingForSlabRelease(
3148
3189
// TODO: add support for chained items
3149
3190
return false ;
3150
3191
} else {
3151
- moveRegularItemWithSync (oldItem, newItemHdl);
3152
- removeFromMMContainer (oldItem);
3153
- return true ;
3192
+ // move can fail if another thread calls insertOrReplace
3193
+ // in this case oldItem is no longer valid (not accessible,
3194
+ // it gets removed from MMContainer and evictForSlabRelease
3195
+ // will send it back to the allocator
3196
+ bool ret = moveRegularItemWithSync (oldItem, newItemHdl);
3197
+ if (!ret) {
3198
+ // we failed to move - newItemHdl was released back to allocator
3199
+ // by the moveRegularItemWithSync but oldItem is not accessible
3200
+ // and no longer valid - we need to clean it up here
3201
+ XDCHECK (!oldItem.isAccessible ());
3202
+ oldItem.markForEvictionWhenMoving ();
3203
+ unlinkItemForEviction (oldItem);
3204
+ wakeUpWaiters (oldItem, {});
3205
+ } else {
3206
+ removeFromMMContainer (oldItem);
3207
+ }
3208
+ return ret;
3154
3209
}
3155
3210
}
3156
3211
}
0 commit comments