Skip to content

Commit 08bf0b4

Browse files
committed
fix race in moveRegularItemWith sync where insertOrReplace can cause move to fail
- updated slab release logic for move failure, but there is still an issue with slab movement. currently investigating.
1 parent b99bb9d commit 08bf0b4

File tree

5 files changed

+212
-25
lines changed

5 files changed

+212
-25
lines changed

Diff for: cachelib/allocator/CacheAllocator-inl.h

+77-22
Original file line numberDiff line numberDiff line change
@@ -1294,8 +1294,21 @@ size_t CacheAllocator<CacheTrait>::wakeUpWaitersLocked(folly::StringPiece key,
12941294
}
12951295

12961296
template <typename CacheTrait>
1297-
void CacheAllocator<CacheTrait>::moveRegularItemWithSync(
1297+
bool CacheAllocator<CacheTrait>::moveRegularItemWithSync(
12981298
Item& oldItem, WriteHandle& newItemHdl) {
1299+
//on function exit - the new item handle is no longer moving
1300+
//and other threads may access it - but in case where
1301+
//we failed to replace in access container we can give the
1302+
//new item back to the allocator
1303+
auto guard = folly::makeGuard([&]() {
1304+
auto ref = newItemHdl->unmarkMoving();
1305+
if (UNLIKELY(ref == 0)) {
1306+
const auto res =
1307+
releaseBackToAllocator(*newItemHdl, RemoveContext::kNormal, false);
1308+
XDCHECK(res == ReleaseRes::kReleased);
1309+
}
1310+
});
1311+
12991312
XDCHECK(oldItem.isMoving());
13001313
XDCHECK(!oldItem.isExpired());
13011314
// TODO: should we introduce new latency tracker. E.g. evictRegularLatency_
@@ -1326,6 +1339,22 @@ void CacheAllocator<CacheTrait>::moveRegularItemWithSync(
13261339

13271340
auto replaced = accessContainer_->replaceIf(oldItem, *newItemHdl,
13281341
predicate);
1342+
// another thread may have called insertOrReplace which could have
1343+
// marked this item as unaccessible causing the replaceIf
1344+
// in the access container to fail - in this case we want
1345+
// to abort the move since the item is no longer valid
1346+
if (!replaced) {
1347+
return false;
1348+
}
1349+
// what if another thread calls insertOrReplace now when
1350+
// the item is moving and already replaced in the hash table?
1351+
// 1. it succeeds in updating the hash table - so there is
1352+
// no guarentee that isAccessible() is true
1353+
// 2. it will then try to remove from MM container
1354+
// - this operation will wait for newItemHdl to
1355+
// be unmarkedMoving via the waitContext
1356+
// 3. replaced handle is returned and eventually drops
1357+
// ref to 0 and the item is recycled back to allocator.
13291358

13301359
if (config_.moveCb) {
13311360
// Execute the move callback. We cannot make any guarantees about the
@@ -1367,14 +1396,7 @@ void CacheAllocator<CacheTrait>::moveRegularItemWithSync(
13671396
XDCHECK(newItemHdl->hasChainedItem());
13681397
}
13691398
newItemHdl.unmarkNascent();
1370-
auto ref = newItemHdl->unmarkMoving();
1371-
//remove because there is a chance the new item was not
1372-
//added to the access container
1373-
if (UNLIKELY(ref == 0)) {
1374-
const auto res =
1375-
releaseBackToAllocator(*newItemHdl, RemoveContext::kNormal, false);
1376-
XDCHECK(res == ReleaseRes::kReleased);
1377-
}
1399+
return true;
13781400
}
13791401

13801402
template <typename CacheTrait>
@@ -1529,7 +1551,6 @@ template <typename CacheTrait>
15291551
void CacheAllocator<CacheTrait>::unlinkItemForEviction(Item& it) {
15301552
XDCHECK(it.isMarkedForEviction());
15311553
XDCHECK(it.getRefCount() == 0);
1532-
15331554
accessContainer_->remove(it);
15341555
removeFromMMContainer(it);
15351556

@@ -1624,28 +1645,43 @@ CacheAllocator<CacheTrait>::findEviction(TierId tid, PoolId pid, ClassId cid) {
16241645
auto evictedToNext = lastTier ? nullptr
16251646
: tryEvictToNextMemoryTier(*candidate, false);
16261647
if (!evictedToNext) {
1627-
if (!token.isValid()) {
1648+
//if insertOrReplace was called during move
1649+
//then candidate will not be accessible (failed replace during tryEvict)
1650+
// - therefore this was why we failed to
1651+
// evict to the next tier and insertOrReplace
1652+
// will remove from NVM cache
1653+
//however, if candidate is accessible
1654+
//that means the allocation in the next
1655+
//tier failed - so we will continue to
1656+
//evict the item to NVM cache
1657+
bool failedToReplace = !candidate->isAccessible();
1658+
if (!token.isValid() && !failedToReplace) {
16281659
token = createPutToken(*candidate);
16291660
}
1630-
// tryEvictToNextMemoryTier should only fail if allocation of the new item fails
1631-
// in that case, it should be still possible to mark item as exclusive.
1661+
// tryEvictToNextMemoryTier can fail if:
1662+
// a) allocation of the new item fails in that case,
1663+
// it should be still possible to mark item for eviction.
1664+
// b) another thread calls insertOrReplace and the item
1665+
// is no longer accessible
16321666
//
16331667
// in case that we are on the last tier, we whould have already marked
16341668
// as exclusive since we will not be moving the item to the next tier
16351669
// but rather just evicting all together, no need to
1636-
// markExclusiveWhenMoving
1670+
// markForEvictionWhenMoving
16371671
auto ret = lastTier ? true : candidate->markForEvictionWhenMoving();
16381672
XDCHECK(ret);
16391673

16401674
unlinkItemForEviction(*candidate);
1675+
1676+
if (token.isValid() && shouldWriteToNvmCacheExclusive(*candidate)
1677+
&& !failedToReplace) {
1678+
nvmCache_->put(*candidate, std::move(token));
1679+
}
16411680
// wake up any readers that wait for the move to complete
16421681
// it's safe to do now, as we have the item marked exclusive and
16431682
// no other reader can be added to the waiters list
16441683
wakeUpWaiters(*candidate, {});
16451684

1646-
if (token.isValid() && shouldWriteToNvmCacheExclusive(*candidate)) {
1647-
nvmCache_->put(*candidate, std::move(token));
1648-
}
16491685
} else {
16501686
XDCHECK(!evictedToNext->isMarkedForEviction() && !evictedToNext->isMoving());
16511687
XDCHECK(!candidate->isMarkedForEviction() && !candidate->isMoving());
@@ -1756,7 +1792,10 @@ CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(
17561792

17571793
if (newItemHdl) {
17581794
XDCHECK_EQ(newItemHdl->getSize(), item.getSize());
1759-
moveRegularItemWithSync(item, newItemHdl);
1795+
if (!moveRegularItemWithSync(item, newItemHdl)) {
1796+
return WriteHandle{};
1797+
}
1798+
XDCHECK_EQ(newItemHdl->getKey(),item.getKey());
17601799
item.unmarkMoving();
17611800
return newItemHdl;
17621801
} else {
@@ -1795,7 +1834,9 @@ CacheAllocator<CacheTrait>::tryPromoteToNextMemoryTier(
17951834

17961835
if (newItemHdl) {
17971836
XDCHECK_EQ(newItemHdl->getSize(), item.getSize());
1798-
moveRegularItemWithSync(item, newItemHdl);
1837+
if (!moveRegularItemWithSync(item, newItemHdl)) {
1838+
return WriteHandle{};
1839+
}
17991840
item.unmarkMoving();
18001841
return newItemHdl;
18011842
} else {
@@ -3148,9 +3189,23 @@ bool CacheAllocator<CacheTrait>::tryMovingForSlabRelease(
31483189
// TODO: add support for chained items
31493190
return false;
31503191
} else {
3151-
moveRegularItemWithSync(oldItem, newItemHdl);
3152-
removeFromMMContainer(oldItem);
3153-
return true;
3192+
//move can fail if another thread calls insertOrReplace
3193+
//in this case oldItem is no longer valid (not accessible,
3194+
//it gets removed from MMContainer and evictForSlabRelease
3195+
//will send it back to the allocator
3196+
bool ret = moveRegularItemWithSync(oldItem, newItemHdl);
3197+
if (!ret) {
3198+
//we failed to move - newItemHdl was released back to allocator
3199+
//by the moveRegularItemWithSync but oldItem is not accessible
3200+
//and no longer valid - we need to clean it up here
3201+
XDCHECK(!oldItem.isAccessible());
3202+
oldItem.markForEvictionWhenMoving();
3203+
unlinkItemForEviction(oldItem);
3204+
wakeUpWaiters(oldItem, {});
3205+
} else {
3206+
removeFromMMContainer(oldItem);
3207+
}
3208+
return ret;
31543209
}
31553210
}
31563211
}

Diff for: cachelib/allocator/CacheAllocator.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1617,7 +1617,7 @@ class CacheAllocator : public CacheBase {
16171617
//
16181618
// @return true If the move was completed, and the containers were updated
16191619
// successfully.
1620-
void moveRegularItemWithSync(Item& oldItem, WriteHandle& newItemHdl);
1620+
bool moveRegularItemWithSync(Item& oldItem, WriteHandle& newItemHdl);
16211621

16221622
// Moves a regular item to a different slab. This should only be used during
16231623
// slab release after the item's exclusive bit has been set. The user supplied

Diff for: cachelib/allocator/CacheItem.h

+5
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ class BaseAllocatorTest;
4646
template <typename AllocatorT>
4747
class AllocatorHitStatsTest;
4848

49+
template <typename AllocatorT>
50+
class AllocatorMemoryTiersTest;
51+
4952
template <typename AllocatorT>
5053
class MapTest;
5154

@@ -473,6 +476,8 @@ class CACHELIB_PACKED_ATTR CacheItem {
473476
FRIEND_TEST(ItemTest, NonStringKey);
474477
template <typename AllocatorT>
475478
friend class facebook::cachelib::tests::AllocatorHitStatsTest;
479+
template <typename AllocatorT>
480+
friend class facebook::cachelib::tests::AllocatorMemoryTiersTest;
476481
};
477482

478483
// A chained item has a hook pointing to the next chained item. The hook is

Diff for: cachelib/allocator/tests/AllocatorMemoryTiersTest.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,15 @@ namespace cachelib {
2121
namespace tests {
2222

2323
using LruAllocatorMemoryTiersTest = AllocatorMemoryTiersTest<LruAllocator>;
24-
24+
//using LruTestAllocatorMemoryTiersTest = AllocatorMemoryTiersTest<LruTestAllocator>;
2525
// TODO(MEMORY_TIER): add more tests with different eviction policies
2626
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersInvalid) { this->testMultiTiersInvalid(); }
2727
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersValid) { this->testMultiTiersValid(); }
2828
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersValidMixed) { this->testMultiTiersValidMixed(); }
2929
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersBackgroundMovers ) { this->testMultiTiersBackgroundMovers(); }
3030
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersRemoveDuringEviction) { this->testMultiTiersRemoveDuringEviction(); }
3131
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersReplaceDuringEviction) { this->testMultiTiersReplaceDuringEviction(); }
32+
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersReplaceDuringEvictionWithReader) { this->testMultiTiersReplaceDuringEvictionWithReader(); }
3233

3334
} // end of namespace tests
3435
} // end of namespace cachelib

Diff for: cachelib/allocator/tests/AllocatorMemoryTiersTest.h

+127-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
#include "cachelib/allocator/FreeThresholdStrategy.h"
2323
#include "cachelib/allocator/PromotionStrategy.h"
2424

25+
#include <fcntl.h>
26+
#include <unistd.h>
27+
#include <ctype.h>
28+
#include <semaphore.h>
2529
#include <folly/synchronization/Latch.h>
2630

2731
namespace facebook {
@@ -58,6 +62,7 @@ class AllocatorMemoryTiersTest : public AllocatorTest<AllocatorT> {
5862
ASSERT_NO_THROW(alloc->insertOrReplace(handle));
5963
}
6064
}
65+
6166
public:
6267
void testMultiTiersInvalid() {
6368
typename AllocatorT::Config config;
@@ -201,7 +206,7 @@ class AllocatorMemoryTiersTest : public AllocatorTest<AllocatorT> {
201206

202207
t->join();
203208
}
204-
209+
205210
void testMultiTiersReplaceDuringEviction() {
206211
std::unique_ptr<AllocatorT> alloc;
207212
PoolId pool;
@@ -234,6 +239,127 @@ class AllocatorMemoryTiersTest : public AllocatorTest<AllocatorT> {
234239
testMultiTiersAsyncOpDuringMove(alloc, pool, quit, moveCb);
235240

236241
t->join();
242+
243+
}
244+
245+
246+
void gdb_sync1() {}
247+
void gdb_sync2() {}
248+
void gdb_sync3() {}
249+
using ReadHandle = typename AllocatorT::ReadHandle;
250+
void testMultiTiersReplaceDuringEvictionWithReader() {
251+
sem_unlink ("/gdb1_sem");
252+
sem_t *sem = sem_open ("/gdb1_sem", O_CREAT | O_EXCL, S_IRUSR | S_IWUSR, 0);
253+
int gdbfd = open("/tmp/gdb1.gdb",O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR);
254+
char gdbcmds[] =
255+
"set attached=1\n"
256+
"break gdb_sync1\n"
257+
"break gdb_sync2\n"
258+
"break moveRegularItemWithSync\n"
259+
"c\n"
260+
"set scheduler-locking on\n"
261+
"thread 1\n"
262+
"c\n"
263+
"thread 4\n"
264+
"c\n"
265+
"thread 5\n"
266+
"break nativeFutexWaitImpl thread 5\n"
267+
"c\n"
268+
"thread 4\n"
269+
"break nativeFutexWaitImpl thread 4\n"
270+
"c\n"
271+
"thread 1\n"
272+
"break releaseBackToAllocator\n"
273+
"c\n"
274+
"c\n"
275+
"thread 5\n"
276+
"c\n"
277+
"thread 4\n"
278+
"c\n"
279+
"thread 1\n"
280+
"break gdb_sync3\n"
281+
"c\n"
282+
"quit\n";
283+
int ret = write(gdbfd,gdbcmds,strlen(gdbcmds));
284+
int ppid = getpid(); //parent pid
285+
//int pid = 0;
286+
int pid = fork();
287+
if (pid == 0) {
288+
sem_wait(sem);
289+
sem_close(sem);
290+
sem_unlink("/gdb1_sem");
291+
char cmdpid[256];
292+
sprintf(cmdpid,"%d",ppid);
293+
int f = execlp("gdb","gdb","--pid",cmdpid,"--batch-silent","--command=/tmp/gdb1.gdb",(char*) 0);
294+
ASSERT(f != -1);
295+
}
296+
sem_post(sem);
297+
//wait for gdb to run
298+
int attached = 0;
299+
while (attached == 0);
300+
301+
std::unique_ptr<AllocatorT> alloc;
302+
PoolId pool;
303+
bool quit = false;
304+
305+
typename AllocatorT::Config config;
306+
config.setCacheSize(4 * Slab::kSize);
307+
config.enableCachePersistence("/tmp");
308+
config.configureMemoryTiers({
309+
MemoryTierCacheConfig::fromShm()
310+
.setRatio(1).setMemBind(std::string("0")),
311+
MemoryTierCacheConfig::fromShm()
312+
.setRatio(1).setMemBind(std::string("0"))
313+
});
314+
315+
alloc = std::make_unique<AllocatorT>(AllocatorT::SharedMemNew, config);
316+
ASSERT(alloc != nullptr);
317+
pool = alloc->addPool("default", alloc->getCacheMemoryStats().ramCacheSize);
318+
319+
int i = 0;
320+
typename AllocatorT::Item* evicted;
321+
std::unique_ptr<std::thread> t;
322+
std::unique_ptr<std::thread> r;
323+
while(!quit) {
324+
auto handle = alloc->allocate(pool, std::to_string(++i), std::string("value").size());
325+
ASSERT(handle != nullptr);
326+
if (i == 1) {
327+
evicted = static_cast<typename AllocatorT::Item*>(handle.get());
328+
folly::Latch latch_t(1);
329+
t = std::make_unique<std::thread>([&](){
330+
auto handleNew = alloc->allocate(pool, std::to_string(1), std::string("new value").size());
331+
ASSERT(handleNew != nullptr);
332+
latch_t.count_down();
333+
//first breakpoint will be this one because
334+
//thread 1 still has more items to fill up the
335+
//cache before an evict is evicted
336+
gdb_sync1();
337+
ASSERT(evicted->isMoving());
338+
//need to suspend thread 1 - who is doing the eviction
339+
//gdb will do this for us
340+
folly::Latch latch(1);
341+
r = std::make_unique<std::thread>([&](){
342+
ASSERT(evicted->isMoving());
343+
latch.count_down();
344+
auto handleEvict = alloc->find(std::to_string(1));
345+
//does find block until done moving?? yes
346+
while (evicted->isMarkedForEviction()); //move will fail
347+
XDCHECK(handleEvict == nullptr) << handleEvict->toString();
348+
ASSERT(handleEvict == nullptr);
349+
});
350+
latch.wait();
351+
gdb_sync2();
352+
alloc->insertOrReplace(handleNew);
353+
ASSERT(!evicted->isAccessible()); //move failed
354+
quit = true;
355+
});
356+
latch_t.wait();
357+
}
358+
ASSERT_NO_THROW(alloc->insertOrReplace(handle));
359+
}
360+
t->join();
361+
r->join();
362+
gdb_sync3();
237363
}
238364
};
239365
} // namespace tests

0 commit comments

Comments
 (0)