Skip to content

Make ordinary indexes store their keys unsliced #659

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 26 additions & 23 deletions src/Database/LSMTree/Internal/Index/Ordinary.hs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
module Database.LSMTree.Internal.Index.Ordinary
(
IndexOrdinary (IndexOrdinary),
toLastKeys,
toUnslicedLastKeys,
search,
sizeInPages,
headerLBS,
Expand Down Expand Up @@ -41,6 +41,7 @@ import Database.LSMTree.Internal.Page (NumPages (NumPages),
PageNo (PageNo), PageSpan (PageSpan))
import Database.LSMTree.Internal.Serialise
(SerialisedKey (SerialisedKey'))
import Database.LSMTree.Internal.Unsliced (Unsliced, makeUnslicedKey)
import Database.LSMTree.Internal.Vector (binarySearchL, mkPrimVector)

{-|
Expand All @@ -66,22 +67,22 @@ supportedTypeAndVersion = 0x0101
ascending order and must comprise at least one page for 'search' to be able
to return a valid page span.
-}
newtype IndexOrdinary = IndexOrdinary (Vector SerialisedKey)
newtype IndexOrdinary = IndexOrdinary (Vector (Unsliced SerialisedKey))
deriving stock (Eq, Show)

instance NFData IndexOrdinary where

rnf (IndexOrdinary lastKeys) = rnf lastKeys
rnf (IndexOrdinary unslicedLastKeys) = rnf unslicedLastKeys

toLastKeys :: IndexOrdinary -> Vector SerialisedKey
toLastKeys (IndexOrdinary lastKeys) = lastKeys
toUnslicedLastKeys :: IndexOrdinary -> Vector (Unsliced SerialisedKey)
toUnslicedLastKeys (IndexOrdinary unslicedLastKeys) = unslicedLastKeys

{-|
For a specification of this operation, see the documentation of [its
type-agnostic version]('Database.LSMTree.Internal.Index.search').
-}
search :: SerialisedKey -> IndexOrdinary -> PageSpan
search key (IndexOrdinary lastKeys)
search key (IndexOrdinary unslicedLastKeys)
-- TODO: ideally, we could assert that an index is never empty, but
-- unfortunately we can not currently do this. Runs (and thefeore indexes)
-- /can/ be empty if they were created by a last-level merge where all input
Expand All @@ -94,35 +95,35 @@ search key (IndexOrdinary lastKeys)
| otherwise = assert (pageCount > 0) result where

protoStart :: Int
!protoStart = binarySearchL lastKeys key
!protoStart = binarySearchL unslicedLastKeys (makeUnslicedKey key)

pageCount :: Int
!pageCount = length lastKeys
!pageCount = length unslicedLastKeys

result :: PageSpan
result | protoStart < pageCount
= let

resultKey :: SerialisedKey
!resultKey = lastKeys ! protoStart
unslicedResultKey :: Unsliced SerialisedKey
!unslicedResultKey = unslicedLastKeys ! protoStart

end :: Int
!end = maybe (pred pageCount) (+ protoStart) $
findIndex (/= resultKey) $
drop (succ protoStart) lastKeys
findIndex (/= unslicedResultKey) $
drop (succ protoStart) unslicedLastKeys

in PageSpan (PageNo $ protoStart)
(PageNo $ end)
| otherwise
= let

resultKey :: SerialisedKey
!resultKey = last lastKeys
unslicedResultKey :: Unsliced SerialisedKey
!unslicedResultKey = last unslicedLastKeys

start :: Int
!start = maybe 0 succ $
findIndexR (/= resultKey) $
lastKeys
findIndexR (/= unslicedResultKey) $
unslicedLastKeys

in PageSpan (PageNo $ start)
(PageNo $ pred pageCount)
Expand All @@ -132,7 +133,8 @@ search key (IndexOrdinary lastKeys)
type-agnostic version]('Database.LSMTree.Internal.Index.sizeInPages').
-}
sizeInPages :: IndexOrdinary -> NumPages
sizeInPages (IndexOrdinary lastKeys) = NumPages $ fromIntegral (length lastKeys)
sizeInPages (IndexOrdinary unslicedLastKeys)
= NumPages $ fromIntegral (length unslicedLastKeys)

{-|
For a specification of this operation, see the documentation of [its
Expand Down Expand Up @@ -203,10 +205,11 @@ fromSBS shortByteString@(SBS unliftedByteArray)
Primitive.Vector _ _ entryCountRep = Primitive.force entryCountBytes

index :: Either String IndexOrdinary
index = IndexOrdinary <$> fromList <$> lastKeys lastKeysBytes
index = IndexOrdinary <$> fromList <$> unslicedLastKeys lastKeysBytes

lastKeys :: Primitive.Vector Word8 -> Either String [SerialisedKey]
lastKeys bytes
unslicedLastKeys :: Primitive.Vector Word8
-> Either String [Unsliced SerialisedKey]
unslicedLastKeys bytes
| Primitive.null bytes
= Right []
| otherwise
Expand Down Expand Up @@ -234,8 +237,8 @@ fromSBS shortByteString@(SBS unliftedByteArray)
(firstBytes, othersBytes)
= Primitive.splitAt firstSize postFirstSizeBytes

first :: SerialisedKey
!first = SerialisedKey' (Primitive.force firstBytes)
first :: Unsliced SerialisedKey
!first = makeUnslicedKey (SerialisedKey' firstBytes)

others <- lastKeys othersBytes
others <- unslicedLastKeys othersBytes
return (first : others)
37 changes: 21 additions & 16 deletions src/Database/LSMTree/Internal/Index/OrdinaryAcc.hs
Original file line number Diff line number Diff line change
Expand Up @@ -30,24 +30,26 @@ import Database.LSMTree.Internal.Index.Ordinary
(IndexOrdinary (IndexOrdinary))
import Database.LSMTree.Internal.Serialise
(SerialisedKey (SerialisedKey'))
import Database.LSMTree.Internal.Unsliced (Unsliced, makeUnslicedKey)
import Database.LSMTree.Internal.Vector (byteVectorFromPrim)
import Database.LSMTree.Internal.Vector.Growing (GrowingVector)
import qualified Database.LSMTree.Internal.Vector.Growing as Growing (append,
freeze, new)
#ifdef NO_IGNORE_ASSERTS
import Database.LSMTree.Internal.Unsliced (fromUnslicedKey)
import qualified Database.LSMTree.Internal.Vector.Growing as Growing
(readMaybeLast)
#endif

{-|
A general-purpose fence pointer index under incremental construction.

A value @IndexOrdinaryAcc lastKeys baler@ denotes a partially constructed
index that assigns keys to pages according to @lastKeys@ and uses @baler@
for incremental output of the serialised key list.
A value @IndexOrdinaryAcc unslicedLastKeys baler@ denotes a partially
constructed index that assigns keys to pages according to @unslicedLastKeys@
and uses @baler@ for incremental output of the serialised key list.
-}
data IndexOrdinaryAcc s = IndexOrdinaryAcc
!(GrowingVector s SerialisedKey)
!(GrowingVector s (Unsliced SerialisedKey))
!(Baler s)

-- | Creates a new, initially empty, index.
Expand All @@ -65,7 +67,7 @@ new initialKeyBufferSize minChunkSize = IndexOrdinaryAcc <$>
newWithDefaults :: ST s (IndexOrdinaryAcc s)
newWithDefaults = new 1024 4096

-- Yields the serialisation of an element of a key list.
-- | Yields the serialisation of an element of a key list.
keyListElem :: SerialisedKey -> [Primitive.Vector Word8]
keyListElem (SerialisedKey' keyBytes) = [keySizeBytes, keyBytes] where

Expand All @@ -86,14 +88,16 @@ keyListElem (SerialisedKey' keyBytes) = [keySizeBytes, keyBytes] where
appendSingle :: (SerialisedKey, SerialisedKey)
-> IndexOrdinaryAcc s
-> ST s (Maybe Chunk)
appendSingle (firstKey, lastKey) (IndexOrdinaryAcc lastKeys baler)
appendSingle (firstKey, lastKey) (IndexOrdinaryAcc unslicedLastKeys baler)
= assert (firstKey <= lastKey) $
do
#ifdef NO_IGNORE_ASSERTS
maybeLastLastKey <- Growing.readMaybeLast lastKeys
assert (all (< firstKey) maybeLastLastKey) $ return ()
maybeLastUnslicedLastKey <- Growing.readMaybeLast unslicedLastKeys
assert
(all (< firstKey) (fromUnslicedKey <$> maybeLastUnslicedLastKey))
(return ())
#endif
Growing.append lastKeys 1 lastKey
Growing.append unslicedLastKeys 1 (makeUnslicedKey lastKey)
Comment on lines 92 to +100
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See my suggestion for appendMulti

feedBaler (keyListElem lastKey) baler

{-|
Expand All @@ -103,13 +107,14 @@ appendSingle (firstKey, lastKey) (IndexOrdinaryAcc lastKeys baler)
appendMulti :: (SerialisedKey, Word32)
-> IndexOrdinaryAcc s
-> ST s [Chunk]
appendMulti (key, overflowPageCount) (IndexOrdinaryAcc lastKeys baler)
appendMulti (key, overflowPageCount) (IndexOrdinaryAcc unslicedLastKeys baler)
= do
#ifdef NO_IGNORE_ASSERTS
maybeLastLastKey <- Growing.readMaybeLast lastKeys
assert (all (< key) maybeLastLastKey) $ return ()
maybeLastUnslicedLastKey <- Growing.readMaybeLast unslicedLastKeys
assert (all (< key) (fromUnslicedKey <$> maybeLastUnslicedLastKey))
(return ())
#endif
Growing.append lastKeys pageCount key
Growing.append unslicedLastKeys pageCount (makeUnslicedKey key)
Comment on lines 111 to +117
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something along these lines would fix the compiler error with assertions disabled

    = do
          let !key' = makeUnslicedKey key
#ifdef NO_IGNORE_ASSERTS
          maybeLastUnslicedLastKey <- Growing.readMaybeLast unslicedLastKeys
          assert (all (< key') maybeLastUnslicedLastKey)
                 (return ())
#endif
          Growing.append unslicedLastKeys pageCount key'

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What compiler error do you get here? I can’t see an error in this code, and commenting out the #ifdef#endif part doesn’t cause GHC to show me an error message.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see: When removing both assertion blocks, there is an unused import. I had a similar issue in the same module with readMaybeLast, which I solved by a conditional import. I can just move the import of fromUnslicedKey into the conditional block with the readMaybeLast import.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can just move the import of fromUnslicedKey into the conditional block with the readMaybeLast import.

Done in dd6b71c.

maybeToList <$> feedBaler keyListElems baler
where

Expand All @@ -124,7 +129,7 @@ appendMulti (key, overflowPageCount) (IndexOrdinaryAcc lastKeys baler)
type-agnostic version]('Database.LSMTree.Internal.Index.unsafeEnd').
-}
unsafeEnd :: IndexOrdinaryAcc s -> ST s (Maybe Chunk, IndexOrdinary)
unsafeEnd (IndexOrdinaryAcc lastKeys baler) = do
keys <- Growing.freeze lastKeys
unsafeEnd (IndexOrdinaryAcc unslicedLastKeys baler) = do
frozenUnslicedLastKeys <- Growing.freeze unslicedLastKeys
remnant <- unsafeEndBaler baler
return (remnant, IndexOrdinary keys)
return (remnant, IndexOrdinary frozenUnslicedLastKeys)
Loading