As an exercise in learning Haskell, I implemented a simple key value store, where you can put and get values (as ByteString
s). (For reference this is inspired by this short note describing bitcask's design). I'd appreciate feedback on any aspect (style, 'haskellyness', library usage etc.) of my implementation:
{-# LANGUAGE OverloadedStrings #-}
module KV (
evalKV,
putKV,
getKV,
liftIO
) where
import Prelude hiding (mapM)
import Data.Traversable (mapM)
import Control.Monad (liftM, liftM2)
import Control.Monad.State (StateT, evalStateT, get, put, liftIO)
import qualified System.IO as IO
import Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as L
import qualified Data.Binary as B
import qualified Data.Binary.Get as G
import qualified Data.Map as M
type Header = (Integer, Integer)
-- keysize valuesize
type ValueInfo = (Integer, Integer)
-- offset valuesize
type Key = ByteString
type Pair = (Key, ValueInfo)
type Index = M.Map Key ValueInfo
data KVState = KVState
{ kvHandle :: IO.Handle
, kvIndex :: Index
} deriving (Show)
type KV a = StateT KVState IO a
evalKV :: FilePath -> KV a -> IO a
evalKV p s = IO.withBinaryFile p IO.ReadWriteMode $ \h -> do
i <- readIndex h
evalStateT s $ KVState h i
putKV :: ByteString -> ByteString -> KV ()
putKV k v = do
(KVState h i) <- get
vi <- liftIO $ writePair h k v
let i' = M.insert k vi i
put $ KVState h i'
getKV :: ByteString -> KV (Maybe ByteString)
getKV k = do
(KVState h i) <- get
liftIO $ lookupKV h i k
readHeader :: ByteString -> (Header, Integer)
readHeader c = (h, fromIntegral o)
where (h, _, o) = G.runGetState B.get c 0
readAt :: IO.Handle -> Integer -> Integer -> IO ByteString
readAt h o sz = do
IO.hSeek h IO.AbsoluteSeek o
L.hGet h $ fromIntegral sz
readPair :: IO.Handle -> Integer -> IO (Maybe Pair)
readPair h o = do
IO.hSeek h IO.AbsoluteSeek o
b <- L.hGet h 10 -- TODO: qualify this arbitrary number
if L.null b
then return Nothing
else do
let ((ksz, vsz), l) = readHeader b
k <- readAt h (o + l) ksz
return $ Just (k, (o + l + ksz, vsz))
writePair :: IO.Handle -> ByteString -> ByteString -> IO ValueInfo
writePair h k v = do
IO.hSeek h IO.SeekFromEnd 0
let l = fromIntegral . L.length
let vsz = l v
let t = (l k, vsz) :: Header
L.hPut h (B.encode t)
L.hPut h k
p <- IO.hTell h
L.hPut h v
return (p, vsz)
readIndex :: IO.Handle -> IO Index
readIndex h = liftM M.fromList $ ri 0
where ri o = do
mp <- readPair h o
case mp of
Just p@(k, (vo, vsz)) -> do
t <- ri (vo + vsz)
return $ p : t
Nothing -> return []
lookupKV :: IO.Handle -> Index -> ByteString -> IO (Maybe ByteString)
lookupKV h i k = mapM r mv
where r = uncurry $ readAt h
mv = M.lookup k i
And usage would look like:
{-# LANGUAGE OverloadedStrings #-}
import KV
import Data.ByteString.Lazy.Char8
main = evalKV "test" $ do
u <- getKV "asdf"
liftIO $ print u
putKV "asdf" "qwer"
v <- getKV "asdf"
liftIO $ print v
Which would result in the following output:
Nothing
Just (Chunk "qwer" Empty)
One concern I have, in particular, is that I've currently made no consideration for allowing concurrent access to the store, if people have opinions on the best approach for this I'd be really interested to hear them. I have this concern as, if I get time to work on this some more, my intention is to create a way to access the store over HTTP/TCP.