1 {-# LANGUAGE ScopedTypeVariables, BangPatterns, TypeSynonymInstances, 2 UndecidableInstances, ViewPatterns, FlexibleContexts #-} 3 4 -- | An experimental monadic interface to Tree mutation. The main idea is to 5 -- simulate IO-ish manipulation of real filesystem (that's the state part of 6 -- the monad), and to keep memory usage down by reasonably often dumping the 7 -- intermediate data to disk and forgetting it. The monad interface itself is 8 -- generic, and a number of actual implementations can be used. This module 9 -- provides just 'virtualTreeIO' that never writes any changes, but may trigger 10 -- filesystem reads as appropriate. 11 module Storage.Hashed.Monad 12 ( virtualTreeIO, virtualTreeMonad 13 , readFile, writeFile, createDirectory, rename, copy, unlink 14 , fileExists, directoryExists, exists, existsAnycase, withDirectory 15 , currentDirectory, currentTree 16 , TreeState, TreeMonad, TreeIO, runTreeMonad 17 , initialState, replaceItem 18 19 , findM, findTreeM, findFileM 20 ) where 21 22 import Prelude hiding ( readFile, writeFile ) 23 24 import Storage.Hashed.Path 25 import Storage.Hashed.Tree 26 import Storage.Hashed.Hash 27 28 import Control.Monad( (>=>) ) 29 import Control.Applicative( (<$>) ) 30 31 import Data.List( sortBy ) 32 import Data.Int( Int64 ) 33 import Data.Char( toLower ) 34 import Data.Maybe( isNothing, isJust ) 35 36 import qualified Data.ByteString.Lazy.Char8 as BL 37 import qualified Data.ByteString.Char8 as BSC 38 import Control.Monad.RWS.Strict 39 import qualified Data.Set as S 40 import qualified Data.Map as M 41 42 type Changed = M.Map Relative (Int64, Int64) -- size, age 43 44 -- | Internal state of the 'TreeIO' monad. Keeps track of the current Tree 45 -- content, unsync'd changes and a current working directory (of the monad). 46 data TreeState m = TreeState { tree :: !(Tree m) 47 , changed :: !Changed 48 , changesize :: !Int64 49 , maxage :: !Int64 50 , updateHash :: TreeItem m -> m Hash 51 , update :: Relative -> TreeItem m -> TreeMonad m (TreeItem m) } 52 53 -- | A 'TreeIO' monad. A sort of like IO but it keeps a 'TreeState' around as well, 54 -- which is a sort of virtual filesystem. Depending on how you obtained your 55 -- 'TreeIO', the actions in your virtual filesystem get somehow reflected in the 56 -- actual real filesystem. For 'virtualTreeIO', nothing happens in real 57 -- filesystem, however with 'plainTreeIO', the plain tree will be updated every 58 -- now and then, and with 'hashedTreeIO' a darcs-style hashed tree will get 59 -- updated. 60 type TreeMonad m = RWST Relative () (TreeState m) m 61 type TreeIO = TreeMonad IO 62 63 class (Functor m, Monad m) => TreeRO m where 64 currentDirectory :: m Relative 65 withDirectory :: Relative -> m a -> m a 66 expandTo :: Relative -> m Relative 67 -- | Grab content of a file in the current Tree at the given path. 68 readFile :: Relative -> m BL.ByteString 69 -- | Check for existence of a node (file or directory, doesn't matter). 70 exists :: Relative -> m Bool 71 -- | Check for existence of a directory. 72 directoryExists :: Relative -> m Bool 73 -- | Check for existence of a file. 74 fileExists :: Relative -> m Bool 75 76 class TreeRO m => TreeRW m where 77 -- | Change content of a file at a given path. The change will be 78 -- eventually flushed to disk, but might be buffered for some time. 79 writeFile :: Relative -> BL.ByteString -> m () 80 createDirectory :: Relative -> m () 81 unlink :: Relative -> m () 82 rename :: Relative -> Relative -> m () 83 copy :: Relative -> Relative -> m () 84 85 initialState :: Tree m -> (TreeItem m -> m Hash) 86 -> (Relative -> TreeItem m -> TreeMonad m (TreeItem m)) -> TreeState m 87 initialState t uh u = TreeState { tree = t 88 , changed = M.empty 89 , changesize = 0 90 , updateHash = uh 91 , maxage = 0 92 , update = u } 93 94 flush :: (Functor m, Monad m) => TreeMonad m () 95 flush = do current <- get 96 changed' <- map fst <$> M.toList <$> gets changed 97 dirs' <- gets tree >>= \t -> return [ path | (path, SubTree s) <- list t ] 98 modify $ \st -> st { changed = M.empty, changesize = 0 } 99 forM_ (changed' ++ dirs' ++ [root]) flushItem 100 101 runTreeMonad' :: (Functor m, Monad m) => TreeMonad m a -> TreeState m -> m (a, Tree m) 102 runTreeMonad' action initial = do 103 (out, final, _) <- runRWST action root initial 104 return (out, tree final) 105 106 runTreeMonad :: (Functor m, Monad m) => TreeMonad m a -> TreeState m -> m (a, Tree m) 107 runTreeMonad action initial = do 108 let action' = do x <- action 109 flush 110 return x 111 runTreeMonad' action' initial 112 113 -- | Run a TreeIO action without storing any changes. This is useful for 114 -- running monadic tree mutations for obtaining the resulting Tree (as opposed 115 -- to their effect of writing a modified tree to disk). The actions can do both 116 -- read and write -- reads are passed through to the actual filesystem, but the 117 -- writes are held in memory in a form of modified Tree. 118 virtualTreeMonad :: (Functor m, Monad m) => TreeMonad m a -> Tree m -> m (a, Tree m) 119 virtualTreeMonad action t = runTreeMonad' action $ 120 initialState t (\_ -> return NoHash) (\_ x -> return x) 121 122 virtualTreeIO :: TreeIO a -> Tree IO -> IO (a, Tree IO) 123 virtualTreeIO = virtualTreeMonad 124 125 currentTree :: (Monad m) => TreeMonad m (Tree m) 126 currentTree = gets tree 127 128 -- | Modifies an item in the current Tree. This action keeps an account of the 129 -- modified data, in changed and changesize, for subsequent flush 130 -- operations. Any modifications (as in "modifyTree") are allowed. 131 modifyItem :: (Functor m, Monad m) 132 => Relative -> Maybe (TreeItem m) -> TreeMonad m () 133 modifyItem path item = do 134 path' <- (+/+ path) `fmap` currentDirectory 135 age <- gets maxage 136 changed' <- gets changed 137 let getsize (Just (File b)) = lift (BL.length `fmap` readBlob b) 138 getsize _ = return 0 139 size <- getsize item 140 let change = case M.lookup path' changed' of 141 Nothing -> size 142 Just (oldsize, _) -> size - oldsize 143 144 modify $ \st -> st { tree = modifyTree (tree st) path' item 145 , changed = M.insert path' (size, age) (changed st) 146 , maxage = age + 1 147 , changesize = (changesize st + change) } 148 149 renameChanged from to = modify $ \st -> st { changed = rename' $ changed st } 150 where rename' = M.fromList . map renameone . M.toList 151 renameone (x, d) | Just suff <- suffix from x = (to +/+ suff, d) 152 | otherwise = (x, d) 153 154 -- | Replace an item with a new version without modifying the content of the 155 -- tree. This does not do any change tracking. Ought to be only used from a 156 -- 'sync' implementation for a particular storage format. The presumed use-case 157 -- is that an existing in-memory Blob is replaced with a one referring to an 158 -- on-disk file. 159 replaceItem :: (Functor m, Monad m) 160 => Relative -> Maybe (TreeItem m) -> TreeMonad m () 161 replaceItem path item = do 162 path' <- (+/+ path) `fmap` currentDirectory 163 modify $ \st -> st { tree = modifyTree (tree st) path' item } 164 165 flushItem :: forall e m. (Monad m, Functor m) => Relative -> TreeMonad m () 166 flushItem path = 167 do current <- gets tree 168 case find current path of 169 Nothing -> return () -- vanished, do nothing 170 Just x -> do y <- fixHash x 171 new <- gets update >>= ($ y) . ($ path) 172 replaceItem path (Just new) 173 where fixHash :: TreeItem m -> TreeMonad m (TreeItem m) 174 fixHash f@(File (Blob con NoHash)) = do 175 hash <- gets updateHash >>= \x -> lift $ x f 176 return $ File $ Blob con hash 177 fixHash (SubTree s) | treeHash s == NoHash = 178 gets updateHash >>= \f -> SubTree <$> lift (addMissingHashes f s) 179 fixHash x = return x 180 181 182 -- | If buffers are becoming large, sync, otherwise do nothing. 183 flushSome :: (Monad m, Functor m) => TreeMonad m () 184 flushSome = do x <- gets changesize 185 when (x > megs 100) $ do 186 remaining <- go =<< sortBy age <$> M.toList <$> gets changed 187 modify $ \s -> s { changed = M.fromList remaining } 188 where go [] = return [] 189 go ((path, (size, age_)):chs) = do 190 x <- (\s -> s - size) <$> gets changesize 191 flushItem path 192 modify $ \s -> s { changesize = x } 193 if (x > megs 80) then go chs 194 else return $ chs 195 megs = (* (1024 * 1024)) 196 age (_, (_, a)) (_, (_, b)) = compare a b 197 198 instance (Monad m) => TreeRO (TreeMonad m) where 199 expandTo p = 200 do t <- gets tree 201 p' <- (+/+ p) `fmap` ask 202 let amend = do t' <- lift $ expandPath t p' 203 modify $ \st -> st { tree = t' } 204 case find t p' of 205 Nothing -> amend 206 Just (Stub _ _) -> amend 207 _ -> return () 208 return p' 209 210 fileExists p = 211 do p' <- expandTo p 212 (isJust . (flip findFile p')) `fmap` gets tree 213 214 directoryExists p = 215 do p' <- expandTo p 216 (isJust . (flip findTree p')) `fmap` gets tree 217 218 exists p = 219 do p' <- expandTo p 220 (isJust . (flip find p')) `fmap` gets tree 221 222 readFile p = 223 do p' <- expandTo p 224 t <- gets tree 225 let f = findFile t p' 226 case f of 227 Nothing -> fail $ "No such file " ++ show p' 228 Just x -> lift (readBlob x) 229 230 currentDirectory = ask 231 withDirectory dir act = do 232 dir' <- expandTo dir 233 local (\old -> dir') act 234 235 instance (Functor m, Monad m) => TreeRW (TreeMonad m) where 236 writeFile p con = 237 do expandTo p 238 modifyItem p (Just blob) 239 flushSome 240 where blob = File $ Blob (return con) hash 241 hash = NoHash -- we would like to say "sha256 con" here, but due 242 -- to strictness of Hash in Blob, this would often 243 -- lead to unnecessary computation which would then 244 -- be discarded anyway; we rely on the sync 245 -- implementation to fix up any NoHash occurrences 246 247 createDirectory p = 248 do expandTo p 249 modifyItem p $ Just $ SubTree emptyTree 250 251 unlink p = 252 do expandTo p 253 modifyItem p Nothing 254 255 rename from to = 256 do from' <- expandTo from 257 to' <- expandTo to 258 tr <- gets tree 259 let item = find tr from' 260 found_to = find tr to' 261 unless (isNothing found_to) $ 262 fail $ "Error renaming: destination " ++ show to ++ " exists." 263 unless (isNothing item) $ do 264 modifyItem from Nothing 265 modifyItem to item 266 renameChanged from to 267 268 copy from to = 269 do from' <- expandTo from 270 to' <- expandTo to 271 tr <- gets tree 272 let item = find tr from' 273 unless (isNothing item) $ modifyItem to item 274 275 existsAnycase :: (Functor m, Monad m) => Relative -> TreeMonad m Bool 276 existsAnycase (directory -> Atomic) = return True 277 existsAnycase (directory -> x :/: xs) = 278 do wd <- currentDirectory 279 Just tree <- gets (flip findTree wd . tree) 280 let subs = [ relative </> n | (n, _) <- listImmediate tree 281 , BSC.map toLower n == BSC.map toLower x ] 282 or `fmap` forM subs (\path -> do 283 file <- fileExists path 284 if file then return True 285 else withDirectory path (existsAnycase xs)) 286 287 findM' :: forall m a e. (Monad m, Functor m) 288 => (Tree m -> Relative -> a) -> Tree m -> Relative -> m a 289 findM' what t path = fst <$> virtualTreeMonad (look path) t 290 where look :: Relative -> TreeMonad m a 291 look = expandTo >=> \p' -> flip what p' <$> gets tree 292 293 findM :: (Monad m, Functor m) => Tree m -> Relative -> m (Maybe (TreeItem m)) 294 findM = findM' find 295 296 findTreeM :: (Monad m, Functor m) => Tree m -> Relative -> m (Maybe (Tree m)) 297 findTreeM = findM' findTree 298 299 findFileM :: (Monad m, Functor m) => Tree m -> Relative -> m (Maybe (Blob m)) 300 findFileM = findM' findFile