1 {-# LANGUAGE ScopedTypeVariables, BangPatterns, TypeSynonymInstances,
    2              UndecidableInstances, ViewPatterns, FlexibleContexts #-}
    3 
    4 -- | An experimental monadic interface to Tree mutation. The main idea is to
    5 -- simulate IO-ish manipulation of real filesystem (that's the state part of
    6 -- the monad), and to keep memory usage down by reasonably often dumping the
    7 -- intermediate data to disk and forgetting it. The monad interface itself is
    8 -- generic, and a number of actual implementations can be used. This module
    9 -- provides just 'virtualTreeIO' that never writes any changes, but may trigger
   10 -- filesystem reads as appropriate.
   11 module Storage.Hashed.Monad
   12     ( virtualTreeIO, virtualTreeMonad
   13     , readFile, writeFile, createDirectory, rename, copy, unlink
   14     , fileExists, directoryExists, exists, existsAnycase, withDirectory
   15     , currentDirectory, currentTree
   16     , TreeState, TreeMonad, TreeIO, runTreeMonad
   17     , initialState, replaceItem
   18 
   19     , findM, findTreeM, findFileM
   20     ) where
   21 
   22 import Prelude hiding ( readFile, writeFile )
   23 
   24 import Storage.Hashed.Path
   25 import Storage.Hashed.Tree
   26 import Storage.Hashed.Hash
   27 
   28 import Control.Monad( (>=>) )
   29 import Control.Applicative( (<$>) )
   30 
   31 import Data.List( sortBy )
   32 import Data.Int( Int64 )
   33 import Data.Char( toLower )
   34 import Data.Maybe( isNothing, isJust )
   35 
   36 import qualified Data.ByteString.Lazy.Char8 as BL
   37 import qualified Data.ByteString.Char8 as BSC
   38 import Control.Monad.RWS.Strict
   39 import qualified Data.Set as S
   40 import qualified Data.Map as M
   41 
   42 type Changed = M.Map Relative (Int64, Int64) -- size, age
   43 
   44 -- | Internal state of the 'TreeIO' monad. Keeps track of the current Tree
   45 -- content, unsync'd changes and a current working directory (of the monad).
   46 data TreeState m = TreeState { tree :: !(Tree m)
   47                              , changed :: !Changed
   48                              , changesize :: !Int64
   49                              , maxage :: !Int64
   50                              , updateHash :: TreeItem m -> m Hash
   51                              , update :: Relative -> TreeItem m -> TreeMonad m (TreeItem m) }
   52 
   53 -- | A 'TreeIO' monad. A sort of like IO but it keeps a 'TreeState' around as well,
   54 -- which is a sort of virtual filesystem. Depending on how you obtained your
   55 -- 'TreeIO', the actions in your virtual filesystem get somehow reflected in the
   56 -- actual real filesystem. For 'virtualTreeIO', nothing happens in real
   57 -- filesystem, however with 'plainTreeIO', the plain tree will be updated every
   58 -- now and then, and with 'hashedTreeIO' a darcs-style hashed tree will get
   59 -- updated.
   60 type TreeMonad m = RWST Relative () (TreeState m) m
   61 type TreeIO = TreeMonad IO
   62 
   63 class (Functor m, Monad m) => TreeRO m where
   64     currentDirectory :: m Relative
   65     withDirectory :: Relative -> m a -> m a
   66     expandTo :: Relative -> m Relative
   67     -- | Grab content of a file in the current Tree at the given path.
   68     readFile :: Relative -> m BL.ByteString
   69     -- | Check for existence of a node (file or directory, doesn't matter).
   70     exists :: Relative -> m Bool
   71     -- | Check for existence of a directory.
   72     directoryExists :: Relative -> m Bool
   73     -- | Check for existence of a file.
   74     fileExists :: Relative -> m Bool
   75 
   76 class TreeRO m => TreeRW m where
   77     -- | Change content of a file at a given path. The change will be
   78     -- eventually flushed to disk, but might be buffered for some time.
   79     writeFile :: Relative -> BL.ByteString -> m ()
   80     createDirectory :: Relative -> m ()
   81     unlink :: Relative -> m ()
   82     rename :: Relative -> Relative -> m ()
   83     copy   :: Relative -> Relative -> m ()
   84 
   85 initialState :: Tree m -> (TreeItem m -> m Hash)
   86                 -> (Relative -> TreeItem m -> TreeMonad m (TreeItem m)) -> TreeState m
   87 initialState t uh u = TreeState { tree = t
   88                                 , changed = M.empty
   89                                 , changesize = 0
   90                                 , updateHash = uh
   91                                 , maxage = 0
   92                                 , update = u }
   93 
   94 flush :: (Functor m, Monad m) => TreeMonad m ()
   95 flush = do current <- get
   96            changed' <- map fst <$> M.toList <$> gets changed
   97            dirs' <- gets tree >>= \t -> return [ path | (path, SubTree s) <- list t ]
   98            modify $ \st -> st { changed = M.empty, changesize = 0 }
   99            forM_ (changed' ++ dirs' ++ [root]) flushItem
  100 
  101 runTreeMonad' :: (Functor m, Monad m) => TreeMonad m a -> TreeState m -> m (a, Tree m)
  102 runTreeMonad' action initial = do
  103   (out, final, _) <- runRWST action root initial
  104   return (out, tree final)
  105 
  106 runTreeMonad :: (Functor m, Monad m) => TreeMonad m a -> TreeState m -> m (a, Tree m)
  107 runTreeMonad action initial = do
  108   let action' = do x <- action
  109                    flush
  110                    return x
  111   runTreeMonad' action' initial
  112 
  113 -- | Run a TreeIO action without storing any changes. This is useful for
  114 -- running monadic tree mutations for obtaining the resulting Tree (as opposed
  115 -- to their effect of writing a modified tree to disk). The actions can do both
  116 -- read and write -- reads are passed through to the actual filesystem, but the
  117 -- writes are held in memory in a form of modified Tree.
  118 virtualTreeMonad :: (Functor m, Monad m) => TreeMonad m a -> Tree m -> m (a, Tree m)
  119 virtualTreeMonad action t = runTreeMonad' action $
  120                                initialState t (\_ -> return NoHash) (\_ x -> return x)
  121 
  122 virtualTreeIO :: TreeIO a -> Tree IO -> IO (a, Tree IO)
  123 virtualTreeIO = virtualTreeMonad
  124 
  125 currentTree :: (Monad m) => TreeMonad m (Tree m)
  126 currentTree = gets tree
  127 
  128 -- | Modifies an item in the current Tree. This action keeps an account of the
  129 -- modified data, in changed and changesize, for subsequent flush
  130 -- operations. Any modifications (as in "modifyTree") are allowed.
  131 modifyItem :: (Functor m, Monad m)
  132             => Relative -> Maybe (TreeItem m) -> TreeMonad m ()
  133 modifyItem path item = do
  134   path' <- (+/+ path) `fmap` currentDirectory
  135   age <- gets maxage
  136   changed' <- gets changed
  137   let getsize (Just (File b)) = lift (BL.length `fmap` readBlob b)
  138       getsize _ = return 0
  139   size <- getsize item
  140   let change = case M.lookup path' changed' of
  141         Nothing -> size
  142         Just (oldsize, _) -> size - oldsize
  143 
  144   modify $ \st -> st { tree = modifyTree (tree st) path' item
  145                      , changed = M.insert path' (size, age) (changed st)
  146                      , maxage = age + 1
  147                      , changesize = (changesize st + change) }
  148 
  149 renameChanged from to = modify $ \st -> st { changed = rename' $ changed st }
  150   where rename' = M.fromList . map renameone . M.toList
  151         renameone (x, d) | Just suff <- suffix from x = (to +/+ suff, d)
  152                          | otherwise = (x, d)
  153 
  154 -- | Replace an item with a new version without modifying the content of the
  155 -- tree. This does not do any change tracking. Ought to be only used from a
  156 -- 'sync' implementation for a particular storage format. The presumed use-case
  157 -- is that an existing in-memory Blob is replaced with a one referring to an
  158 -- on-disk file.
  159 replaceItem :: (Functor m, Monad m)
  160             => Relative -> Maybe (TreeItem m) -> TreeMonad m ()
  161 replaceItem path item = do
  162   path' <- (+/+ path) `fmap` currentDirectory
  163   modify $ \st -> st { tree = modifyTree (tree st) path' item }
  164 
  165 flushItem :: forall e m. (Monad m, Functor m) => Relative -> TreeMonad m ()
  166 flushItem path =
  167   do current <- gets tree
  168      case find current path of
  169        Nothing -> return () -- vanished, do nothing
  170        Just x -> do y <- fixHash x
  171                     new <- gets update >>= ($ y) . ($ path)
  172                     replaceItem path (Just new)
  173     where fixHash :: TreeItem m -> TreeMonad m (TreeItem m)
  174           fixHash f@(File (Blob con NoHash)) = do
  175             hash <- gets updateHash >>= \x -> lift $ x f
  176             return $ File $ Blob con hash
  177           fixHash (SubTree s) | treeHash s == NoHash =
  178             gets updateHash >>= \f -> SubTree <$> lift (addMissingHashes f s)
  179           fixHash x = return x
  180 
  181 
  182 -- | If buffers are becoming large, sync, otherwise do nothing.
  183 flushSome :: (Monad m, Functor m) => TreeMonad m ()
  184 flushSome = do x <- gets changesize
  185                when (x > megs 100) $ do
  186                  remaining <- go =<< sortBy age <$> M.toList <$> gets changed
  187                  modify $ \s -> s { changed = M.fromList remaining }
  188   where go [] = return []
  189         go ((path, (size, age_)):chs) = do
  190           x <- (\s -> s - size) <$> gets changesize
  191           flushItem path
  192           modify $ \s -> s { changesize = x }
  193           if (x > megs 80) then go chs
  194                            else return $ chs
  195         megs = (* (1024 * 1024))
  196         age (_, (_, a)) (_, (_, b)) = compare a b
  197 
  198 instance (Monad m) => TreeRO (TreeMonad m) where
  199     expandTo p =
  200         do t <- gets tree
  201            p' <- (+/+ p) `fmap` ask
  202            let amend = do t' <- lift $ expandPath t p'
  203                           modify $ \st -> st { tree = t' }
  204            case find t p' of
  205              Nothing -> amend
  206              Just (Stub _ _) -> amend
  207              _ -> return ()
  208            return p'
  209 
  210     fileExists p =
  211         do p' <- expandTo p
  212            (isJust . (flip findFile p')) `fmap` gets tree
  213 
  214     directoryExists p =
  215         do p' <- expandTo p
  216            (isJust . (flip findTree p')) `fmap` gets tree
  217 
  218     exists p =
  219         do p' <- expandTo p
  220            (isJust . (flip find p')) `fmap` gets tree
  221 
  222     readFile p =
  223         do p' <- expandTo p
  224            t <- gets tree
  225            let f = findFile t p'
  226            case f of
  227              Nothing -> fail $ "No such file " ++ show p'
  228              Just x -> lift (readBlob x)
  229 
  230     currentDirectory = ask
  231     withDirectory dir act = do
  232       dir' <- expandTo dir
  233       local (\old -> dir') act
  234 
  235 instance (Functor m, Monad m) => TreeRW (TreeMonad m) where
  236     writeFile p con =
  237         do expandTo p
  238            modifyItem p (Just blob)
  239            flushSome
  240         where blob = File $ Blob (return con) hash
  241               hash = NoHash -- we would like to say "sha256 con" here, but due
  242                             -- to strictness of Hash in Blob, this would often
  243                             -- lead to unnecessary computation which would then
  244                             -- be discarded anyway; we rely on the sync
  245                             -- implementation to fix up any NoHash occurrences
  246 
  247     createDirectory p =
  248         do expandTo p
  249            modifyItem p $ Just $ SubTree emptyTree
  250 
  251     unlink p =
  252         do expandTo p
  253            modifyItem p Nothing
  254 
  255     rename from to =
  256         do from' <- expandTo from
  257            to' <- expandTo to
  258            tr <- gets tree
  259            let item = find tr from'
  260                found_to = find tr to'
  261            unless (isNothing found_to) $
  262                   fail $ "Error renaming: destination " ++ show to ++ " exists."
  263            unless (isNothing item) $ do
  264                   modifyItem from Nothing
  265                   modifyItem to item
  266                   renameChanged from to
  267 
  268     copy from to =
  269         do from' <- expandTo from
  270            to' <- expandTo to
  271            tr <- gets tree
  272            let item = find tr from'
  273            unless (isNothing item) $ modifyItem to item
  274 
  275 existsAnycase :: (Functor m, Monad m) => Relative -> TreeMonad m Bool
  276 existsAnycase (directory -> Atomic) = return True
  277 existsAnycase (directory -> x :/: xs) =
  278   do wd <- currentDirectory
  279      Just tree <- gets (flip findTree wd . tree)
  280      let subs = [ relative </> n  | (n, _) <- listImmediate tree
  281                                   , BSC.map toLower n == BSC.map toLower x ]
  282      or `fmap` forM subs (\path -> do
  283        file <- fileExists path
  284        if file then return True
  285                else withDirectory path (existsAnycase xs))
  286 
  287 findM' :: forall m a e. (Monad m, Functor m)
  288        => (Tree m -> Relative -> a) -> Tree m -> Relative -> m a
  289 findM' what t path = fst <$> virtualTreeMonad (look path) t
  290   where look :: Relative -> TreeMonad m a
  291         look = expandTo >=> \p' -> flip what p' <$> gets tree
  292 
  293 findM :: (Monad m, Functor m) => Tree m -> Relative -> m (Maybe (TreeItem m))
  294 findM = findM' find
  295 
  296 findTreeM :: (Monad m, Functor m) => Tree m -> Relative -> m (Maybe (Tree m))
  297 findTreeM = findM' findTree
  298 
  299 findFileM :: (Monad m, Functor m) => Tree m -> Relative -> m (Maybe (Blob m))
  300 findFileM = findM' findFile