1 {-# LANGUAGE ScopedTypeVariables, MultiParamTypeClasses, FlexibleInstances, ViewPatterns #-}
    2 
    3 -- | The abstract representation of a Tree and useful abstract utilities to
    4 -- handle those.
    5 module Storage.Hashed.Tree
    6     ( Tree, Blob(..), TreeItem(..), ItemType(..), Hash(..)
    7     , makeTree, makeTreeWithHash, emptyTree, emptyBlob, makeBlob, makeBlobBS
    8 
    9     -- * Unfolding stubbed (lazy) Trees.
   10     --
   11     -- | By default, Tree obtained by a read function is stubbed: it will
   12     -- contain Stub items that need to be executed in order to access the
   13     -- respective subtrees. 'expand' will produce an unstubbed Tree.
   14     , expandUpdate, expand, expandPath
   15 
   16     -- * Tree access and lookup.
   17     , items, list, listImmediate, treeHash
   18     , lookup, find, findFile, findTree, itemHash, itemType
   19     , zipCommonFiles, zipFiles, zipTrees, diffTrees
   20 
   21     -- * Files (Blobs).
   22     , readBlob
   23 
   24     -- * Filtering trees.
   25     , FilterTree(..), restrict
   26 
   27     -- * Manipulating trees.
   28     , modifyTree, updateTree, partiallyUpdateTree, updateSubtrees, overlay
   29     , addMissingHashes ) where
   30 
   31 import Prelude hiding( lookup, filter, all )
   32 import Storage.Hashed.Path
   33 import Storage.Hashed.Hash
   34 
   35 import qualified Data.ByteString.Lazy.Char8 as BL
   36 import qualified Data.ByteString.Char8 as BS
   37 import qualified Data.Map as M
   38 
   39 import Data.Maybe( catMaybes, isNothing )
   40 import Data.List( union, sort )
   41 import Control.Applicative( (<$>) )
   42 
   43 --------------------------------
   44 -- Tree, Blob and friends
   45 --
   46 
   47 data Blob m = Blob !(m BL.ByteString) !Hash
   48 data TreeItem m = File !(Blob m)
   49                 | SubTree !(Tree m)
   50                 | Stub !(m (Tree m)) !Hash
   51 
   52 data ItemType = BlobType | TreeType deriving (Show, Eq)
   53 
   54 -- | Abstraction of a filesystem tree.
   55 -- Please note that the Tree returned by the respective read operations will
   56 -- have TreeStub items in it. To obtain a Tree without such stubs, call
   57 -- expand on it, eg.:
   58 --
   59 -- > tree <- readDarcsPristine "." >>= expand
   60 --
   61 -- When a Tree is expanded, it becomes \"final\". All stubs are forced and the
   62 -- Tree can be traversed purely. Access to actual file contents stays in IO
   63 -- though.
   64 --
   65 -- A Tree may have a Hash associated with it. A pair of Tree's is identical
   66 -- whenever their hashes are (the reverse need not hold, since not all Trees
   67 -- come equipped with a hash).
   68 data Tree m = Tree { items :: (M.Map Name (TreeItem m))
   69                    -- | Get hash of a Tree. This is guaranteed to uniquely
   70                    -- identify the Tree (including any blob content), as far as
   71                    -- cryptographic hashes are concerned. Sha256 is recommended.
   72                    , treeHash :: !Hash }
   73 
   74 listImmediate :: Tree m -> [(Name, TreeItem m)]
   75 listImmediate = M.toList . items
   76 
   77 -- | Get a hash of a TreeItem. May be Nothing.
   78 itemHash :: TreeItem m -> Hash
   79 itemHash (File (Blob _ h)) = h
   80 itemHash (SubTree t) = treeHash t
   81 itemHash (Stub _ h) = h
   82 
   83 itemType :: TreeItem m -> ItemType
   84 itemType (File _) = BlobType
   85 itemType (SubTree _) = TreeType
   86 itemType (Stub _ _) = TreeType
   87 
   88 emptyTree :: (Monad m) => Tree m
   89 emptyTree = Tree { items = M.empty
   90                  , treeHash = NoHash }
   91 
   92 emptyBlob :: (Monad m) => Blob m
   93 emptyBlob = Blob (return BL.empty) NoHash
   94 
   95 makeBlob :: (Monad m) => BL.ByteString -> Blob m
   96 makeBlob str = Blob (return str) (sha256 str)
   97 
   98 makeBlobBS :: (Monad m) => BS.ByteString -> Blob m
   99 makeBlobBS s' = let s = BL.fromChunks [s'] in Blob (return s) (sha256 s)
  100 
  101 makeTree :: (Monad m) => [(Name,TreeItem m)] -> Tree m
  102 makeTree l = Tree { items = M.fromList l
  103                   , treeHash = NoHash }
  104 
  105 makeTreeWithHash :: (Monad m) => [(Name,TreeItem m)] -> Hash -> Tree m
  106 makeTreeWithHash l h = Tree { items = M.fromList l
  107                             , treeHash = h }
  108 
  109 -----------------------------------
  110 -- Tree access and lookup
  111 --
  112 
  113 -- | Look up a 'Tree' item (an immediate subtree or blob).
  114 lookup :: Tree m -> Name -> Maybe (TreeItem m)
  115 lookup t n = M.lookup n (items t)
  116 
  117 find' :: TreeItem m -> Relative -> Maybe (TreeItem m)
  118 find' t (directory -> Atomic) = Just t
  119 find' (SubTree t) (directory -> d :/: rest) =
  120     case lookup t d of
  121       Just sub -> find' sub rest
  122       Nothing -> Nothing
  123 find' _ _ = Nothing
  124 
  125 -- | Find a 'TreeItem' by its path. Gives 'Nothing' if the path is invalid.
  126 find :: Tree m -> Relative -> Maybe (TreeItem m)
  127 find = find' . SubTree
  128 
  129 -- | Find a 'Blob' by its path. Gives 'Nothing' if the path is invalid, or does
  130 -- not point to a Blob.
  131 findFile :: Tree m -> Relative -> Maybe (Blob m)
  132 findFile t p = case find t p of
  133                  Just (File x) -> Just x
  134                  _ -> Nothing
  135 
  136 -- | Find a 'Tree' by its path. Gives 'Nothing' if the path is invalid, or does
  137 -- not point to a Tree.
  138 findTree :: Tree m -> Relative -> Maybe (Tree m)
  139 findTree t p = case find t p of
  140                  Just (SubTree x) -> Just x
  141                  _ -> Nothing
  142 
  143 -- | List all contents of a 'Tree'.
  144 list :: Tree m -> [(Relative, TreeItem m)]
  145 list t_ = paths t_ root
  146     where paths t p = [ (p </> n, i)
  147                           | (n,i) <- listImmediate t ] ++
  148                     concat [ paths subt (p </> subn)
  149                              | (subn, SubTree subt) <- listImmediate t ]
  150 
  151 expandUpdate :: (Monad m) => (Relative -> Tree m -> m (Tree m)) -> Tree m -> m (Tree m)
  152 expandUpdate update t_ = go root t_
  153     where go path t = do
  154             let subtree (name, sub) = do tree <- go (path </> name) =<< unstub sub
  155                                          return (name, SubTree tree)
  156             expanded <- mapM subtree [ x | x@(_, item) <- listImmediate t, isSub item ]
  157             let orig = [ i | i <- listImmediate t, not $ isSub $ snd i ]
  158                 orig_map = M.filter (not . isSub) (items t)
  159                 expanded_map = M.fromList expanded
  160                 tree = t { items = M.union orig_map expanded_map }
  161             update path tree
  162 
  163 -- | Expand a stubbed Tree into a one with no stubs in it. You might want to
  164 -- filter the tree before expanding to save IO. This is the basic
  165 -- implementation, which may be overriden by some Tree instances (this is
  166 -- especially true of the Index case).
  167 expand :: (Monad m) => Tree m -> m (Tree m)
  168 expand = expandUpdate $ \_ -> return
  169 
  170 -- | Unfold a path in a (stubbed) Tree, such that the leaf node of the path is
  171 -- reachable without crossing any stubs. Moreover, the leaf ought not be a Stub
  172 -- in the resulting Tree. A non-existent path is expanded as far as it can be.
  173 expandPath :: (Monad m) => Tree m -> Relative -> m (Tree m)
  174 expandPath t_ path_ = expand' t_ path_
  175     where expand' t (directory -> Atomic) = return t
  176           expand' t (directory -> n :/: rest) =
  177             case lookup t n of
  178               (Just item) | isSub item -> amend t n rest =<< unstub item
  179               _ -> return t -- fail $ "Descent error in expandPath: " ++ show path_
  180           amend t name rest sub = do
  181             sub' <- expand' sub rest
  182             let tree = t { items = M.insert name (SubTree sub') (items t) }
  183             return tree
  184 
  185 class (Monad m) => FilterTree a m where
  186     -- | Given @pred tree@, produce a 'Tree' that only has items for which
  187     -- @pred@ returns @True@.
  188     -- The tree might contain stubs. When expanded, these will be subject to
  189     -- filtering as well.
  190     filter :: (Relative -> TreeItem m -> Bool) -> a m -> a m
  191 
  192 instance (Monad m) => FilterTree Tree m where
  193     filter predicate t_ = filter' t_ root
  194         where filter' t path =
  195                   let subs = (catMaybes [ (,) name `fmap` wibble path name item
  196                                               | (name,item) <- listImmediate t ])
  197                   in t { items = M.mapMaybeWithKey (wibble path) $ items t }
  198               wibble path name item =
  199                   let npath = path </> name in
  200                       if predicate npath item
  201                          then Just $ filterSub npath item
  202                          else Nothing
  203               filterSub npath (SubTree t) = SubTree $ filter' t npath
  204               filterSub npath (Stub stub h) =
  205                   Stub (do x <- stub
  206                            return $ filter' x npath) h
  207               filterSub _ x = x
  208 
  209 -- | Given two Trees, a @guide@ and a @tree@, produces a new Tree that is a
  210 -- identical to @tree@, but only has those items that are present in both
  211 -- @tree@ and @guide@. The @guide@ Tree may not contain any stubs.
  212 restrict :: (FilterTree t m, Monad n) => Tree n -> t m -> t m
  213 restrict guide tree = filter accept tree
  214     where accept path item =
  215               case (find guide path, item) of
  216                 (Just (SubTree _), SubTree _) -> True
  217                 (Just (SubTree _), Stub _ _) -> True
  218                 (Just (File _), File _) -> True
  219                 (Just (Stub _ _), _) ->
  220                     error "*sulk* Go away, you, you precondition violator!"
  221                 (_, _) -> False
  222 
  223 -- | Read a Blob into a Lazy ByteString. Might be backed by an mmap, use with
  224 -- care.
  225 readBlob :: Blob m -> m BL.ByteString
  226 readBlob (Blob r _) = r
  227 
  228 -- | For every pair of corresponding blobs from the two supplied trees,
  229 -- evaluate the supplied function and accumulate the results in a list. Hint:
  230 -- to get IO actions through, just use sequence on the resulting list.
  231 -- NB. This won't expand any stubs.
  232 zipCommonFiles :: (Relative -> Blob m -> Blob m -> a) -> Tree m -> Tree m -> [a]
  233 zipCommonFiles f a b = catMaybes [ flip (f p) x `fmap` findFile a p
  234                                    | (p, File x) <- list b ]
  235 
  236 -- | For each file in each of the two supplied trees, evaluate the supplied
  237 -- function (supplying the corresponding file from the other tree, or Nothing)
  238 -- and accumulate the results in a list. Hint: to get IO actions through, just
  239 -- use sequence on the resulting list.  NB. This won't expand any stubs.
  240 zipFiles :: (Relative -> Maybe (Blob m) -> Maybe (Blob m) -> a)
  241          -> Tree m -> Tree m -> [a]
  242 zipFiles f a b = [ f p (findFile a p) (findFile b p)
  243                    | p <- paths a `sortedUnion` paths b ]
  244     where paths t = sort [ p | (p, File _) <- list t ]
  245 
  246 zipTrees :: (Relative -> Maybe (TreeItem m) -> Maybe (TreeItem m) -> a)
  247          -> Tree m -> Tree m -> [a]
  248 zipTrees f a b = [ f p (find a p) (find b p)
  249                    | p <- reverse (paths a `sortedUnion` paths b) ]
  250     where paths t = sort [ p | (p, _) <- list t ]
  251 
  252 -- | Helper function for taking the union of AnchoredPath lists that
  253 -- are already sorted.  This function does not check the precondition
  254 -- so use it carefully.
  255 sortedUnion :: [Relative] -> [Relative] -> [Relative]
  256 sortedUnion [] ys = ys
  257 sortedUnion xs [] = xs
  258 sortedUnion a@(x:xs) b@(y:ys) = case compare x y of
  259                                 LT -> x : sortedUnion xs b
  260                                 EQ -> x : sortedUnion xs ys
  261                                 GT -> y : sortedUnion a ys
  262 
  263 -- | Cautiously extracts differing subtrees from a pair of Trees. It will never
  264 -- do any unneccessary expanding. Tree hashes are used to cut the comparison as
  265 -- high up the Tree branches as possible. The result is a pair of trees that do
  266 -- not share any identical subtrees. They are derived from the first and second
  267 -- parameters respectively and they are always fully expanded. It might be
  268 -- advantageous to feed the result into 'zipFiles' or 'zipTrees'.
  269 diffTrees :: forall m. (Functor m, Monad m) => Tree m -> Tree m -> m (Tree m, Tree m)
  270 diffTrees left right =
  271             if treeHash left `match` treeHash right
  272                then return (emptyTree, emptyTree)
  273                else diff left right
  274   where isFile (File _) = True
  275         isFile _ = False
  276         notFile = not . isFile
  277         isEmpty = null . listImmediate
  278         subtree :: TreeItem m -> m (Tree m)
  279         subtree (Stub x _) = x
  280         subtree (SubTree x) = return x
  281         subtree (File _) = error "diffTrees tried to descend a File as a subtree"
  282         maybeUnfold (Stub x _) = SubTree `fmap` (x >>= expand)
  283         maybeUnfold (SubTree x) = SubTree `fmap` expand x
  284         maybeUnfold i = return i
  285         immediateN t = [ n | (n, _) <- listImmediate t ]
  286         diff left' right' = do
  287           is <- sequence [
  288                    case (lookup left' n, lookup right' n) of
  289                      (Just l, Nothing) -> do
  290                        l' <- maybeUnfold l
  291                        return (n, Just l', Nothing)
  292                      (Nothing, Just r) -> do
  293                        r' <- maybeUnfold r
  294                        return (n, Nothing, Just r')
  295                      (Just l, Just r)
  296                          | itemHash l `match` itemHash r ->
  297                              return (n, Nothing, Nothing)
  298                          | notFile l && notFile r ->
  299                              do x <- subtree l
  300                                 y <- subtree r
  301                                 (x', y') <- diffTrees x y
  302                                 if isEmpty x' && isEmpty y'
  303                                    then return (n, Nothing, Nothing)
  304                                    else return (n, Just $ SubTree x', Just $ SubTree y')
  305                          | isFile l && isFile r ->
  306                              return (n, Just l, Just r)
  307                          | otherwise ->
  308                              do l' <- maybeUnfold l
  309                                 r' <- maybeUnfold r
  310                                 return (n, Just l', Just r')
  311                      _ -> error "n lookups failed"
  312                    | n <- immediateN left' `union` immediateN right' ]
  313           let is_l = [ (n, l) | (n, Just l, _) <- is ]
  314               is_r = [ (n, r) | (n, _, Just r) <- is ]
  315           return (makeTree is_l, makeTree is_r)
  316 
  317 -- | Modify a Tree (by replacing, or removing or adding items).
  318 modifyTree :: (Monad m) => Tree m -> Relative -> Maybe (TreeItem m) -> Tree m
  319 modifyTree t_ p_ i_ = snd $ go t_ p_ i_
  320   where fix t unmod items' = (unmod, t { items = countmap items' `seq` items'
  321                                        , treeHash = if unmod then treeHash t else NoHash })
  322 
  323         go t (isRoot -> True) (Just (SubTree sub)) = (treeHash t `match` treeHash sub, sub)
  324 
  325         go t (singleton -> Just n) (Just item) = fix t unmod items'
  326             where !items' = M.insert n item (items t)
  327                   !unmod = itemHash item `match` case lookup t n of
  328                                              Nothing -> NoHash
  329                                              Just i -> itemHash i
  330 
  331         go t (singleton -> Just n) Nothing = fix t unmod items'
  332             where !items' = M.delete n (items t)
  333                   !unmod = isNothing $ lookup t n
  334 
  335         go t path@(directory -> n :/: r) item = fix t unmod items'
  336             where subtree s = go s r item
  337                   !items' = M.insert n sub (items t)
  338                   !sub = snd sub'
  339                   !unmod = fst sub'
  340                   !sub' = case lookup t n of
  341                     Just (SubTree s) -> let (mod, sub) = subtree s in (mod, SubTree sub)
  342                     Just (Stub s _) -> (False, Stub (do x <- s
  343                                                         return $! snd $! subtree x) NoHash)
  344                     Nothing -> (False, SubTree $! snd $! subtree emptyTree)
  345                     _ -> error $ "Modify tree at " ++ show path
  346 
  347         go _ (isRoot -> True) (Just (Stub _ _)) =
  348             error $ "BUG: Error descending in modifyTree, path = " ++ show p_
  349         go _ (isRoot -> True) (Just (File _)) =
  350             error $ "BUG: Error descending in modifyTree, path = " ++ show p_
  351         go _ (isRoot -> True) Nothing =
  352             error $ "BUG: Error descending in modifyTree, path = " ++ show p_
  353 
  354 countmap = M.fold (\_ i -> i + 1) 0
  355 
  356 updateSubtrees :: (Tree m -> Tree m) -> Tree m -> Tree m
  357 updateSubtrees fun t =
  358     fun $ t { items = M.mapWithKey (curry $ snd . update) $ items t
  359             , treeHash = NoHash }
  360   where update (k, SubTree s) = (k, SubTree $ updateSubtrees fun s)
  361         update (k, File f) = (k, File f)
  362         update (_, Stub _ _) = error "Stubs not supported in updateTreePostorder"
  363 
  364 -- | Does /not/ expand the tree.
  365 updateTree :: (Functor m, Monad m) => (TreeItem m -> m (TreeItem m)) -> Tree m -> m (Tree m)
  366 updateTree fun t = partiallyUpdateTree fun (\_ _ -> True) t
  367 
  368 -- | Does /not/ expand the tree.
  369 partiallyUpdateTree :: (Functor m, Monad m) => (TreeItem m -> m (TreeItem m))
  370                        -> (Relative -> TreeItem m -> Bool) -> Tree m -> m (Tree m)
  371 partiallyUpdateTree fun pred t' = go root t'
  372   where go path t = do
  373           items' <- M.fromList <$> mapM (maybeupdate path) (listImmediate t)
  374           SubTree t' <- fun . SubTree $ t { items = items'
  375                                           , treeHash = NoHash }
  376           return t'
  377         maybeupdate path (k, item) = case pred (path </> k) item of
  378           True -> update (path </> k) (k, item)
  379           False -> return (k, item)
  380         update path (k, SubTree tree) = (\new -> (k, SubTree new)) <$> go path tree
  381         update    _ (k, item) = (\new -> (k, new)) <$> fun item
  382 
  383 -- | Lay one tree over another. The resulting Tree will look like the base (1st
  384 -- parameter) Tree, although any items also present in the overlay Tree will be
  385 -- taken from the overlay. It is not allowed to overlay a different kind of an
  386 -- object, nor it is allowed for the overlay to add new objects to base.  This
  387 -- means that the overlay Tree should be a subset of the base Tree (although
  388 -- any extraneous items will be ignored by the implementation).
  389 overlay :: (Functor m, Monad m) => Tree m -> Tree m -> Tree m
  390 overlay base over = Tree { items = M.fromList immediate
  391                          , treeHash = NoHash }
  392     where immediate = [ (n, get n) | (n, _) <- listImmediate base ]
  393           get n = case (M.lookup n $ items base, M.lookup n $ items over) of
  394                     (Just (File _), Just f@(File _)) -> f
  395                     (Just (SubTree b), Just (SubTree o)) -> SubTree $ overlay b o
  396                     (Just (Stub b _), Just (SubTree o)) -> Stub (flip overlay o `fmap` b) NoHash
  397                     (Just (SubTree b), Just (Stub o _)) -> Stub (overlay b `fmap` o) NoHash
  398                     (Just (Stub b _), Just (Stub o _)) -> Stub (do o' <- o
  399                                                                    b' <- b
  400                                                                    return $ overlay b' o') NoHash
  401                     (Just x, _) -> x
  402                     (_, _) -> error $ "Unexpected case in overlay at get " ++ show n ++ "."
  403 
  404 addMissingHashes :: (Monad m, Functor m) => (TreeItem m -> m Hash) -> Tree m -> m (Tree m)
  405 addMissingHashes make = updateTree update -- use partiallyUpdateTree here
  406     where update (SubTree t) = make (SubTree t) >>= \x -> return $ SubTree (t { treeHash = x })
  407           update (File blob@(Blob con NoHash)) =
  408               do hash <- make $ File blob
  409                  return $ File (Blob con hash)
  410           update (Stub s NoHash) = update . SubTree =<< s
  411           update x = return x
  412 
  413 ------ Private utilities shared among multiple functions. --------
  414 
  415 unstub :: (Monad m) => TreeItem m -> m (Tree m)
  416 unstub (Stub s _) = s
  417 unstub (SubTree s) = return s
  418 
  419 isSub :: TreeItem m -> Bool
  420 isSub (File _) = False
  421 isSub _ = True
  422