1 {-# LANGUAGE ScopedTypeVariables, MultiParamTypeClasses, FlexibleInstances, ViewPatterns #-} 2 3 -- | The abstract representation of a Tree and useful abstract utilities to 4 -- handle those. 5 module Storage.Hashed.Tree 6 ( Tree, Blob(..), TreeItem(..), ItemType(..), Hash(..) 7 , makeTree, makeTreeWithHash, emptyTree, emptyBlob, makeBlob, makeBlobBS 8 9 -- * Unfolding stubbed (lazy) Trees. 10 -- 11 -- | By default, Tree obtained by a read function is stubbed: it will 12 -- contain Stub items that need to be executed in order to access the 13 -- respective subtrees. 'expand' will produce an unstubbed Tree. 14 , expandUpdate, expand, expandPath 15 16 -- * Tree access and lookup. 17 , items, list, listImmediate, treeHash 18 , lookup, find, findFile, findTree, itemHash, itemType 19 , zipCommonFiles, zipFiles, zipTrees, diffTrees 20 21 -- * Files (Blobs). 22 , readBlob 23 24 -- * Filtering trees. 25 , FilterTree(..), restrict 26 27 -- * Manipulating trees. 28 , modifyTree, updateTree, partiallyUpdateTree, updateSubtrees, overlay 29 , addMissingHashes ) where 30 31 import Prelude hiding( lookup, filter, all ) 32 import Storage.Hashed.Path 33 import Storage.Hashed.Hash 34 35 import qualified Data.ByteString.Lazy.Char8 as BL 36 import qualified Data.ByteString.Char8 as BS 37 import qualified Data.Map as M 38 39 import Data.Maybe( catMaybes, isNothing ) 40 import Data.List( union, sort ) 41 import Control.Applicative( (<$>) ) 42 43 -------------------------------- 44 -- Tree, Blob and friends 45 -- 46 47 data Blob m = Blob !(m BL.ByteString) !Hash 48 data TreeItem m = File !(Blob m) 49 | SubTree !(Tree m) 50 | Stub !(m (Tree m)) !Hash 51 52 data ItemType = BlobType | TreeType deriving (Show, Eq) 53 54 -- | Abstraction of a filesystem tree. 55 -- Please note that the Tree returned by the respective read operations will 56 -- have TreeStub items in it. To obtain a Tree without such stubs, call 57 -- expand on it, eg.: 58 -- 59 -- > tree <- readDarcsPristine "." >>= expand 60 -- 61 -- When a Tree is expanded, it becomes \"final\". All stubs are forced and the 62 -- Tree can be traversed purely. Access to actual file contents stays in IO 63 -- though. 64 -- 65 -- A Tree may have a Hash associated with it. A pair of Tree's is identical 66 -- whenever their hashes are (the reverse need not hold, since not all Trees 67 -- come equipped with a hash). 68 data Tree m = Tree { items :: (M.Map Name (TreeItem m)) 69 -- | Get hash of a Tree. This is guaranteed to uniquely 70 -- identify the Tree (including any blob content), as far as 71 -- cryptographic hashes are concerned. Sha256 is recommended. 72 , treeHash :: !Hash } 73 74 listImmediate :: Tree m -> [(Name, TreeItem m)] 75 listImmediate = M.toList . items 76 77 -- | Get a hash of a TreeItem. May be Nothing. 78 itemHash :: TreeItem m -> Hash 79 itemHash (File (Blob _ h)) = h 80 itemHash (SubTree t) = treeHash t 81 itemHash (Stub _ h) = h 82 83 itemType :: TreeItem m -> ItemType 84 itemType (File _) = BlobType 85 itemType (SubTree _) = TreeType 86 itemType (Stub _ _) = TreeType 87 88 emptyTree :: (Monad m) => Tree m 89 emptyTree = Tree { items = M.empty 90 , treeHash = NoHash } 91 92 emptyBlob :: (Monad m) => Blob m 93 emptyBlob = Blob (return BL.empty) NoHash 94 95 makeBlob :: (Monad m) => BL.ByteString -> Blob m 96 makeBlob str = Blob (return str) (sha256 str) 97 98 makeBlobBS :: (Monad m) => BS.ByteString -> Blob m 99 makeBlobBS s' = let s = BL.fromChunks [s'] in Blob (return s) (sha256 s) 100 101 makeTree :: (Monad m) => [(Name,TreeItem m)] -> Tree m 102 makeTree l = Tree { items = M.fromList l 103 , treeHash = NoHash } 104 105 makeTreeWithHash :: (Monad m) => [(Name,TreeItem m)] -> Hash -> Tree m 106 makeTreeWithHash l h = Tree { items = M.fromList l 107 , treeHash = h } 108 109 ----------------------------------- 110 -- Tree access and lookup 111 -- 112 113 -- | Look up a 'Tree' item (an immediate subtree or blob). 114 lookup :: Tree m -> Name -> Maybe (TreeItem m) 115 lookup t n = M.lookup n (items t) 116 117 find' :: TreeItem m -> Relative -> Maybe (TreeItem m) 118 find' t (directory -> Atomic) = Just t 119 find' (SubTree t) (directory -> d :/: rest) = 120 case lookup t d of 121 Just sub -> find' sub rest 122 Nothing -> Nothing 123 find' _ _ = Nothing 124 125 -- | Find a 'TreeItem' by its path. Gives 'Nothing' if the path is invalid. 126 find :: Tree m -> Relative -> Maybe (TreeItem m) 127 find = find' . SubTree 128 129 -- | Find a 'Blob' by its path. Gives 'Nothing' if the path is invalid, or does 130 -- not point to a Blob. 131 findFile :: Tree m -> Relative -> Maybe (Blob m) 132 findFile t p = case find t p of 133 Just (File x) -> Just x 134 _ -> Nothing 135 136 -- | Find a 'Tree' by its path. Gives 'Nothing' if the path is invalid, or does 137 -- not point to a Tree. 138 findTree :: Tree m -> Relative -> Maybe (Tree m) 139 findTree t p = case find t p of 140 Just (SubTree x) -> Just x 141 _ -> Nothing 142 143 -- | List all contents of a 'Tree'. 144 list :: Tree m -> [(Relative, TreeItem m)] 145 list t_ = paths t_ root 146 where paths t p = [ (p </> n, i) 147 | (n,i) <- listImmediate t ] ++ 148 concat [ paths subt (p </> subn) 149 | (subn, SubTree subt) <- listImmediate t ] 150 151 expandUpdate :: (Monad m) => (Relative -> Tree m -> m (Tree m)) -> Tree m -> m (Tree m) 152 expandUpdate update t_ = go root t_ 153 where go path t = do 154 let subtree (name, sub) = do tree <- go (path </> name) =<< unstub sub 155 return (name, SubTree tree) 156 expanded <- mapM subtree [ x | x@(_, item) <- listImmediate t, isSub item ] 157 let orig = [ i | i <- listImmediate t, not $ isSub $ snd i ] 158 orig_map = M.filter (not . isSub) (items t) 159 expanded_map = M.fromList expanded 160 tree = t { items = M.union orig_map expanded_map } 161 update path tree 162 163 -- | Expand a stubbed Tree into a one with no stubs in it. You might want to 164 -- filter the tree before expanding to save IO. This is the basic 165 -- implementation, which may be overriden by some Tree instances (this is 166 -- especially true of the Index case). 167 expand :: (Monad m) => Tree m -> m (Tree m) 168 expand = expandUpdate $ \_ -> return 169 170 -- | Unfold a path in a (stubbed) Tree, such that the leaf node of the path is 171 -- reachable without crossing any stubs. Moreover, the leaf ought not be a Stub 172 -- in the resulting Tree. A non-existent path is expanded as far as it can be. 173 expandPath :: (Monad m) => Tree m -> Relative -> m (Tree m) 174 expandPath t_ path_ = expand' t_ path_ 175 where expand' t (directory -> Atomic) = return t 176 expand' t (directory -> n :/: rest) = 177 case lookup t n of 178 (Just item) | isSub item -> amend t n rest =<< unstub item 179 _ -> return t -- fail $ "Descent error in expandPath: " ++ show path_ 180 amend t name rest sub = do 181 sub' <- expand' sub rest 182 let tree = t { items = M.insert name (SubTree sub') (items t) } 183 return tree 184 185 class (Monad m) => FilterTree a m where 186 -- | Given @pred tree@, produce a 'Tree' that only has items for which 187 -- @pred@ returns @True@. 188 -- The tree might contain stubs. When expanded, these will be subject to 189 -- filtering as well. 190 filter :: (Relative -> TreeItem m -> Bool) -> a m -> a m 191 192 instance (Monad m) => FilterTree Tree m where 193 filter predicate t_ = filter' t_ root 194 where filter' t path = 195 let subs = (catMaybes [ (,) name `fmap` wibble path name item 196 | (name,item) <- listImmediate t ]) 197 in t { items = M.mapMaybeWithKey (wibble path) $ items t } 198 wibble path name item = 199 let npath = path </> name in 200 if predicate npath item 201 then Just $ filterSub npath item 202 else Nothing 203 filterSub npath (SubTree t) = SubTree $ filter' t npath 204 filterSub npath (Stub stub h) = 205 Stub (do x <- stub 206 return $ filter' x npath) h 207 filterSub _ x = x 208 209 -- | Given two Trees, a @guide@ and a @tree@, produces a new Tree that is a 210 -- identical to @tree@, but only has those items that are present in both 211 -- @tree@ and @guide@. The @guide@ Tree may not contain any stubs. 212 restrict :: (FilterTree t m, Monad n) => Tree n -> t m -> t m 213 restrict guide tree = filter accept tree 214 where accept path item = 215 case (find guide path, item) of 216 (Just (SubTree _), SubTree _) -> True 217 (Just (SubTree _), Stub _ _) -> True 218 (Just (File _), File _) -> True 219 (Just (Stub _ _), _) -> 220 error "*sulk* Go away, you, you precondition violator!" 221 (_, _) -> False 222 223 -- | Read a Blob into a Lazy ByteString. Might be backed by an mmap, use with 224 -- care. 225 readBlob :: Blob m -> m BL.ByteString 226 readBlob (Blob r _) = r 227 228 -- | For every pair of corresponding blobs from the two supplied trees, 229 -- evaluate the supplied function and accumulate the results in a list. Hint: 230 -- to get IO actions through, just use sequence on the resulting list. 231 -- NB. This won't expand any stubs. 232 zipCommonFiles :: (Relative -> Blob m -> Blob m -> a) -> Tree m -> Tree m -> [a] 233 zipCommonFiles f a b = catMaybes [ flip (f p) x `fmap` findFile a p 234 | (p, File x) <- list b ] 235 236 -- | For each file in each of the two supplied trees, evaluate the supplied 237 -- function (supplying the corresponding file from the other tree, or Nothing) 238 -- and accumulate the results in a list. Hint: to get IO actions through, just 239 -- use sequence on the resulting list. NB. This won't expand any stubs. 240 zipFiles :: (Relative -> Maybe (Blob m) -> Maybe (Blob m) -> a) 241 -> Tree m -> Tree m -> [a] 242 zipFiles f a b = [ f p (findFile a p) (findFile b p) 243 | p <- paths a `sortedUnion` paths b ] 244 where paths t = sort [ p | (p, File _) <- list t ] 245 246 zipTrees :: (Relative -> Maybe (TreeItem m) -> Maybe (TreeItem m) -> a) 247 -> Tree m -> Tree m -> [a] 248 zipTrees f a b = [ f p (find a p) (find b p) 249 | p <- reverse (paths a `sortedUnion` paths b) ] 250 where paths t = sort [ p | (p, _) <- list t ] 251 252 -- | Helper function for taking the union of AnchoredPath lists that 253 -- are already sorted. This function does not check the precondition 254 -- so use it carefully. 255 sortedUnion :: [Relative] -> [Relative] -> [Relative] 256 sortedUnion [] ys = ys 257 sortedUnion xs [] = xs 258 sortedUnion a@(x:xs) b@(y:ys) = case compare x y of 259 LT -> x : sortedUnion xs b 260 EQ -> x : sortedUnion xs ys 261 GT -> y : sortedUnion a ys 262 263 -- | Cautiously extracts differing subtrees from a pair of Trees. It will never 264 -- do any unneccessary expanding. Tree hashes are used to cut the comparison as 265 -- high up the Tree branches as possible. The result is a pair of trees that do 266 -- not share any identical subtrees. They are derived from the first and second 267 -- parameters respectively and they are always fully expanded. It might be 268 -- advantageous to feed the result into 'zipFiles' or 'zipTrees'. 269 diffTrees :: forall m. (Functor m, Monad m) => Tree m -> Tree m -> m (Tree m, Tree m) 270 diffTrees left right = 271 if treeHash left `match` treeHash right 272 then return (emptyTree, emptyTree) 273 else diff left right 274 where isFile (File _) = True 275 isFile _ = False 276 notFile = not . isFile 277 isEmpty = null . listImmediate 278 subtree :: TreeItem m -> m (Tree m) 279 subtree (Stub x _) = x 280 subtree (SubTree x) = return x 281 subtree (File _) = error "diffTrees tried to descend a File as a subtree" 282 maybeUnfold (Stub x _) = SubTree `fmap` (x >>= expand) 283 maybeUnfold (SubTree x) = SubTree `fmap` expand x 284 maybeUnfold i = return i 285 immediateN t = [ n | (n, _) <- listImmediate t ] 286 diff left' right' = do 287 is <- sequence [ 288 case (lookup left' n, lookup right' n) of 289 (Just l, Nothing) -> do 290 l' <- maybeUnfold l 291 return (n, Just l', Nothing) 292 (Nothing, Just r) -> do 293 r' <- maybeUnfold r 294 return (n, Nothing, Just r') 295 (Just l, Just r) 296 | itemHash l `match` itemHash r -> 297 return (n, Nothing, Nothing) 298 | notFile l && notFile r -> 299 do x <- subtree l 300 y <- subtree r 301 (x', y') <- diffTrees x y 302 if isEmpty x' && isEmpty y' 303 then return (n, Nothing, Nothing) 304 else return (n, Just $ SubTree x', Just $ SubTree y') 305 | isFile l && isFile r -> 306 return (n, Just l, Just r) 307 | otherwise -> 308 do l' <- maybeUnfold l 309 r' <- maybeUnfold r 310 return (n, Just l', Just r') 311 _ -> error "n lookups failed" 312 | n <- immediateN left' `union` immediateN right' ] 313 let is_l = [ (n, l) | (n, Just l, _) <- is ] 314 is_r = [ (n, r) | (n, _, Just r) <- is ] 315 return (makeTree is_l, makeTree is_r) 316 317 -- | Modify a Tree (by replacing, or removing or adding items). 318 modifyTree :: (Monad m) => Tree m -> Relative -> Maybe (TreeItem m) -> Tree m 319 modifyTree t_ p_ i_ = snd $ go t_ p_ i_ 320 where fix t unmod items' = (unmod, t { items = countmap items' `seq` items' 321 , treeHash = if unmod then treeHash t else NoHash }) 322 323 go t (isRoot -> True) (Just (SubTree sub)) = (treeHash t `match` treeHash sub, sub) 324 325 go t (singleton -> Just n) (Just item) = fix t unmod items' 326 where !items' = M.insert n item (items t) 327 !unmod = itemHash item `match` case lookup t n of 328 Nothing -> NoHash 329 Just i -> itemHash i 330 331 go t (singleton -> Just n) Nothing = fix t unmod items' 332 where !items' = M.delete n (items t) 333 !unmod = isNothing $ lookup t n 334 335 go t path@(directory -> n :/: r) item = fix t unmod items' 336 where subtree s = go s r item 337 !items' = M.insert n sub (items t) 338 !sub = snd sub' 339 !unmod = fst sub' 340 !sub' = case lookup t n of 341 Just (SubTree s) -> let (mod, sub) = subtree s in (mod, SubTree sub) 342 Just (Stub s _) -> (False, Stub (do x <- s 343 return $! snd $! subtree x) NoHash) 344 Nothing -> (False, SubTree $! snd $! subtree emptyTree) 345 _ -> error $ "Modify tree at " ++ show path 346 347 go _ (isRoot -> True) (Just (Stub _ _)) = 348 error $ "BUG: Error descending in modifyTree, path = " ++ show p_ 349 go _ (isRoot -> True) (Just (File _)) = 350 error $ "BUG: Error descending in modifyTree, path = " ++ show p_ 351 go _ (isRoot -> True) Nothing = 352 error $ "BUG: Error descending in modifyTree, path = " ++ show p_ 353 354 countmap = M.fold (\_ i -> i + 1) 0 355 356 updateSubtrees :: (Tree m -> Tree m) -> Tree m -> Tree m 357 updateSubtrees fun t = 358 fun $ t { items = M.mapWithKey (curry $ snd . update) $ items t 359 , treeHash = NoHash } 360 where update (k, SubTree s) = (k, SubTree $ updateSubtrees fun s) 361 update (k, File f) = (k, File f) 362 update (_, Stub _ _) = error "Stubs not supported in updateTreePostorder" 363 364 -- | Does /not/ expand the tree. 365 updateTree :: (Functor m, Monad m) => (TreeItem m -> m (TreeItem m)) -> Tree m -> m (Tree m) 366 updateTree fun t = partiallyUpdateTree fun (\_ _ -> True) t 367 368 -- | Does /not/ expand the tree. 369 partiallyUpdateTree :: (Functor m, Monad m) => (TreeItem m -> m (TreeItem m)) 370 -> (Relative -> TreeItem m -> Bool) -> Tree m -> m (Tree m) 371 partiallyUpdateTree fun pred t' = go root t' 372 where go path t = do 373 items' <- M.fromList <$> mapM (maybeupdate path) (listImmediate t) 374 SubTree t' <- fun . SubTree $ t { items = items' 375 , treeHash = NoHash } 376 return t' 377 maybeupdate path (k, item) = case pred (path </> k) item of 378 True -> update (path </> k) (k, item) 379 False -> return (k, item) 380 update path (k, SubTree tree) = (\new -> (k, SubTree new)) <$> go path tree 381 update _ (k, item) = (\new -> (k, new)) <$> fun item 382 383 -- | Lay one tree over another. The resulting Tree will look like the base (1st 384 -- parameter) Tree, although any items also present in the overlay Tree will be 385 -- taken from the overlay. It is not allowed to overlay a different kind of an 386 -- object, nor it is allowed for the overlay to add new objects to base. This 387 -- means that the overlay Tree should be a subset of the base Tree (although 388 -- any extraneous items will be ignored by the implementation). 389 overlay :: (Functor m, Monad m) => Tree m -> Tree m -> Tree m 390 overlay base over = Tree { items = M.fromList immediate 391 , treeHash = NoHash } 392 where immediate = [ (n, get n) | (n, _) <- listImmediate base ] 393 get n = case (M.lookup n $ items base, M.lookup n $ items over) of 394 (Just (File _), Just f@(File _)) -> f 395 (Just (SubTree b), Just (SubTree o)) -> SubTree $ overlay b o 396 (Just (Stub b _), Just (SubTree o)) -> Stub (flip overlay o `fmap` b) NoHash 397 (Just (SubTree b), Just (Stub o _)) -> Stub (overlay b `fmap` o) NoHash 398 (Just (Stub b _), Just (Stub o _)) -> Stub (do o' <- o 399 b' <- b 400 return $ overlay b' o') NoHash 401 (Just x, _) -> x 402 (_, _) -> error $ "Unexpected case in overlay at get " ++ show n ++ "." 403 404 addMissingHashes :: (Monad m, Functor m) => (TreeItem m -> m Hash) -> Tree m -> m (Tree m) 405 addMissingHashes make = updateTree update -- use partiallyUpdateTree here 406 where update (SubTree t) = make (SubTree t) >>= \x -> return $ SubTree (t { treeHash = x }) 407 update (File blob@(Blob con NoHash)) = 408 do hash <- make $ File blob 409 return $ File (Blob con hash) 410 update (Stub s NoHash) = update . SubTree =<< s 411 update x = return x 412 413 ------ Private utilities shared among multiple functions. -------- 414 415 unstub :: (Monad m) => TreeItem m -> m (Tree m) 416 unstub (Stub s _) = s 417 unstub (SubTree s) = return s 418 419 isSub :: TreeItem m -> Bool 420 isSub (File _) = False 421 isSub _ = True 422