1 -- Copyright (C) 2007 David Roundy
    2 --
    3 -- This program is free software; you can redistribute it and/or modify
    4 -- it under the terms of the GNU General Public License as published by
    5 -- the Free Software Foundation; either version 2, or (at your option)
    6 -- any later version.
    7 --
    8 -- This program is distributed in the hope that it will be useful,
    9 -- but WITHOUT ANY WARRANTY; without even the implied warranty of
   10 -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   11 -- GNU General Public License for more details.
   12 --
   13 -- You should have received a copy of the GNU General Public License
   14 -- along with this program; if not, write to the Free Software Foundation,
   15 -- Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
   16 
   17 {-# OPTIONS_GHC -cpp -fglasgow-exts #-}
   18 {-# LANGUAGE CPP, TypeSynonymInstances, FlexibleInstances #-}
   19 
   20 #include "gadts.h"
   21 
   22 module Darcs.Repository.HashedIO ( HashedIO, applyHashed,
   23                                    copyHashed, syncHashedPristine, copyPartialsHashed, listHashedContents,
   24                                    slurpHashedPristine, writeHashedPristine,
   25                                    clean_hashdir ) where
   26 
   27 import Darcs.Global ( darcsdir )
   28 import Data.List ( (\\) )
   29 import qualified Data.Map as Map
   30 import System.Directory ( getDirectoryContents, createDirectoryIfMissing )
   31 import System.Posix.Types ( EpochTime )
   32 import Control.Monad.State ( StateT, runStateT, modify, get, put, gets, lift )
   33 import Control.Monad ( when )
   34 import Data.Maybe ( isJust )
   35 import System.IO.Unsafe ( unsafeInterleaveIO )
   36 
   37 import Darcs.SlurpDirectory.Internal ( Slurpy(..), SlurpyContents(..), map_to_slurpies, slurpies_to_map )
   38 import Darcs.SlurpDirectory ( withSlurpy, undefined_time, undefined_size )
   39 import Darcs.Repository.Cache ( Cache, fetchFileUsingCache, writeFileUsingCache,
   40                                 peekInCache, speculateFileUsingCache,
   41                                 findFileMtimeUsingCache, setFileMtimeUsingCache,
   42                                 okayHash, cleanCachesWithHint, HashedDir(..), hashedDir )
   43 import Darcs.Patch ( Patchy, apply )
   44 import Darcs.RepoPath ( FilePathLike, toFilePath )
   45 import Darcs.IO ( ReadableDirectory(..), WriteableDirectory(..) )
   46 import Darcs.Flags ( DarcsFlag, Compression( .. ), compression )
   47 import Darcs.Lock ( writeAtomicFilePS, removeFileMayNotExist )
   48 import Darcs.Utils ( withCurrentDirectory )
   49 import Progress ( debugMessage, beginTedious, endTedious, tediousSize, finishedOneIO, progress )
   50 import Darcs.Patch.FileName ( FileName, norm_path, fp2fn, fn2fp, fn2niceps, niceps2fn,
   51                               break_on_dir, own_name, super_name )
   52 
   53 import ByteStringUtils ( linesPS, unlinesPS )
   54 import qualified Data.ByteString       as B  (ByteString, length, empty)
   55 import qualified Data.ByteString.Char8 as BC (unpack, pack)
   56 
   57 import SHA1 ( sha1PS )
   58 
   59 -- | @readHashFile c subdir hash@ reads the file with hash @hash@ in dir subdir,
   60 -- fetching it from 'Cache' @c@ if needed.
   61 readHashFile :: Cache -> HashedDir -> String -> IO (String,B.ByteString)
   62 readHashFile c subdir hash =
   63     do debugMessage $ "Reading hash file "++hash++" from "++(hashedDir subdir)++"/"
   64        fetchFileUsingCache c subdir hash
   65 
   66 applyHashed :: Patchy q => Cache -> [DarcsFlag] -> String -> q C(x y) -> IO String
   67 applyHashed c fs h p = do s <- slurpHashedPristine c (compression fs) h
   68                           let ms = withSlurpy s $ apply fs p
   69                           case ms of
   70                             Left e -> fail e
   71                             Right (s', ()) -> writeHashedPristine c (compression fs) s'
   72 {-
   73 applyHashed c fs h p = do (_,hd) <- runStateT (apply fs p) $
   74                                     HashDir { permissions = RW, cache = c,
   75                                               options = fs, rootHash = h }
   76                           return $ rootHash hd
   77 -}
   78 
   79 data HashDir r p = HashDir { permissions :: !r, cache :: !Cache,
   80                              compress :: !Compression, rootHash :: !String }
   81 type HashedIO r p = StateT (HashDir r p) IO
   82 
   83 data RO = RO
   84 data RW = RW
   85 {-
   86 class Readable r where
   87     isRO :: r -> Bool
   88     isRO = const False
   89 instance Readable RW
   90 instance Readable RO where
   91     isRO RO = True
   92 -}
   93 
   94 instance ReadableDirectory (HashedIO r p) where
   95     mDoesDirectoryExist fn = do thing <- identifyThing fn
   96                                 case thing of Just (D,_) -> return True
   97                                               _ -> return False
   98     mDoesFileExist fn = do thing <- identifyThing fn
   99                            case thing of Just (F,_) -> return True
  100                                          _ -> return False
  101     mInCurrentDirectory fn j | fn' == fp2fn "" = j
  102                              | otherwise =
  103                                  case break_on_dir fn' of
  104                                  Nothing -> do c <- readroot
  105                                                case geta D fn' c of
  106                                                  Nothing -> fail "dir doesn't exist mInCurrentDirectory..."
  107                                                  Just h -> inh h j
  108                                  Just (d,fn'') -> do c <- readroot
  109                                                      case geta D d c of
  110                                                        Nothing -> fail "dir doesn't exist..."
  111                                                        Just h -> inh h $ mInCurrentDirectory fn'' j
  112         where fn' = norm_path fn
  113     mGetDirectoryContents = map (\ (_,f,_) -> f) `fmap` readroot
  114     mReadFilePS fn = mInCurrentDirectory (super_name fn) $ do
  115                                           c <- readroot
  116                                           case geta F (own_name fn) c of
  117                                             Nothing -> fail $ " file don't exist... "++ fn2fp fn
  118                                             Just h -> readhash h
  119 
  120 instance WriteableDirectory (HashedIO RW p) where
  121     mWithCurrentDirectory fn j
  122         | fn' == fp2fn "" = j
  123         | otherwise =
  124             case break_on_dir fn' of
  125             Nothing -> do c <- readroot
  126                           case geta D fn' c of
  127                             Nothing -> fail "dir doesn't exist in mWithCurrentDirectory..."
  128                             Just h -> do (h',x) <- withh h j
  129                                          writeroot $ seta D fn' h' c
  130                                          return x
  131             Just (d,fn'') -> do c <- readroot
  132                                 case geta D d c of
  133                                   Nothing -> fail "dir doesn't exist..."
  134                                   Just h -> do (h',x) <- withh h $ mWithCurrentDirectory fn'' j
  135                                                writeroot $ seta D d h' c
  136                                                return x
  137         where fn' = norm_path fn
  138     mSetFileExecutable _ _ = return ()
  139     mWriteFilePS fn ps = do mexists <- identifyThing fn
  140                             case mexists of
  141                               Just (D,_) -> fail "can't write file over directory"
  142                               _ -> do h <- writeHashFile ps
  143                                       makeThing fn (F,h)
  144     mCreateDirectory fn = do h <- writeHashFile B.empty
  145                              exists <- isJust `fmap` identifyThing fn
  146                              when exists $ fail "can't mCreateDirectory over an existing object."
  147                              makeThing fn (D,h)
  148     mRename o n = do nexists <- isJust `fmap` identifyThing n
  149                      when nexists $ fail "mRename failed..."
  150                      mx <- identifyThing o
  151                      -- for backwards compatibility accept rename of nonexistent files.
  152                      case mx of Nothing -> return ()
  153                                 Just x -> do rmThing o
  154                                              makeThing n x
  155     mRemoveDirectory = rmThing
  156     mRemoveFile f = do x <- mReadFilePS f
  157                        when (B.length x /= 0) $
  158                             fail $ "Cannot remove non-empty file "++fn2fp f
  159                        rmThing f
  160 
  161 identifyThing :: FileName -> HashedIO r p (Maybe (ObjType,String))
  162 identifyThing fn | fn' == fp2fn "" = do h <- gets rootHash
  163                                         return $ Just (D, h)
  164                  | otherwise = case break_on_dir fn' of
  165                                Nothing -> getany fn' `fmap` readroot
  166                                Just (d,fn'') -> do c <- readroot
  167                                                    case geta D d c of
  168                                                      Nothing -> return Nothing
  169                                                      Just h -> inh h $ identifyThing fn''
  170         where fn' = norm_path fn
  171 
  172 makeThing :: FileName -> (ObjType,String) -> HashedIO RW p ()
  173 makeThing fn (o,h) = mWithCurrentDirectory (super_name $ norm_path fn) $
  174                      seta o (own_name $ norm_path fn) h `fmap` readroot >>= writeroot
  175 
  176 rmThing :: FileName -> HashedIO RW p ()
  177 rmThing fn = mWithCurrentDirectory (super_name $ norm_path fn) $
  178              do c <- readroot
  179                 let c' = filter (\(_,x,_)->x/= own_name (norm_path fn)) c
  180                 if length c' == length c - 1
  181                   then writeroot c'
  182                   else fail "obj doesn't exist in rmThing"
  183 
  184 readhash :: String -> HashedIO r p B.ByteString
  185 readhash h = do c <- gets cache
  186                 z <- lift $ unsafeInterleaveIO $ readHashFile c HashedPristineDir h
  187                 let (_,out) = z
  188                 return out
  189 
  190 readTediousHash :: String -> String -> HashedIO r p B.ByteString
  191 readTediousHash k h = do lift $ finishedOneIO k h
  192                          readhash h
  193 
  194 gethashmtime :: String -> HashedIO r p EpochTime
  195 gethashmtime h = do HashDir _ c _ _ <- get
  196                     lift $ unsafeInterleaveIO $ findFileMtimeUsingCache c HashedPristineDir h
  197 
  198 withh :: String -> HashedIO RW p a -> HashedIO RW p (String,a)
  199 withh h j = do hd <- get
  200                put $ hd { rootHash = h }
  201                x <- j
  202                h' <- gets rootHash
  203                put hd
  204                return (h',x)
  205 
  206 inh :: String -> HashedIO r p a -> HashedIO r p a
  207 inh h j = do hd <- get
  208              put $ hd { rootHash = h }
  209              x <- j
  210              put hd
  211              return x
  212 
  213 safeInterleave :: HashedIO RO p a -> HashedIO r p a
  214 safeInterleave job = do HashDir _ c compr h <- get
  215                         z <- lift $ unsafeInterleaveIO $ runStateT job
  216                              (HashDir { permissions = RO, cache = c, compress = compr, rootHash = h })
  217                         let (x,_) = z
  218                         return x
  219 
  220 readroot :: HashedIO r p [(ObjType, FileName, String)]
  221 readroot = do haveitalready <- peekroot
  222               cc <- gets rootHash >>= readdir
  223               when (not haveitalready) $ speculate cc
  224               return cc
  225     where speculate :: [(a,b,String)] -> HashedIO r q ()
  226           speculate c = do cac <- gets cache
  227                            mapM_ (\(_,_,z) -> lift $ speculateFileUsingCache cac HashedPristineDir z) c
  228           peekroot :: HashedIO r p Bool
  229           peekroot = do HashDir _ c _ h <- get
  230                         lift $ peekInCache c HashedPristineDir h
  231 
  232 writeroot :: [(ObjType, FileName, String)] -> HashedIO r p ()
  233 writeroot c = do h <- writedir c
  234                  modify $ \hd -> hd { rootHash = h }
  235 
  236 data ObjType = F | D deriving Eq
  237 
  238 -- | @geta objtype name stuff@ tries to get an object of type @objtype@ named @name@
  239 -- in @stuff@.
  240 geta :: ObjType -> FileName -> [(ObjType, FileName, String)] -> Maybe String
  241 geta o f c = do (o',h) <- getany f c
  242                 if o == o' then Just h else Nothing
  243 
  244 getany :: FileName -> [(ObjType, FileName, String)] -> Maybe (ObjType,String)
  245 getany _ [] = Nothing
  246 getany f ((o,f',h):_) | f == f' = Just (o,h)
  247 getany f (_:r) = getany f r
  248 
  249 seta :: ObjType -> FileName -> String -> [(ObjType, FileName, String)] -> [(ObjType, FileName, String)]
  250 seta o f h [] = [(o,f,h)]
  251 seta o f h ((_,f',_):r) | f == f' = (o,f,h):r
  252 seta o f h (x:xs) = x : seta o f h xs
  253 
  254 readdir :: String -> HashedIO r p [(ObjType, FileName, String)]
  255 readdir hash = (parsed . linesPS) `fmap` readhash hash
  256     where parsed (t:n:h:rest) | t == dir = (D, niceps2fn n, BC.unpack h) : parsed rest
  257                               | t == file = (F, niceps2fn n, BC.unpack h) : parsed rest
  258           parsed _ = []
  259 dir :: B.ByteString
  260 dir = BC.pack "directory:"
  261 file :: B.ByteString
  262 file = BC.pack "file:"
  263 
  264 
  265 writedir :: [(ObjType, FileName, String)] -> HashedIO r p String
  266 writedir c = writeHashFile cps
  267     where cps = unlinesPS $ concatMap (\ (o,d,h) -> [showO o,fn2niceps d,BC.pack h]) c++[B.empty]
  268           showO D = dir
  269           showO F = file
  270 
  271 writeHashFile :: B.ByteString -> HashedIO r p String
  272 writeHashFile ps = do c <- gets cache
  273                       compr <- gets compress
  274                       lift $ writeFileUsingCache c compr HashedPristineDir ps
  275 
  276 -- |Create a Slurpy representing the pristine content determined by the
  277 -- supplied root hash (which uniquely determines the pristine tree)
  278 slurpHashedPristine :: Cache -> Compression -> String -> IO Slurpy
  279 slurpHashedPristine c compr h = fst `fmap` runStateT slh
  280                                   (HashDir { permissions = RO, cache = c,
  281                                              compress = compr, rootHash = h })
  282 
  283 slh :: HashedIO r p Slurpy
  284 slh = do c <- readroot
  285          hroot <- gets rootHash
  286          lift $ beginTedious k
  287          safeInterleave $ (Slurpy rootdir . SlurpDir (Just hroot) . slurpies_to_map) `fmap` mapM sl c
  288     where sl (F,n,h) = do ps <- safeInterleave $ readTediousHash k h
  289                           t <- gethashmtime h
  290                           let len = if length h == 75 then read (take 10 h)
  291                                                       else undefined_size
  292                           return $ Slurpy n $ SlurpFile (Just h, t, len) ps
  293           sl (D,n,h) = inh h $ do c <- readroot
  294                                   lift $ tediousSize k (length c)
  295                                   lift $ finishedOneIO k h
  296                                   (Slurpy n . SlurpDir (Just h) . slurpies_to_map) `fmap` mapM sl c
  297           k = "Reading pristine"
  298 
  299 rootdir :: FileName
  300 rootdir = fp2fn "."
  301 
  302 -- |Write contents of a Slurpy into hashed pristine. Only files that have not
  303 -- not yet been hashed (that is, the hash corresponding to their content is
  304 -- already present in hashed pristine) will be written out, so it is efficient
  305 -- to use this function to update existing pristine cache. Note that the
  306 -- pristine root hash will *not* be updated. You need to do that manually.
  307 writeHashedPristine :: Cache -> Compression -> Slurpy -> IO String
  308 writeHashedPristine c compr sl =
  309     do beginTedious k
  310        h <- fst `fmap` runStateT (hsl sl)
  311             (HashDir { permissions = RW, cache = c,
  312                        compress = compr, rootHash = sha1PS B.empty })
  313        endTedious k
  314        return h
  315     where hsl (Slurpy _ (SlurpDir (Just h) _)) = return h
  316           hsl (Slurpy _ (SlurpDir Nothing ss)) = do lift $ tediousSize k (Map.size ss)
  317                                                     mapM hs (map_to_slurpies ss) >>= writedir
  318           hsl (Slurpy _ (SlurpFile (Just h,_,_) _)) = return h
  319           hsl (Slurpy _ (SlurpFile _ x)) = writeHashFile x
  320           hs (Slurpy d (SlurpDir (Just h) _)) = progress k $ return (D, d, h)
  321           hs s@(Slurpy d (SlurpDir Nothing _)) = do h <- hsl s
  322                                                     lift $ finishedOneIO k h
  323                                                     return (D, d, h)
  324           hs (Slurpy f (SlurpFile (Just h,_,_) _)) = progress k $ return (F, f, h)
  325           hs s@(Slurpy f (SlurpFile _ _)) = do h <- hsl s
  326                                                lift $ finishedOneIO k h
  327                                                return (F, f, h)
  328           k = "Writing pristine"
  329 
  330 grab :: FileName -> Slurpy -> Maybe Slurpy
  331 grab _ (Slurpy _ (SlurpFile _ _)) = Nothing
  332 grab fn (Slurpy _ (SlurpDir _ ss)) = fmap (Slurpy fn) $ Map.lookup fn ss
  333 
  334 -- | Update timestamps on pristine files to match those in the working directory
  335 -- (which is passed to this function in form of a Slurpy). It needed for the
  336 -- mtime-based unsafeDiff optimisation to work efficiently.
  337 syncHashedPristine :: Cache -> Slurpy -> String -> IO ()
  338 syncHashedPristine c s r = do runStateT sh $ HashDir { permissions=RW, cache=c,
  339                                                        compress=compression [], rootHash=r }
  340                               return ()
  341     where sh = do cc <- readroot
  342                   lift $ tediousSize k (length cc)
  343                   mapM_ sh' cc
  344           sh' (D,n,h) = case progress k $ grab n s of
  345                         Just s' -> lift $ syncHashedPristine c s' h
  346                         Nothing -> return ()
  347           sh' (F,n,h) = case progress k $ grab n s of
  348                         Just (Slurpy _ (SlurpFile (_,t',l) x)) ->
  349                             do t <- lift $ findFileMtimeUsingCache c HashedPristineDir h
  350                                when (t' /= undefined_time && t' /= t) $
  351                                     do ps <- readhash h
  352                                        when (B.length ps == fromIntegral l && ps == x) $
  353                                             lift $ setFileMtimeUsingCache c HashedPristineDir h t'
  354                         _ -> return ()
  355           k = "Synchronizing pristine"
  356 
  357 copyHashed :: String -> Cache -> Compression -> String -> IO ()
  358 copyHashed k c compr z = do runStateT cph $ HashDir { permissions = RO, cache = c,
  359                                                       compress = compr, rootHash = z }
  360                             return ()
  361     where cph = do cc <- readroot
  362                    lift $ tediousSize k (length cc)
  363                    mapM_ cp cc
  364           cp (F,n,h) = do ps <- readhash h
  365                           lift $ finishedOneIO k (fn2fp n)
  366                           lift $ writeAtomicFilePS (fn2fp n) ps
  367           cp (D,n,h) = do lift $ createDirectoryIfMissing False (fn2fp n)
  368                           lift $ finishedOneIO k (fn2fp n)
  369                           lift $ withCurrentDirectory (fn2fp n) $ copyHashed k c compr h
  370 
  371 copyPartialsHashed :: FilePathLike fp =>
  372                       Cache -> Compression -> String -> [fp] -> IO ()
  373 copyPartialsHashed c compr root = mapM_ (copyPartialHashed c compr root)
  374 
  375 copyPartialHashed :: FilePathLike fp => Cache -> Compression -> String -> fp -> IO ()
  376 copyPartialHashed c compr root ff =
  377     do createDirectoryIfMissing True (basename $ toFilePath ff)
  378        runStateT (cp $ fp2fn $ toFilePath ff) $
  379                  HashDir { permissions = RO, cache = c,
  380                            compress=compr, rootHash = root }
  381        return ()
  382  where basename = reverse . dropWhile ('/' /=) . dropWhile ('/' ==) . reverse
  383        cp f = do mt <- identifyThing f
  384                  case mt of
  385                    Just (D,h) -> do lift $ createDirectoryIfMissing True (fn2fp f)
  386                                     lift $ withCurrentDirectory (fn2fp f) $ copyHashed "" c compr h
  387                    Just (F,h) -> do ps <- readhash h
  388                                     lift $ writeAtomicFilePS (fn2fp f) ps
  389                    Nothing -> return ()
  390 
  391 -- | Seems to list all hashes reachable from "root".
  392 listHashedContents :: String -> Cache -> String -> IO [String]
  393 listHashedContents k c root =
  394     do beginTedious k
  395        tediousSize k 1
  396        x <- fst `fmap` runStateT (lhc (D,fp2fn ".",root)) (HashDir RO c NoCompression root)
  397        endTedious k
  398        return x
  399     where lhc :: (ObjType, FileName, String) -> HashedIO r a [String]
  400           lhc (D,dname,d) = do xs <- inh d $ readroot
  401                                lift $ finishedOneIO k (fn2fp dname)
  402                                lift $ tediousSize k (length $ filter (\(x,_,_) -> x == D) xs)
  403                                hcxs <- mapM lhc xs
  404                                return (d:concat hcxs)
  405           lhc (F,_,h) = return [h]
  406 
  407 clean_hashdir :: Cache -> HashedDir -> [String] -> IO ()
  408 clean_hashdir c dir_ hashroots =
  409    do -- we'll remove obsolete bits of "dir"
  410       debugMessage $ "Cleaning out " ++ (hashedDir dir_) ++ "..."
  411       let hashdir = darcsdir ++ "/" ++ (hashedDir dir_) ++ "/"
  412       hs <- concat `fmap` (mapM (listHashedContents "cleaning up..." c) hashroots)
  413       fs <- filter okayHash `fmap` getDirectoryContents hashdir
  414       mapM_ (removeFileMayNotExist . (hashdir++)) (fs \\ hs)
  415       -- and also clean out any global caches.
  416       debugMessage "Cleaning out any global caches..."
  417       cleanCachesWithHint c dir_ (fs \\ hs)