1 {-# OPTIONS_GHC -cpp -fglasgow-exts #-}
    2 {-# LANGUAGE CPP #-}
    3 
    4 #include "gadts.h"
    5 
    6 module Darcs.Repository.Cache (
    7                    cacheHash, okayHash, takeHash,
    8                    Cache(..), CacheType(..), CacheLoc(..), WritableOrNot(..),
    9                    HashedDir(..), hashedDir,
   10                    unionCaches, cleanCaches, cleanCachesWithHint,
   11                    fetchFileUsingCache, speculateFileUsingCache, writeFileUsingCache,
   12                    findFileMtimeUsingCache, setFileMtimeUsingCache, peekInCache,
   13                    repo2cache,
   14                    writable, isthisrepo, hashedFilePath, allHashedDirs
   15                  ) where
   16 
   17 import Control.Monad ( liftM, when, guard )
   18 import Data.List ( nub )
   19 import Data.Maybe ( listToMaybe )
   20 import System.Directory ( removeFile, doesFileExist, getDirectoryContents )
   21 import System.Posix ( setFileTimes )
   22 import System.Posix.Files ( linkCount, modificationTime, getSymbolicLinkStatus )
   23 import System.Posix.Types ( EpochTime )
   24 import System.IO ( hPutStrLn, stderr )
   25 
   26 import Crypt.SHA256 ( sha256sum )
   27 
   28 import ByteStringUtils ( gzWriteFilePS, linesPS )
   29 import qualified Data.ByteString as B (length, drop, ByteString )
   30 import qualified Data.ByteString.Char8 as BC (unpack)
   31 
   32 import SHA1 ( sha1PS )
   33 import System.Posix.Files ( createLink )
   34 import System.Directory ( createDirectoryIfMissing )
   35 
   36 import Darcs.External ( gzFetchFilePS, fetchFilePS, speculateFileOrUrl, copyFileOrUrl,
   37                         Cachable( Cachable ) )
   38 import Darcs.Flags ( Compression( .. ) )
   39 import Darcs.Global ( darcsdir )
   40 import Darcs.Lock ( writeAtomicFilePS, gzWriteAtomicFilePS )
   41 import Progress ( progressList, debugMessage, debugFail )
   42 import Darcs.SlurpDirectory ( undefined_time )
   43 import Darcs.URL ( is_file )
   44 import Darcs.Utils ( withCurrentDirectory, catchall )
   45 
   46 data HashedDir = HashedPristineDir | HashedPatchesDir | HashedInventoriesDir
   47 hashedDir :: HashedDir -> String
   48 hashedDir HashedPristineDir = "pristine.hashed"
   49 hashedDir HashedPatchesDir = "patches"
   50 hashedDir HashedInventoriesDir = "inventories"
   51 
   52 allHashedDirs :: [HashedDir]
   53 allHashedDirs = [HashedPristineDir, HashedPatchesDir, HashedInventoriesDir]
   54 
   55 data WritableOrNot = Writable | NotWritable deriving ( Show )
   56 data CacheType = Repo | Directory deriving ( Eq, Show )
   57 data CacheLoc = Cache !CacheType !WritableOrNot !String
   58 newtype Cache = Ca [CacheLoc] -- abstract type for hiding cache
   59 
   60 instance Eq CacheLoc where
   61     (Cache Repo _ a) == (Cache Repo _ b) = a == b
   62     (Cache Directory _ a) == (Cache Directory _ b) = a == b
   63     _ == _ = False
   64 instance Show CacheLoc where
   65     show (Cache Repo Writable a) = "thisrepo:" ++ a
   66     show (Cache Repo NotWritable a) = "repo:" ++ a
   67     show (Cache Directory Writable a) = "cache:" ++ a
   68     show (Cache Directory NotWritable a) = "readonly:" ++ a
   69 instance Show Cache where
   70     show (Ca cs) = unlines $ map show cs
   71 
   72 unionCaches :: Cache -> Cache -> Cache
   73 unionCaches (Ca a) (Ca b) = Ca (nub (a++b))
   74 
   75 repo2cache :: String -> Cache
   76 repo2cache r = Ca [Cache Repo NotWritable r]
   77 
   78 -- | 'cacheHash' computes the cache hash (i.e. filename) of a packed string.
   79 cacheHash :: B.ByteString -> String
   80 cacheHash ps = case show (B.length ps) of
   81                  x | l > 10 -> sha256sum ps
   82                    | otherwise -> take (10-l) (repeat '0') ++ x ++'-':sha256sum ps
   83                                         where l = length x
   84 
   85 okayHash :: String -> Bool
   86 okayHash s = length s == 40 || length s == 64 || length s == 75
   87 
   88 takeHash :: B.ByteString -> Maybe (String, B.ByteString)
   89 takeHash ps = do h <- listToMaybe $ linesPS ps
   90                  let v = BC.unpack h
   91                  guard $ okayHash v
   92                  Just (v, B.drop (B.length h) ps)
   93 
   94 checkHash :: String -> B.ByteString -> Bool
   95 checkHash h s | length h == 40 = sha1PS s == h
   96               | length h == 64 = sha256sum s == h
   97               | length h == 75 = B.length s == read (take 10 h) && sha256sum s == drop 11 h
   98               | otherwise = False
   99 
  100 
  101 findFileMtimeUsingCache :: Cache -> HashedDir -> String -> IO EpochTime
  102 findFileMtimeUsingCache (Ca cache) subdir f = mt cache
  103     where mt [] = return undefined_time
  104           mt (Cache Repo Writable r:_) = (modificationTime `fmap`
  105                                           getSymbolicLinkStatus (r++"/"++darcsdir++"/"++(hashedDir subdir)++"/"++f))
  106                                          `catchall` return undefined_time
  107           mt (_:cs) = mt cs
  108 
  109 setFileMtimeUsingCache :: Cache -> HashedDir -> String -> EpochTime -> IO ()
  110 setFileMtimeUsingCache (Ca cache) subdir f t = st cache
  111     where st [] = return ()
  112           st (Cache Repo Writable r:_) = setFileTimes (r++"/"++darcsdir++"/"++(hashedDir subdir)++"/"++f) t t
  113                                          `catchall` return ()
  114           st (_:cs) = st cs
  115 
  116 fetchFileUsingCache :: Cache -> HashedDir -> String -> IO (String, B.ByteString)
  117 fetchFileUsingCache = fetchFileUsingCachePrivate Anywhere
  118 
  119 writable :: CacheLoc -> Bool
  120 writable (Cache _ NotWritable _) = False
  121 writable (Cache _ Writable _) = True
  122 
  123 isthisrepo :: CacheLoc -> Bool
  124 isthisrepo (Cache Repo Writable _) = True
  125 isthisrepo _ = False
  126 
  127 -- | @hashedFilePath cachelocation subdir hash@ returns the physical filename of
  128 -- hash @hash@ in the @subdir@ section of @cachelocation@.
  129 hashedFilePath :: CacheLoc -> HashedDir -> String -> String
  130 hashedFilePath (Cache Directory _ d) s f = d ++ "/" ++ (hashedDir s) ++ "/" ++ f
  131 hashedFilePath (Cache Repo _ r) s f =
  132     r ++ "/"++darcsdir++"/" ++ (hashedDir s) ++ "/" ++ f
  133 
  134 -- | @peekInCache cache subdir hash@ tells whether @cache@ and
  135 -- contains an object with hash @hash@ in a writable position.
  136 -- Florent: why do we want it to be in a writable position?
  137 peekInCache :: Cache -> HashedDir -> String -> IO Bool
  138 peekInCache (Ca cache) subdir f = cacheHasIt cache `catchall` return False
  139     where cacheHasIt [] = return False
  140           cacheHasIt (c:cs) | not $ writable c = cacheHasIt cs
  141                             | otherwise = do ex <- doesFileExist $ fn c
  142                                              if ex then return True
  143                                                    else cacheHasIt cs
  144           fn c = hashedFilePath c subdir f
  145 
  146 -- | @speculateFileUsingCache cache subdirectory name@ takes note that
  147 -- the file @name@ is likely to be useful soon: pipelined downloads
  148 -- will add it to the (low-priority) queue, for the rest it is a noop.
  149 speculateFileUsingCache :: Cache -> HashedDir -> String -> IO ()
  150 speculateFileUsingCache c sd h = do debugMessage $ "Speculating on "++h
  151                                     copyFileUsingCache OnlySpeculate c sd h
  152 
  153 data OrOnlySpeculate = ActuallyCopy | OnlySpeculate deriving ( Eq )
  154 
  155 copyFileUsingCache :: OrOnlySpeculate -> Cache -> HashedDir -> String -> IO ()
  156 copyFileUsingCache oos (Ca cache) subdir f =
  157     do debugMessage $ "I'm doing copyFileUsingCache on "++(hashedDir subdir)++"/"++f
  158        Just stickItHere <- cacheLoc cache
  159        createDirectoryIfMissing False (reverse $ dropWhile (/='/') $ reverse stickItHere)
  160        sfuc cache stickItHere
  161     `catchall` return ()
  162     where cacheLoc [] = return Nothing
  163           cacheLoc (c:cs) | not $ writable c = cacheLoc cs
  164                           | otherwise =
  165               do ex <- doesFileExist $ fn c
  166                  if ex then fail "Bug in darcs: This exception should be caught in speculateFileUsingCache"
  167                        else do othercache <- cacheLoc cs
  168                                case othercache of Just x -> return $ Just x
  169                                                   Nothing -> return $ Just (fn c)
  170           sfuc [] _ = return ()
  171           sfuc (c:cs) out | not $ writable c =
  172               if oos == OnlySpeculate
  173                  then speculateFileOrUrl (fn c) out
  174                  else copyFileOrUrl [] (fn c) out Cachable
  175                           | otherwise = sfuc cs out
  176           fn c = hashedFilePath c subdir f
  177 
  178 
  179 data FromWhere = LocalOnly | Anywhere deriving ( Eq )
  180 
  181 fetchFileUsingCachePrivate :: FromWhere -> Cache -> HashedDir -> String -> IO (String, B.ByteString)
  182 fetchFileUsingCachePrivate fromWhere (Ca cache) subdir f =
  183     do when (fromWhere == Anywhere) $ copyFileUsingCache ActuallyCopy (Ca cache) subdir f
  184        ffuc cache
  185     `catchall` debugFail ("Couldn't fetch `"++f++"'\nin subdir "++(hashedDir subdir)++
  186                           " from sources:\n\n"++show (Ca cache))
  187     where ffuc (c:cs)
  188            | not (writable c) && (Anywhere == fromWhere || is_file (fn c)) =
  189               do debugMessage $ "In fetchFileUsingCachePrivate I'm going manually"
  190                  debugMessage $ "    getting "++f
  191                  debugMessage $ "    from " ++ fn c
  192                  x <- gzFetchFilePS (fn c) Cachable
  193                  if not $ checkHash f x
  194                     then do x' <- fetchFilePS (fn c) Cachable
  195                             when (not $ checkHash f x') $
  196                                  do hPutStrLn stderr $ "Hash failure in " ++ fn c
  197                                     fail $ "Hash failure in " ++ fn c
  198                             return (fn c, x')
  199                     else return (fn c, x) -- FIXME: create links in caches
  200               `catchall` ffuc cs
  201 
  202            | writable c =
  203               do x1 <- gzFetchFilePS (fn c) Cachable
  204                  x <- if not $ checkHash f x1
  205                       then do x2 <- fetchFilePS (fn c) Cachable
  206                               when (not $ checkHash f x2) $
  207                                  do hPutStrLn stderr $ "Hash failure in " ++ fn c
  208                                     removeFile $ fn c
  209                                     fail $ "Hash failure in " ++ fn c
  210                               return x2
  211                       else return x1
  212                  mapM_ (tryLinking (fn c)) cs
  213                  return (fn c, x)
  214               `catchall` do (fname,x) <- ffuc cs
  215                             do createCache c subdir
  216                                createLink fname (fn c)
  217                                return (fn c, x)
  218                              `catchall`
  219                              do gzWriteFilePS (fn c) x `catchall` return ()
  220                                 return (fname,x)
  221            | otherwise = ffuc cs
  222 
  223           ffuc [] = debugFail $ "No sources from which to fetch file `"++f++"'\n"++ show (Ca cache)
  224 
  225           tryLinking ff c@(Cache Directory Writable d) =
  226               do createDirectoryIfMissing False (d++"/"++(hashedDir subdir))
  227                  createLink ff (fn c)
  228               `catchall` return ()
  229           tryLinking _ _ = return ()
  230           fn c = hashedFilePath c subdir f
  231 
  232 createCache :: CacheLoc -> HashedDir -> IO ()
  233 createCache (Cache Directory _ d) subdir =
  234     createDirectoryIfMissing True (d ++ "/" ++ (hashedDir subdir))
  235 createCache _ _ = return ()
  236 
  237 -- | @write compression filename content@ writes @content@ to the file @filename@ according
  238 -- to the policy given by @compression@.
  239 write :: Compression -> String -> B.ByteString -> IO ()
  240 write NoCompression = writeAtomicFilePS
  241 write GzipCompression = gzWriteAtomicFilePS
  242 
  243 -- | @writeFileUsingCache cache compression subdir contents@ write the string @contents@ to
  244 -- the directory subdir, except if it is already in the cache, in which case it is a noop.
  245 -- Warning (?) this means that in case of a hash collision, writing using writeFileUsingCache is
  246 -- a noop. The returned value is the filename that was given to the string.
  247 writeFileUsingCache :: Cache -> Compression -> HashedDir -> B.ByteString -> IO String
  248 writeFileUsingCache (Ca cache) compr subdir ps =
  249     (fetchFileUsingCachePrivate LocalOnly (Ca cache) subdir hash >> return hash) `catchall`
  250     wfuc cache `catchall`
  251          debugFail ("Couldn't write `"++hash++"'\nin subdir "++(hashedDir subdir)++" to sources:\n\n"++
  252                     show (Ca cache))
  253     where hash = cacheHash ps
  254           wfuc (c:cs) | not $ writable c = wfuc cs
  255                       | otherwise = do createCache c subdir
  256                                        write compr (fn c) ps -- FIXME: create links in caches
  257                                        return hash
  258           wfuc [] = debugFail $ "No location to write file `" ++ (hashedDir subdir) ++"/"++hash ++ "'"
  259           fn c = hashedFilePath c subdir hash
  260 
  261 cleanCaches :: Cache -> HashedDir -> IO ()
  262 cleanCaches c d = cleanCachesWithHint' c d Nothing
  263 
  264 cleanCachesWithHint :: Cache -> HashedDir -> [String] -> IO ()
  265 cleanCachesWithHint c d h = cleanCachesWithHint' c d (Just h)
  266 
  267 cleanCachesWithHint' :: Cache -> HashedDir -> Maybe [String] -> IO ()
  268 cleanCachesWithHint' (Ca cs) subdir hint = mapM_ cleanCache cs
  269     where cleanCache (Cache Directory Writable d) =
  270              (withCurrentDirectory (d++"/"++(hashedDir subdir)) $
  271               do fs' <- getDirectoryContents "."
  272                  let fs = case hint of
  273                             Just h -> h
  274                             Nothing -> fs'
  275                  mapM_ clean $ progressList ("Cleaning cache "++d++"/"++(hashedDir subdir)) $
  276                        filter okayHash fs) `catchall` return ()
  277           cleanCache _ = return ()
  278           clean f = do lc <- linkCount `liftM` getSymbolicLinkStatus f
  279                        when (lc < 2) $ removeFile f
  280                     `catchall` return ()