1 {-# LANGUAGE BangPatterns #-}
    2 
    3 -- | A few darcs-specific utility functions. These are used for reading and
    4 -- writing darcs and darcs-compatible hashed trees.
    5 module Storage.Hashed.Darcs where
    6 
    7 import Prelude hiding ( lookup )
    8 import System.FilePath ( (</>) )
    9 
   10 import System.Directory( doesFileExist )
   11 import Codec.Compression.GZip( decompress, compress )
   12 import Control.Applicative( (<$>) )
   13 
   14 import qualified Data.ByteString.Char8 as BS8
   15 import qualified Data.ByteString.Lazy.Char8 as BL8
   16 import qualified Data.ByteString.Lazy as BL
   17 import qualified Data.ByteString as BS
   18 
   19 import Data.List( sortBy )
   20 import Data.Char( chr, ord, isSpace )
   21 import Data.Maybe( fromJust, isJust )
   22 import qualified Data.Set as S
   23 import Control.Monad.State.Strict
   24 
   25 import Storage.Hashed.Tree hiding ( lookup )
   26 import qualified Storage.Hashed.Tree as Tree
   27 import Storage.Hashed.Path hiding ( (</>) )
   28 import Storage.Hashed.Utils
   29 import Storage.Hashed.Hash
   30 import Storage.Hashed.Monad
   31 
   32 type GetHash = (Maybe Int, Hash) -> IO BL.ByteString
   33 
   34 ---------------------------------------------------------------------
   35 -- Utilities for coping with the darcs directory format.
   36 --
   37 
   38 -- | 'darcsDecodeWhite' interprets the Darcs-specific \"encoded\" filenames
   39 --   produced by 'darcsEncodeWhite'
   40 --
   41 --   > darcsDecodeWhite "hello\32\there" == "hello there"
   42 --   > darcsDecodeWhite "hello\92\there" == "hello\there"
   43 --   > darcsDecodeWhite "hello\there"    == error "malformed filename"
   44 darcsDecodeWhite :: String -> FilePath
   45 darcsDecodeWhite ('\\':cs) =
   46     case break (=='\\') cs of
   47     (theord, '\\':rest) ->
   48         chr (read theord) : darcsDecodeWhite rest
   49     _ -> error "malformed filename"
   50 darcsDecodeWhite (c:cs) = c: darcsDecodeWhite cs
   51 darcsDecodeWhite "" = ""
   52 
   53 -- | 'darcsEncodeWhite' translates whitespace in filenames to a darcs-specific
   54 --   format (backslash followed by numerical representation according to 'ord').
   55 --   Note that backslashes are also escaped since they are used in the encoding.
   56 --
   57 --   > darcsEncodeWhite "hello there" == "hello\32\there"
   58 --   > darcsEncodeWhite "hello\there" == "hello\92\there"
   59 darcsEncodeWhite :: FilePath -> String
   60 darcsEncodeWhite (c:cs) | isSpace c || c == '\\' =
   61     '\\' : (show $ ord c) ++ "\\" ++ darcsEncodeWhite cs
   62 darcsEncodeWhite (c:cs) = c : darcsEncodeWhite cs
   63 darcsEncodeWhite [] = []
   64 
   65 darcsEncodeWhiteBS :: BS8.ByteString -> BS8.ByteString
   66 darcsEncodeWhiteBS = BS8.pack . darcsEncodeWhite . BS8.unpack
   67 
   68 decodeDarcsHash :: BS8.ByteString -> Hash
   69 decodeDarcsHash bs = case BS8.split '-' bs of
   70                        [s, h] | BS8.length s == 10 -> decodeBase16 h
   71                        _ -> decodeBase16 bs
   72 
   73 decodeDarcsSize :: BS8.ByteString -> Maybe Int
   74 decodeDarcsSize bs = case BS8.split '-' bs of
   75                        [s, _] | BS8.length s == 10 ->
   76                                   case reads (BS8.unpack s) of
   77                                     [(x, _)] -> Just x
   78                                     _ -> Nothing
   79                        _ -> Nothing
   80 
   81 darcsGetLocal :: FilePath -> GetHash
   82 darcsGetLocal dir (s, h) = case darcsLocation dir (s, h) of
   83   Nothing -> fail "darcsGetLocal: invalid hash"
   84   Just l -> do file <- BL.readFile l
   85                if BL.null file then return BL.empty
   86                                else return (decompress file)
   87 
   88 darcsLocation :: FilePath -> (Maybe Int, Hash) -> Maybe FilePath
   89 darcsLocation dir (s,h) = case h of
   90                             NoHash -> Nothing
   91                             _ -> Just $ dir </> prefix s ++ hash
   92     where prefix Nothing = ""
   93           prefix (Just s') = formatSize s' ++ "-"
   94           formatSize s' = let n = show s' in replicate (10 - length n) '0' ++ n
   95           hash = BS8.unpack (encodeBase16 h)
   96 
   97 ----------------------------------------------
   98 -- Darcs directory format.
   99 --
  100 
  101 darcsFormatDir :: Tree m -> Maybe BL8.ByteString
  102 darcsFormatDir t = BL8.fromChunks <$> concat <$>
  103                        mapM string (sortBy cmp $ listImmediate t)
  104     where cmp (a, _) (b, _) = compare a b
  105           string (name, item) =
  106               do header <- case item of
  107                              File _ -> Just $ BS8.pack "file:\n"
  108                              _ -> Just $ BS8.pack "directory:\n"
  109                  hash <- case itemHash item of
  110                            NoHash -> Nothing
  111                            x -> Just $ encodeBase16 x
  112                  return $ [ header
  113                           , darcsEncodeWhiteBS name
  114                           , BS8.singleton '\n'
  115                           , hash, BS8.singleton '\n' ]
  116 
  117 darcsParseDir :: BL8.ByteString -> [(ItemType, Name, Maybe Int, Hash)]
  118 darcsParseDir content = parse (BL8.split '\n' content)
  119     where
  120       parse (t:n:h':r) = (header t,
  121                           BS8.pack $ darcsDecodeWhite (BL8.unpack n),
  122                           decodeDarcsSize hash,
  123                           decodeDarcsHash hash) : parse r
  124           where hash = BS8.concat $ BL8.toChunks h'
  125       parse _ = []
  126       header x
  127           | x == BL8.pack "file:" = BlobType
  128           | x == BL8.pack "directory:" = TreeType
  129           | otherwise = error $ "Error parsing darcs hashed dir: " ++ BL8.unpack x
  130 
  131 ----------------------------------------
  132 -- Utilities.
  133 --
  134 
  135 -- | Compute a darcs-compatible hash value for a tree-like structure.
  136 darcsTreeHash :: Tree m -> Hash
  137 darcsTreeHash t = case darcsFormatDir t of
  138                     Nothing -> NoHash
  139                     Just x -> sha256 x
  140 
  141 
  142 darcsUpdateDirHashes :: Tree m -> Tree m
  143 darcsUpdateDirHashes = updateSubtrees update
  144     where update t = t { treeHash = darcsTreeHash t }
  145 
  146 darcsUpdateHashes :: (Monad m, Functor m) => Tree m -> m (Tree m)
  147 darcsUpdateHashes = updateTree update
  148     where update (SubTree t) = return . SubTree $ t { treeHash = darcsTreeHash t }
  149           update (File blob@(Blob con _)) =
  150               do hash <- sha256 <$> readBlob blob
  151                  return $ File (Blob con hash)
  152 
  153 darcsHash (SubTree t) = return $ darcsTreeHash t
  154 darcsHash (File blob) = sha256 <$> readBlob blob
  155 darcshash _ = return NoHash
  156 
  157 darcsAddMissingHashes :: (Monad m, Functor m) => Tree m -> m (Tree m)
  158 darcsAddMissingHashes = addMissingHashes darcsHash
  159 
  160 -------------------------------------------
  161 -- Reading darcs pristine data
  162 --
  163 
  164 -- | Read and parse a darcs-style hashed directory listing from a given @dir@
  165 -- and with a given @hash@.
  166 readDarcsHashedDir :: GetHash -> (Maybe Int, Hash) -> IO [(ItemType, Name, Maybe Int, Hash)]
  167 readDarcsHashedDir get h = do
  168   content <- get h
  169   return $ if BL8.null content
  170               then []
  171               else darcsParseDir content
  172 
  173 -- | Read in a darcs-style hashed tree. This is mainly useful for reading
  174 -- \"pristine.hashed\". You need to provide the root hash you are interested in
  175 -- (found in _darcs/hashed_inventory).
  176 readDarcsHashed' :: Bool -> GetHash -> (Maybe Int, Hash) -> IO (Tree IO)
  177 readDarcsHashed' _ _ (_, NoHash) = fail "Cannot readDarcsHashed NoHash"
  178 readDarcsHashed' sizefail get root@(_, hash) = do
  179   items' <- readDarcsHashedDir get root
  180   subs <- sequence [
  181            do when (sizefail && isJust s) $
  182                 fail ("Unexpectedly encountered size-prefixed hash: " ++ show (s, h))
  183               case tp of
  184                 BlobType -> return (d, File $
  185                                        Blob (get (s, h)) h)
  186                 TreeType ->
  187                   do let t = readDarcsHashed get (s, h)
  188                      return (d, Stub t h)
  189            | (tp, d, s, h) <- items' ]
  190   return $ makeTreeWithHash subs hash
  191 
  192 readDarcsHashed = readDarcsHashed' False
  193 readDarcsHashedNosize dir hash = readDarcsHashed' True dir (Nothing, hash)
  194 
  195 ----------------------------------------------------
  196 -- Writing darcs-style hashed trees.
  197 --
  198 
  199 -- | Write a Tree into a darcs-style hashed directory.
  200 writeDarcsHashed :: Tree IO -> FilePath -> IO Hash
  201 writeDarcsHashed tree' dir =
  202     do t <- darcsUpdateDirHashes <$> expand tree'
  203        sequence_ [ dump =<< readBlob b | (_, File b) <- list t ]
  204        let dirs = darcsFormatDir t : [ darcsFormatDir d | (_, SubTree d) <- list t ]
  205        os' <- mapM dump $ map fromJust dirs
  206        return $ darcsTreeHash t
  207     where dump bits =
  208               do let name = dir </> BS8.unpack (encodeBase16 $ sha256 bits)
  209                  exist <- doesFileExist name
  210                  unless exist $ BL.writeFile name (compress bits)
  211 
  212 -- | Create a hashed file from a 'FilePath' and content. In case the file exists
  213 -- it is kept untouched and is assumed to have the right content. XXX Corrupt
  214 -- files should be probably renamed out of the way automatically or something
  215 -- (probably when they are being read though).
  216 fsCreateHashedFile :: FilePath -> BL8.ByteString -> TreeIO ()
  217 fsCreateHashedFile fn content =
  218     liftIO $ do
  219       exist <- doesFileExist fn
  220       unless exist $ BL.writeFile fn content
  221 
  222 -- | Run a 'TreeIO' @action@ in a hashed setting. The @initial@ tree is assumed
  223 -- to be fully available from the @directory@, and any changes will be written
  224 -- out to same. Please note that actual filesystem files are never removed.
  225 hashedTreeIO :: TreeIO a -- ^ action
  226              -> Tree IO -- ^ initial
  227              -> FilePath -- ^ directory
  228              -> IO (a, Tree IO)
  229 hashedTreeIO action t dir =
  230     do runTreeMonad action $ initialState t darcsHash updateItem
  231     where updateItem path (File b) = File <$> updateFile path b
  232           updateItem path (SubTree s) = SubTree <$> updateSub path s
  233           updateItem _ x = return x
  234 
  235           updateFile path b@(Blob _ !h) = do
  236             content <- liftIO $ readBlob b
  237             let fn = dir </> BS8.unpack (encodeBase16 h)
  238                 nblob = Blob (decompress <$> rblob) h
  239                 rblob = BL.fromChunks <$> return <$> BS.readFile fn
  240                 newcontent = compress content
  241             fsCreateHashedFile fn newcontent
  242             return nblob
  243           updateSub path s = do
  244             let !hash = treeHash s
  245                 Just dirdata = darcsFormatDir s
  246                 fn = dir </> BS8.unpack (encodeBase16 hash)
  247             fsCreateHashedFile fn (compress dirdata)
  248             return s
  249