1 {-# LANGUAGE BangPatterns #-} 2 3 -- | A few darcs-specific utility functions. These are used for reading and 4 -- writing darcs and darcs-compatible hashed trees. 5 module Storage.Hashed.Darcs where 6 7 import Prelude hiding ( lookup ) 8 import System.FilePath ( (</>) ) 9 10 import System.Directory( doesFileExist ) 11 import Codec.Compression.GZip( decompress, compress ) 12 import Control.Applicative( (<$>) ) 13 14 import qualified Data.ByteString.Char8 as BS8 15 import qualified Data.ByteString.Lazy.Char8 as BL8 16 import qualified Data.ByteString.Lazy as BL 17 import qualified Data.ByteString as BS 18 19 import Data.List( sortBy ) 20 import Data.Char( chr, ord, isSpace ) 21 import Data.Maybe( fromJust, isJust ) 22 import qualified Data.Set as S 23 import Control.Monad.State.Strict 24 25 import Storage.Hashed.Tree hiding ( lookup ) 26 import qualified Storage.Hashed.Tree as Tree 27 import Storage.Hashed.Path hiding ( (</>) ) 28 import Storage.Hashed.Utils 29 import Storage.Hashed.Hash 30 import Storage.Hashed.Monad 31 32 type GetHash = (Maybe Int, Hash) -> IO BL.ByteString 33 34 --------------------------------------------------------------------- 35 -- Utilities for coping with the darcs directory format. 36 -- 37 38 -- | 'darcsDecodeWhite' interprets the Darcs-specific \"encoded\" filenames 39 -- produced by 'darcsEncodeWhite' 40 -- 41 -- > darcsDecodeWhite "hello\32\there" == "hello there" 42 -- > darcsDecodeWhite "hello\92\there" == "hello\there" 43 -- > darcsDecodeWhite "hello\there" == error "malformed filename" 44 darcsDecodeWhite :: String -> FilePath 45 darcsDecodeWhite ('\\':cs) = 46 case break (=='\\') cs of 47 (theord, '\\':rest) -> 48 chr (read theord) : darcsDecodeWhite rest 49 _ -> error "malformed filename" 50 darcsDecodeWhite (c:cs) = c: darcsDecodeWhite cs 51 darcsDecodeWhite "" = "" 52 53 -- | 'darcsEncodeWhite' translates whitespace in filenames to a darcs-specific 54 -- format (backslash followed by numerical representation according to 'ord'). 55 -- Note that backslashes are also escaped since they are used in the encoding. 56 -- 57 -- > darcsEncodeWhite "hello there" == "hello\32\there" 58 -- > darcsEncodeWhite "hello\there" == "hello\92\there" 59 darcsEncodeWhite :: FilePath -> String 60 darcsEncodeWhite (c:cs) | isSpace c || c == '\\' = 61 '\\' : (show $ ord c) ++ "\\" ++ darcsEncodeWhite cs 62 darcsEncodeWhite (c:cs) = c : darcsEncodeWhite cs 63 darcsEncodeWhite [] = [] 64 65 darcsEncodeWhiteBS :: BS8.ByteString -> BS8.ByteString 66 darcsEncodeWhiteBS = BS8.pack . darcsEncodeWhite . BS8.unpack 67 68 decodeDarcsHash :: BS8.ByteString -> Hash 69 decodeDarcsHash bs = case BS8.split '-' bs of 70 [s, h] | BS8.length s == 10 -> decodeBase16 h 71 _ -> decodeBase16 bs 72 73 decodeDarcsSize :: BS8.ByteString -> Maybe Int 74 decodeDarcsSize bs = case BS8.split '-' bs of 75 [s, _] | BS8.length s == 10 -> 76 case reads (BS8.unpack s) of 77 [(x, _)] -> Just x 78 _ -> Nothing 79 _ -> Nothing 80 81 darcsGetLocal :: FilePath -> GetHash 82 darcsGetLocal dir (s, h) = case darcsLocation dir (s, h) of 83 Nothing -> fail "darcsGetLocal: invalid hash" 84 Just l -> do file <- BL.readFile l 85 if BL.null file then return BL.empty 86 else return (decompress file) 87 88 darcsLocation :: FilePath -> (Maybe Int, Hash) -> Maybe FilePath 89 darcsLocation dir (s,h) = case h of 90 NoHash -> Nothing 91 _ -> Just $ dir </> prefix s ++ hash 92 where prefix Nothing = "" 93 prefix (Just s') = formatSize s' ++ "-" 94 formatSize s' = let n = show s' in replicate (10 - length n) '0' ++ n 95 hash = BS8.unpack (encodeBase16 h) 96 97 ---------------------------------------------- 98 -- Darcs directory format. 99 -- 100 101 darcsFormatDir :: Tree m -> Maybe BL8.ByteString 102 darcsFormatDir t = BL8.fromChunks <$> concat <$> 103 mapM string (sortBy cmp $ listImmediate t) 104 where cmp (a, _) (b, _) = compare a b 105 string (name, item) = 106 do header <- case item of 107 File _ -> Just $ BS8.pack "file:\n" 108 _ -> Just $ BS8.pack "directory:\n" 109 hash <- case itemHash item of 110 NoHash -> Nothing 111 x -> Just $ encodeBase16 x 112 return $ [ header 113 , darcsEncodeWhiteBS name 114 , BS8.singleton '\n' 115 , hash, BS8.singleton '\n' ] 116 117 darcsParseDir :: BL8.ByteString -> [(ItemType, Name, Maybe Int, Hash)] 118 darcsParseDir content = parse (BL8.split '\n' content) 119 where 120 parse (t:n:h':r) = (header t, 121 BS8.pack $ darcsDecodeWhite (BL8.unpack n), 122 decodeDarcsSize hash, 123 decodeDarcsHash hash) : parse r 124 where hash = BS8.concat $ BL8.toChunks h' 125 parse _ = [] 126 header x 127 | x == BL8.pack "file:" = BlobType 128 | x == BL8.pack "directory:" = TreeType 129 | otherwise = error $ "Error parsing darcs hashed dir: " ++ BL8.unpack x 130 131 ---------------------------------------- 132 -- Utilities. 133 -- 134 135 -- | Compute a darcs-compatible hash value for a tree-like structure. 136 darcsTreeHash :: Tree m -> Hash 137 darcsTreeHash t = case darcsFormatDir t of 138 Nothing -> NoHash 139 Just x -> sha256 x 140 141 142 darcsUpdateDirHashes :: Tree m -> Tree m 143 darcsUpdateDirHashes = updateSubtrees update 144 where update t = t { treeHash = darcsTreeHash t } 145 146 darcsUpdateHashes :: (Monad m, Functor m) => Tree m -> m (Tree m) 147 darcsUpdateHashes = updateTree update 148 where update (SubTree t) = return . SubTree $ t { treeHash = darcsTreeHash t } 149 update (File blob@(Blob con _)) = 150 do hash <- sha256 <$> readBlob blob 151 return $ File (Blob con hash) 152 153 darcsHash (SubTree t) = return $ darcsTreeHash t 154 darcsHash (File blob) = sha256 <$> readBlob blob 155 darcshash _ = return NoHash 156 157 darcsAddMissingHashes :: (Monad m, Functor m) => Tree m -> m (Tree m) 158 darcsAddMissingHashes = addMissingHashes darcsHash 159 160 ------------------------------------------- 161 -- Reading darcs pristine data 162 -- 163 164 -- | Read and parse a darcs-style hashed directory listing from a given @dir@ 165 -- and with a given @hash@. 166 readDarcsHashedDir :: GetHash -> (Maybe Int, Hash) -> IO [(ItemType, Name, Maybe Int, Hash)] 167 readDarcsHashedDir get h = do 168 content <- get h 169 return $ if BL8.null content 170 then [] 171 else darcsParseDir content 172 173 -- | Read in a darcs-style hashed tree. This is mainly useful for reading 174 -- \"pristine.hashed\". You need to provide the root hash you are interested in 175 -- (found in _darcs/hashed_inventory). 176 readDarcsHashed' :: Bool -> GetHash -> (Maybe Int, Hash) -> IO (Tree IO) 177 readDarcsHashed' _ _ (_, NoHash) = fail "Cannot readDarcsHashed NoHash" 178 readDarcsHashed' sizefail get root@(_, hash) = do 179 items' <- readDarcsHashedDir get root 180 subs <- sequence [ 181 do when (sizefail && isJust s) $ 182 fail ("Unexpectedly encountered size-prefixed hash: " ++ show (s, h)) 183 case tp of 184 BlobType -> return (d, File $ 185 Blob (get (s, h)) h) 186 TreeType -> 187 do let t = readDarcsHashed get (s, h) 188 return (d, Stub t h) 189 | (tp, d, s, h) <- items' ] 190 return $ makeTreeWithHash subs hash 191 192 readDarcsHashed = readDarcsHashed' False 193 readDarcsHashedNosize dir hash = readDarcsHashed' True dir (Nothing, hash) 194 195 ---------------------------------------------------- 196 -- Writing darcs-style hashed trees. 197 -- 198 199 -- | Write a Tree into a darcs-style hashed directory. 200 writeDarcsHashed :: Tree IO -> FilePath -> IO Hash 201 writeDarcsHashed tree' dir = 202 do t <- darcsUpdateDirHashes <$> expand tree' 203 sequence_ [ dump =<< readBlob b | (_, File b) <- list t ] 204 let dirs = darcsFormatDir t : [ darcsFormatDir d | (_, SubTree d) <- list t ] 205 os' <- mapM dump $ map fromJust dirs 206 return $ darcsTreeHash t 207 where dump bits = 208 do let name = dir </> BS8.unpack (encodeBase16 $ sha256 bits) 209 exist <- doesFileExist name 210 unless exist $ BL.writeFile name (compress bits) 211 212 -- | Create a hashed file from a 'FilePath' and content. In case the file exists 213 -- it is kept untouched and is assumed to have the right content. XXX Corrupt 214 -- files should be probably renamed out of the way automatically or something 215 -- (probably when they are being read though). 216 fsCreateHashedFile :: FilePath -> BL8.ByteString -> TreeIO () 217 fsCreateHashedFile fn content = 218 liftIO $ do 219 exist <- doesFileExist fn 220 unless exist $ BL.writeFile fn content 221 222 -- | Run a 'TreeIO' @action@ in a hashed setting. The @initial@ tree is assumed 223 -- to be fully available from the @directory@, and any changes will be written 224 -- out to same. Please note that actual filesystem files are never removed. 225 hashedTreeIO :: TreeIO a -- ^ action 226 -> Tree IO -- ^ initial 227 -> FilePath -- ^ directory 228 -> IO (a, Tree IO) 229 hashedTreeIO action t dir = 230 do runTreeMonad action $ initialState t darcsHash updateItem 231 where updateItem path (File b) = File <$> updateFile path b 232 updateItem path (SubTree s) = SubTree <$> updateSub path s 233 updateItem _ x = return x 234 235 updateFile path b@(Blob _ !h) = do 236 content <- liftIO $ readBlob b 237 let fn = dir </> BS8.unpack (encodeBase16 h) 238 nblob = Blob (decompress <$> rblob) h 239 rblob = BL.fromChunks <$> return <$> BS.readFile fn 240 newcontent = compress content 241 fsCreateHashedFile fn newcontent 242 return nblob 243 updateSub path s = do 244 let !hash = treeHash s 245 Just dirdata = darcsFormatDir s 246 fn = dir </> BS8.unpack (encodeBase16 hash) 247 fsCreateHashedFile fn (compress dirdata) 248 return s 249