1 -- Copyright (C) 2002-2003 David Roundy 2 -- 3 -- This program is free software; you can redistribute it and/or modify 4 -- it under the terms of the GNU General Public License as published by 5 -- the Free Software Foundation; either version 2, or (at your option) 6 -- any later version. 7 -- 8 -- This program is distributed in the hope that it will be useful, 9 -- but WITHOUT ANY WARRANTY; without even the implied warranty of 10 -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 -- GNU General Public License for more details. 12 -- 13 -- You should have received a copy of the GNU General Public License 14 -- along with this program; see the file COPYING. If not, write to 15 -- the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 16 -- Boston, MA 02110-1301, USA. 17 18 {-# OPTIONS_GHC -cpp -fglasgow-exts #-} 19 {-# LANGUAGE CPP #-} 20 21 #include "gadts.h" 22 23 module Darcs.Diff ( unsafeDiffAtPaths, unsafeDiff, sync, cmp 24 #ifndef GADT_WITNESSES 25 , diff_files 26 #endif 27 ) where 28 29 import System.Posix 30 ( setFileTimes ) 31 import System.IO ( IOMode(ReadMode), hFileSize, hClose ) 32 import System.Directory ( doesDirectoryExist, doesFileExist, 33 getDirectoryContents, 34 ) 35 import Control.Monad ( when ) 36 import Data.List ( sort 37 #ifndef GADT_WITNESSES 38 , intersperse, isPrefixOf 39 #endif 40 ) 41 #ifndef GADT_WITNESSES 42 import Data.Maybe ( catMaybes ) 43 #endif 44 45 #ifndef GADT_WITNESSES 46 import ByteStringUtils ( is_funky, linesPS) 47 import qualified Data.ByteString.Char8 as BC (last) 48 import qualified Data.ByteString as B (null, empty, take, ByteString) 49 #endif 50 import qualified Data.ByteString as B (hGet, length) 51 52 import Darcs.SlurpDirectory ( Slurpy, slurp_name, is_dir, is_file, 53 #ifndef GADT_WITNESSES 54 get_slurp, 55 #endif 56 get_dircontents, get_filecontents, 57 get_mtime, get_length, 58 undefined_time 59 #ifndef GADT_WITNESSES 60 , FileContents, undefined_size 61 #endif 62 ) 63 #ifndef GADT_WITNESSES 64 import System.FilePath.Posix ( (</>) ) 65 #endif 66 import Darcs.Patch ( Prim 67 #ifndef GADT_WITNESSES 68 , hunk, canonize, rmfile, rmdir 69 , addfile, adddir 70 , binary, invert 71 #endif 72 ) 73 #ifndef GADT_WITNESSES 74 import Darcs.Patch.FileName( fp2fn, breakup ) 75 #endif 76 import System.IO ( openBinaryFile ) 77 import Darcs.Repository.Prefs ( FileType(..) ) 78 import Darcs.Flags ( DarcsFlag(..) ) 79 import Darcs.Utils ( catchall ) 80 import Darcs.Ordered ( FL(..) 81 #ifndef GADT_WITNESSES 82 , (+>+) 83 #endif 84 ) 85 #ifndef GADT_WITNESSES 86 #include "impossible.h" 87 #endif 88 89 -- | The unsafeDiffAtPaths function calls diff_at_path for a set of files and 90 -- and directories, and returns all changes to those files. It recurses into 91 -- given directories when searching for changes. 92 -- 93 -- Comparing paths and not slurpies is useful when the user 94 -- requests a diff for a file that is created or removed in the working copy: 95 -- then there is no slurpy for the file in the /current/ or /working/ slurpy 96 -- respectively. 97 -- 98 -- The given paths must always be fixed repository paths starting with a 99 -- ".". It is safe to pass overlapping paths. 100 -- 101 -- The booleans in the first argument tell whether to ignore mtimes, whether 102 -- we must look for additions and if we're diffing for a summary only. 103 -- 104 -- It returns an FL of patches, that contains all the changes that have been 105 -- made at all those paths. 106 unsafeDiffAtPaths :: (Bool, Bool, Bool) -> (FilePath -> FileType) -> 107 Slurpy -> Slurpy -> [FilePath] -> FL Prim C(x y) 108 #ifdef GADT_WITNESSES 109 unsafeDiffAtPaths = undefined 110 #else 111 unsafeDiffAtPaths flags filetypeFunction s1 s2 paths = 112 foldr (+>+) NilFL (catMaybes diffsPerPath) 113 where diffsPerPath = map differ safePaths 114 differ = diff_at_path flags filetypeFunction s1 s2 115 safePaths = make_nonoverlapping_path_set paths 116 117 diff_at_path :: (Bool, Bool, Bool) -> (FilePath -> FileType) 118 -> Slurpy -> Slurpy -> FilePath -> Maybe (FL Prim) 119 diff_at_path (ignoreTimes, lookForAdds, summary) filetypeFunction s1 s2 path = 120 case (pathIn1, pathIn2) of 121 (Nothing, Nothing) -> Nothing 122 (Nothing, Just s2PathSlurpy) -> do 123 Just $ diff_added summary filetypeFunction initialFps s2PathSlurpy NilFL 124 (Just s1PathSlurpy, Nothing) -> do 125 Just $ diff_removed filetypeFunction initialFps s1PathSlurpy NilFL 126 (Just s1PathSlurpy, Just s2PathSlurpy) -> 127 Just $ gendiff (ignoreTimes, lookForAdds, summary) filetypeFunction 128 initialFps s1PathSlurpy s2PathSlurpy NilFL 129 where pathIn1 = get_slurp (fp2fn path) s1 130 pathIn2 = get_slurp (fp2fn path) s2 131 initialFps = tail $ reverse (breakup path) 132 133 make_nonoverlapping_path_set :: [FilePath] -> [FilePath] 134 make_nonoverlapping_path_set = map unbreakup . delete_overlapping . map breakup . sort 135 where 136 delete_overlapping :: [[FilePath]] -> [[FilePath]] 137 delete_overlapping (p1:p2:ps) = if p1 `isPrefixOf` p2 138 then delete_overlapping (p1:ps) 139 else p1 : delete_overlapping (p2:ps) 140 delete_overlapping ps = ps 141 unbreakup = concat . intersperse "/" 142 #endif 143 144 -- The diff function takes a recursive diff of two slurped-up directory trees. 145 -- The code involved is actually pretty trivial. \verb!paranoid_diff! runs a 146 -- diff in which we don't make the assumption that files with the same 147 -- modification time are identical. 148 149 unsafeDiff :: [DarcsFlag] 150 -> (FilePath -> FileType) -> Slurpy -> Slurpy -> FL Prim C(x y) 151 #ifdef GADT_WITNESSES 152 unsafeDiff = undefined 153 #else 154 unsafeDiff opts wt s1 s2 155 = case diff_at_path (ignoreTimes, lookForAdds, summary) wt s1 s2 "" of 156 Just d -> d 157 _ -> impossible -- because "" always exists in a slurpy 158 where -- NoSummary/Summary both present gives False 159 -- Just Summary gives True 160 -- Just NoSummary gives False 161 -- Neither gives False 162 summary = Summary `elem` opts && NoSummary `notElem` opts 163 lookForAdds = LookForAdds `elem` opts 164 ignoreTimes = IgnoreTimes `elem` opts 165 166 mk_filepath :: [FilePath] -> FilePath 167 mk_filepath fps = concat $ intersperse "/" $ reverse fps 168 169 gendiff :: (Bool,Bool,Bool) 170 -> (FilePath -> FileType) -> [FilePath] -> Slurpy -> Slurpy 171 -> (FL Prim -> FL Prim) 172 gendiff opts@(isparanoid,_,_) wt fps s1 s2 173 | is_file s1 && is_file s2 = diff_regular_files isparanoid wt f s1 s2 174 | is_dir s1 && is_dir s2 = 175 let fps' = case n2 of 176 "." -> fps 177 _ -> n2:fps 178 in fps' `seq` recur_diff opts (wt . (n2</>)) fps' dc1 dc2 179 | otherwise = id 180 where n2 = slurp_name s2 181 f = mk_filepath (n2:fps) 182 dc1 = get_dircontents s1 183 dc2 = get_dircontents s2 184 185 -- recur_diff or recursive diff 186 -- First parameter is (IgnoreTimes?, LookforAdds?, Summary?) 187 recur_diff :: (Bool,Bool,Bool) 188 -> (FilePath -> FileType) -> [FilePath] -> [Slurpy] -> [Slurpy] 189 -> (FL Prim -> FL Prim) 190 recur_diff _ _ _ [] [] = id 191 recur_diff opts@(_,doadd,summary) wt fps (s:ss) (s':ss') 192 -- this is the case if a file has been removed in the working directory 193 | s < s' = diff_removed wt fps s . recur_diff opts wt fps ss (s':ss') 194 -- this next case is when there is a file in the directory that is not 195 -- in the repository (ie, not managed by darcs) 196 | s > s' = let rest = recur_diff opts wt fps (s:ss) ss' 197 in if not doadd then rest 198 else diff_added summary wt fps s' . rest 199 -- actually compare the files because the names match 200 | s == s' = gendiff opts wt fps s s' . recur_diff opts wt fps ss ss' 201 recur_diff opts wt fps (s:ss) [] = 202 diff_removed wt fps s . recur_diff opts wt fps ss [] 203 recur_diff opts@(_,True,summary) wt fps [] (s':ss') = 204 diff_added summary wt fps s' . recur_diff opts wt fps [] ss' 205 recur_diff (_,False,_) _ _ [] _ = id 206 recur_diff _ _ _ _ _ = impossible 207 208 -- diff, taking into account paranoidness and file type, two regular files 209 diff_regular_files :: Bool -> (FilePath -> FileType) -> FilePath -> Slurpy -> Slurpy -> (FL Prim -> FL Prim) 210 diff_regular_files ignoreTimes filetypeFunction f s1 s2 = 211 if maybe_differ 212 then case filetypeFunction (slurp_name s2) of 213 TextFile -> diff_files f b1 b2 214 BinaryFile -> if b1 /= b2 then (binary f b1 b2:>:) 215 else id 216 else id 217 where maybe_differ = ignoreTimes 218 || get_mtime s1 == undefined_time 219 || get_mtime s1 /= get_mtime s2 220 || get_length s1 == undefined_size 221 || get_length s1 /= get_length s2 222 b1 = get_filecontents s1 223 b2 = get_filecontents s2 224 225 -- creates a diff for a file or directory which needs to be added to the 226 -- repository 227 diff_added :: Bool -> (FilePath -> FileType) -> [FilePath] -> Slurpy 228 -> (FL Prim -> FL Prim) 229 diff_added summary wt fps s 230 | is_file s = case wt n of 231 TextFile -> (addfile f:>:) . 232 (if summary 233 then id 234 else diff_from_empty id f (get_filecontents s)) 235 BinaryFile -> (addfile f:>:) . 236 (if summary then id else 237 (bin_patch f B.empty (get_filecontents s))) 238 | otherwise {- is_dir s -} = 239 (adddir f:>:) 240 . foldr (.) id (map (diff_added summary wt (n:fps)) (get_dircontents s)) 241 where n = slurp_name s 242 f = mk_filepath (n:fps) 243 244 get_text :: FileContents -> [B.ByteString] 245 get_text = linesPS 246 247 empt :: FileContents 248 empt = B.empty 249 250 diff_files :: FilePath -> FileContents -> FileContents 251 -> (FL Prim -> FL Prim) 252 diff_files f o n | get_text o == [B.empty] && get_text n == [B.empty] = id 253 | get_text o == [B.empty] = diff_from_empty id f n 254 | get_text n == [B.empty] = diff_from_empty invert f o 255 diff_files f o n = if o == n 256 then id 257 else if has_bin o || has_bin n 258 then (binary f o n:>:) 259 else (canonize (hunk f 1 (linesPS o) (linesPS n)) +>+) 260 261 diff_from_empty :: (Prim -> Prim) -> FilePath -> FileContents 262 -> (FL Prim -> FL Prim) 263 diff_from_empty inv f b = 264 if b == B.empty 265 then id 266 else let p = if has_bin b 267 then binary f B.empty b 268 else if BC.last b == '\n' 269 then hunk f 1 [] $ init $ linesPS b 270 else hunk f 1 [B.empty] $ linesPS b 271 in (inv p:>:) 272 273 {- | We take a B.ByteString which represents a file's contents, and we check to see 274 whether it is a 'binary' file or a 'textual' file. We define a textual file as any file 275 which does not contain two magic characters, '\0' (the NULL character on Unix) and '^Z' 276 (Control-Z, a DOS convention). 277 278 Note that to improve performance, we won't examine *all* of the string, because that 279 falls down on large files, but just the first 4096 characters. -} 280 has_bin :: FileContents -> Bool 281 has_bin = is_funky . B.take 4096 282 #endif 283 284 #ifndef GADT_WITNESSES 285 bin_patch :: FilePath -> B.ByteString -> B.ByteString 286 -> FL Prim -> FL Prim 287 bin_patch f o n | B.null o && B.null n = id 288 | otherwise = (binary f o n:>:) 289 #endif 290 291 #ifndef GADT_WITNESSES 292 diff_removed :: (FilePath -> FileType) -> [FilePath] -> Slurpy 293 -> (FL Prim -> FL Prim) 294 diff_removed wt fps s 295 | is_file s = case wt n of 296 TextFile -> diff_files f (get_filecontents s) empt 297 . (rmfile f:>:) 298 BinaryFile -> (bin_patch f 299 (get_filecontents s) B.empty) 300 . (rmfile f:>:) 301 | otherwise {- is_dir s -} 302 = foldr (.) (rmdir f:>:) 303 $ map (diff_removed wt (n:fps)) (get_dircontents s) 304 where n = slurp_name s 305 f = mk_filepath (n:fps) 306 #endif 307 308 sync :: String -> Slurpy -> Slurpy -> IO () 309 sync path s1 s2 310 | is_file s1 && is_file s2 && 311 (get_mtime s1 == undefined_time || get_mtime s1 /= get_mtime s2) && 312 get_length s1 == get_length s2 && 313 get_filecontents s1 == get_filecontents s2 = 314 set_mtime n (get_mtime s2) 315 | is_dir s1 && is_dir s2 316 = n2 `seq` recur_sync n (get_dircontents s1) (get_dircontents s2) 317 | otherwise = return () 318 where n2 = slurp_name s2 319 n = path++"/"++n2 320 set_mtime fname ctime = setFileTimes fname ctime ctime 321 `catchall` return () 322 recur_sync _ [] _ = return () 323 recur_sync _ _ [] = return () 324 recur_sync p (s:ss) (s':ss') 325 | s < s' = recur_sync p ss (s':ss') 326 | s > s' = recur_sync p (s:ss) ss' 327 | otherwise = do sync p s s' 328 recur_sync p ss ss' 329 330 331 cmp :: FilePath -> FilePath -> IO Bool 332 cmp p1 p2 = do 333 dir1 <- doesDirectoryExist p1 334 dir2 <- doesDirectoryExist p2 335 file1 <- doesFileExist p1 336 file2 <- doesFileExist p2 337 if dir1 && dir2 338 then cmpdir p1 p2 339 else if file1 && file2 340 then cmpfile p1 p2 341 else return False 342 cmpdir :: FilePath -> FilePath -> IO Bool 343 cmpdir d1 d2 = do 344 fn1 <- fmap (filter (\f->f/="." && f /="..")) $ getDirectoryContents d1 345 fn2 <- fmap (filter (\f->f/="." && f /="..")) $ getDirectoryContents d2 346 if sort fn1 /= sort fn2 347 then return False 348 else andIO $ map (\fn-> cmp (d1++"/"++fn) (d2++"/"++fn)) fn1 349 andIO :: [IO Bool] -> IO Bool 350 andIO (iob:iobs) = do b <- iob 351 if b then andIO iobs else return False 352 andIO [] = return True 353 cmpfile :: FilePath -> FilePath -> IO Bool 354 cmpfile f1 f2 = do 355 h1 <- openBinaryFile f1 ReadMode 356 h2 <- openBinaryFile f2 ReadMode 357 l1 <- hFileSize h1 358 l2 <- hFileSize h2 359 if l1 /= l2 360 then do hClose h1 361 hClose h2 362 putStrLn $ "different file lengths for "++f1++" and "++f2 363 return False 364 else do b <- hcmp h1 h2 365 when (not b) $ putStrLn $ "files "++f1++" and "++f2++" differ" 366 hClose h1 367 hClose h2 368 return b 369 where hcmp h1 h2 = do c1 <- B.hGet h1 1024 370 c2 <- B.hGet h2 1024 371 if c1 /= c2 372 then return False 373 else if B.length c1 == 1024 374 then hcmp h1 h2 375 else return True