1 -- Copyright (C) 2002-2003 David Roundy
    2 --
    3 -- This program is free software; you can redistribute it and/or modify
    4 -- it under the terms of the GNU General Public License as published by
    5 -- the Free Software Foundation; either version 2, or (at your option)
    6 -- any later version.
    7 --
    8 -- This program is distributed in the hope that it will be useful,
    9 -- but WITHOUT ANY WARRANTY; without even the implied warranty of
   10 -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   11 -- GNU General Public License for more details.
   12 --
   13 -- You should have received a copy of the GNU General Public License
   14 -- along with this program; see the file COPYING.  If not, write to
   15 -- the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
   16 -- Boston, MA 02110-1301, USA.
   17 
   18 {-# OPTIONS_GHC -cpp -fglasgow-exts #-}
   19 {-# LANGUAGE CPP #-}
   20 
   21 #include "gadts.h"
   22 
   23 module Darcs.Diff ( unsafeDiffAtPaths, unsafeDiff, sync, cmp
   24 #ifndef GADT_WITNESSES
   25                   , diff_files
   26 #endif
   27                   ) where
   28 
   29 import System.Posix
   30      ( setFileTimes )
   31 import System.IO ( IOMode(ReadMode), hFileSize, hClose )
   32 import System.Directory ( doesDirectoryExist, doesFileExist,
   33                    getDirectoryContents,
   34                  )
   35 import Control.Monad ( when )
   36 import Data.List ( sort
   37 #ifndef GADT_WITNESSES
   38                  , intersperse, isPrefixOf
   39 #endif
   40                  )
   41 #ifndef GADT_WITNESSES
   42 import Data.Maybe ( catMaybes )
   43 #endif
   44 
   45 #ifndef GADT_WITNESSES
   46 import ByteStringUtils ( is_funky, linesPS)
   47 import qualified Data.ByteString.Char8 as BC (last)
   48 import qualified Data.ByteString as B       (null, empty, take, ByteString)
   49 #endif
   50 import qualified Data.ByteString as B       (hGet, length)
   51 
   52 import Darcs.SlurpDirectory ( Slurpy, slurp_name, is_dir, is_file,
   53 #ifndef GADT_WITNESSES
   54                         get_slurp,
   55 #endif
   56                         get_dircontents, get_filecontents,
   57                         get_mtime, get_length,
   58                         undefined_time
   59 #ifndef GADT_WITNESSES
   60                         , FileContents, undefined_size
   61 #endif
   62                       )
   63 #ifndef GADT_WITNESSES
   64 import System.FilePath.Posix ( (</>) )
   65 #endif
   66 import Darcs.Patch ( Prim
   67 #ifndef GADT_WITNESSES
   68                    , hunk, canonize, rmfile, rmdir
   69                    , addfile, adddir
   70                    , binary, invert
   71 #endif
   72                    )
   73 #ifndef GADT_WITNESSES
   74 import Darcs.Patch.FileName( fp2fn, breakup )
   75 #endif
   76 import System.IO ( openBinaryFile )
   77 import Darcs.Repository.Prefs ( FileType(..) )
   78 import Darcs.Flags ( DarcsFlag(..) )
   79 import Darcs.Utils ( catchall )
   80 import Darcs.Ordered ( FL(..)
   81 #ifndef GADT_WITNESSES
   82                            , (+>+)
   83 #endif
   84                            )
   85 #ifndef GADT_WITNESSES
   86 #include "impossible.h"
   87 #endif
   88 
   89 -- | The unsafeDiffAtPaths function calls diff_at_path for a set of files and
   90 --   and directories, and returns all changes to those files. It recurses into
   91 --   given directories when searching for changes.
   92 --
   93 --   Comparing paths and not slurpies is useful when the user
   94 --   requests a diff for a file that is created or removed in the working copy:
   95 --   then there is no slurpy for the file in the /current/ or /working/ slurpy
   96 --   respectively.
   97 --
   98 --   The given paths must always be fixed repository paths starting with a
   99 --   ".". It is safe to pass overlapping paths.
  100 --
  101 --   The booleans in the first argument tell whether to ignore mtimes, whether
  102 --   we must look for additions and if we're diffing for a summary only.
  103 --
  104 --   It returns an FL of patches, that contains all the changes that have been
  105 --   made at all those paths.
  106 unsafeDiffAtPaths :: (Bool, Bool, Bool) -> (FilePath -> FileType) ->
  107                Slurpy -> Slurpy -> [FilePath] -> FL Prim C(x y)
  108 #ifdef GADT_WITNESSES
  109 unsafeDiffAtPaths = undefined
  110 #else
  111 unsafeDiffAtPaths flags filetypeFunction s1 s2 paths =
  112     foldr (+>+) NilFL (catMaybes diffsPerPath)
  113   where diffsPerPath = map differ safePaths
  114         differ       = diff_at_path flags filetypeFunction s1 s2
  115         safePaths    = make_nonoverlapping_path_set paths
  116 
  117 diff_at_path :: (Bool, Bool, Bool) -> (FilePath -> FileType)
  118                 -> Slurpy -> Slurpy -> FilePath -> Maybe (FL Prim)
  119 diff_at_path (ignoreTimes, lookForAdds, summary) filetypeFunction s1 s2 path =
  120     case (pathIn1, pathIn2) of
  121         (Nothing, Nothing) -> Nothing
  122         (Nothing, Just s2PathSlurpy) -> do
  123             Just $ diff_added summary filetypeFunction initialFps s2PathSlurpy NilFL
  124         (Just s1PathSlurpy, Nothing) -> do
  125             Just $ diff_removed filetypeFunction initialFps s1PathSlurpy NilFL
  126         (Just s1PathSlurpy, Just s2PathSlurpy) ->
  127             Just $ gendiff (ignoreTimes, lookForAdds, summary) filetypeFunction
  128                            initialFps s1PathSlurpy s2PathSlurpy NilFL
  129   where pathIn1 = get_slurp (fp2fn path) s1
  130         pathIn2 = get_slurp (fp2fn path) s2
  131         initialFps = tail $ reverse (breakup path)
  132 
  133 make_nonoverlapping_path_set :: [FilePath] -> [FilePath]
  134 make_nonoverlapping_path_set = map unbreakup . delete_overlapping . map breakup . sort
  135   where
  136     delete_overlapping :: [[FilePath]] -> [[FilePath]]
  137     delete_overlapping (p1:p2:ps) = if p1 `isPrefixOf` p2
  138                                       then delete_overlapping (p1:ps)
  139                                       else p1 : delete_overlapping (p2:ps)
  140     delete_overlapping ps         = ps
  141     unbreakup = concat . intersperse "/"
  142 #endif
  143 
  144 -- The diff function takes a recursive diff of two slurped-up directory trees.
  145 -- The code involved is actually pretty trivial.  \verb!paranoid_diff! runs a
  146 -- diff in which we don't make the assumption that files with the same
  147 -- modification time are identical.
  148 
  149 unsafeDiff :: [DarcsFlag]
  150            -> (FilePath -> FileType) -> Slurpy -> Slurpy -> FL Prim C(x y)
  151 #ifdef GADT_WITNESSES
  152 unsafeDiff = undefined
  153 #else
  154 unsafeDiff opts wt s1 s2
  155     = case diff_at_path (ignoreTimes, lookForAdds, summary)  wt s1 s2 "" of
  156           Just d -> d
  157           _      -> impossible -- because "" always exists in a slurpy                                                      
  158   where -- NoSummary/Summary both present gives False
  159         -- Just Summary gives True
  160         -- Just NoSummary gives False
  161         -- Neither gives False
  162         summary = Summary `elem` opts && NoSummary `notElem` opts
  163         lookForAdds = LookForAdds `elem` opts
  164         ignoreTimes = IgnoreTimes `elem` opts
  165 
  166 mk_filepath :: [FilePath] -> FilePath
  167 mk_filepath fps = concat $ intersperse "/" $ reverse fps
  168 
  169 gendiff :: (Bool,Bool,Bool)
  170         -> (FilePath -> FileType) -> [FilePath] -> Slurpy -> Slurpy
  171         -> (FL Prim -> FL Prim)
  172 gendiff opts@(isparanoid,_,_) wt fps s1 s2
  173     | is_file s1 && is_file s2 = diff_regular_files isparanoid wt f s1 s2
  174     | is_dir s1 && is_dir s2 =
  175           let fps' = case n2 of
  176                          "." -> fps
  177                          _ -> n2:fps
  178           in fps' `seq` recur_diff opts (wt . (n2</>)) fps' dc1 dc2
  179     | otherwise = id
  180     where n2 = slurp_name s2
  181           f = mk_filepath (n2:fps)
  182           dc1 = get_dircontents s1
  183           dc2 = get_dircontents s2
  184 
  185 -- recur_diff or recursive diff
  186 -- First parameter is (IgnoreTimes?, LookforAdds?, Summary?)
  187 recur_diff :: (Bool,Bool,Bool)
  188            -> (FilePath -> FileType) -> [FilePath] -> [Slurpy] -> [Slurpy]
  189            -> (FL Prim -> FL Prim)
  190 recur_diff _ _ _ [] [] = id
  191 recur_diff opts@(_,doadd,summary) wt fps (s:ss) (s':ss')
  192     -- this is the case if a file has been removed in the working directory
  193     | s < s' = diff_removed wt fps s . recur_diff opts wt fps ss (s':ss')
  194     -- this next case is when there is a file in the directory that is not
  195     -- in the repository (ie, not managed by darcs)
  196     | s > s' = let rest = recur_diff opts wt fps (s:ss) ss'
  197                in if not doadd then                        rest
  198                                else diff_added summary wt fps s' . rest
  199     -- actually compare the files because the names match
  200     | s == s' = gendiff opts wt fps s s' . recur_diff opts wt fps ss ss'
  201 recur_diff opts wt fps (s:ss) [] =
  202     diff_removed wt fps s . recur_diff opts wt fps ss []
  203 recur_diff opts@(_,True,summary) wt fps [] (s':ss') =
  204     diff_added summary wt fps s' . recur_diff opts wt fps [] ss'
  205 recur_diff (_,False,_) _ _ [] _ = id
  206 recur_diff _ _ _ _ _ = impossible                                                                                              
  207 
  208 -- diff, taking into account paranoidness and file type, two regular files
  209 diff_regular_files :: Bool -> (FilePath -> FileType) -> FilePath -> Slurpy -> Slurpy -> (FL Prim -> FL Prim)
  210 diff_regular_files ignoreTimes filetypeFunction f s1 s2 = 
  211     if maybe_differ   
  212         then case filetypeFunction (slurp_name s2) of                                     
  213                TextFile -> diff_files f b1 b2                    
  214                BinaryFile -> if b1 /= b2 then (binary f b1 b2:>:)
  215                                          else id                 
  216         else id
  217   where maybe_differ = ignoreTimes
  218                      || get_mtime s1 == undefined_time
  219                      || get_mtime s1 /= get_mtime s2
  220                      || get_length s1 == undefined_size
  221                      || get_length s1 /= get_length s2
  222         b1 = get_filecontents s1
  223         b2 = get_filecontents s2
  224 
  225 -- creates a diff for a file or directory which needs to be added to the
  226 -- repository
  227 diff_added :: Bool -> (FilePath -> FileType) -> [FilePath] -> Slurpy
  228            -> (FL Prim -> FL Prim)
  229 diff_added summary wt fps s
  230     | is_file s = case wt n of
  231                   TextFile -> (addfile f:>:) .
  232                               (if summary
  233                                then id
  234                                else diff_from_empty id f (get_filecontents s))
  235                   BinaryFile -> (addfile f:>:) .
  236                                 (if summary then id else
  237                                 (bin_patch f B.empty (get_filecontents s)))
  238     | otherwise {- is_dir s -} =
  239         (adddir f:>:)
  240       . foldr (.) id (map (diff_added summary wt (n:fps)) (get_dircontents s))
  241     where n = slurp_name s
  242           f = mk_filepath (n:fps)
  243 
  244 get_text :: FileContents -> [B.ByteString]
  245 get_text = linesPS
  246 
  247 empt :: FileContents
  248 empt = B.empty
  249 
  250 diff_files :: FilePath -> FileContents -> FileContents
  251            -> (FL Prim -> FL Prim)
  252 diff_files f o n | get_text o == [B.empty] && get_text n == [B.empty] = id
  253                  | get_text o == [B.empty] = diff_from_empty id f n
  254                  | get_text n == [B.empty] = diff_from_empty invert f o
  255 diff_files f o n = if o == n
  256                    then id
  257                    else if has_bin o || has_bin n
  258                         then (binary f o n:>:)
  259                         else (canonize (hunk f 1 (linesPS o) (linesPS n)) +>+)
  260 
  261 diff_from_empty :: (Prim -> Prim) -> FilePath -> FileContents
  262                 -> (FL Prim -> FL Prim)
  263 diff_from_empty inv f b =
  264     if b == B.empty
  265     then id
  266     else let p = if has_bin b
  267                  then binary f B.empty b
  268                  else if BC.last b == '\n'
  269                       then hunk f 1 [] $ init $ linesPS b
  270                       else hunk f 1 [B.empty] $ linesPS b
  271          in (inv p:>:)
  272 
  273 {- | We take a B.ByteString which represents a file's contents, and we check to see
  274 whether it is a 'binary' file or a 'textual' file. We define a textual file as any file
  275 which does not contain two magic characters, '\0' (the NULL character on Unix) and '^Z'
  276 (Control-Z, a DOS convention).
  277 
  278 Note that to improve performance, we won't examine *all* of the string, because that
  279 falls down on large files, but just the first 4096 characters. -}
  280 has_bin :: FileContents -> Bool
  281 has_bin = is_funky . B.take 4096
  282 #endif
  283 
  284 #ifndef GADT_WITNESSES
  285 bin_patch :: FilePath -> B.ByteString -> B.ByteString
  286           -> FL Prim -> FL Prim
  287 bin_patch f o n | B.null o && B.null n = id
  288                 | otherwise = (binary f o n:>:)
  289 #endif
  290 
  291 #ifndef GADT_WITNESSES
  292 diff_removed :: (FilePath -> FileType) -> [FilePath] -> Slurpy
  293              -> (FL Prim -> FL Prim)
  294 diff_removed wt fps s
  295     | is_file s = case wt n of
  296                   TextFile -> diff_files f (get_filecontents s) empt
  297                             . (rmfile f:>:)
  298                   BinaryFile -> (bin_patch f
  299                                  (get_filecontents s) B.empty)
  300                               . (rmfile f:>:)
  301     | otherwise {- is_dir s -}
  302         = foldr (.) (rmdir f:>:)
  303         $ map (diff_removed wt (n:fps)) (get_dircontents s)
  304     where n = slurp_name s
  305           f = mk_filepath (n:fps)
  306 #endif
  307 
  308 sync :: String -> Slurpy -> Slurpy -> IO ()
  309 sync path s1 s2
  310     | is_file s1 && is_file s2 &&
  311       (get_mtime s1 == undefined_time || get_mtime s1 /= get_mtime s2) &&
  312       get_length s1 == get_length s2 &&
  313       get_filecontents s1 == get_filecontents s2 =
  314         set_mtime n (get_mtime s2)
  315     | is_dir s1 && is_dir s2
  316         = n2 `seq` recur_sync n (get_dircontents s1) (get_dircontents s2)
  317     | otherwise = return ()
  318     where n2 = slurp_name s2
  319           n = path++"/"++n2
  320           set_mtime fname ctime = setFileTimes fname ctime ctime
  321                        `catchall` return ()
  322           recur_sync _ [] _ = return ()
  323           recur_sync _ _ [] = return ()
  324           recur_sync p (s:ss) (s':ss')
  325               | s < s' = recur_sync p ss (s':ss')
  326               | s > s' = recur_sync p (s:ss) ss'
  327               | otherwise = do sync p s s'
  328                                recur_sync p ss ss'
  329 
  330 
  331 cmp :: FilePath -> FilePath -> IO Bool
  332 cmp p1 p2 = do
  333   dir1 <- doesDirectoryExist p1
  334   dir2 <- doesDirectoryExist p2
  335   file1 <- doesFileExist p1
  336   file2 <- doesFileExist p2
  337   if dir1 && dir2
  338      then cmpdir p1 p2
  339      else if file1 && file2
  340           then cmpfile p1 p2
  341           else return False
  342 cmpdir :: FilePath -> FilePath -> IO Bool
  343 cmpdir d1 d2 = do
  344   fn1 <- fmap (filter (\f->f/="." && f /="..")) $ getDirectoryContents d1
  345   fn2 <- fmap (filter (\f->f/="." && f /="..")) $ getDirectoryContents d2
  346   if sort fn1 /= sort fn2
  347      then return False
  348      else andIO $ map (\fn-> cmp (d1++"/"++fn) (d2++"/"++fn)) fn1
  349 andIO :: [IO Bool] -> IO Bool
  350 andIO (iob:iobs) = do b <- iob
  351                       if b then andIO iobs else return False
  352 andIO [] = return True
  353 cmpfile :: FilePath -> FilePath -> IO Bool
  354 cmpfile f1 f2 = do
  355   h1 <- openBinaryFile f1 ReadMode
  356   h2 <- openBinaryFile f2 ReadMode
  357   l1 <- hFileSize h1
  358   l2 <- hFileSize h2
  359   if l1 /= l2
  360      then do hClose h1
  361              hClose h2
  362              putStrLn $ "different file lengths for "++f1++" and "++f2
  363              return False
  364      else do b <- hcmp h1 h2
  365              when (not b) $ putStrLn $ "files "++f1++" and "++f2++" differ"
  366              hClose h1
  367              hClose h2
  368              return b
  369     where hcmp h1 h2 = do c1 <- B.hGet h1 1024
  370                           c2 <- B.hGet h2 1024
  371                           if c1 /= c2
  372                              then return False
  373                              else if B.length c1 == 1024
  374                                   then hcmp h1 h2
  375                                   else return True