1 %  Copyright (C) 2003-2005 David Roundy
    2 %
    3 %  This program is free software; you can redistribute it and/or modify
    4 %  it under the terms of the GNU General Public License as published by
    5 %  the Free Software Foundation; either version 2, or (at your option)
    6 %  any later version.
    7 %
    8 %  This program is distributed in the hope that it will be useful,
    9 %  but WITHOUT ANY WARRANTY; without even the implied warranty of
   10 %  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   11 %  GNU General Public License for more details.
   12 %
   13 %  You should have received a copy of the GNU General Public License
   14 %  along with this program; see the file COPYING.  If not, write to
   15 %  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
   16 %  Boston, MA 02110-1301, USA.
   17 
   18 \darcsCommand{optimize}
   19 \begin{code}
   20 {-# OPTIONS_GHC -cpp #-}
   21 {-# LANGUAGE CPP #-}
   22 
   23 module Darcs.Commands.Optimize ( optimize ) where
   24 import Control.Monad ( when, unless )
   25 import Data.Maybe ( isJust )
   26 import Text.Regex ( mkRegex, matchRegex )
   27 import System.Directory ( getDirectoryContents, doesDirectoryExist )
   28 
   29 import Darcs.Hopefully ( hopefully, info )
   30 import Darcs.Commands ( DarcsCommand(..), nodefaults )
   31 import Darcs.Arguments ( DarcsFlag( Compress, UnCompress,
   32                                     NoCompress, Reorder,
   33                                     TagName,
   34                                     Relink, RelinkPristine ),
   35                         reorder_patches,
   36                         uncompress_nocompress,
   37                         relink, relink_pristine, sibling,
   38                         flagsToSiblings,
   39                         working_repo_dir, umask_option,
   40                       )
   41 import Darcs.Repository.Prefs ( get_preflist )
   42 import Darcs.Repository ( Repository, PatchSet, withRepoLock, ($-), withGutsOf,
   43                           read_repo, optimizeInventory, slurp_recorded,
   44                           tentativelyReplacePatches, cleanRepository,
   45                           amInRepository, finalizeRepositoryChanges )
   46 import Darcs.Ordered ( RL(..), unsafeUnRL, (+<+), mapFL_FL, reverseRL, mapRL, concatRL )
   47 import Darcs.Patch.Info ( PatchInfo, just_name, human_friendly )
   48 import Darcs.Patch ( RepoPatch )
   49 import ByteStringUtils ( gzReadFilePS )
   50 import Darcs.Patch.Depends ( deep_optimize_patchset, slightly_optimize_patchset,
   51                  get_patches_beyond_tag, get_patches_in_tag,
   52                )
   53 import Darcs.Lock ( maybeRelink, gzWriteAtomicFilePS, writeAtomicFilePS )
   54 import Darcs.RepoPath ( toFilePath )
   55 import Darcs.Utils ( withCurrentDirectory )
   56 import Progress ( debugMessage )
   57 import Printer ( putDocLn, text, ($$) )
   58 import Darcs.SlurpDirectory ( slurp, list_slurpy_files )
   59 import Darcs.Repository.Pristine ( identifyPristine, pristineDirectory )
   60 import Darcs.Sealed ( FlippedSeal(..), unsafeUnseal )
   61 import Darcs.Global ( darcsdir )
   62 #include "impossible.h"
   63 
   64 optimize_description :: String
   65 optimize_description = "Optimize the repository."
   66 
   67 optimize_help :: String
   68 optimize_help =
   69  "The `darcs optimize' command modifies the current repository in an\n" ++
   70  "attempt to reduce its resource requirements.  By default a single\n" ++
   71  "fast, safe optimization is performed; additional optimization\n" ++
   72  "techniques can be enabled by passing options to `darcs optimize'.\n" ++
   73  "\n" ++ optimize_help_inventory ++
   74  -- "\n" ++ optimize_help_reorder ++
   75  "\n" ++ optimize_help_relink ++
   76  -- checkpoints and uncompression are least useful, so they are last.
   77  "\n" ++ optimize_help_compression ++
   78  "\n" ++ optimize_help_checkpoint ++
   79  "\n" ++
   80  "There is one more optimization which CAN NOT be performed by this\n" ++
   81  "command.  Every time your record a patch, a new inventory file is\n" ++
   82  "written to _darcs/inventories/, and old inventories are never reaped.\n" ++
   83  "\n" ++
   84  "If _darcs/inventories/ is consuming a relatively large amount of\n" ++
   85  "space, you can safely reclaim it by using `darcs get' to make a\n" ++
   86  "complete copy of the repo.  When doing so, don't forget to copy over\n" ++
   87  "any unsaved changes you have made to the working tree or to\n" ++
   88  "unversioned files in _darcs/prefs/ (such as _darcs/prefs/author).\n"
   89 
   90 optimize :: DarcsCommand
   91 optimize = DarcsCommand {command_name = "optimize",
   92                          command_help = optimize_help,
   93                          command_description = optimize_description,
   94                          command_extra_args = 0,
   95                          command_extra_arg_help = [],
   96                          command_command = optimize_cmd,
   97                          command_prereq = amInRepository,
   98                          command_get_arg_possibilities = return [],
   99                          command_argdefaults = nodefaults,
  100                          command_advanced_options = [uncompress_nocompress, umask_option],
  101                          command_basic_options = [working_repo_dir,
  102                                                  reorder_patches,
  103                                                  sibling, relink,
  104                                                  relink_pristine]}
  105 
  106 optimize_cmd :: [DarcsFlag] -> [String] -> IO ()
  107 optimize_cmd origopts _ = withRepoLock opts $- \repository -> do
  108     cleanRepository repository
  109     do_reorder opts repository
  110     do_optimize_inventory repository
  111     when (Compress `elem` opts || UnCompress `elem` opts) $ optimize_compression opts
  112     when (Relink `elem` opts || (RelinkPristine `elem` opts)) $
  113         do_relink opts repository
  114     putStrLn "Done optimizing!"
  115   where opts = if UnCompress `elem` origopts then NoCompress:origopts else origopts
  116 is_tag :: PatchInfo -> Bool
  117 is_tag pinfo = take 4 (just_name pinfo) == "TAG "
  118 
  119 optimize_help_inventory :: String
  120 optimize_help_inventory =
  121  "The default optimization moves recent patches (those not included in\n" ++
  122  "the latest tag) to the `front', reducing the amount that a typical\n" ++
  123  "remote command needs to download.  It should also reduce the CPU time\n" ++
  124  "needed for some operations.\n"
  125 
  126 do_optimize_inventory :: RepoPatch p => Repository p -> IO ()
  127 do_optimize_inventory repository = do
  128     debugMessage "Writing out a nice copy of the inventory."
  129     optimizeInventory repository
  130     debugMessage "Done writing out a nice copy of the inventory."
  131 
  132 optimize_help_checkpoint :: String
  133 optimize_help_checkpoint =
  134  "If the repository is in `old-fashioned-inventory' format, the `darcs\n" ++
  135  "optimize --checkpoint' command creates a checkpoint of the latest tag.\n" ++
  136  "This checkpoint is used by `darcs get --partial' to create partial\n" ++
  137  "repositories.  With the `--tag' option, checkpoints for older tags can\n" ++
  138  "be created.  In newer repository formats, this feature has been\n" ++
  139  "replaced by `darcs get --lazy', which does not require checkpoints.\n"
  140 
  141 get_tag :: RepoPatch p => [DarcsFlag] -> Repository p -> IO (Maybe PatchInfo)
  142 get_tag [] r = do ps <- read_repo r
  143                   case filter is_tag $ lasts $ mapRL (mapRL info) ps of
  144                       [] -> do putStrLn "There is no tag to checkpoint!"
  145                                return Nothing
  146                       (pinfo:_) -> return $ Just pinfo
  147 get_tag (TagName t:_) r =
  148     do ps <- read_repo r
  149        case filter (match_tag t) $ lasts $ mapRL (mapRL info) ps of
  150          (pinfo:_) -> return $ Just pinfo
  151          _ -> case filter (match_tag t) $
  152                    lasts $ mapRL (mapRL info) $ deep_optimize_patchset ps of
  153               (pinfo:_) -> return $ Just pinfo
  154               _ -> do putStr "Cannot checkpoint any tag "
  155                       putStr $ "matching '"++t++"'\n"
  156                       return Nothing
  157 get_tag (_:fs) r = get_tag fs r
  158 
  159 lasts :: [[a]] -> [a]
  160 lasts [] = []
  161 lasts (x@(_:_):ls) = last x : lasts ls
  162 lasts ([]:ls) = lasts ls
  163 
  164 mymatch :: String -> PatchInfo -> Bool
  165 mymatch r = match_name $ matchRegex (mkRegex r)
  166 match_name :: (String -> Maybe a) -> PatchInfo -> Bool
  167 match_name ch pinfo = isJust $ ch (just_name pinfo)
  168 match_tag :: String -> PatchInfo -> Bool
  169 match_tag ('^':n) = mymatch $ "^TAG "++n
  170 match_tag n = mymatch $ "^TAG .*"++n
  171 
  172 
  173 optimize_help_compression :: String
  174 optimize_help_compression =
  175  "By default patches are compressed with zlib (RFC 1951) to reduce\n" ++
  176  "storage (and download) size.  In exceptional circumstances, it may be\n" ++
  177  "preferable to avoid compression.  In this case the `--dont-compress'\n" ++
  178  "option can be used (e.g. with `darcs record') to avoid compression.\n" ++
  179  "\n" ++
  180  "The `darcs optimize --uncompress' and `darcs optimize --compress'\n" ++
  181  "commands can be used to ensure existing patches in the current\n" ++
  182  "repository are respectively uncompressed or compressed.  Note that\n" ++
  183  "repositories in the legacy `old-fashioned-inventory' format have a .gz\n" ++
  184  "extension on patch files even when uncompressed.\n"
  185 
  186 optimize_compression :: [DarcsFlag] -> IO ()
  187 optimize_compression opts = do
  188     putStrLn "Optimizing (un)compression of patches..."
  189     do_compress (darcsdir++"/patches")
  190     putStrLn "Optimizing (un)compression of inventories..."
  191     do_compress (darcsdir++"/inventories")
  192     where do_compress f =
  193               do isd <- doesDirectoryExist f
  194                  if isd then withCurrentDirectory f $
  195                              do fs <- filter notdot `fmap` getDirectoryContents "."
  196                                 mapM_ do_compress fs
  197                         else if Compress `elem` opts
  198                              then gzReadFilePS f >>= gzWriteAtomicFilePS f
  199                              else gzReadFilePS f >>= writeAtomicFilePS f
  200           notdot ('.':_) = False
  201           notdot _ = True
  202 
  203 optimize_help_relink :: String
  204 optimize_help_relink =
  205  "The `darcs optimize --relink' command hard-links patches that the\n" ++
  206  "current repository has in common with its peers.  Peers are those\n" ++
  207  "repositories listed in _darcs/prefs/sources, or defined with the\n" ++
  208  "`--sibling' option (which can be used multiple times).\n" ++
  209  "\n" ++
  210  "Darcs uses hard-links automatically, so this command is rarely needed.\n" ++
  211  "It is most useful if you used `cp -r' instead of `darcs get' to copy a\n" ++
  212  "repository, or if you pulled the same patch from a remote repository\n" ++
  213  "into multiple local repositories.\n" ++
  214  "\n" ++
  215  "A `darcs optimize --relink-pristine' command is also available, but\n" ++
  216  "generally SHOULD NOT be used.  It results in a relatively small space\n" ++
  217  "saving at the cost of making many Darcs commands MUCH slower.\n"
  218 
  219 do_relink :: RepoPatch p => [DarcsFlag] -> Repository p -> IO ()
  220 do_relink opts repository =
  221     do some_siblings <- return (flagsToSiblings opts)
  222        defrepolist <- get_preflist "defaultrepo"
  223        siblings <- return (map toFilePath some_siblings ++ defrepolist)
  224        if (siblings == []) 
  225           then putStrLn "No siblings -- no relinking done."
  226           else do when (Relink `elem` opts) $
  227                       do debugMessage "Relinking patches..."
  228                          patches <-
  229                            (fmap list_slurpy_files) (slurp $ darcsdir++"/patches")
  230                          maybeRelinkFiles siblings patches (darcsdir++"/patches")
  231                   when (RelinkPristine `elem` opts) $
  232                       do pristine <- identifyPristine
  233                          case (pristineDirectory pristine) of
  234                              (Just d) -> do
  235                                  debugMessage "Relinking pristine tree..."
  236                                  c <- slurp_recorded repository
  237                                  maybeRelinkFiles
  238                                      siblings (list_slurpy_files c) d
  239                              Nothing -> return ()
  240                   debugMessage "Done relinking."
  241                   return ()
  242        return ()
  243 
  244 maybeRelinkFiles :: [String] -> [String] -> String -> IO ()
  245 maybeRelinkFiles src dst dir = 
  246     mapM_ (maybeRelinkFile src) (map ((dir ++ "/") ++) dst)
  247 
  248 maybeRelinkFile :: [String] -> String -> IO ()
  249 maybeRelinkFile [] _ = return ()
  250 maybeRelinkFile (h:t) f =
  251     do done <- maybeRelink (h ++ "/" ++ f) f
  252        unless done $
  253            maybeRelinkFile t f
  254        return ()
  255 
  256 
  257 \end{code}
  258 \begin{options}
  259 --reorder-patches
  260 \end{options}
  261 
  262 The \verb|--reorder-patches| option causes Darcs to create an optimal
  263 ordering of its internal patch inventory. This may help to produce shorter
  264 `context' lists when sending patches, and may improve performance for some
  265 other operations as well.  You should not run \verb!--reorder-patches! on a
  266 repository from which someone may be simultaneously pulling or getting, as
  267 this could lead to repository corruption.
  268 \begin{code}
  269 
  270 -- FIXME: someone needs to grovel through the source and determine
  271 -- just how optimizeInventory differs from do_reorder.  The following
  272 -- is purely speculation. --twb, 2009-04
  273 -- optimize_help_reorder :: String
  274 -- optimize_help_reorder =
  275 --  "The `darcs optimize --reorder' command is a more comprehensive version\n" ++
  276 --  "of the default optimization.  It reorders patches with respect to ALL\n" ++
  277 --  "tags, rather than just the latest tag.\n"
  278 
  279 do_reorder :: RepoPatch p => [DarcsFlag] -> Repository p -> IO ()
  280 do_reorder opts _ | not (Reorder `elem` opts) = return ()
  281 do_reorder opts repository = do
  282     debugMessage "Reordering the inventory."
  283     psnew <- choose_order `fmap` read_repo repository
  284     let ps = mapFL_FL hopefully $ reverseRL $ head $ unsafeUnRL psnew
  285     withGutsOf repository $ do tentativelyReplacePatches repository opts ps
  286                                finalizeRepositoryChanges repository
  287     debugMessage "Done reordering the inventory."
  288 
  289 choose_order :: RepoPatch p => PatchSet p -> PatchSet p
  290 choose_order ps | isJust last_tag =
  291     case slightly_optimize_patchset $ unsafeUnseal $ get_patches_in_tag lt ps of 
  292     ((t:<:NilRL):<:pps) -> case get_patches_beyond_tag lt ps of
  293                            FlippedSeal (p :<: NilRL) -> (p+<+(t:<:NilRL)) :<: pps
  294                            _ -> impossible                                                                                                            
  295     _ -> impossible                                                                                                            
  296     where last_tag = case filter is_tag $ mapRL info $ concatRL ps of
  297                      (t:_) -> Just t
  298                      _ -> Nothing
  299           lt = fromJust last_tag
  300 choose_order ps = ps
  301 \end{code}