1 %  Copyright (C) 2002-2005 David Roundy
    2 %
    3 %  This program is free software; you can redistribute it and/or modify
    4 %  it under the terms of the GNU General Public License as published by
    5 %  the Free Software Foundation; either version 2, or (at your option)
    6 %  any later version.
    7 %
    8 %  This program is distributed in the hope that it will be useful,
    9 %  but WITHOUT ANY WARRANTY; without even the implied warranty of
   10 %  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   11 %  GNU General Public License for more details.
   12 %
   13 %  You should have received a copy of the GNU General Public License
   14 %  along with this program; see the file COPYING.  If not, write to
   15 %  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
   16 %  Boston, MA 02110-1301, USA.
   17 
   18 \darcsCommand{replace}
   19 \begin{code}
   20 {-# OPTIONS_GHC -cpp #-}
   21 {-# LANGUAGE CPP #-}
   22 
   23 module Darcs.Commands.Replace ( replace ) where
   24 
   25 import Data.Maybe ( isJust )
   26 import Control.Monad ( unless )
   27 
   28 import Darcs.Commands ( DarcsCommand(DarcsCommand, command_name, command_help,
   29                         command_description, command_extra_args,
   30                         command_extra_arg_help, command_command, command_prereq,
   31                         command_get_arg_possibilities, command_argdefaults,
   32                         command_advanced_options, command_basic_options),
   33                         nodefaults )
   34 import Darcs.Arguments ( DarcsFlag(ForceReplace, Toks), list_registered_files,
   35                          ignoretimes, umask_option, tokens, force_replace,
   36                          working_repo_dir, fixSubPaths )
   37 import Darcs.Repository ( withRepoLock, ($-),
   38                     add_to_pending, slurp_pending,
   39                     amInRepository, slurp_recorded_and_unrecorded,
   40                     applyToWorking,
   41                   )
   42 import Darcs.Patch ( Prim, apply_to_slurpy, tokreplace, force_replace_slurpy )
   43 import Darcs.Ordered ( FL(..), unsafeFL, (+>+), concatFL )
   44 import Darcs.SlurpDirectory ( slurp_hasfile, Slurpy )
   45 import RegChars ( regChars )
   46 import Data.Char ( isSpace )
   47 import Darcs.Diff ( unsafeDiff )
   48 import Darcs.RepoPath ( SubPath, sp2fn, toFilePath )
   49 import Darcs.Repository.Prefs ( FileType(TextFile) )
   50 #include "impossible.h"
   51 
   52 replace_description :: String
   53 replace_description = "Substitute one word for another."
   54 
   55 replace_help :: String
   56 replace_help =
   57  "In addition to line-based patches, Darcs supports a limited form of\n" ++
   58  "lexical substitution.  Files are treated as sequences of words, and\n" ++
   59  "each occurrence of the old word is replaced by the new word.\n" ++
   60  "This is intended to provide a clean way to rename a function or\n" ++
   61  "variable.  Such renamings typically affect lines all through the\n" ++
   62  "source code, so a traditional line-based patch would be very likely to\n" ++
   63  "conflict with other branches, requiring manual merging.\n" ++
   64  "\n" ++
   65  "Files are tokenized according to one simple rule: words are strings of\n" ++
   66  "valid token characters, and everything between them (punctuation and\n" ++
   67  "whitespace) is discarded.\n" ++
   68  "\n" ++
   69  "The tokenizer treats files as byte strings, so it is not possible for\n" ++
   70  "--token-chars to include multi-byte characters, such as the non-ASCII\n" ++
   71  "parts of UTF-8.  Similarly, trying to replace a `high-bit' character\n" ++
   72  "from a unibyte encoding will also result in replacement of the same\n" ++
   73  "byte in files with different encodings.  For example, an acute a from\n" ++
   74  "ISO 8859-1 will also match an alpha from ISO 8859-7.\n" ++
   75  "\n" ++
   76  -- FIXME: this heuristic is ham-fisted and silly.  Can we drop it?
   77  "By default, valid token characters are letters, numbers and the\n" ++
   78  "underscore (i.e. [A-Za-z0-9_]).  However if the old and/or new token\n" ++
   79  "contains either a hyphen or period, BOTH hyphen and period are treated\n" ++
   80  "as valid by default (i.e. [A-Za-z0-9_.-]).\n" ++
   81  "\n" ++
   82  "The set of valid characters can be customized using the --token-chars\n" ++
   83  "option.  The argument must be surrounded by square brackets.  If a\n" ++
   84  "hyphen occurs between two characters in the set, it is treated as a\n" ++
   85  "set range.  For example, in most locales [A-Z] denotes all uppercase\n" ++
   86  "letters.  If the first character is a caret, valid tokens are taken to\n" ++
   87  "be the complement of the remaining characters.  For example, [^ \\n\\t]\n" ++
   88  "declares all characters except the space, tab and newline as valid\n" ++
   89  "within a word.  Unlike the tr(1) and grep(1) utilities, character\n" ++
   90  "classes (such as [[:alnum:]]) are NOT supported.\n" ++
   91  "\n" ++
   92  "If you choose to use --token-chars, you are STRONGLY encouraged to do\n" ++
   93  "so consistently.  The consequences of using multiple replace patches\n" ++
   94  "with different --token-chars arguments on the same file are not well\n" ++
   95  "tested nor well understood.\n" ++
   96  "\n" ++
   97  "By default Darcs will refuse to perform a replacement if the new token\n" ++
   98  "is already in use, because the replacements would be not be\n" ++
   99  "distinguishable from the existing tokens.  This behaviour can be\n" ++
  100  "overridden by supplying the --force option, but an attempt to `darcs\n" ++
  101  "rollback' the resulting patch will affect these existing tokens.\n"
  102 
  103 -- FIXME: can  we just  delete the remaining  text?  It seems  more an
  104 -- instance of "look how clever  I am; I made commutation work" rather
  105 -- than information that is actually useful to users.
  106 \end{code}
  107 There is a potentially confusing difference, however, when a replace is
  108 used to make another replace possible:
  109 \begin{verbatim}
  110 $ darcs replace newtoken aaack ./foo.c
  111 $ darcs replace oldtoken newtoken ./foo.c
  112 $ darcs record
  113 \end{verbatim}
  114 will be valid, even if \verb!newtoken! and \verb!oldtoken! are both present
  115 in the recorded version of foo.c, while the sequence
  116 \begin{verbatim}
  117 $ [manually edit foo.c replacing newtoken with aaack]
  118 $ darcs replace oldtoken newtoken ./foo.c
  119 \end{verbatim}
  120 will fail because ``newtoken'' still exists in the recorded version of
  121 \verb!foo.c!.  The reason for the difference is that when recording, a
  122 ``replace'' patch always is recorded \emph{before} any manual changes,
  123 which is usually what you want, since often you will introduce new
  124 occurrences of the ``newtoken'' in your manual changes.  In contrast,
  125 multiple ``replace'' changes are recorded in the order in which
  126 they were made.
  127 \begin{code}
  128 
  129 replace :: DarcsCommand
  130 replace = DarcsCommand {command_name = "replace",
  131                         command_help = replace_help,
  132                         command_description = replace_description,
  133                         command_extra_args = -1,
  134                         command_extra_arg_help = ["<OLD>","<NEW>",
  135                                                   "<FILE> ..."],
  136                         command_command = replace_cmd,
  137                         command_prereq = amInRepository,
  138                         command_get_arg_possibilities = list_registered_files,
  139                         command_argdefaults = nodefaults,
  140                         command_advanced_options = [ignoretimes, umask_option],
  141                         command_basic_options =
  142                             [tokens, force_replace, working_repo_dir]}
  143 
  144 replace_cmd :: [DarcsFlag] -> [String] -> IO ()
  145 replace_cmd opts (old:new:relfs) = withRepoLock opts $- \repository -> do
  146   fs <- fixSubPaths opts relfs
  147   toks <- choose_toks opts old new
  148   let checkToken tok =
  149         unless (is_tok toks tok) $ fail $ "'"++tok++"' is not a valid token!"
  150   checkToken old
  151   checkToken new
  152   (_, work) <- slurp_recorded_and_unrecorded repository
  153   cur <- slurp_pending repository
  154   pswork <- (concatFL . unsafeFL) `fmap` sequence (map (repl toks cur work) fs)
  155   add_to_pending repository pswork
  156   applyToWorking repository opts pswork `catch` \e ->
  157       fail $ "Can't do replace on working!\n"
  158           ++ "Perhaps one of the files already contains '"++ new++"'?\n"
  159           ++ show e
  160   where ftf _ = TextFile
  161 
  162         repl :: String -> Slurpy -> Slurpy -> SubPath -> IO (FL Prim)
  163         repl toks cur work f =
  164           if not $ slurp_hasfile (sp2fn f) work
  165           then do putStrLn $ "Skipping file '"++f_fp++"' which isn't in the repository."
  166                   return NilFL
  167           else if ForceReplace `elem` opts ||
  168                   isJust (apply_to_slurpy (tokreplace f_fp toks old new) work) ||
  169                   isJust (apply_to_slurpy (tokreplace f_fp toks old new) cur)
  170                then return (get_force_replace f toks work)
  171                else do putStrLn $ "Skipping file '"++f_fp++"'"
  172                        putStrLn $ "Perhaps the recorded version of this " ++
  173                                   "file already contains '" ++new++"'?"
  174                        putStrLn $ "Use the --force option to override."
  175                        return NilFL
  176           where f_fp = toFilePath f
  177 
  178         get_force_replace :: SubPath -> String -> Slurpy -> FL Prim
  179         get_force_replace f toks s =
  180             case force_replace_slurpy (tokreplace f_fp toks new old) s of
  181             Nothing -> bug "weird forcing bug in replace."                                                                                                           
  182             Just s' -> case unsafeDiff [] ftf s s' of
  183                        pfix -> pfix +>+ (tokreplace f_fp toks old new :>: NilFL)
  184             where f_fp = toFilePath f
  185 
  186 replace_cmd _ _ = fail "Usage: darcs replace OLD NEW [FILES]"
  187 
  188 default_toks :: String
  189 default_toks = "A-Za-z_0-9"
  190 filename_toks :: String
  191 filename_toks = "A-Za-z_0-9\\-\\."
  192 
  193 -- | Given a set of characters and a string, returns true iff the
  194 -- string contains only characters from the set.  A set beginning with
  195 -- a caret (@^@) is treated as a complementary set.
  196 is_tok :: String -> String -> Bool
  197 is_tok _ "" = False
  198 is_tok toks s = and $ map (regChars toks) s
  199 
  200 -- | This function checks for @--token-chars@ on the command-line.  If
  201 -- found, it validates the argument and returns it, without the
  202 -- surrounding square brackets.  Otherwise, it returns either
  203 -- 'default_toks' or 'filename_toks' as explained in 'replace_help'.
  204 -- 
  205 -- Note: Limitations in the current replace patch file format prevents
  206 -- tokens and token-char specifiers from containing any whitespace.
  207 choose_toks :: [DarcsFlag] -> String -> String -> IO String
  208 choose_toks (Toks t:_) a b
  209     | length t <= 2 =
  210         bad_token_spec $ "It must contain more than 2 characters, because " ++
  211                          "it should be enclosed in square brackets"
  212     | head t /= '[' || last t /= ']' =
  213         bad_token_spec "It should be enclosed in square brackets"
  214     | '^' == head tok && length tok == 1 =
  215         bad_token_spec "Must be at least one character in the complementary set"
  216     | any isSpace t =
  217         bad_token_spec "Space is not allowed in the spec"
  218     | any isSpace a = bad_token_spec $ spacey_token a
  219     | any isSpace b = bad_token_spec $ spacey_token b
  220     | not (is_tok tok a) = bad_token_spec $ not_a_token a
  221     | not (is_tok tok b) = bad_token_spec $ not_a_token b
  222     | otherwise          = return tok
  223     where tok = init $ tail t :: String
  224           bad_token_spec msg = fail $ "Bad token spec: '"++ t ++"' ("++ msg ++")"
  225           spacey_token x = x ++ " must not contain any space"
  226           not_a_token x = x ++ " is not a token, according to your spec"
  227 choose_toks (_:fs) a b = choose_toks fs a b
  228 choose_toks [] a b = if is_tok default_toks a && is_tok default_toks b
  229                      then return default_toks
  230                      else return filename_toks
  231 \end{code}
  232