1 %  Copyright (C) 2004 David Roundy
    2 %
    3 %  This program is free software; you can redistribute it and/or modify
    4 %  it under the terms of the GNU General Public License as published by
    5 %  the Free Software Foundation; either version 2, or (at your option)
    6 %  any later version.
    7 %
    8 %  This program is distributed in the hope that it will be useful,
    9 %  but WITHOUT ANY WARRANTY; without even the implied warranty of
   10 %  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   11 %  GNU General Public License for more details.
   12 %
   13 %  You should have received a copy of the GNU General Public License
   14 %  along with this program; see the file COPYING.  If not, write to
   15 %  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
   16 %  Boston, MA 02110-1301, USA.
   17 
   18 \begin{code}
   19 {-# OPTIONS_GHC -cpp #-}
   20 {-# LANGUAGE CPP #-}
   21 
   22 #include "gadts.h"
   23 
   24 module Darcs.Patch.Match ( PatchMatch, Matcher, MatchFun,
   25                     patch_match, match_pattern,
   26                     apply_matcher, make_matcher,
   27                     parseMatch,
   28                     match_parser, helpOnMatchers,
   29                   ) where
   30 
   31 import Text.ParserCombinators.Parsec
   32 import Text.ParserCombinators.Parsec.Expr
   33 import Text.Regex ( mkRegex, matchRegex )
   34 import Data.Maybe ( isJust )
   35 import System.IO.Unsafe ( unsafePerformIO )
   36 
   37 import Darcs.Hopefully ( PatchInfoAnd, hopefully, info )
   38 import Darcs.Patch ( Patch, Patchy, list_touched_files, patchcontents )
   39 import Darcs.Patch.Info ( just_name, just_author, make_filename,
   40                           pi_date )
   41 import Darcs.Sealed ( Sealed2(..), seal2 )
   42 import DateMatcher ( parseDateMatcher )
   43 
   44 import Darcs.Patch.MatchData ( PatchMatch(..), patch_match )
   45 
   46 -- | A type for predicates over patches which do not care about
   47 -- contexts
   48 type MatchFun p = Sealed2 (PatchInfoAnd p) -> Bool
   49 
   50 -- | A @Matcher@ is made of a 'MatchFun' which we will use to match
   51 -- patches and a @String@ representing it.
   52 data Matcher p = MATCH String (MatchFun p)
   53 
   54 instance Show (Matcher p) where
   55     show (MATCH s _) = '"':s ++ "\""
   56 
   57 make_matcher :: String -> (Sealed2 (PatchInfoAnd p) -> Bool) -> Matcher p
   58 make_matcher s m = MATCH s m
   59 
   60 -- | @apply_matcher@ applies a matcher to a patch.
   61 apply_matcher :: Matcher p -> PatchInfoAnd p C(x y) -> Bool
   62 apply_matcher (MATCH _ m) = m . seal2
   63 
   64 parseMatch :: Patchy p => PatchMatch -> Either String (MatchFun p)
   65 parseMatch (PatternMatch s) =
   66     case parse match_parser "match" s of
   67     Left err -> Left $ "Invalid -"++"-match pattern '"++s++
   68                 "'.\n"++ unlines (map ("    "++) $ lines $ show err) -- indent
   69     Right m -> Right m
   70 
   71 match_pattern :: Patchy p => PatchMatch -> Matcher p
   72 match_pattern p@(PatternMatch s) =
   73     case parseMatch p of
   74     Left err -> error err
   75     Right m -> make_matcher s m
   76 
   77 trivial :: Patchy p => MatchFun p
   78 trivial = const True
   79 \end{code}
   80 
   81 \paragraph{Match}
   82 
   83 Currently \verb!--match! accepts six primitive match types, although
   84 there are plans to expand it to match more patterns.  Also, note that the
   85 syntax is still preliminary and subject to change.
   86 
   87 The first match type accepts a literal string which is checked against
   88 the patch name.  The syntax is
   89 \begin{verbatim}
   90 darcs annotate --summary --match 'exact foo+bar'
   91 \end{verbatim}
   92 This is useful for situations where a patch name contains characters that
   93 could be considered special for regular expressions.
   94 
   95 In this and the other match types, the argument must be enclosed in double
   96 quotes if it contains spaces.  You can escape a quote in the argument with a
   97 backslash; backslash escapes itself, but it is treated literally if followed
   98 by a character other than a double quote or backslash, so it is typically not
   99 necessary to escape a backslash.  No such escaping is necessary unless the
  100 argument is enclosed in double quotes.
  101 
  102 The second match type accepts a regular expression which is checked against
  103 the patch name.  The syntax is
  104 \begin{verbatim}
  105 darcs annotate --summary --match 'name foo'
  106 \end{verbatim}
  107 Note that to match regexp metacharacters, such as \verb|(|, literally, they
  108 must be escaped with backslash along with any embedded double quotes.  To
  109 match a literal backslash it must be written quadrupled in general, but often
  110 it need not be escaped, since backslash is only special in regexps when
  111 followed by a metacharacter.  In the following example pairs, the first
  112 literal is matched by the second sequence in the match name:
  113 ``\verb|"|'':``\verb|\"|'', ``\verb|\|'':``\verb|\\\\|'',
  114 ``\verb|\x|'':``\verb|\x|'', ``\verb|(|'':``\verb|\(|''.
  115 
  116 The third match type matches the darcs hash for each patch:
  117 \begin{verbatim}
  118 darcs annotate --summary --match \
  119   'hash 20040403105958-53a90-c719567e92c3b0ab9eddd5290b705712b8b918ef'
  120 \end{verbatim}
  121 Note you need to provide the full hash string as above.
  122 This is intended to be used, for example, by programs allowing you to view
  123 darcs repositories (e.g.\ CGI scripts like viewCVS).
  124 
  125 The fourth match type accepts a regular expression which is checked against
  126 the patch author.  The syntax is
  127 \begin{verbatim}
  128 darcs annotate --summary --match 'author foo'
  129 \end{verbatim}
  130 
  131 There is also support for matching by date.  This is done using commands such as
  132 \begin{verbatim}
  133 darcs annotate --summary --match 'date "last week"'
  134 darcs annotate --summary --match 'date yesterday'
  135 darcs annotate --summary --match 'date "today 14:00"'
  136 darcs annotate --summary --match 'date "tea time yesterday"'
  137 darcs annotate --summary --match 'date "3 days before last year at 17:00"'
  138 darcs changes --from-match 'date "Sat Jun  30 11:31:30 EDT 2004"'
  139 \end{verbatim}
  140 
  141 Only English date specifications are supported---specifically you must use
  142 English day and month names.  Also, only a limited set of time zones is
  143 supported (compatible with GNU coreutils' date parsing).  Unrecognized zones
  144 are treated as UTC, which may result in the timestamps printed in change
  145 entries not being recognized by the date matching.  You can avoid this problem
  146 on a POSIX-like system by running darcs in the UTC zone to get the times
  147 initially, e.g.:
  148 \begin{verbatim}
  149 TZ=UTC darcs changes
  150 \end{verbatim}
  151 
  152 When matching on the ISO format, a partial date is treated as a range.
  153 English dates can either refer to a specific day (``6 months ago',``day before
  154 yesterday''), or to an interval
  155 from some past date (``last month'') to the present.  Putting this all
  156 together, if today is ``2004-07-24'' then the following matches should work:
  157 
  158 \begin{tabular}{|ll|}
  159 \hline
  160 \textbf{date} & \textbf{patches selected} \\
  161 \hline
  162 2004          & from 2004-01-01 up to and including 2004-12-31 \\
  163 2004-01       & from 2004-01-01 up to and including 2004-01-31 \\
  164 2004-01-01    & during 2004-01-01 \\
  165 \hline
  166 today         & during 2004-07-24 (starting midnight in your timezone) \\
  167 yesterday     & during 2004-07-23 \\
  168 6 months ago  & during 2004-01-23 \\
  169 \hline
  170 last 6 months & since  2004-01-23 \\
  171 last month    & since  2004-06-23 (not 2004-06-01!) \\
  172 last week     & since  2004-07-16 \\
  173 \hline
  174 \end{tabular}
  175 
  176 For more precise control, you may specify an interval, either
  177 in a small subset of English or
  178 of \htmladdnormallinkfoot{the ISO 8601 format}{http://www.w3.org/TR/NOTE-datetime}.
  179 If you use the ISO format, note that durations, when
  180 specified alone, are interpreted as being relative to the current date and time.
  181 \begin{verbatim}
  182 darcs annotate --summary --match 'date "between 2004-03-12 and last week"'
  183 darcs annotate --summary --match 'date "after 2005"'
  184 darcs annotate --summary --match 'date "in the last 3 weeks"'
  185 darcs annotate --summary --match 'date "P3M/2006-03-17"'
  186 darcs annotate --summary --match 'date "2004-01-02/2006-03-17"'
  187 darcs annotate --summary --match 'date "P2M6D"'
  188 \end{verbatim}
  189 
  190 You may also prefer to combine date matching with a more specific pattern.
  191 \begin{verbatim}
  192 darcs annotate --summary --match 'date "last week" && name foo'
  193 \end{verbatim}
  194 
  195 The sixth match type accepts a regular expression which is checked against
  196 file paths that the patch touches.  The syntax is
  197 \begin{verbatim}
  198 darcs annotate --summary --match 'touch foo/bar.c'
  199 \end{verbatim}
  200 
  201 The \verb!--match! pattern can include the logical operators \verb!&&!,
  202 \verb!||! and \verb!not!, as well as grouping of patterns with parentheses.
  203 For example
  204 \begin{verbatim}
  205 darcs annotate --summary --match 'name record && not name overrode'
  206 \end{verbatim}
  207 
  208 \begin{code}
  209 match_parser :: Patchy p => CharParser st (MatchFun p)
  210 match_parser = do m <- option trivial submatch
  211                   eof
  212                   return m
  213 
  214 submatch :: Patchy p => CharParser st (MatchFun p)
  215 submatch = buildExpressionParser table match <?> "match rule"
  216 
  217 table :: OperatorTable Char st (MatchFun p)
  218 table   = [ [prefix "not" negate_match,
  219              prefix "!" negate_match ]
  220           , [binary "||" or_match,
  221              binary "or" or_match,
  222              binary "&&" and_match,
  223             binary "and" and_match ]
  224           ]
  225     where binary name fun =
  226               Infix (do trystring name
  227                         spaces
  228                         return fun) AssocLeft
  229           prefix  name fun = Prefix $ do trystring name
  230                                          spaces
  231                                          return fun
  232           negate_match a p = not (a p)
  233           or_match m1 m2 p = (m1 p) || (m2 p)
  234           and_match m1 m2 p = (m1 p) && (m2 p)
  235 
  236 trystring :: String -> CharParser st String
  237 trystring s = try $ string s
  238 
  239 match :: Patchy p => CharParser st (MatchFun p)
  240 match = between spaces spaces
  241         (parens submatch
  242          <|> choice matchers_
  243          <?> "simple match")
  244         where matchers_ = map createMatchHelper primitiveMatchers
  245 
  246 
  247 createMatchHelper :: (String, String, [String], String -> MatchFun p)
  248                   -> CharParser st (MatchFun p)
  249 createMatchHelper (key,_,_,matcher) =
  250   do trystring key
  251      spaces
  252      q <- quoted
  253      return $ matcher q
  254 
  255 -- FIXME: would this be better defined in Darcs.Commands.Help?
  256 -- | The string that is emitted when the user runs @darcs help --match@.
  257 helpOnMatchers :: String
  258 helpOnMatchers = unlines $
  259   ["Selecting Patches:",
  260    "",
  261    "The --patches option yields patches with names matching an `extended'",
  262    "regular expression.  See regex(7) for details.  The --matches option",
  263    "yields patches that match a logical (Boolean) expression: one or more",
  264    "primitive expressions combined by grouping (parentheses) and the",
  265    "complement (not), conjunction (and) and disjunction (or) operators.",
  266    "The C notation for logic operators (!, && and ||) can also be used.",
  267    "",
  268    " --patches=regex is a synonym for --matches='name regex'", 
  269    " --from-patch and --to-patch are synonyms for --from-match='name... and --to-match='name...",
  270    " --from-patch and --to-match can be unproblematically combined:",
  271    " darcs changes --from-patch='html.*documentation' --to-match='date 20040212'",
  272    "",
  273    "The following primitive Boolean expressions are supported:"]
  274   ++ keywords
  275   ++ ["", "Here are some examples:"]
  276   ++ examples
  277   where -- This type signature exists to appease GHC.
  278         ps :: [(String, String, [String], String -> MatchFun Patch)]
  279         ps = primitiveMatchers
  280         keywords = [showKeyword k d | (k,d,_,_) <- ps]
  281         examples = [showExample k e | (k,_,es,_) <- ps, e <- es]
  282         showKeyword keyword description =
  283             -- FIXME: it would be nice to have a variable name here:
  284             -- "author REGEX - match against author (email address)"
  285             -- or "exact STRING - match against exact patch name".
  286             "  " ++ keyword ++ " - " ++ description ++ "."
  287         showExample keyword example =
  288             -- FIXME: this string is long, and its not a use case I've
  289             -- ever seen in practice.  Can we use something else,
  290             -- like "darcs changes --matches"? --twb, 2008-12-28
  291             "  darcs annotate --summary --match "
  292             ++ "'" ++ keyword ++ " " ++ example ++ "'"
  293 
  294 primitiveMatchers :: Patchy p => [(String, String, [String], String -> MatchFun p)]
  295                      -- ^ keyword (operator), help description, list
  296                      -- of examples, matcher function
  297 primitiveMatchers =
  298  [ ("exact", "check a literal string against the patch name"
  299            , ["\"Resolve issue17: use dynamic memory allocation.\""]
  300            , exactmatch )
  301  , ("name", "check a regular expression against the patch name"
  302           , ["issue17", "\"^[Rr]esolve issue17\\>\""]
  303           , mymatch )
  304  , ("author", "check a regular expression against the author name"
  305             , ["\"David Roundy\"", "droundy", "droundy@darcs.net"]
  306             , authormatch )
  307  , ("hash",  "match the darcs hash for a patch"
  308           ,  ["20040403105958-53a90-c719567e92c3b0ab9eddd5290b705712b8b918ef"]
  309           ,  hashmatch )
  310  , ("date", "match the patch date"
  311           , ["\"2006-04-02 22:41\"", "\"tea time yesterday\""]
  312           , datematch )
  313  , ("touch", "match file paths for a patch"
  314           , ["src/foo.c", "src/", "\"src/*.(c|h)\""]
  315           , touchmatch ) ]
  316 
  317 parens :: CharParser st (MatchFun p)
  318        -> CharParser st (MatchFun p)
  319 parens p  = between (string "(") (string ")") p
  320 
  321 quoted :: CharParser st String
  322 quoted = between (char '"') (char '"')
  323                  (many $ do { char '\\' -- allow escapes
  324                             ; try (oneOf ['\\', '"']) <|> return '\\'
  325                             }
  326                          <|>  noneOf ['"'])
  327          <|> between spaces spaces (many $ noneOf " ()")
  328          <?> "string"
  329 
  330 mymatch, exactmatch, authormatch, hashmatch, datematch, touchmatch :: Patchy p => String -> MatchFun p
  331 
  332 mymatch r (Sealed2 hp) = isJust $ matchRegex (mkRegex r) $ just_name (info hp)
  333 
  334 exactmatch r (Sealed2 hp) = r == (just_name (info hp))
  335 
  336 authormatch a (Sealed2 hp) = isJust $ matchRegex (mkRegex a) $ just_author (info hp)
  337 
  338 hashmatch h (Sealed2 hp) = let rh = make_filename (info hp) in
  339                                   (rh == h) || (rh == h++".gz")
  340 
  341 datematch d (Sealed2 hp) = let dm = unsafePerformIO $ parseDateMatcher d
  342                                   in dm $ pi_date (info hp)
  343 
  344 touchmatch r (Sealed2 hp) = let files = list_touched_files $ patchcontents $ hopefully hp
  345                             in or $ map (isJust . matchRegex (mkRegex r)) files
  346 \end{code}