1 {-# LANGUAGE CPP, ForeignFunctionInterface #-} 2 3 module URL ( copyUrl, copyUrlFirst, pipeliningEnabledByDefault, 4 setDebugHTTP, setHTTPPipelining, waitUrl, 5 Cachable(Cachable, Uncachable, MaxAge), 6 environmentHelpProxy, environmentHelpProxyPassword 7 ) where 8 9 import Data.IORef ( newIORef, readIORef, writeIORef, IORef ) 10 import Data.Map ( Map ) 11 import Data.List ( delete ) 12 import qualified Data.Map as Map 13 import System.Directory ( copyFile ) 14 import System.IO.Unsafe ( unsafePerformIO ) 15 import Control.Concurrent ( forkIO ) 16 import Control.Concurrent.Chan ( isEmptyChan, newChan, readChan, writeChan, Chan ) 17 import Control.Concurrent.MVar ( isEmptyMVar, modifyMVar_, newEmptyMVar, newMVar, putMVar, readMVar, withMVar, MVar ) 18 import Control.Monad ( unless, when ) 19 import Control.Monad.Trans ( liftIO ) 20 import Control.Monad.State ( evalStateT, get, modify, put, StateT ) 21 import Foreign.C.Types ( CInt ) 22 23 import Workaround ( renameFile ) 24 import Darcs.Global ( atexit ) 25 import Progress ( debugFail, debugMessage ) 26 import Darcs.Lock ( removeFileMayNotExist ) 27 28 import Numeric ( showHex ) 29 import System.Random ( randomRIO ) 30 31 #ifdef HAVE_CURL 32 import Foreign.C.String ( withCString, peekCString, CString ) 33 #else 34 import qualified HTTP ( request_url, wait_next_url ) 35 #endif 36 #include "impossible.h" 37 38 data UrlRequest = UrlRequest { url :: String 39 , file :: FilePath 40 , cachable :: Cachable 41 , priority :: Priority 42 , notifyVar :: MVar String } 43 44 data Cachable = Cachable | Uncachable | MaxAge !CInt 45 deriving (Show, Eq) 46 47 data UrlState = UrlState { inProgress :: Map String ( FilePath 48 , [FilePath] 49 , Cachable 50 , (MVar String) ) 51 , waitToStart :: Q String 52 , pipeLength :: Int 53 , randomJunk :: String } 54 55 data Q a = Q [a] [a] 56 57 readQ :: Q a -> Maybe (a, Q a) 58 readQ (Q (x:xs) ys) = Just (x, Q xs ys) 59 readQ (Q [] ys) = do x:xs <- Just $ reverse ys 60 Just (x, Q xs []) 61 62 insertQ :: a -> Q a -> Q a 63 insertQ y (Q xs ys) = Q xs (y:ys) 64 65 pushQ :: a -> Q a -> Q a 66 pushQ x (Q xs ys) = Q (x:xs) ys 67 68 deleteQ :: Eq a => a -> Q a -> Q a 69 deleteQ x (Q xs ys) = Q (delete x xs) (delete x ys) 70 71 elemQ :: Eq a => a -> Q a -> Bool 72 elemQ x (Q xs ys) = x `elem` xs || x `elem` ys 73 74 emptyQ :: Q a 75 emptyQ = Q [] [] 76 77 nullQ :: Q a -> Bool 78 nullQ (Q [] []) = True 79 nullQ _ = False 80 81 data Priority = High | Low deriving Eq 82 83 #if defined(CURL_PIPELINING) || defined(CURL_PIPELINING_DEFAULT) 84 pipeliningLimit :: Int 85 pipeliningLimit = 100 86 #endif 87 88 pipeliningEnabledByDefault :: Bool 89 #ifdef CURL_PIPELINING_DEFAULT 90 pipeliningEnabledByDefault = True 91 #else 92 pipeliningEnabledByDefault = False 93 #endif 94 95 {-# NOINLINE maxPipeLength #-} 96 maxPipeLength :: IORef Int 97 maxPipeLength = unsafePerformIO $ newIORef $ 98 #ifdef CURL_PIPELINING_DEFAULT 99 pipeliningLimit 100 #else 101 1 102 #endif 103 104 {-# NOINLINE urlNotifications #-} 105 urlNotifications :: MVar (Map String (MVar String)) 106 urlNotifications = unsafePerformIO $ newMVar Map.empty 107 108 {-# NOINLINE urlChan #-} 109 urlChan :: Chan UrlRequest 110 urlChan = unsafePerformIO $ do 111 ch <- newChan 112 forkIO (urlThread ch) 113 return ch 114 115 urlThread :: Chan UrlRequest -> IO () 116 urlThread ch = do junk <- flip showHex "" `fmap` randomRIO rrange 117 evalStateT urlThread' (UrlState Map.empty emptyQ 0 junk) 118 where rrange = (0, 2^(128 :: Integer) :: Integer) 119 urlThread' = do 120 empty <- liftIO $ isEmptyChan ch 121 st <- get 122 let l = pipeLength st 123 w = waitToStart st 124 reqs <- if not empty || (nullQ w && l == 0) 125 then liftIO readAllRequests 126 else return [] 127 mapM_ addReq reqs 128 checkWaitToStart 129 waitNextUrl 130 urlThread' 131 readAllRequests = do 132 r <- readChan ch 133 debugMessage $ "URL.urlThread ("++url r++"\n"++ 134 " -> "++file r++")" 135 empty <- isEmptyChan ch 136 reqs <- if not empty 137 then readAllRequests 138 else return [] 139 return (r:reqs) 140 addReq r = do 141 let u = url r 142 f = file r 143 c = cachable r 144 d <- liftIO $ alreadyDownloaded u 145 if d 146 then dbg "Ignoring UrlRequest of URL that is already downloaded." 147 else do 148 st <- get 149 let p = inProgress st 150 w = waitToStart st 151 e = (f, [], c, notifyVar r) 152 new_w = case priority r of 153 High -> pushQ u w 154 Low -> insertQ u w 155 new_st = st { inProgress = Map.insert u e p 156 , waitToStart = new_w } 157 case Map.lookup u p of 158 Just (f', fs', c', v) -> do 159 let new_c = minCachable c c' 160 when (c /= c') $ let new_p = Map.insert u (f', fs', new_c, v) p 161 in do modify (\s -> s { inProgress = new_p }) 162 dbg $ "Changing "++u++" request cachability from "++show c++" to "++show new_c 163 when (u `elemQ` w && priority r == High) $ do 164 modify (\s -> s { waitToStart = pushQ u (deleteQ u w) }) 165 dbg $ "Moving "++u++" to head of download queue." 166 if f `notElem` (f':fs') 167 then let new_p = Map.insert u (f', f:fs', new_c, v) p 168 in do modify (\s -> s { inProgress = new_p }) 169 dbg "Adding new file to existing UrlRequest." 170 else dbg "Ignoring UrlRequest of file that's already queued." 171 _ -> put new_st 172 alreadyDownloaded u = do 173 n <- liftIO $ withMVar urlNotifications (return . (Map.lookup u)) 174 case n of 175 Just v -> not `fmap` isEmptyMVar v 176 Nothing -> return True 177 178 checkWaitToStart :: StateT UrlState IO () 179 checkWaitToStart = do 180 st <- get 181 let l = pipeLength st 182 mpl <- liftIO $ readIORef maxPipeLength 183 when (l < mpl) $ do 184 let w = waitToStart st 185 case readQ w of 186 Just (u,rest) -> do 187 case Map.lookup u (inProgress st) of 188 Just (f, _, c, v) -> do 189 dbg ("URL.request_url ("++u++"\n"++ 190 " -> "++f++")") 191 let f_new = f++"-new_"++randomJunk st 192 err <- liftIO $ request_url u f_new c 193 if null err 194 then do dbg "URL.request_url succeeded" 195 liftIO $ atexit (removeFileMayNotExist f_new) 196 put $ st { waitToStart = rest 197 , pipeLength = l + 1 } 198 else do dbg $ "Failed to start download URL "++u++": "++err 199 liftIO $ do removeFileMayNotExist f_new 200 putMVar v err 201 put $ st { waitToStart = rest } 202 _ -> bug $ "Possible bug in URL.checkWaitToStart "++u 203 checkWaitToStart 204 _ -> return () 205 206 copyUrlFirst :: String -> FilePath -> Cachable -> IO () 207 copyUrlFirst = copyUrlWithPriority High 208 209 copyUrl :: String -> FilePath -> Cachable -> IO () 210 copyUrl = copyUrlWithPriority Low 211 212 copyUrlWithPriority :: Priority -> String -> String -> Cachable -> IO () 213 copyUrlWithPriority p u f c = do 214 debugMessage ("URL.copyUrlWithPriority ("++u++"\n"++ 215 " -> "++f++")") 216 v <- newEmptyMVar 217 let fn _ old_val = old_val 218 modifyMVar_ urlNotifications (return . (Map.insertWith fn u v)) 219 let r = UrlRequest u f c p v 220 writeChan urlChan r 221 222 waitNextUrl :: StateT UrlState IO () 223 waitNextUrl = do 224 st <- get 225 let l = pipeLength st 226 when (l > 0) $ do 227 dbg "URL.waitNextUrl start" 228 (u, e) <- liftIO $ wait_next_url 229 let p = inProgress st 230 new_st = st { inProgress = Map.delete u p 231 , pipeLength = l - 1 } 232 liftIO $ if null e 233 then case Map.lookup u p of 234 Just (f, fs, _, v) -> do 235 renameFile (f++"-new_"++randomJunk st) f 236 mapM_ (safeCopyFile st f) fs 237 putMVar v e 238 debugMessage $ "URL.waitNextUrl succeeded: "++u++" "++f 239 Nothing -> bug $ "Possible bug in URL.waitNextUrl: "++u 240 else case Map.lookup u p of 241 Just (f, _, _, v) -> do 242 removeFileMayNotExist (f++"-new_"++randomJunk st) 243 putMVar v e 244 debugMessage $ "URL.waitNextUrl failed: "++ 245 u++" "++f++" "++e 246 Nothing -> bug $ "Another possible bug in URL.waitNextUrl: "++u++" "++e 247 unless (null u) $ put new_st 248 where safeCopyFile st f t = let new_t = t++"-new_"++randomJunk st 249 in do copyFile f new_t 250 renameFile new_t t 251 252 waitUrl :: String -> IO () 253 waitUrl u = do debugMessage $ "URL.waitUrl "++u 254 r <- withMVar urlNotifications (return . (Map.lookup u)) 255 case r of 256 Just var -> do 257 e <- readMVar var 258 modifyMVar_ urlNotifications (return . (Map.delete u)) 259 unless (null e) (debugFail $ "Failed to download URL "++u++": "++e) 260 Nothing -> return () -- file was already downloaded 261 262 dbg :: String -> StateT a IO () 263 dbg = liftIO . debugMessage 264 265 minCachable :: Cachable -> Cachable -> Cachable 266 minCachable Uncachable _ = Uncachable 267 minCachable _ Uncachable = Uncachable 268 minCachable (MaxAge a) (MaxAge b) = MaxAge $ min a b 269 minCachable (MaxAge a) _ = MaxAge a 270 minCachable _ (MaxAge b) = MaxAge b 271 minCachable _ _ = Cachable 272 273 #ifdef HAVE_CURL 274 cachableToInt :: Cachable -> CInt 275 cachableToInt Cachable = -1 276 cachableToInt Uncachable = 0 277 cachableToInt (MaxAge n) = n 278 #endif 279 280 setHTTPPipelining :: Bool -> IO () 281 setHTTPPipelining False = writeIORef maxPipeLength 1 282 setHTTPPipelining True = writeIORef maxPipeLength 283 #ifdef CURL_PIPELINING 284 pipeliningLimit 285 #else 286 1 >> (putStrLn $ "Warning: darcs is compiled without HTTP pipelining "++ 287 "support, '--http-pipelining' argument is ignored.") 288 #endif 289 290 setDebugHTTP :: IO () 291 request_url :: String -> FilePath -> Cachable -> IO String 292 wait_next_url :: IO (String, String) 293 294 #ifdef HAVE_CURL 295 296 setDebugHTTP = curl_enable_debug 297 298 request_url u f cache = 299 withCString u $ \ustr -> 300 withCString f $ \fstr -> do 301 err <- curl_request_url ustr fstr (cachableToInt cache) >>= peekCString 302 return err 303 304 wait_next_url = do 305 e <- curl_wait_next_url >>= peekCString 306 u <- curl_last_url >>= peekCString 307 return (u, e) 308 309 foreign import ccall "hscurl.h curl_request_url" 310 curl_request_url :: CString -> CString -> CInt -> IO CString 311 312 foreign import ccall "hscurl.h curl_wait_next_url" 313 curl_wait_next_url :: IO CString 314 315 foreign import ccall "hscurl.h curl_last_url" 316 curl_last_url :: IO CString 317 318 foreign import ccall "hscurl.h curl_enable_debug" 319 curl_enable_debug :: IO () 320 321 #elif defined(HAVE_HTTP) 322 323 setDebugHTTP = return () 324 request_url = HTTP.request_url 325 wait_next_url = HTTP.wait_next_url 326 327 #else 328 329 setDebugHTTP = debugMessage "URL.setDebugHttp works only with libcurl" 330 request_url _ _ _ = debugFail "URL.request_url: there is no libcurl!" 331 wait_next_url = debugFail "URL.wait_next_url: there is no libcurl!" 332 333 #endif 334 335 -- Usage of these environment variables happens in C code, so the 336 -- closest to "literate" user documentation is here, where the 337 -- offending function 'curl_request_url' is imported. 338 environmentHelpProxy :: ([String], [String]) 339 environmentHelpProxy = (["HTTP_PROXY", "HTTPS_PROXY", "FTP_PROXY", 340 "ALL_PROXY", "NO_PROXY"], [ 341 "If Darcs was built with libcurl, the environment variables HTTP_PROXY,", 342 "HTTPS_PROXY and FTP_PROXY can be set to the URL of a proxy in the form", 343 "", 344 " [protocol://]<host>[:port]", 345 "", 346 "In which case libcurl will use the proxy for the associated protocol", 347 "(HTTP, HTTPS and FTP). The environment variable ALL_PROXY can be used", 348 "to set a single proxy for all libcurl requests.", 349 "", 350 "If the environment variable NO_PROXY is a comma-separated list of host", 351 "names, access to those hosts will bypass proxies defined by the above", 352 "variables. For example, it is quite common to avoid proxying requests", 353 "to machines on the local network with", 354 "", 355 " NO_PROXY=localhost,*.localdomain", 356 "", 357 "For compatibility with lynx et al, lowercase equivalents of these", 358 "environment variables (e.g. $http_proxy) are also understood and are", 359 "used in preference to the uppercase versions.", 360 "", 361 "If Darcs was not built with libcurl, all these environment variables", 362 "are silently ignored, and there is no way to use a web proxy."]) 363 364 environmentHelpProxyPassword :: ([String], [String]) 365 environmentHelpProxyPassword = (["DARCS_PROXYUSERPWD"], [ 366 "If Darcs was built with libcurl, and you are using a web proxy that", 367 "requires authentication, you can set the $DARCS_PROXYUSERPWD", 368 "environment variable to the username and password expected by the", 369 "proxy, separated by a colon. This environment variable is silently", 370 "ignored if Darcs was not built with libcurl."])