module Stackage.Database.Cron ( stackageServerCron , loadFromS3 , getHoogleDB ) where import ClassyPrelude.Conduit import Control.Concurrent (threadDelay) import Stackage.PackageIndex.Conduit import Database.Persist (Entity (Entity)) import Data.Char (isAlpha) import qualified Codec.Archive.Tar as Tar import Stackage.Database import Network.HTTP.Client import Network.HTTP.Client.Conduit (bodyReaderSource) import Filesystem (rename, removeTree, removeFile) import Web.PathPieces (toPathPiece) import Filesystem (isFile, createTree) import Filesystem.Path.CurrentOS (parent, fromText, encodeString) import Control.Monad.State.Strict (StateT, get, put) import Network.HTTP.Types (status200) import Data.Streaming.Network (bindPortTCP) import Network.AWS (Credentials (Discover), Region (NorthVirginia), getEnv, send, sourceFileIO, envManager) import Network.AWS.Data (toBody) import Network.AWS.S3 (ObjectCannedACL (PublicRead), poACL, putObject) import Control.Lens (set, view) import qualified Data.Conduit.Binary as CB import Data.Conduit.Zlib (WindowBits (WindowBits), compress, ungzip) import qualified Hoogle filename' :: Text filename' = concat [ "stackage-database-" , tshow currentSchema , ".sqlite3" ] keyName :: Text keyName = "stackage-database/" ++ filename' url :: Text url = concat [ "https://s3.amazonaws.com/haddock.stackage.org/" , keyName ] -- | Provides an action to be used to refresh the file from S3. loadFromS3 :: Manager -> IO (IO StackageDatabase, IO ()) loadFromS3 man = do killPrevVar <- newTVarIO $ return () currSuffixVar <- newTVarIO (1 :: Int) let root = "stackage-database" handleIO print $ removeTree root createTree root req <- parseUrl $ unpack url let download = do suffix <- atomically $ do x <- readTVar currSuffixVar writeTVar currSuffixVar $! x + 1 return x let fp = root unpack ("database-download-" ++ tshow suffix) putStrLn $ "Downloading database to " ++ pack fp withResponse req man $ \res -> runResourceT $ bodyReaderSource (responseBody res) $= ungzip $$ sinkFile fp putStrLn "Finished downloading database" return fp dbvar <- newTVarIO $ error "database not yet loaded" let update = do fp <- download db <- openStackageDatabase (fromString fp) void $ tryIO $ join $ atomically $ do writeTVar dbvar db oldKill <- readTVar killPrevVar writeTVar killPrevVar $ do -- give existing users a chance to clean up threadDelay $ 1000000 * 30 void $ tryIO $ removeFile (fromString fp) return oldKill update return (readTVarIO dbvar, update) hoogleKey :: SnapName -> Text hoogleKey name = concat [ "hoogle/" , toPathPiece name , "/" , VERSION_hoogle , ".hoo" ] hoogleUrl :: SnapName -> Text hoogleUrl n = concat [ "https://s3.amazonaws.com/haddock.stackage.org/" , hoogleKey n ] getHoogleDB :: Bool -- ^ print exceptions? -> Manager -> SnapName -> IO (Maybe FilePath) getHoogleDB toPrint man name = do let fp = fromText $ hoogleKey name fptmp = encodeString fp <.> "tmp" exists <- isFile fp if exists then return $ Just (encodeString fp) else do req' <- parseUrl $ unpack $ hoogleUrl name let req = req' { checkStatus = \_ _ _ -> Nothing , decompress = const False } withResponse req man $ \res -> if responseStatus res == status200 then do createTree $ parent (fromString fptmp) runResourceT $ bodyReaderSource (responseBody res) $= ungzip $$ sinkFile fptmp rename (fromString fptmp) fp return $ Just $ encodeString fp else do when toPrint $ mapM brRead res >>= print return Nothing stackageServerCron :: IO () stackageServerCron = do -- Hacky approach instead of PID files void $ catchIO (bindPortTCP 17834 "127.0.0.1") $ \_ -> error $ "cabal loader process already running, exiting" env <- getEnv NorthVirginia Discover let upload :: FilePath -> Text -> IO () upload fp key = do let fpgz = fp <.> "gz" runResourceT $ sourceFile fp $$ compress 9 (WindowBits 31) =$ CB.sinkFile fpgz body <- sourceFileIO fpgz let po = set poACL (Just PublicRead) $ putObject body "haddock.stackage.org" key putStrLn $ "Uploading: " ++ key eres <- runResourceT $ send env po case eres of Left e -> error $ show (fp, key, e) Right _ -> putStrLn "Success" let dbfp = fromText keyName createStackageDatabase dbfp upload (encodeString dbfp) keyName db <- openStackageDatabase dbfp do snapshots <- runReaderT snapshotsJSON db let key = "snapshots.json" :: Text po = set poACL (Just PublicRead) $ putObject (toBody snapshots) "haddock.stackage.org" key putStrLn $ "Uploading: " ++ key eres <- runResourceT $ send env po case eres of Left e -> error $ show (key, e) Right _ -> putStrLn "Success" names <- runReaderT last5Lts5Nightly db let manager = view envManager env forM_ names $ \name -> do mfp <- getHoogleDB False manager name case mfp of Just _ -> putStrLn $ "Hoogle database exists for: " ++ toPathPiece name Nothing -> do mfp' <- createHoogleDB db manager name forM_ mfp' $ \fp -> do let key = hoogleKey name upload fp key let dest = unpack key createTree $ parent (fromString dest) rename (fromString fp) (fromString dest) createHoogleDB :: StackageDatabase -> Manager -> SnapName -> IO (Maybe FilePath) createHoogleDB db man name = handleAny (\e -> print e $> Nothing) $ do req' <- parseUrl $ unpack tarUrl let req = req' { decompress = const True } unlessM (isFile (fromString tarFP)) $ withResponse req man $ \res -> do let tmp = tarFP <.> "tmp" createTree $ parent (fromString tmp) runResourceT $ bodyReaderSource (responseBody res) $$ sinkFile tmp rename (fromString tmp) (fromString tarFP) void $ tryIO $ removeTree (fromString bindir) void $ tryIO $ removeFile (fromString outname) createTree (fromString bindir) dbs <- runResourceT $ sourceTarFile False tarFP $$ evalStateC 1 (mapMC (singleDB db name bindir)) =$ sinkList putStrLn "Merging databases..." Hoogle.mergeDatabase (catMaybes dbs) outname putStrLn "Merge done" return $ Just outname where root = "hoogle-gen" bindir = root "bindir" outname = root "output.hoo" tarKey = toPathPiece name ++ "/hoogle/orig.tar" tarUrl = "https://s3.amazonaws.com/haddock.stackage.org/" ++ tarKey tarFP = root unpack tarKey singleDB :: StackageDatabase -> SnapName -> FilePath -- ^ bindir to write to -> Tar.Entry -> StateT Int (ResourceT IO) (Maybe FilePath) singleDB db sname bindir e@(Tar.entryContent -> Tar.NormalFile lbs _) = do idx <- get put $! idx + 1 putStrLn $ "Loading file for Hoogle: " ++ pack (Tar.entryPath e) let pkg = pack $ takeWhile (/= '.') $ Tar.entryPath e msp <- flip runReaderT db $ do Just (Entity sid _) <- lookupSnapshot sname lookupSnapshotPackage sid pkg case msp of Nothing -> do putStrLn $ "Unknown: " ++ pkg return Nothing Just (Entity _ sp) -> do let ver = snapshotPackageVersion sp pkgver = concat [pkg, "-", ver] out = bindir show idx <.> "hoo" src' = unlines $ haddockHacks (Just $ unpack docsUrl) $ lines $ unpack $ decodeUtf8 lbs docsUrl = concat [ "https://www.stackage.org/haddock/" , toPathPiece sname , "/" , pkgver , "/index.html" ] _errs <- liftIO $ Hoogle.createDatabase "" Hoogle.Haskell [] src' out return $ Just out singleDB _ _ _ _ = return Nothing --------------------------------------------------------------------- -- HADDOCK HACKS -- (Copied from hoogle-4.2.36/src/Recipe/Haddock.hs) -- Modifications: -- 1) Some name qualification -- 2) Explicit type sig due to polymorphic elem -- 3) Fixed an unused binding warning -- Eliminate @version -- Change :*: to (:*:), Haddock bug -- Change !!Int to !Int, Haddock bug -- Change instance [overlap ok] to instance, Haddock bug -- Change instance [incoherent] to instance, Haddock bug -- Change instance [safe] to instance, Haddock bug -- Change !Int to Int, HSE bug -- Drop {-# UNPACK #-}, Haddock bug -- Drop everything after where, Haddock bug haddockHacks :: Maybe Hoogle.URL -> [String] -> [String] haddockHacks loc src = maybe id haddockPackageUrl loc (translate src) where translate :: [String] -> [String] translate = map (unwords . g . map f . words) . filter (not . isPrefixOf "@version ") f "::" = "::" f (':':xs) = "(:" ++ xs ++ ")" f ('!':'!':x:xs) | isAlpha x = xs f ('!':x:xs) | isAlpha x || x `elem` ("[(" :: String) = x:xs f x | x `elem` ["[overlap","ok]","[incoherent]","[safe]"] = "" f x | x `elem` ["{-#","UNPACK","#-}"] = "" f x = x g ("where":_) = [] g (x:xs) = x : g xs g [] = [] haddockPackageUrl :: Hoogle.URL -> [String] -> [String] haddockPackageUrl x = concatMap f where f y | "@package " `isPrefixOf` y = ["@url " ++ x, y] | otherwise = [y]