diff --git a/Handler/Hoogle.hs b/Handler/Hoogle.hs index a60db52..894debd 100644 --- a/Handler/Hoogle.hs +++ b/Handler/Hoogle.hs @@ -14,7 +14,7 @@ import qualified Stackage.Database.Cron as Cron getHoogleDB :: SnapName -> Handler (Maybe FilePath) getHoogleDB name = do app <- getYesod - liftIO $ Cron.getHoogleDB (httpManager app) name + liftIO $ Cron.getHoogleDB True (httpManager app) name getHoogleR :: SnapName -> Handler Html getHoogleR name = do diff --git a/Stackage/Database.hs b/Stackage/Database.hs index 3657218..a9ae77a 100644 --- a/Stackage/Database.hs +++ b/Stackage/Database.hs @@ -29,6 +29,7 @@ module Stackage.Database , getSnapshotsForPackage , getSnapshots , currentSchema + , last5Lts5Nightly ) where import Database.Sqlite (SqliteException) @@ -132,6 +133,8 @@ newtype StackageDatabase = StackageDatabase ConnectionPool class MonadIO m => GetStackageDatabase m where getStackageDatabase :: m StackageDatabase +instance MonadIO m => GetStackageDatabase (ReaderT StackageDatabase m) where + getStackageDatabase = ask sourcePackages :: MonadResource m => FilePath -> Producer m Tar.Entry sourcePackages root = do @@ -601,3 +604,12 @@ getSnapshots l o = run $ (,) <*> fmap (map entityVal) (selectList [] [LimitTo l, OffsetBy o, Desc SnapshotCreated]) + +last5Lts5Nightly :: GetStackageDatabase m => m [SnapName] +last5Lts5Nightly = run $ do + ls <- selectList [] [Desc LtsMajor, Desc LtsMinor, LimitTo 5] + ns <- selectList [] [Desc NightlyDay, LimitTo 5] + return $ map l ls ++ map n ns + where + l (Entity _ x) = SNLts (ltsMajor x) (ltsMinor x) + n (Entity _ x) = SNNightly (nightlyDay x) diff --git a/Stackage/Database/Cron.hs b/Stackage/Database/Cron.hs index 6e7f8c7..1ccfdc3 100644 --- a/Stackage/Database/Cron.hs +++ b/Stackage/Database/Cron.hs @@ -5,23 +5,31 @@ module Stackage.Database.Cron ) where import ClassyPrelude.Conduit +import Stackage.PackageIndex.Conduit +import Database.Persist (Entity (Entity)) +import Data.Char (isAlpha) +import qualified Codec.Archive.Tar as Tar import Stackage.Database import Network.HTTP.Client import Network.HTTP.Client.Conduit (bodyReaderSource) -import Filesystem (rename) +import Filesystem (rename, removeTree, removeFile) import Web.PathPieces (toPathPiece) -import Filesystem (isFile) +import Filesystem (isFile, createTree) +import Filesystem.Path (parent) +import Control.Monad.State.Strict (StateT, get, put) import Network.HTTP.Types (status200) +import Data.Streaming.Network (bindPortTCP) import Network.AWS (Credentials (Discover), Region (NorthVirginia), getEnv, - send, sourceFileIO) + send, sourceFileIO, envManager ) import Network.AWS.S3 (ObjectCannedACL (PublicRead), poACL, putObject) -import Control.Lens (set) +import Control.Lens (set, view) import qualified Data.Conduit.Binary as CB import Data.Conduit.Zlib (WindowBits (WindowBits), compress) +import qualified Hoogle filename' :: Text filename' = concat @@ -47,6 +55,7 @@ loadFromS3 = do fptmp = fp <.> "tmp" req <- parseUrl $ unpack url let download man = withResponse req man $ \res -> do + createTree $ parent fptmp runResourceT $ bodyReaderSource (responseBody res) $$ sinkFile fptmp @@ -70,8 +79,9 @@ hoogleUrl n = concat , ".gz" ] -getHoogleDB :: Manager -> SnapName -> IO (Maybe FilePath) -getHoogleDB man name = do +getHoogleDB :: Bool -- ^ print exceptions? + -> Manager -> SnapName -> IO (Maybe FilePath) +getHoogleDB toPrint man name = do let fp = fpFromText $ hoogleKey name fptmp = fp <.> "tmp" exists <- isFile fp @@ -82,18 +92,24 @@ getHoogleDB man name = do let req = req' { checkStatus = \_ _ _ -> Nothing } withResponse req man $ \res -> if responseStatus res == status200 then do + createTree $ parent fptmp runResourceT $ bodyReaderSource (responseBody res) $$ sinkFile fptmp rename fptmp fp return $ Just fp else do - mapM brRead res >>= print + when toPrint $ mapM brRead res >>= print return Nothing stackageServerCron :: IO () stackageServerCron = do + -- Hacky approach instead of PID files + void $ catchIO (bindPortTCP 17834 "127.0.0.1") $ \_ -> + error $ "cabal loader process already running, exiting" + env <- getEnv NorthVirginia Discover - let upload fp key = do + let upload :: FilePath -> Text -> IO () + upload fp key = do let fpgz = fpToString $ fp <.> "gz" runResourceT $ sourceFile fp $$ compress 9 (WindowBits 31) @@ -102,81 +118,143 @@ stackageServerCron = do let po = set poACL (Just PublicRead) $ putObject body "haddock.stackage.org" key + putStrLn $ "Uploading: " ++ key eres <- runResourceT $ send env po case eres of Left e -> error $ show (fp, key, e) - Right _ -> return () + Right _ -> putStrLn "Success" let dbfp = fpFromText keyName - createStackageDatabase dbfp - upload dbfp keyName + _ <- return (upload, dbfp) + --createStackageDatabase dbfp + --upload dbfp keyName -{- - createStackageDatabase dbfile + (db, _) <- loadFromS3 + names <- runReaderT last5Lts5Nightly db + let manager = view envManager env + forM_ names $ \name -> do + mfp <- getHoogleDB False manager name + case mfp of + Just _ -> putStrLn $ "Hoogle database exists for: " ++ toPathPiece name + Nothing -> do + mfp' <- createHoogleDB db manager name + forM_ mfp' $ \fp -> do + let key = hoogleKey name + upload fp key + let dest = fpFromText key + createTree $ parent dest + rename fp dest -import Data.Streaming.Network (bindPortTCP) +createHoogleDB :: StackageDatabase -> Manager -> SnapName -> IO (Maybe FilePath) +createHoogleDB db man name = handleAny (\e -> print e $> Nothing) $ do + req <- parseUrl $ unpack tarUrl + unlessM (isFile tarFP) $ withResponse req man $ \res -> do + let tmp = tarFP <.> "tmp" + createTree $ parent tmp + runResourceT $ bodyReaderSource (responseBody res) + $$ sinkFile tmp + rename tmp tarFP -data CabalLoaderEnv = CabalLoaderEnv - { cleSettings :: !(AppConfig DefaultEnv Extra) - , cleManager :: !Manager - } + void $ tryIO $ removeTree bindir + void $ tryIO $ removeFile outname + createTree bindir -instance HasHackageRoot CabalLoaderEnv where - getHackageRoot = hackageRoot . appExtra . cleSettings -instance HasHttpManager CabalLoaderEnv where - getHttpManager = cleManager + dbs <- runResourceT + $ sourceTarFile True (fpToString tarFP) + $$ evalStateC 1 (mapMC (singleDB db name bindir)) + =$ sinkList -cabalLoaderMain :: IO () -cabalLoaderMain = do - -- Hacky approach instead of PID files - void $ catchIO (bindPortTCP 17834 "127.0.0.1") $ \_ -> - error $ "cabal loader process already running, exiting" + putStrLn "Merging databases..." + Hoogle.mergeDatabase (map fpToString $ catMaybes dbs) (fpToString outname) + putStrLn "Merge done" - error "cabalLoaderMain" - {- FIXME - conf <- fromArgs parseExtra - dbconf <- getDbConf conf - pool <- Database.Persist.createPoolConfig dbconf - manager <- newManager - bs <- loadBlobStore manager conf - hSetBuffering stdout LineBuffering - env <- getEnvironment - let forceUpdate = lookup "STACKAGE_FORCE_UPDATE" env == Just "1" - flip runLoggingT logFunc $ appLoadCabalFiles - True -- update database? - forceUpdate - CabalLoaderEnv - { cleSettings = conf - , cleBlobStore = bs - , cleManager = manager - } - dbconf - pool - - let foundation = App - { settings = conf - , getStatic = error "getStatic" - , connPool = pool - , httpManager = manager - , persistConfig = dbconf - , appLogger = error "appLogger" - , genIO = error "genIO" - , blobStore = bs - , haddockRootDir = error "haddockRootDir" - , appDocUnpacker = error "appDocUnpacker" - , widgetCache = error "widgetCache" - , websiteContent = error "websiteContent" - } - createHoogleDatabases - bs - (flip (Database.Persist.runPool dbconf) pool) - putStrLn - (yesodRender foundation (appRoot conf)) + return $ Just outname where - logFunc loc src level str - | level > LevelDebug = S.hPutStr stdout $ fromLogStr $ defaultLogStr loc src level str - | otherwise = return () - -} + root = "hoogle-gen" + bindir = root "bindir" + outname = root "output.hoo" + tarKey = toPathPiece name ++ "/hoogle/orig.tar" + tarUrl = "https://s3.amazonaws.com/haddock.stackage.org/" ++ tarKey + tarFP = root fpFromText tarKey --} +singleDB :: StackageDatabase + -> SnapName + -> FilePath -- ^ bindir to write to + -> Tar.Entry + -> StateT Int (ResourceT IO) (Maybe FilePath) +singleDB db sname bindir e@(Tar.entryContent -> Tar.NormalFile lbs _) = do + idx <- get + put $! idx + 1 + putStrLn $ "Loading file for Hoogle: " ++ pack (Tar.entryPath e) + + let pkg = pack $ takeWhile (/= '.') $ Tar.entryPath e + msp <- flip runReaderT db $ do + Just (Entity sid _) <- lookupSnapshot sname + lookupSnapshotPackage sid pkg + case msp of + Nothing -> do + putStrLn $ "Unknown: " ++ pkg + return Nothing + Just (Entity _ sp) -> do + let ver = snapshotPackageVersion sp + pkgver = concat [pkg, "-", ver] + out = bindir fpFromString (show idx) <.> "hoo" + src' = unlines + $ haddockHacks (Just $ unpack docsUrl) + $ lines + $ unpack + $ decodeUtf8 lbs + docsUrl = concat + [ "https://www.stackage.org/haddock/" + , toPathPiece sname + , "/" + , pkgver + , "/index.html" + ] + + _errs <- liftIO $ Hoogle.createDatabase "" Hoogle.Haskell [] src' $ fpToString out + + return $ Just out +singleDB _ _ _ _ = return Nothing + +--------------------------------------------------------------------- +-- HADDOCK HACKS +-- (Copied from hoogle-4.2.36/src/Recipe/Haddock.hs) +-- Modifications: +-- 1) Some name qualification +-- 2) Explicit type sig due to polymorphic elem +-- 3) Fixed an unused binding warning + +-- Eliminate @version +-- Change :*: to (:*:), Haddock bug +-- Change !!Int to !Int, Haddock bug +-- Change instance [overlap ok] to instance, Haddock bug +-- Change instance [incoherent] to instance, Haddock bug +-- Change instance [safe] to instance, Haddock bug +-- Change !Int to Int, HSE bug +-- Drop {-# UNPACK #-}, Haddock bug +-- Drop everything after where, Haddock bug + +haddockHacks :: Maybe Hoogle.URL -> [String] -> [String] +haddockHacks loc src = maybe id haddockPackageUrl loc (translate src) + where + translate :: [String] -> [String] + translate = map (unwords . g . map f . words) . filter (not . isPrefixOf "@version ") + + f "::" = "::" + f (':':xs) = "(:" ++ xs ++ ")" + f ('!':'!':x:xs) | isAlpha x = xs + f ('!':x:xs) | isAlpha x || x `elem` ("[(" :: String) = x:xs + f x | x `elem` ["[overlap","ok]","[incoherent]","[safe]"] = "" + f x | x `elem` ["{-#","UNPACK","#-}"] = "" + f x = x + + g ("where":_) = [] + g (x:xs) = x : g xs + g [] = [] + +haddockPackageUrl :: Hoogle.URL -> [String] -> [String] +haddockPackageUrl x = concatMap f + where f y | "@package " `isPrefixOf` y = ["@url " ++ x, y] + | otherwise = [y]