Hoogle database generation

This commit is contained in:
Michael Snoyman 2015-05-14 21:21:42 +03:00
parent d627f63521
commit 54b69cb491
3 changed files with 163 additions and 73 deletions

View File

@ -14,7 +14,7 @@ import qualified Stackage.Database.Cron as Cron
getHoogleDB :: SnapName -> Handler (Maybe FilePath) getHoogleDB :: SnapName -> Handler (Maybe FilePath)
getHoogleDB name = do getHoogleDB name = do
app <- getYesod app <- getYesod
liftIO $ Cron.getHoogleDB (httpManager app) name liftIO $ Cron.getHoogleDB True (httpManager app) name
getHoogleR :: SnapName -> Handler Html getHoogleR :: SnapName -> Handler Html
getHoogleR name = do getHoogleR name = do

View File

@ -29,6 +29,7 @@ module Stackage.Database
, getSnapshotsForPackage , getSnapshotsForPackage
, getSnapshots , getSnapshots
, currentSchema , currentSchema
, last5Lts5Nightly
) where ) where
import Database.Sqlite (SqliteException) import Database.Sqlite (SqliteException)
@ -132,6 +133,8 @@ newtype StackageDatabase = StackageDatabase ConnectionPool
class MonadIO m => GetStackageDatabase m where class MonadIO m => GetStackageDatabase m where
getStackageDatabase :: m StackageDatabase getStackageDatabase :: m StackageDatabase
instance MonadIO m => GetStackageDatabase (ReaderT StackageDatabase m) where
getStackageDatabase = ask
sourcePackages :: MonadResource m => FilePath -> Producer m Tar.Entry sourcePackages :: MonadResource m => FilePath -> Producer m Tar.Entry
sourcePackages root = do sourcePackages root = do
@ -601,3 +604,12 @@ getSnapshots l o = run $ (,)
<*> fmap (map entityVal) (selectList <*> fmap (map entityVal) (selectList
[] []
[LimitTo l, OffsetBy o, Desc SnapshotCreated]) [LimitTo l, OffsetBy o, Desc SnapshotCreated])
last5Lts5Nightly :: GetStackageDatabase m => m [SnapName]
last5Lts5Nightly = run $ do
ls <- selectList [] [Desc LtsMajor, Desc LtsMinor, LimitTo 5]
ns <- selectList [] [Desc NightlyDay, LimitTo 5]
return $ map l ls ++ map n ns
where
l (Entity _ x) = SNLts (ltsMajor x) (ltsMinor x)
n (Entity _ x) = SNNightly (nightlyDay x)

View File

@ -5,23 +5,31 @@ module Stackage.Database.Cron
) where ) where
import ClassyPrelude.Conduit import ClassyPrelude.Conduit
import Stackage.PackageIndex.Conduit
import Database.Persist (Entity (Entity))
import Data.Char (isAlpha)
import qualified Codec.Archive.Tar as Tar
import Stackage.Database import Stackage.Database
import Network.HTTP.Client import Network.HTTP.Client
import Network.HTTP.Client.Conduit (bodyReaderSource) import Network.HTTP.Client.Conduit (bodyReaderSource)
import Filesystem (rename) import Filesystem (rename, removeTree, removeFile)
import Web.PathPieces (toPathPiece) import Web.PathPieces (toPathPiece)
import Filesystem (isFile) import Filesystem (isFile, createTree)
import Filesystem.Path (parent)
import Control.Monad.State.Strict (StateT, get, put)
import Network.HTTP.Types (status200) import Network.HTTP.Types (status200)
import Data.Streaming.Network (bindPortTCP)
import Network.AWS (Credentials (Discover), import Network.AWS (Credentials (Discover),
Region (NorthVirginia), getEnv, Region (NorthVirginia), getEnv,
send, sourceFileIO) send, sourceFileIO, envManager )
import Network.AWS.S3 (ObjectCannedACL (PublicRead), import Network.AWS.S3 (ObjectCannedACL (PublicRead),
poACL, poACL,
putObject) putObject)
import Control.Lens (set) import Control.Lens (set, view)
import qualified Data.Conduit.Binary as CB import qualified Data.Conduit.Binary as CB
import Data.Conduit.Zlib (WindowBits (WindowBits), import Data.Conduit.Zlib (WindowBits (WindowBits),
compress) compress)
import qualified Hoogle
filename' :: Text filename' :: Text
filename' = concat filename' = concat
@ -47,6 +55,7 @@ loadFromS3 = do
fptmp = fp <.> "tmp" fptmp = fp <.> "tmp"
req <- parseUrl $ unpack url req <- parseUrl $ unpack url
let download man = withResponse req man $ \res -> do let download man = withResponse req man $ \res -> do
createTree $ parent fptmp
runResourceT runResourceT
$ bodyReaderSource (responseBody res) $ bodyReaderSource (responseBody res)
$$ sinkFile fptmp $$ sinkFile fptmp
@ -70,8 +79,9 @@ hoogleUrl n = concat
, ".gz" , ".gz"
] ]
getHoogleDB :: Manager -> SnapName -> IO (Maybe FilePath) getHoogleDB :: Bool -- ^ print exceptions?
getHoogleDB man name = do -> Manager -> SnapName -> IO (Maybe FilePath)
getHoogleDB toPrint man name = do
let fp = fpFromText $ hoogleKey name let fp = fpFromText $ hoogleKey name
fptmp = fp <.> "tmp" fptmp = fp <.> "tmp"
exists <- isFile fp exists <- isFile fp
@ -82,18 +92,24 @@ getHoogleDB man name = do
let req = req' { checkStatus = \_ _ _ -> Nothing } let req = req' { checkStatus = \_ _ _ -> Nothing }
withResponse req man $ \res -> if responseStatus res == status200 withResponse req man $ \res -> if responseStatus res == status200
then do then do
createTree $ parent fptmp
runResourceT $ bodyReaderSource (responseBody res) runResourceT $ bodyReaderSource (responseBody res)
$$ sinkFile fptmp $$ sinkFile fptmp
rename fptmp fp rename fptmp fp
return $ Just fp return $ Just fp
else do else do
mapM brRead res >>= print when toPrint $ mapM brRead res >>= print
return Nothing return Nothing
stackageServerCron :: IO () stackageServerCron :: IO ()
stackageServerCron = do stackageServerCron = do
-- Hacky approach instead of PID files
void $ catchIO (bindPortTCP 17834 "127.0.0.1") $ \_ ->
error $ "cabal loader process already running, exiting"
env <- getEnv NorthVirginia Discover env <- getEnv NorthVirginia Discover
let upload fp key = do let upload :: FilePath -> Text -> IO ()
upload fp key = do
let fpgz = fpToString $ fp <.> "gz" let fpgz = fpToString $ fp <.> "gz"
runResourceT $ sourceFile fp runResourceT $ sourceFile fp
$$ compress 9 (WindowBits 31) $$ compress 9 (WindowBits 31)
@ -102,81 +118,143 @@ stackageServerCron = do
let po = let po =
set poACL (Just PublicRead) set poACL (Just PublicRead)
$ putObject body "haddock.stackage.org" key $ putObject body "haddock.stackage.org" key
putStrLn $ "Uploading: " ++ key
eres <- runResourceT $ send env po eres <- runResourceT $ send env po
case eres of case eres of
Left e -> error $ show (fp, key, e) Left e -> error $ show (fp, key, e)
Right _ -> return () Right _ -> putStrLn "Success"
let dbfp = fpFromText keyName let dbfp = fpFromText keyName
createStackageDatabase dbfp _ <- return (upload, dbfp)
upload dbfp keyName --createStackageDatabase dbfp
--upload dbfp keyName
{- (db, _) <- loadFromS3
createStackageDatabase dbfile names <- runReaderT last5Lts5Nightly db
let manager = view envManager env
forM_ names $ \name -> do
mfp <- getHoogleDB False manager name
case mfp of
Just _ -> putStrLn $ "Hoogle database exists for: " ++ toPathPiece name
Nothing -> do
mfp' <- createHoogleDB db manager name
forM_ mfp' $ \fp -> do
let key = hoogleKey name
upload fp key
let dest = fpFromText key
createTree $ parent dest
rename fp dest
import Data.Streaming.Network (bindPortTCP) createHoogleDB :: StackageDatabase -> Manager -> SnapName -> IO (Maybe FilePath)
createHoogleDB db man name = handleAny (\e -> print e $> Nothing) $ do
req <- parseUrl $ unpack tarUrl
unlessM (isFile tarFP) $ withResponse req man $ \res -> do
let tmp = tarFP <.> "tmp"
createTree $ parent tmp
runResourceT $ bodyReaderSource (responseBody res)
$$ sinkFile tmp
rename tmp tarFP
data CabalLoaderEnv = CabalLoaderEnv void $ tryIO $ removeTree bindir
{ cleSettings :: !(AppConfig DefaultEnv Extra) void $ tryIO $ removeFile outname
, cleManager :: !Manager createTree bindir
}
instance HasHackageRoot CabalLoaderEnv where dbs <- runResourceT
getHackageRoot = hackageRoot . appExtra . cleSettings $ sourceTarFile True (fpToString tarFP)
instance HasHttpManager CabalLoaderEnv where $$ evalStateC 1 (mapMC (singleDB db name bindir))
getHttpManager = cleManager =$ sinkList
cabalLoaderMain :: IO () putStrLn "Merging databases..."
cabalLoaderMain = do Hoogle.mergeDatabase (map fpToString $ catMaybes dbs) (fpToString outname)
-- Hacky approach instead of PID files putStrLn "Merge done"
void $ catchIO (bindPortTCP 17834 "127.0.0.1") $ \_ ->
error $ "cabal loader process already running, exiting"
error "cabalLoaderMain" return $ Just outname
{- FIXME
conf <- fromArgs parseExtra
dbconf <- getDbConf conf
pool <- Database.Persist.createPoolConfig dbconf
manager <- newManager
bs <- loadBlobStore manager conf
hSetBuffering stdout LineBuffering
env <- getEnvironment
let forceUpdate = lookup "STACKAGE_FORCE_UPDATE" env == Just "1"
flip runLoggingT logFunc $ appLoadCabalFiles
True -- update database?
forceUpdate
CabalLoaderEnv
{ cleSettings = conf
, cleBlobStore = bs
, cleManager = manager
}
dbconf
pool
let foundation = App
{ settings = conf
, getStatic = error "getStatic"
, connPool = pool
, httpManager = manager
, persistConfig = dbconf
, appLogger = error "appLogger"
, genIO = error "genIO"
, blobStore = bs
, haddockRootDir = error "haddockRootDir"
, appDocUnpacker = error "appDocUnpacker"
, widgetCache = error "widgetCache"
, websiteContent = error "websiteContent"
}
createHoogleDatabases
bs
(flip (Database.Persist.runPool dbconf) pool)
putStrLn
(yesodRender foundation (appRoot conf))
where where
logFunc loc src level str root = "hoogle-gen"
| level > LevelDebug = S.hPutStr stdout $ fromLogStr $ defaultLogStr loc src level str bindir = root </> "bindir"
| otherwise = return () outname = root </> "output.hoo"
-}
tarKey = toPathPiece name ++ "/hoogle/orig.tar"
tarUrl = "https://s3.amazonaws.com/haddock.stackage.org/" ++ tarKey
tarFP = root </> fpFromText tarKey
-} singleDB :: StackageDatabase
-> SnapName
-> FilePath -- ^ bindir to write to
-> Tar.Entry
-> StateT Int (ResourceT IO) (Maybe FilePath)
singleDB db sname bindir e@(Tar.entryContent -> Tar.NormalFile lbs _) = do
idx <- get
put $! idx + 1
putStrLn $ "Loading file for Hoogle: " ++ pack (Tar.entryPath e)
let pkg = pack $ takeWhile (/= '.') $ Tar.entryPath e
msp <- flip runReaderT db $ do
Just (Entity sid _) <- lookupSnapshot sname
lookupSnapshotPackage sid pkg
case msp of
Nothing -> do
putStrLn $ "Unknown: " ++ pkg
return Nothing
Just (Entity _ sp) -> do
let ver = snapshotPackageVersion sp
pkgver = concat [pkg, "-", ver]
out = bindir </> fpFromString (show idx) <.> "hoo"
src' = unlines
$ haddockHacks (Just $ unpack docsUrl)
$ lines
$ unpack
$ decodeUtf8 lbs
docsUrl = concat
[ "https://www.stackage.org/haddock/"
, toPathPiece sname
, "/"
, pkgver
, "/index.html"
]
_errs <- liftIO $ Hoogle.createDatabase "" Hoogle.Haskell [] src' $ fpToString out
return $ Just out
singleDB _ _ _ _ = return Nothing
---------------------------------------------------------------------
-- HADDOCK HACKS
-- (Copied from hoogle-4.2.36/src/Recipe/Haddock.hs)
-- Modifications:
-- 1) Some name qualification
-- 2) Explicit type sig due to polymorphic elem
-- 3) Fixed an unused binding warning
-- Eliminate @version
-- Change :*: to (:*:), Haddock bug
-- Change !!Int to !Int, Haddock bug
-- Change instance [overlap ok] to instance, Haddock bug
-- Change instance [incoherent] to instance, Haddock bug
-- Change instance [safe] to instance, Haddock bug
-- Change !Int to Int, HSE bug
-- Drop {-# UNPACK #-}, Haddock bug
-- Drop everything after where, Haddock bug
haddockHacks :: Maybe Hoogle.URL -> [String] -> [String]
haddockHacks loc src = maybe id haddockPackageUrl loc (translate src)
where
translate :: [String] -> [String]
translate = map (unwords . g . map f . words) . filter (not . isPrefixOf "@version ")
f "::" = "::"
f (':':xs) = "(:" ++ xs ++ ")"
f ('!':'!':x:xs) | isAlpha x = xs
f ('!':x:xs) | isAlpha x || x `elem` ("[(" :: String) = x:xs
f x | x `elem` ["[overlap","ok]","[incoherent]","[safe]"] = ""
f x | x `elem` ["{-#","UNPACK","#-}"] = ""
f x = x
g ("where":_) = []
g (x:xs) = x : g xs
g [] = []
haddockPackageUrl :: Hoogle.URL -> [String] -> [String]
haddockPackageUrl x = concatMap f
where f y | "@package " `isPrefixOf` y = ["@url " ++ x, y]
| otherwise = [y]