mirror of
https://github.com/commercialhaskell/stackage-server.git
synced 2026-01-12 20:28:32 +01:00
315 lines
12 KiB
Haskell
315 lines
12 KiB
Haskell
module Stackage.Database.Cron
|
|
( stackageServerCron
|
|
, loadFromS3
|
|
, newHoogleLocker
|
|
, singleRun
|
|
) where
|
|
|
|
import ClassyPrelude.Conduit
|
|
import Stackage.PackageIndex.Conduit
|
|
import Database.Persist (Entity (Entity))
|
|
import qualified Codec.Archive.Tar as Tar
|
|
import Stackage.Database
|
|
import Network.HTTP.Client
|
|
import Network.HTTP.Client.Conduit (bodyReaderSource)
|
|
import Filesystem (rename, removeTree, removeFile, isFile, createTree)
|
|
import Web.PathPieces (toPathPiece)
|
|
import Filesystem.Path.CurrentOS (parent, fromText, encodeString)
|
|
import Network.HTTP.Types (status200)
|
|
import Data.Streaming.Network (bindPortTCP)
|
|
import Network.AWS (Credentials (Discover),
|
|
Region (NorthVirginia), newEnv,
|
|
send, chunkedFile, defaultChunkSize,
|
|
envManager, runAWS)
|
|
import Control.Monad.Trans.AWS (trying, _Error)
|
|
import Network.AWS.Data.Body (toBody)
|
|
import Network.AWS.S3 (ObjectCannedACL (OPublicRead),
|
|
poACL, poContentType, putObject,
|
|
BucketName(BucketName),
|
|
ObjectKey(ObjectKey))
|
|
import Control.Lens (set, view)
|
|
import qualified Data.Conduit.Binary as CB
|
|
import Data.Conduit.Zlib (WindowBits (WindowBits),
|
|
compress, ungzip)
|
|
import qualified Hoogle
|
|
import System.Directory (doesFileExist, getAppUserDataDirectory)
|
|
import System.IO (withBinaryFile, IOMode (ReadMode))
|
|
import System.IO.Temp (withSystemTempDirectory)
|
|
import Control.SingleRun
|
|
import qualified Data.ByteString.Lazy as L
|
|
import System.FilePath (splitPath)
|
|
|
|
filename' :: Text
|
|
filename' = concat
|
|
[ "stackage-database-"
|
|
, tshow currentSchema
|
|
, ".sqlite3"
|
|
]
|
|
|
|
keyName :: Text
|
|
keyName = "stackage-database/" ++ filename'
|
|
|
|
url :: Text
|
|
url = concat
|
|
[ "https://s3.amazonaws.com/haddock.stackage.org/"
|
|
, keyName
|
|
]
|
|
|
|
-- | Provides an action to be used to refresh the file from S3.
|
|
loadFromS3 :: Bool -- ^ devel mode? if True, won't delete old databases, and won't refresh them either
|
|
-> Manager -> IO (IO StackageDatabase, IO ())
|
|
loadFromS3 develMode man = do
|
|
killPrevVar <- newTVarIO $ return ()
|
|
currSuffixVar <- newTVarIO (1 :: Int)
|
|
|
|
let root = "stackage-database"
|
|
unless develMode $ handleIO print $ removeTree root
|
|
createTree root
|
|
|
|
req <- parseUrl $ unpack url
|
|
let download = do
|
|
suffix <- atomically $ do
|
|
x <- readTVar currSuffixVar
|
|
writeTVar currSuffixVar $! x + 1
|
|
return x
|
|
|
|
let fp = root </> unpack ("database-download-" ++ tshow suffix)
|
|
isInitial = suffix == 1
|
|
toSkip <-
|
|
if isInitial
|
|
then do
|
|
putStrLn $ "Checking if database exists: " ++ tshow fp
|
|
doesFileExist fp
|
|
else return False
|
|
if toSkip
|
|
then putStrLn "Skipping initial database download"
|
|
else do
|
|
putStrLn $ "Downloading database to " ++ pack fp
|
|
withResponse req man $ \res ->
|
|
runResourceT
|
|
$ bodyReaderSource (responseBody res)
|
|
$= ungzip
|
|
$$ sinkFile fp
|
|
putStrLn "Finished downloading database"
|
|
|
|
return fp
|
|
|
|
dbvar <- newTVarIO $ error "database not yet loaded"
|
|
|
|
let update = do
|
|
fp <- download
|
|
db <- openStackageDatabase (fromString fp) `onException` removeFile (fromString fp)
|
|
void $ tryIO $ join $ atomically $ do
|
|
writeTVar dbvar db
|
|
oldKill <- readTVar killPrevVar
|
|
writeTVar killPrevVar $ do
|
|
-- give existing users a chance to clean up
|
|
threadDelay $ 1000000 * 30
|
|
void $ tryIO $ removeFile (fromString fp)
|
|
closeStackageDatabase db
|
|
return oldKill
|
|
|
|
update
|
|
|
|
return (readTVarIO dbvar, unless develMode update)
|
|
|
|
hoogleKey :: SnapName -> Text
|
|
hoogleKey name = concat
|
|
[ "hoogle/"
|
|
, toPathPiece name
|
|
, "/"
|
|
, VERSION_hoogle
|
|
, ".hoo"
|
|
]
|
|
|
|
hoogleUrl :: SnapName -> Text
|
|
hoogleUrl n = concat
|
|
[ "https://s3.amazonaws.com/haddock.stackage.org/"
|
|
, hoogleKey n
|
|
]
|
|
|
|
newHoogleLocker :: Bool -- ^ print exceptions?
|
|
-> Manager
|
|
-> IO (SingleRun SnapName (Maybe FilePath))
|
|
newHoogleLocker toPrint man = mkSingleRun $ \name -> do
|
|
let fp = fromText $ hoogleKey name
|
|
fptmp = encodeString fp <.> "tmp"
|
|
|
|
exists <- isFile fp
|
|
if exists
|
|
then return $ Just (encodeString fp)
|
|
else do
|
|
req' <- parseUrl $ unpack $ hoogleUrl name
|
|
let req = req'
|
|
{ checkStatus = \_ _ _ -> Nothing
|
|
, decompress = const False
|
|
}
|
|
withResponse req man $ \res -> if responseStatus res == status200
|
|
then do
|
|
createTree $ parent (fromString fptmp)
|
|
runResourceT $ bodyReaderSource (responseBody res)
|
|
$= ungzip
|
|
$$ sinkFile fptmp
|
|
rename (fromString fptmp) fp
|
|
return $ Just $ encodeString fp
|
|
else do
|
|
when toPrint $ mapM brRead res >>= print
|
|
return Nothing
|
|
|
|
stackageServerCron :: IO ()
|
|
stackageServerCron = do
|
|
-- Hacky approach instead of PID files
|
|
void $ catchIO (bindPortTCP 17834 "127.0.0.1") $ \_ ->
|
|
error $ "cabal loader process already running, exiting"
|
|
|
|
env <- newEnv NorthVirginia Discover
|
|
let upload :: FilePath -> ObjectKey -> IO ()
|
|
upload fp key = do
|
|
let fpgz = fp <.> "gz"
|
|
runResourceT $ sourceFile fp
|
|
$$ compress 9 (WindowBits 31)
|
|
=$ CB.sinkFile fpgz
|
|
body <- chunkedFile defaultChunkSize fpgz
|
|
let po =
|
|
set poACL (Just OPublicRead)
|
|
$ putObject "haddock.stackage.org" key body
|
|
putStrLn $ "Uploading: " ++ tshow key
|
|
eres <- runResourceT $ runAWS env $ trying _Error $ send po
|
|
case eres of
|
|
Left e -> error $ show (fp, key, e)
|
|
Right _ -> putStrLn "Success"
|
|
|
|
let dbfp = fromText keyName
|
|
createStackageDatabase dbfp
|
|
upload (encodeString dbfp) (ObjectKey keyName)
|
|
|
|
db <- openStackageDatabase dbfp
|
|
|
|
do
|
|
snapshots <- runReaderT snapshotsJSON db
|
|
let key = ObjectKey "snapshots.json"
|
|
po =
|
|
set poACL (Just OPublicRead)
|
|
$ set poContentType (Just "application/json")
|
|
$ putObject (BucketName "haddock.stackage.org") key (toBody snapshots)
|
|
putStrLn $ "Uploading: " ++ tshow key
|
|
eres <- runResourceT $ runAWS env $ trying _Error $ send po
|
|
case eres of
|
|
Left e -> error $ show (key, e)
|
|
Right _ -> putStrLn "Success"
|
|
|
|
names <- runReaderT (lastXLts5Nightly 50) db
|
|
let manager = view envManager env
|
|
|
|
locker <- newHoogleLocker False manager
|
|
|
|
forM_ names $ \name -> do
|
|
mfp <- singleRun locker name
|
|
case mfp of
|
|
Just _ -> putStrLn $ "Hoogle database exists for: " ++ toPathPiece name
|
|
Nothing -> do
|
|
mfp' <- createHoogleDB db manager name
|
|
forM_ mfp' $ \fp -> do
|
|
let key = hoogleKey name
|
|
upload fp (ObjectKey key)
|
|
let dest = unpack key
|
|
createTree $ parent (fromString dest)
|
|
rename (fromString fp) (fromString dest)
|
|
|
|
createHoogleDB :: StackageDatabase -> Manager -> SnapName -> IO (Maybe FilePath)
|
|
createHoogleDB db man name = handleAny (\e -> print e $> Nothing) $ do
|
|
putStrLn $ "Creating Hoogle DB for " ++ toPathPiece name
|
|
req' <- parseUrl $ unpack tarUrl
|
|
let req = req' { decompress = const True }
|
|
|
|
unlessM (isFile (fromString tarFP)) $ withResponse req man $ \res -> do
|
|
let tmp = tarFP <.> "tmp"
|
|
createTree $ parent (fromString tmp)
|
|
runResourceT $ bodyReaderSource (responseBody res)
|
|
$$ sinkFile tmp
|
|
rename (fromString tmp) (fromString tarFP)
|
|
|
|
void $ tryIO $ removeTree (fromString bindir)
|
|
void $ tryIO $ removeFile (fromString outname)
|
|
createTree (fromString bindir)
|
|
|
|
withSystemTempDirectory ("hoogle-" ++ unpack (toPathPiece name)) $ \tmpdir -> do
|
|
allPackagePairs <- runResourceT
|
|
$ sourceTarFile False tarFP
|
|
$$ foldMapMC (liftIO . singleDB db name tmpdir)
|
|
|
|
stackDir <- getAppUserDataDirectory "stack"
|
|
let indexTar = stackDir </> "indices" </> "Hackage" </> "00-index.tar"
|
|
withBinaryFile indexTar ReadMode $ \h -> do
|
|
let loop Tar.Done = return ()
|
|
loop (Tar.Fail e) = throwM e
|
|
loop (Tar.Next e es) = go e >> loop es
|
|
|
|
go e =
|
|
case (Tar.entryContent e, splitPath $ Tar.entryPath e) of
|
|
(Tar.NormalFile cabalLBS _, [pkg', ver', pkgcabal'])
|
|
| Just pkg <- stripSuffix "/" (pack pkg')
|
|
, Just ver <- stripSuffix "/" (pack ver')
|
|
, Just pkg2 <- stripSuffix ".cabal" (pack pkgcabal')
|
|
, pkg == pkg2
|
|
, lookup pkg allPackagePairs == Just ver ->
|
|
writeFile (tmpdir </> unpack pkg <.> "cabal") cabalLBS
|
|
_ -> return ()
|
|
L.hGetContents h >>= loop . Tar.read
|
|
|
|
let args =
|
|
[ "generate"
|
|
, "--database=" ++ outname
|
|
, "--local=" ++ tmpdir
|
|
]
|
|
putStrLn $ concat
|
|
[ "Merging databases... ("
|
|
, tshow args
|
|
, ")"
|
|
]
|
|
Hoogle.hoogle args
|
|
|
|
putStrLn "Merge done"
|
|
|
|
return $ Just outname
|
|
where
|
|
root = "hoogle-gen"
|
|
bindir = root </> "bindir"
|
|
outname = root </> "output.hoo"
|
|
|
|
tarKey = toPathPiece name ++ "/hoogle/orig.tar"
|
|
tarUrl = "https://s3.amazonaws.com/haddock.stackage.org/" ++ tarKey
|
|
tarFP = root </> unpack tarKey
|
|
|
|
singleDB :: StackageDatabase
|
|
-> SnapName
|
|
-> FilePath -- ^ temp directory to write .txt files to
|
|
-> Tar.Entry
|
|
-> IO (Map Text Text)
|
|
singleDB db sname tmpdir e@(Tar.entryContent -> Tar.NormalFile lbs _) = do
|
|
--putStrLn $ "Loading file for Hoogle: " ++ pack (Tar.entryPath e)
|
|
|
|
let pkg = pack $ takeWhile (/= '.') $ Tar.entryPath e
|
|
msp <- flip runReaderT db $ do
|
|
Just (Entity sid _) <- lookupSnapshot sname
|
|
lookupSnapshotPackage sid pkg
|
|
case msp of
|
|
Nothing -> do
|
|
putStrLn $ "Unknown: " ++ pkg
|
|
return mempty
|
|
Just (Entity _ sp) -> do
|
|
let out = tmpdir </> unpack pkg <.> "txt"
|
|
-- FIXME add @url directive
|
|
writeFile out lbs
|
|
return $ singletonMap pkg (snapshotPackageVersion sp)
|
|
{-
|
|
docsUrl = concat
|
|
[ "https://www.stackage.org/haddock/"
|
|
, toPathPiece sname
|
|
, "/"
|
|
, pkgver
|
|
, "/index.html"
|
|
] -}
|
|
|
|
singleDB _ _ _ _ = return mempty
|