Document more stackage-server-cron steps

This commit is contained in:
Bryan Richter 2025-02-07 12:53:18 +02:00
parent 608cf0f4f6
commit 672099d68e
No known key found for this signature in database
GPG Key ID: B202264020068BFB
3 changed files with 46 additions and 15 deletions

View File

@ -58,7 +58,7 @@ optsParser =
T.unpack defHaddockBucketName)) <*> T.unpack defHaddockBucketName)) <*>
switch switch
(long "do-not-upload" <> (long "do-not-upload" <>
help "Stop from hoogle db and snapshots.json from being generated and uploaded") <*> help "Disable upload of Hoogle database and snapshots.json") <*>
option option
readLogLevel readLogLevel
(long "log-level" <> metavar "LOG_LEVEL" <> short 'l' <> value LevelInfo <> (long "log-level" <> metavar "LOG_LEVEL" <> short 'l' <> value LevelInfo <>

View File

@ -107,9 +107,12 @@ withResponseUnliftIO req man f = withRunInIO $ \ runInIO -> withResponse req man
-- | Returns an action that, under the SingleRun wrapper that ensures only one -- | Returns an action that, under the SingleRun wrapper that ensures only one
-- thing at a time is writing the file in question, ensure that a Hoogle -- thing at a time is writing the file in question, ensure that a Hoogle
-- database exists on the filesystem for the given SnapName. But only going so -- database exists on the filesystem for the given SnapName. But only going so
-- far as downloading it from the haddock bucket. See 'createHoogleDB' for the -- far as downloading it from the haddock bucket. See 'buildAndUploadHoogleDBs' for the
-- function that puts it there in the first place. If no db exists in the -- function that puts it there in the first place. If no db exists in the
-- bucket, the action will return 'Nothing'. -- bucket, the action will return 'Nothing'.
--
-- The location searched is $PWD/hoogle/<snapshot>/<hoogle-version>.hoo
-- E.g. in production, ~stackage-update/hoogle/lts-22.20/5.0.18.4.hoo (for stackage-update).
newHoogleLocker :: newHoogleLocker ::
(HasLogFunc env, MonadIO m) => env -> Manager -> Text -> m (SingleRun SnapName (Maybe FilePath)) (HasLogFunc env, MonadIO m) => env -> Manager -> Text -> m (SingleRun SnapName (Maybe FilePath))
newHoogleLocker env man bucketUrl = mkSingleRun hoogleLocker newHoogleLocker env man bucketUrl = mkSingleRun hoogleLocker
@ -238,7 +241,7 @@ runStackageUpdate doNotUpload = do
-- for the -1'th package. -- for the -1'th package.
runConduit $ sourceSnapshots .| foldMC (createOrUpdateSnapshot corePackageGetters) (pure ()) runConduit $ sourceSnapshots .| foldMC (createOrUpdateSnapshot corePackageGetters) (pure ())
unless doNotUpload uploadSnapshotsJSON unless doNotUpload uploadSnapshotsJSON
buildAndUploadHoogleDB doNotUpload buildAndUploadHoogleDBs doNotUpload
logInfo "Finished building and uploading Hoogle DBs" logInfo "Finished building and uploading Hoogle DBs"
@ -762,6 +765,9 @@ uploadHoogleDB fp key =
body <- toBody <$> readFileBinary fpgz body <- toBody <$> readFileBinary fpgz
uploadBucket <- scUploadBucketName <$> ask uploadBucket <- scUploadBucketName <$> ask
uploadFromRIO key $ uploadFromRIO key $
-- FIXME: I should also set content encoding explicitly here. But
-- then I would break stackage-server, which applies an 'ungzip' in
-- 'newHoogleLocker'. :(
set putObject_acl (Just ObjectCannedACL_Public_read) $ newPutObject (BucketName uploadBucket) key body set putObject_acl (Just ObjectCannedACL_Public_read) $ newPutObject (BucketName uploadBucket) key body
@ -775,15 +781,28 @@ uploadFromRIO key po = do
logError $ "Couldn't upload " <> displayShow key <> " to S3 becuase " <> displayShow e logError $ "Couldn't upload " <> displayShow key <> " to S3 becuase " <> displayShow e
Right _ -> logInfo $ "Successfully uploaded " <> displayShow key <> " to S3" Right _ -> logInfo $ "Successfully uploaded " <> displayShow key <> " to S3"
buildAndUploadHoogleDB :: Bool -> RIO StackageCron () -- | As the name says, build and upload Hoogle DBs.
buildAndUploadHoogleDB doNotUpload = do --
-- Which DBs? The last 5 LTS and the last 5 Nightlies that are missing their
-- Hoogle DBs.
--
-- How? It downloads the Hoogle inputs that were previously generated alongside
-- the Haddocks, runs @hoogle@ on them, and uploads the result back to the same
-- bucket. Those inputs were generated by snapshot curation.
--
-- Why? I feel like this should be a short Bash script using curl and hoogle, and
-- maybe one day it will be.
--
-- This action is only run by stackage-server-cron.
buildAndUploadHoogleDBs :: Bool -> RIO StackageCron ()
buildAndUploadHoogleDBs doNotUpload = do
snapshots <- lastLtsNightlyWithoutHoogleDb 5 5 snapshots <- lastLtsNightlyWithoutHoogleDb 5 5
-- currentHoogleVersionId <- scHoogleVersionId <$> ask
env <- ask env <- ask
awsEnv <- asks scEnvAWS awsEnv <- asks scEnvAWS
bucketUrl <- asks scDownloadBucketUrl bucketUrl <- asks scDownloadBucketUrl
-- locker is an action that returns the path to a hoogle db, if one exists -- locker is an action that returns the path to a hoogle db, if one exists
-- in the haddock bucket already. -- in the haddock bucket already. It takes the SnapName as an argument.
-- I think it might be overkill.
locker <- newHoogleLocker (env ^. logFuncL) (awsEnv ^. env_manager) bucketUrl locker <- newHoogleLocker (env ^. logFuncL) (awsEnv ^. env_manager) bucketUrl
let -- These bindings undo a questionable conflation of operations let -- These bindings undo a questionable conflation of operations
insertH = checkInsertSnapshotHoogleDb True insertH = checkInsertSnapshotHoogleDb True
@ -796,12 +815,18 @@ buildAndUploadHoogleDB doNotUpload = do
-- Perhaps the check can be removed. -- Perhaps the check can be removed.
unlessM (checkH snapshotId) $ do unlessM (checkH snapshotId) $ do
logInfo $ "Starting Hoogle database download: " <> display (hoogleKey snapName) logInfo $ "Starting Hoogle database download: " <> display (hoogleKey snapName)
-- Check if the database already exists (by downloading it).
-- FIXME: Why not just send a HEAD?
-- Perhaps the idea was to put the hoogle database somewhere the
-- main Stackage server process can find it? But nowadays
-- stackage-server downloads its own version separately.
mfp <- singleRun locker snapName mfp <- singleRun locker snapName
case mfp of case mfp of
Just _ -> do Just _ -> do
-- Something bad must have happened: we created the Hoogle db -- Something bad must have happened: we created the hoogle db
-- previously, but didn't get to record it in our database. -- previously, but didn't get to record it as available.
logInfo $ "Current hoogle database exists for: " <> display snapName logWarn $ "Unregistered hoogle database found for: " <> display snapName
<> ". Registering now."
void $ insertH snapshotId void $ insertH snapshotId
Nothing -> do Nothing -> do
logInfo $ "Current hoogle database does not yet exist in the bucket for: " <> display snapName logInfo $ "Current hoogle database does not yet exist in the bucket for: " <> display snapName
@ -820,8 +845,9 @@ buildAndUploadHoogleDB doNotUpload = do
-- Haddocks are downloaded from the documentation bucket, where they were -- Haddocks are downloaded from the documentation bucket, where they were
-- uploaded as a tar file. -- uploaded as a tar file.
-- --
-- Returns the path to the .hoo database. -- Returns the path to the .hoo database, which will be found in the first
createHoogleDB :: SnapshotId -> SnapName -> RIO StackageCron (Maybe FilePath) -- argument. It will look like @<rootDir>/hoogle-gen/output.hoo@.
createHoogleDB :: SnapshotId -> SnapName -> RIO StackageCron FilePath
createHoogleDB snapshotId snapName = do createHoogleDB snapshotId snapName = do
logInfo $ "Creating Hoogle DB for " <> display snapName logInfo $ "Creating Hoogle DB for " <> display snapName
downloadBucketUrl <- scDownloadBucketUrl <$> ask downloadBucketUrl <- scDownloadBucketUrl <$> ask
@ -848,7 +874,9 @@ createHoogleDB snapshotId snapName = do
sourceFile tarFP .| sourceFile tarFP .|
untar (restoreHoogleTxtFileWithCabal tmpdir snapshotId snapName) .| untar (restoreHoogleTxtFileWithCabal tmpdir snapshotId snapName) .|
foldMapC Any foldMapC Any
-- We just check if we have any Hoogle .txt file at all.
unless hasRestored $ error "No Hoogle .txt files found" unless hasRestored $ error "No Hoogle .txt files found"
-- Generate the hoogle database
let args = ["generate", "--database=" ++ outname, "--local=" ++ tmpdir] let args = ["generate", "--database=" ++ outname, "--local=" ++ tmpdir]
logInfo $ logInfo $
mconcat mconcat
@ -856,6 +884,8 @@ createHoogleDB snapshotId snapName = do
, foldMap fromString $ L.intersperse " " ("hoogle" : args) , foldMap fromString $ L.intersperse " " ("hoogle" : args)
, ")" , ")"
] ]
-- 'Hoogle.hoogle' expects to run as an app, and crashes if something
-- goes wrong. That's good.
liftIO $ Hoogle.hoogle args liftIO $ Hoogle.hoogle args
logInfo "Merge done" logInfo "Merge done"
return $ Just outname return $ Just outname

View File

@ -1224,9 +1224,10 @@ checkInsertSnapshotHoogleDb shouldInsert snapshotId = do
lift $ lift $
logInfo $ logInfo $
"Marking hoogle database for version " <> display hver <> " as available." "Marking hoogle database for version " <> display hver <> " as available."
-- whether or not the version exists, we still put it into snapshot_hoogle_db -- whether or not the version exists, we still put it into
-- So literally the only use of the above query is to log the -- snapshot_hoogle_db. So literally the only use of the above
-- action we're taking. -- query is to log the action we're taking. Whether or not it
-- exists is immaterial to the following action.
isJust <$> P.insertUniqueEntity sh isJust <$> P.insertUniqueEntity sh
-- if we're not inserting, we're just checking if it already exists -- if we're not inserting, we're just checking if it already exists
-- in snapshot_hoogle_db. -- in snapshot_hoogle_db.