Add a bunch of docs around Hoogle DBs

so I remember how it all works.
This commit is contained in:
Bryan Richter 2024-02-16 13:12:48 +02:00
parent 22ef976f05
commit eebde8b817
No known key found for this signature in database
GPG Key ID: B202264020068BFB
2 changed files with 65 additions and 8 deletions

View File

@ -103,6 +103,11 @@ getStackageSnapshotsDir = do
withResponseUnliftIO :: MonadUnliftIO m => Request -> Manager -> (Response BodyReader -> m b) -> m b
withResponseUnliftIO req man f = withRunInIO $ \ runInIO -> withResponse req man (runInIO . f)
-- | Under the SingleRun wrapper that ensures only one thing at a time is
-- writing the file in question, ensure that a Hoogle database exists on the
-- filesystem for the given SnapName. But only going so far as downloading it
-- from the haddock bucket. See 'createHoogleDB' for the function that puts it
-- there in the first place.
newHoogleLocker ::
(HasLogFunc env, MonadIO m) => env -> Manager -> Text -> m (SingleRun SnapName (Maybe FilePath))
newHoogleLocker env man bucketUrl = mkSingleRun hoogleLocker
@ -704,20 +709,30 @@ uploadFromRIO key po = do
buildAndUploadHoogleDB :: Bool -> RIO StackageCron ()
buildAndUploadHoogleDB doNotUpload = do
snapshots <- lastLtsNightlyWithoutHoogleDb 5 5
-- currentHoogleVersionId <- scHoogleVersionId <$> ask
env <- ask
awsEnv <- asks scEnvAWS
bucketUrl <- asks scDownloadBucketUrl
-- locker is an action that returns the path to a hoogle db, if one exists
-- in the haddock bucket already.
locker <- newHoogleLocker (env ^. logFuncL) (awsEnv ^. env_manager) bucketUrl
let insertH = checkInsertSnapshotHoogleDb True
checkH = checkInsertSnapshotHoogleDb False
for_ snapshots $ \(snapshotId, snapName) ->
unlessM (checkInsertSnapshotHoogleDb False snapshotId) $ do
-- Even though we just got a list of snapshots that don't have hoogle
-- databases, we check again. For some reason. I don't see how this can
-- actually be useful. both lastLtsNightlyWithoutHoogleDb and
-- checkInsertSnapshotHoogleDb just check against SnapshotHoogleDb.
-- Perhaps the check can be removed.
unlessM (checkH snapshotId) $ do
logInfo $ "Starting Hoogle database download: " <> display (hoogleKey snapName)
mfp <- singleRun locker snapName
case mfp of
Just _ -> do
logInfo $ "Current hoogle database exists for: " <> display snapName
void $ checkInsertSnapshotHoogleDb True snapshotId
void $ insertH snapshotId
Nothing -> do
logInfo $ "Current hoogle database does not yet exist for: " <> display snapName
logInfo $ "Current hoogle database does not yet exist in the bucket for: " <> display snapName
mfp' <- createHoogleDB snapshotId snapName
forM_ mfp' $ \fp -> do
let key = hoogleKey snapName
@ -726,8 +741,10 @@ buildAndUploadHoogleDB doNotUpload = do
renamePath fp dest
unless doNotUpload $ do
uploadHoogleDB dest (ObjectKey key)
void $ checkInsertSnapshotHoogleDb True snapshotId
void $ insertH snapshotId
-- | Create a hoogle db from haddocks for the given snapshot, and upload it to
-- the haddock bucket.
createHoogleDB :: SnapshotId -> SnapName -> RIO StackageCron (Maybe FilePath)
createHoogleDB snapshotId snapName =
handleAny logException $ do

View File

@ -167,25 +167,48 @@ ltsBefore x y = do
go (Entity _ lts) = (ltsSnap lts, SNLts (ltsMajor lts) (ltsMinor lts))
-- | Queries the database for the latest LTS and nightly snapshots that do not
-- have corresponding entries in the SnapshotHoogleDb table with the current
-- Hoogle version.
lastLtsNightlyWithoutHoogleDb :: Int -> Int -> RIO StackageCron [(SnapshotId, SnapName)]
lastLtsNightlyWithoutHoogleDb ltsCount nightlyCount = do
currentHoogleVersionId <- scHoogleVersionId <$> ask
let getSnapshotsWithoutHoogeDb snapId snapCount =
map (unValue *** unValue) <$>
select
-- "snap" is either Lts or Nightly, while "snapshot" is indeed
-- "snapshot"
(from $ \(snap `InnerJoin` snapshot) -> do
on $ snap ^. snapId ==. snapshot ^. SnapshotId
where_ $
notExists $
from $ \snapshotHoogleDb ->
where_ $
(snapshotHoogleDb ^. SnapshotHoogleDbSnapshot ==. snapshot ^.
SnapshotId) &&.
(snapshotHoogleDb ^. SnapshotHoogleDbVersion ==.
val currentHoogleVersionId)
(snapshotHoogleDb ^. SnapshotHoogleDbSnapshot
==. snapshot ^. SnapshotId)
&&. (snapshotHoogleDb ^. SnapshotHoogleDbVersion
==. val currentHoogleVersionId)
orderBy [desc (snapshot ^. SnapshotCreated)]
limit $ fromIntegral snapCount
pure (snapshot ^. SnapshotId, snapshot ^. SnapshotName))
-- In sql, this query would be
--
-- select snapshot.id, snapshot.name
-- from snapshot
-- join $foo as snap -- either Lts or Nightly
-- on snap.snap = snapshot.id
-- where not exists (
-- select 1
-- from snapshot_hoogle_db
-- where snapshot_hoogle_db.snapshot = snapshot.id
-- and snapshot_hoogle_db.version = $currentHoogleVersionId
-- )
-- order by snapshot.created desc
-- limit $snapCount
--
-- So it returns a list of snapshots where there is no
-- corresponding entry in the snapshot_hoogle_db table for the
-- current hoogle version.
run $ do
lts <- getSnapshotsWithoutHoogeDb LtsSnap ltsCount
nightly <- getSnapshotsWithoutHoogeDb NightlySnap nightlyCount
@ -1159,10 +1182,27 @@ checkInsertSnapshotHoogleDb shouldInsert snapshotId = do
(from
(\v -> do
where_ $ v ^. VersionId ==. val hoogleVersionId
-- This is reaching into the *pantry*
-- database!
pure (v ^. VersionVersion)))
-- in sql, this query would be
--
-- select version.version
-- from version
-- where version.id = $hoogleVersionId
--
-- So it returns the "version"s that corresponds to the
-- current hoogle version id.
-- mhver is now Maybe Version, and corresponds to the current
-- hoogle version, assuming it exists in the Version table
forM_ mhver $ \hver ->
lift $
logInfo $
"Marking hoogle database for version " <> display hver <> " as available."
-- whether or not the version exists, we still put it into snapshot_hoogle_db
-- So literally the only use of the above query is to log the
-- action we're taking.
isJust <$> P.insertUniqueEntity sh
-- if we're not inserting, we're just checking if it already exists
-- in snapshot_hoogle_db.
else isJust <$> P.checkUnique sh