mirror of
https://github.com/commercialhaskell/stackage-server.git
synced 2026-01-11 19:58:28 +01:00
Document more learnings
This commit is contained in:
parent
5628bcff89
commit
93b8666dde
@ -104,11 +104,12 @@ getStackageSnapshotsDir = do
|
||||
withResponseUnliftIO :: MonadUnliftIO m => Request -> Manager -> (Response BodyReader -> m b) -> m b
|
||||
withResponseUnliftIO req man f = withRunInIO $ \ runInIO -> withResponse req man (runInIO . f)
|
||||
|
||||
-- | Under the SingleRun wrapper that ensures only one thing at a time is
|
||||
-- writing the file in question, ensure that a Hoogle database exists on the
|
||||
-- filesystem for the given SnapName. But only going so far as downloading it
|
||||
-- from the haddock bucket. See 'createHoogleDB' for the function that puts it
|
||||
-- there in the first place.
|
||||
-- | Returns an action that, under the SingleRun wrapper that ensures only one
|
||||
-- thing at a time is writing the file in question, ensure that a Hoogle
|
||||
-- database exists on the filesystem for the given SnapName. But only going so
|
||||
-- far as downloading it from the haddock bucket. See 'createHoogleDB' for the
|
||||
-- function that puts it there in the first place. If no db exists in the
|
||||
-- bucket, the action will return 'Nothing'.
|
||||
newHoogleLocker ::
|
||||
(HasLogFunc env, MonadIO m) => env -> Manager -> Text -> m (SingleRun SnapName (Maybe FilePath))
|
||||
newHoogleLocker env man bucketUrl = mkSingleRun hoogleLocker
|
||||
@ -232,6 +233,9 @@ runStackageUpdate doNotUpload = do
|
||||
corePackageGetters <- makeCorePackageGetters
|
||||
runResourceT $
|
||||
join $
|
||||
-- @createOrUpdateSnapshot@ processes package N while processing docs for
|
||||
-- package N-1. This @pure ()@ is the "documentation processing action"
|
||||
-- for the -1'th package.
|
||||
runConduit $ sourceSnapshots .| foldMC (createOrUpdateSnapshot corePackageGetters) (pure ())
|
||||
unless doNotUpload uploadSnapshotsJSON
|
||||
buildAndUploadHoogleDB doNotUpload
|
||||
@ -313,6 +317,9 @@ makeCorePackageGetter fallbackCabalFileMap pname ver =
|
||||
Nothing -> do
|
||||
whenM (scReportProgress <$> ask) $
|
||||
logSticky $ "Loading core package: " <> display pid
|
||||
-- I have no idea what's happening here. I guess I
|
||||
-- don't know what it means to "load" a package.
|
||||
-- What is actually going on?
|
||||
htr <- getHackageTarball pir Nothing
|
||||
case htrFreshPackageInfo htr of
|
||||
Just (gpd, treeId) -> do
|
||||
@ -414,33 +421,33 @@ checkForDocs :: SnapshotId -> SnapName -> ResourceT (RIO StackageCron) ()
|
||||
checkForDocs snapshotId snapName = do
|
||||
bucketName <- lift (scDownloadBucketName <$> ask)
|
||||
env <- asks scEnvAWS
|
||||
mods <-
|
||||
-- it is faster to download all modules in this snapshot separately, rather
|
||||
-- than process them with a conduit all the way to the database.
|
||||
packageModules <-
|
||||
runConduit $
|
||||
paginate env (req bucketName) .| concatMapC (fromMaybe [] . (^. listObjectsV2Response_contents)) .|
|
||||
mapC (\obj -> toText (obj ^. object_key)) .|
|
||||
concatMapC (T.stripSuffix ".html" >=> T.stripPrefix prefix >=> pathToPackageModule) .|
|
||||
sinkList
|
||||
-- it is faster to download all modules in this snapshot, than process them with a conduit all
|
||||
-- the way to the database.
|
||||
paginate env (listSnapshotObjects bucketName)
|
||||
.| concatMapC (fromMaybe [] . (^. listObjectsV2Response_contents))
|
||||
.| mapC (\obj -> toText (obj ^. object_key))
|
||||
.| concatMapC (T.stripSuffix ".html" >=> T.stripPrefix prefix >=> pathToPackageModule)
|
||||
.| sinkList
|
||||
-- Cache SnapshotPackageId rather than look it up many times for each module in the package.
|
||||
sidsCacheRef <- newIORef Map.empty
|
||||
-- Cache is for SnapshotPackageId, there will be many modules per peckage, no need to look into
|
||||
-- the database for each one of them.
|
||||
-- The other half of the cores are used in 'updateSnapshot'
|
||||
n <- max 1 . (`div` 2) <$> getNumCapabilities
|
||||
unexpectedPackages <- lift $ pooledMapConcurrentlyN n (markModules sidsCacheRef) mods
|
||||
unexpectedPackages <- lift $ pooledMapConcurrentlyN n (markModule sidsCacheRef) packageModules
|
||||
forM_ (Set.fromList $ catMaybes unexpectedPackages) $ \pid ->
|
||||
lift $
|
||||
logWarn $
|
||||
"Documentation found for package '" <> display pid <>
|
||||
"', which does not exist in this snapshot: " <>
|
||||
lift $ logWarn $
|
||||
"Documentation found for package '" <> display pid <>
|
||||
"', which does not exist in this snapshot: " <>
|
||||
display snapName
|
||||
where
|
||||
prefix = textDisplay snapName <> "/"
|
||||
req bucketName = newListObjectsV2 (BucketName bucketName) & listObjectsV2_prefix ?~ prefix
|
||||
listSnapshotObjects bucketName = newListObjectsV2 (BucketName bucketName) & listObjectsV2_prefix ?~ prefix
|
||||
-- | This function records all package modules that have documentation available, the ones
|
||||
-- that are not found in the snapshot reported back as an error. Besides being run
|
||||
-- concurrently this function optimizes the SnapshotPackageId lookup as well, since that can
|
||||
-- be shared amongst many modules of one package.
|
||||
markModules sidsCacheRef (pid, modName) = do
|
||||
markModule sidsCacheRef (pid, modName) = do
|
||||
sidsCache <- readIORef sidsCacheRef
|
||||
let mSnapshotPackageId = Map.lookup pid sidsCache
|
||||
mFound <- run $ markModuleHasDocs snapshotId pid mSnapshotPackageId modName
|
||||
@ -466,8 +473,7 @@ sourceSnapshots :: ConduitT a SnapshotFileInfo (ResourceT (RIO StackageCron)) ()
|
||||
sourceSnapshots = do
|
||||
snapshotsDir <- lift $ lift getStackageSnapshotsDir
|
||||
sourceDirectoryDeep False (snapshotsDir </> "lts") .| concatMapMC (getLtsParser snapshotsDir)
|
||||
sourceDirectoryDeep False (snapshotsDir </> "nightly") .|
|
||||
concatMapMC (getNightlyParser snapshotsDir)
|
||||
sourceDirectoryDeep False (snapshotsDir </> "nightly") .| concatMapMC (getNightlyParser snapshotsDir)
|
||||
where
|
||||
makeSnapshotFileInfo gitDir fp mFileNameDate snapName = do
|
||||
let parseSnapshot updatedOn = do
|
||||
@ -512,11 +518,12 @@ sourceSnapshots = do
|
||||
|
||||
data DecisionResult a e = NothingToDo | NoSnapshotFile | NeedsUpdate a e | DoesntExist e
|
||||
|
||||
-- | Creates a new `Snapshot` if it is not yet present in the database and decides if update
|
||||
-- | Creates a new `Snapshot` if it is not yet present in the database, and decides if update
|
||||
-- is necessary when it already exists.
|
||||
--
|
||||
-- sfiSnapshotFileGetter is a mystery. Silently ignoring snapshots where the
|
||||
-- getter returns Nothing seems like a potential problem.
|
||||
-- getter returns Nothing seems like a potential problem. Anyway I'd rather run
|
||||
-- it beforehand!
|
||||
decideOnSnapshotUpdate :: SnapshotFileInfo -> RIO StackageCron (Maybe (SnapshotId, SnapshotFile))
|
||||
decideOnSnapshotUpdate SnapshotFileInfo {sfiSnapName, sfiUpdatedOn, sfiSnapshotFileGetter} = do
|
||||
forceUpdate <- scForceFullUpdate <$> ask
|
||||
@ -551,10 +558,11 @@ decideOnSnapshotUpdate SnapshotFileInfo {sfiSnapName, sfiUpdatedOn, sfiSnapshotF
|
||||
| otherwise -> return Nothing
|
||||
|
||||
DoesntExist sf@SnapshotFile {sfCompiler, sfPublishDate}
|
||||
| Just publishDate <- sfPublishDate ->
|
||||
| Just publishDate <- sfPublishDate -> do
|
||||
logInfo $ mkLogMsg "is new, adding to the database."
|
||||
fmap (, sf) <$>
|
||||
run (insertUnique (Snapshot sfiSnapName sfCompiler publishDate Nothing))
|
||||
| otherwise -> return Nothing
|
||||
run (insertUnique (Snapshot sfiSnapName sfCompiler publishDate Nothing))
|
||||
| otherwise -> Nothing <$ logWarn (mkLogMsg "has no publish date, skipping.")
|
||||
|
||||
type CorePackageGetter
|
||||
= RIO StackageCron ( Either CabalFileIds (Entity Tree)
|
||||
@ -567,6 +575,32 @@ type CorePackageGetter
|
||||
-- current snapshot as well as an action that was passed as an argument. At the end it will return
|
||||
-- an action that should be invoked in order to mark modules that have documentation available,
|
||||
-- which in turn can be passed as an argument to the next snapshot loader.
|
||||
-- Something something ouroboros.
|
||||
--
|
||||
-- Question: When do the docs for the last snapshot get loaded?
|
||||
--
|
||||
-- Well, this binding is called as @join $ runConduit $ foldMC (createOrUpdateSnapshot corePackageInfoGetters) (pure ())@
|
||||
--
|
||||
-- So the answer: the doc-loading action for the last snapshot gets returned by @runConduit $ foldMC ...@,
|
||||
-- which means it gets executed by @join $ runConduit $ foldMC ...@.
|
||||
--
|
||||
-- Evidence:
|
||||
--
|
||||
-- Since @foldMC :: (a -> b -> m a) -> a -> ConduitT b o m a@, we see
|
||||
--
|
||||
-- @@
|
||||
-- a ~ ResourceT (RIO Stackage Cron) () -- this is the doc-loading action
|
||||
-- b ~ SnapshotFileInfo
|
||||
-- m ~ ResourceT (RIO StackageCron)
|
||||
-- @@
|
||||
|
||||
-- and the foldMC creates a @ConduitT SnapshotFileInfo o (ResourceT (RIO StackageCron)) (ResourceT (RIO StackageCron) ())@
|
||||
--
|
||||
-- TODO: It might be more efficient to just put all the actions (snapshot
|
||||
-- creation and documentation writing both) on a queue and let a bunch of
|
||||
-- workers chew on it. The current impl creates arbitrary synchronization points
|
||||
-- with 'runConcurrently'. Granted, I don't know what a good chunk size would
|
||||
-- actually be.
|
||||
createOrUpdateSnapshot ::
|
||||
Map CompilerP [CorePackageGetter]
|
||||
-> ResourceT (RIO StackageCron) ()
|
||||
@ -581,6 +615,7 @@ createOrUpdateSnapshot corePackageInfoGetters prevAction sfi@SnapshotFileInfo {
|
||||
loadCurrentSnapshot finishedDocs = do
|
||||
loadDocs <-
|
||||
decideOnSnapshotUpdate sfi >>= \case
|
||||
-- Nothing to do, and thus no docs to process
|
||||
Nothing -> return $ pure ()
|
||||
Just (snapshotId, snapshotFile) ->
|
||||
updateSnapshot
|
||||
@ -595,11 +630,12 @@ createOrUpdateSnapshot corePackageInfoGetters prevAction sfi@SnapshotFileInfo {
|
||||
logSticky "Still loading the docs for previous snapshot ..."
|
||||
pure loadDocs
|
||||
|
||||
-- | Creates snapshot name (FIXME: Why not do this when creating the snapshot?)
|
||||
-- and updates all packages in the snapshot. If any packages are missing they
|
||||
-- will be created. Returns an action that will (a) check for available
|
||||
-- documentation for the packages' modules and (b) mark the packages as
|
||||
-- documented when haddock is present on AWS S3.
|
||||
-- | Creates Lts or Nightly entity [Question(bryan): Why not do this when
|
||||
-- creating the snapshot? Why is this a separate table anyway?] and updates all
|
||||
-- packages in the snapshot. If any packages are missing they will be created.
|
||||
-- Returns an action that will (a) check for available documentation for the
|
||||
-- packages' modules and (b) mark the packages as documented when haddock is
|
||||
-- present on AWS S3.
|
||||
--
|
||||
-- (Only after documentation has been checked will this snapshot be marked as
|
||||
-- completely updated. This is required in case something goes wrong and process
|
||||
@ -610,11 +646,14 @@ updateSnapshot ::
|
||||
-> SnapName
|
||||
-> UTCTime
|
||||
-> SnapshotFile
|
||||
-> RIO StackageCron (ResourceT (RIO StackageCron) ())
|
||||
-> RIO StackageCron (ResourceT (RIO StackageCron) ()) -- ^ Returns the action for processing docs
|
||||
updateSnapshot corePackageGetters snapshotId snapName updatedOn SnapshotFile {..} = do
|
||||
insertSnapshotName snapshotId snapName
|
||||
loadedPackageCountRef <- newIORef (0 :: Int)
|
||||
let totalPackages = length sfPackages
|
||||
-- A wrapper for 'addPantryPackage' that extracts the package info from
|
||||
-- snapshot info, increments the count of loaded packages, and reports success
|
||||
-- as a Bool.
|
||||
addPantryPackageWithReport pp = do
|
||||
let PantryCabal {pcPackageName} = ppPantryCabal pp
|
||||
isHidden = fromMaybe False (Map.lookup pcPackageName sfHidden)
|
||||
@ -742,7 +781,8 @@ buildAndUploadHoogleDB doNotUpload = do
|
||||
-- locker is an action that returns the path to a hoogle db, if one exists
|
||||
-- in the haddock bucket already.
|
||||
locker <- newHoogleLocker (env ^. logFuncL) (awsEnv ^. env_manager) bucketUrl
|
||||
let insertH = checkInsertSnapshotHoogleDb True
|
||||
let -- These bindings undo a questionable conflation of operations
|
||||
insertH = checkInsertSnapshotHoogleDb True
|
||||
checkH = checkInsertSnapshotHoogleDb False
|
||||
for_ snapshots $ \(snapshotId, snapName) ->
|
||||
-- Even though we just got a list of snapshots that don't have hoogle
|
||||
@ -755,6 +795,8 @@ buildAndUploadHoogleDB doNotUpload = do
|
||||
mfp <- singleRun locker snapName
|
||||
case mfp of
|
||||
Just _ -> do
|
||||
-- Something bad must have happened: we created the Hoogle db
|
||||
-- previously, but didn't get to record it in our database.
|
||||
logInfo $ "Current hoogle database exists for: " <> display snapName
|
||||
void $ insertH snapshotId
|
||||
Nothing -> do
|
||||
@ -773,6 +815,8 @@ buildAndUploadHoogleDB doNotUpload = do
|
||||
-- the haddock bucket.
|
||||
createHoogleDB :: SnapshotId -> SnapName -> RIO StackageCron (Maybe FilePath)
|
||||
createHoogleDB snapshotId snapName =
|
||||
-- FIXME: this handles *any* exception, which means it will swallow most
|
||||
-- signals
|
||||
handleAny logException $ do
|
||||
logInfo $ "Creating Hoogle DB for " <> display snapName
|
||||
downloadBucketUrl <- scDownloadBucketUrl <$> ask
|
||||
@ -848,7 +892,10 @@ restoreHoogleTxtFileWithCabal tmpdir snapshotId snapName fileInfo =
|
||||
_ -> yield False
|
||||
|
||||
|
||||
pathToPackageModule :: Text -> Maybe (PackageIdentifierP, ModuleNameP)
|
||||
pathToPackageModule
|
||||
:: Text
|
||||
-- ^ Input is like @ace-0.6/ACE-Combinators@
|
||||
-> Maybe (PackageIdentifierP, ModuleNameP)
|
||||
pathToPackageModule txt =
|
||||
case T.split (== '/') txt of
|
||||
[pkgIdentifier, moduleNameDashes] -> do
|
||||
|
||||
@ -173,7 +173,7 @@ ltsBefore x y = do
|
||||
lastLtsNightlyWithoutHoogleDb :: Int -> Int -> RIO StackageCron [(SnapshotId, SnapName)]
|
||||
lastLtsNightlyWithoutHoogleDb ltsCount nightlyCount = do
|
||||
currentHoogleVersionId <- scHoogleVersionId <$> ask
|
||||
let getSnapshotsWithoutHoogeDb snapId snapCount =
|
||||
let getSnapshotsWithoutHoogleDb snapId snapCount =
|
||||
map (unValue *** unValue) <$>
|
||||
select
|
||||
-- "snap" is either Lts or Nightly, while "snapshot" is indeed
|
||||
@ -206,12 +206,12 @@ lastLtsNightlyWithoutHoogleDb ltsCount nightlyCount = do
|
||||
-- order by snapshot.created desc
|
||||
-- limit $snapCount
|
||||
--
|
||||
-- So it returns a list of snapshots where there is no
|
||||
-- So it returns a limited list of snapshots where there is no
|
||||
-- corresponding entry in the snapshot_hoogle_db table for the
|
||||
-- current hoogle version.
|
||||
run $ do
|
||||
lts <- getSnapshotsWithoutHoogeDb LtsSnap ltsCount
|
||||
nightly <- getSnapshotsWithoutHoogeDb NightlySnap nightlyCount
|
||||
lts <- getSnapshotsWithoutHoogleDb LtsSnap ltsCount
|
||||
nightly <- getSnapshotsWithoutHoogleDb NightlySnap nightlyCount
|
||||
pure $ lts ++ nightly
|
||||
|
||||
|
||||
@ -1100,6 +1100,8 @@ getHackageCabalByKey (PackageIdentifierP pname ver) (BlobKey sha size) =
|
||||
return (hc ^. HackageCabalId, hc ^. HackageCabalTree)
|
||||
|
||||
|
||||
-- | Gets the id for the SnapshotPackage that corresponds to the given Snapshot
|
||||
-- and PackageIdentifier.
|
||||
getSnapshotPackageId ::
|
||||
SnapshotId
|
||||
-> PackageIdentifierP
|
||||
@ -1114,6 +1116,18 @@ getSnapshotPackageId snapshotId (PackageIdentifierP pname ver) =
|
||||
(pn ^. PackageNameName ==. val pname) &&.
|
||||
(v ^. VersionVersion ==. val ver))
|
||||
return (sp ^. SnapshotPackageId)
|
||||
--
|
||||
-- i.e.
|
||||
--
|
||||
-- select sp.id
|
||||
-- from snapshot_package sp
|
||||
-- join version
|
||||
-- on version.id = sp.version
|
||||
-- join package_name pn
|
||||
-- on pn.id = sp.package_name
|
||||
-- where sp.snapshot = $snapshot_id
|
||||
-- and pn.name = $name
|
||||
-- and v.version = $version
|
||||
|
||||
|
||||
getSnapshotPackageCabalBlob ::
|
||||
@ -1127,6 +1141,16 @@ getSnapshotPackageCabalBlob snapshotId pname =
|
||||
((sp ^. SnapshotPackageSnapshot ==. val snapshotId) &&.
|
||||
(pn ^. PackageNameName ==. val pname))
|
||||
return (blob ^. BlobContents)
|
||||
-- i.e.
|
||||
--
|
||||
-- select blob.content
|
||||
-- from snapshot_package sp
|
||||
-- join package_name pn
|
||||
-- on pn.id = sp.package_name
|
||||
-- join blob
|
||||
-- on blob.id = sp.cabal
|
||||
-- where sp.snapshot = $snapshotId
|
||||
-- and pn.name = $name
|
||||
|
||||
-- | Idempotent and thread safe way of adding a new module.
|
||||
insertModuleSafe :: ModuleNameP -> ReaderT SqlBackend (RIO env) ModuleNameId
|
||||
@ -1164,6 +1188,7 @@ markModuleHasDocs snapshotId pid mSnapshotPackageId modName =
|
||||
\AND snapshot_package_module.snapshot_package = ?"
|
||||
[toPersistValue modName, toPersistValue snapshotPackageId]
|
||||
return $ Just snapshotPackageId
|
||||
-- FIXME: The Nothing case seems like it should not happen.
|
||||
Nothing -> return Nothing
|
||||
|
||||
|
||||
|
||||
@ -132,6 +132,7 @@ instance Display PantryCabal where
|
||||
instance ToMarkup PantryCabal where
|
||||
toMarkup = toMarkup . textDisplay
|
||||
|
||||
-- A Cabal file (package name, version, blob) and source tree
|
||||
data PantryPackage = PantryPackage
|
||||
{ ppPantryCabal :: !PantryCabal
|
||||
, ppPantryKey :: !TreeKey
|
||||
|
||||
@ -29,5 +29,4 @@ nix:
|
||||
- zlib
|
||||
- postgresql
|
||||
- pkg-config
|
||||
- haskell-language-server
|
||||
- cacert
|
||||
|
||||
Loading…
Reference in New Issue
Block a user