Use names and Haddocks to improve understanding

This commit is contained in:
Bryan Richter 2024-04-30 14:16:15 +03:00
parent c4676e524c
commit 935a5012fe
No known key found for this signature in database
GPG Key ID: B202264020068BFB
3 changed files with 79 additions and 61 deletions

View File

@ -239,31 +239,38 @@ runStackageUpdate doNotUpload = do
-- | This will look at 'global-hints.yaml' and will create core package getters that are reused -- | This will look at 'global-hints.yaml' and will create core package getters that are reused
-- later for adding those package to individual snapshot. -- later for adding those package to individual snapshots.
makeCorePackageGetters :: makeCorePackageGetters ::
RIO StackageCron (Map CompilerP [CorePackageGetter]) RIO StackageCron (Map CompilerP [CorePackageGetter])
makeCorePackageGetters = do makeCorePackageGetters = do
rootDir <- scStackageRoot <$> ask rootDir <- scStackageRoot <$> ask
contentDir <- getStackageContentDir rootDir contentDir <- getStackageContentDir rootDir
coreCabalFiles <- getCoreCabalFiles rootDir backupCoreCabalFiles <- getBackupCoreCabalFiles rootDir
liftIO (decodeFileEither (contentDir </> "stack" </> "global-hints.yaml")) >>= \case liftIO (decodeFileEither (contentDir </> "stack" </> "global-hints.yaml")) >>= \case
Right (hints :: Map CompilerP (Map PackageNameP VersionP)) -> Right (hints :: Map CompilerP (Map PackageNameP VersionP)) ->
Map.traverseWithKey Map.traverseWithKey
(\compiler -> (\compiler ->
fmap Map.elems . fmap Map.elems .
Map.traverseMaybeWithKey (makeCorePackageGetter compiler coreCabalFiles)) Map.traverseMaybeWithKey (makeCorePackageGetter compiler backupCoreCabalFiles))
hints hints
Left exc -> do Left exc -> do
logError $ logError $
"Error parsing 'global-hints.yaml' file: " <> fromString (displayException exc) "Error parsing 'global-hints.yaml' file: " <> fromString (displayException exc)
return mempty return mempty
getCoreCabalFiles :: -- | Packages distributed with GHC aren't taken from Hackage like normal
-- packages. Release managers do upload them, however, so that their docs are
-- available.
--
-- Or at least, they should. The release process was fragile, and some packages
-- weren't uploaded. This mechanism gives us a chance to fill in missing
-- packages.
getBackupCoreCabalFiles ::
FilePath FilePath
-> RIO StackageCron (Map PackageIdentifierP (GenericPackageDescription, CabalFileIds)) -> RIO StackageCron (Map PackageIdentifierP (GenericPackageDescription, CabalFileIds))
getCoreCabalFiles rootDir = do getBackupCoreCabalFiles rootDir = do
coreCabalFilesDir <- getCoreCabalFilesDir rootDir backupCoreCabalFilesDir <- getBackupCoreCabalFilesDir rootDir
cabalFileNames <- getDirectoryContents coreCabalFilesDir cabalFileNames <- getDirectoryContents backupCoreCabalFilesDir
cabalFiles <- cabalFiles <-
forM (filter (isExtensionOf ".cabal") cabalFileNames) $ \cabalFileName -> forM (filter (isExtensionOf ".cabal") cabalFileNames) $ \cabalFileName ->
let pidTxt = T.pack (dropExtension (takeFileName cabalFileName)) let pidTxt = T.pack (dropExtension (takeFileName cabalFileName))
@ -272,15 +279,17 @@ getCoreCabalFiles rootDir = do
logError $ "Invalid package identifier: " <> fromString cabalFileName logError $ "Invalid package identifier: " <> fromString cabalFileName
pure Nothing pure Nothing
Just pid -> do Just pid -> do
cabalBlob <- readFileBinary (coreCabalFilesDir </> cabalFileName) cabalBlob <- readFileBinary (backupCoreCabalFilesDir </> cabalFileName)
mCabalInfo <- run $ addCabalFile pid cabalBlob mCabalInfo <- run $ addCabalFile pid cabalBlob
pure ((,) pid <$> mCabalInfo) pure ((,) pid <$> mCabalInfo)
pure $ Map.fromList $ catMaybes cabalFiles pure $ Map.fromList $ catMaybes cabalFiles
-- | Core package info rarely changes between the snapshots, therefore it would be wasteful to -- | Core package info rarely changes between the snapshots, therefore it would be wasteful to
-- load, parse and update all packages from gloabl-hints for each snapshot, instead we produce -- load, parse and update all packages from gloabl-hints for each snapshot. Instead we produce
-- a memoized version that will do it once initiall and then return information aboat a -- a memoized version that will do it once initially and then return information about a
-- package on subsequent invocations. -- package on subsequent invocations.
--
-- FIXME: The compiler argument is unused (and never has been). Should it be used?
makeCorePackageGetter :: makeCorePackageGetter ::
CompilerP CompilerP
-> Map PackageIdentifierP (GenericPackageDescription, CabalFileIds) -> Map PackageIdentifierP (GenericPackageDescription, CabalFileIds)
@ -336,64 +345,70 @@ makeCorePackageGetter _compiler fallbackCabalFileMap pname ver =
PackageIdentifierRevision (unPackageNameP pname) (unVersionP ver) (CFIRevision (Revision 0)) PackageIdentifierRevision (unPackageNameP pname) (unVersionP ver) (CFIRevision (Revision 0))
-- TODO: for now it is only from hackage, PantryPackage needs an update to use other origins -- | Populates the database with information about a package?
-- | A pantry package is being added to a particular snapshot. Extra information like compiler and --
-- flags are passed on in order to properly figure out dependencies and modules -- Specifically, a pantry package is being added to a particular snapshot.
--
-- Extra information like compiler and flags are passed on in order to properly
-- figure out dependencies and modules.
--
-- TODO: for now it is only from hackage. PantryPackage needs an update to use other origins
addPantryPackage :: addPantryPackage ::
SnapshotId -> CompilerP -> Bool -> Map FlagNameP Bool -> PantryPackage -> RIO StackageCron Bool SnapshotId -> CompilerP -> Bool -> Map FlagNameP Bool -> PantryPackage -> RIO StackageCron Bool
addPantryPackage sid compiler isHidden flags (PantryPackage pc treeKey) = do addPantryPackage snapId compiler isHidden flags (PantryPackage pcabal pTreeKey) = do
env <- ask env <- ask
let gpdCachedRef = scCachedGPD env let pkgDescCache = scCachedGPD env
cache = scCacheCabalFiles env cacheP = scCacheCabalFiles env
let blobKeyToInt = fromIntegral . unSqlBackendKey . unBlobKey let blobKeyToInt = fromIntegral . unSqlBackendKey . unBlobKey
let updateCacheGPD blobId gpd = let cachedPkgDesc cabalBlobId pkgDesc =
gpd `deepseq` pkgDesc `deepseq`
atomicModifyIORef' gpdCachedRef (\cacheMap -> (IntMap.insert blobId gpd cacheMap, gpd)) atomicModifyIORef' pkgDescCache (\cacheMap -> (IntMap.insert cabalBlobId pkgDesc cacheMap, pkgDesc))
let getCachedGPD treeCabal = let getPkgDesc cabalBlobId =
\case \case
Just gpd | cache -> updateCacheGPD (blobKeyToInt treeCabal) gpd Just pkgDesc | cacheP -> cachedPkgDesc (blobKeyToInt cabalBlobId) pkgDesc
Just gpd -> pure gpd Just pkgDesc -> pure pkgDesc
Nothing | cache -> do Nothing | cacheP -> do
cacheMap <- readIORef gpdCachedRef cacheMap <- readIORef pkgDescCache
case IntMap.lookup (blobKeyToInt treeCabal) cacheMap of case IntMap.lookup (blobKeyToInt cabalBlobId) cacheMap of
Just gpd -> pure gpd Just pkgDesc -> pure pkgDesc
Nothing -> Nothing ->
loadBlobById treeCabal >>= loadBlobById cabalBlobId >>=
updateCacheGPD (blobKeyToInt treeCabal) . parseCabalBlob cachedPkgDesc (blobKeyToInt cabalBlobId) . parseCabalBlob
Nothing -> parseCabalBlob <$> loadBlobById treeCabal Nothing -> parseCabalBlob <$> loadBlobById cabalBlobId
let storeHackageSnapshotPackage hcid mtid mgpd = let storeHackageSnapshotPackage hackageCabalId mTreeId mpkgDesc =
getTreeForKey treeKey >>= \case getTreeForKey pTreeKey >>= \case
Just (Entity treeId _) -- error case #1
| Just tid <- mtid Just (Entity treeId' _)
, tid /= treeId -> do | Just treeId <- mTreeId
lift $ logError $ "Pantry Tree Key mismatch for: " <> display pc , treeId /= treeId' -> do
lift $ logError $ "Pantry Tree Key mismatch for: " <> display pcabal
pure False pure False
Just tree@(Entity _ Tree {treeCabal}) -- happy case
| Just treeCabal' <- treeCabal -> do Just pkgTree@(Entity _ Tree {treeCabal})
gpd <- getCachedGPD treeCabal' mgpd | Just cabalBlobId <- treeCabal -> do
let mhcid = Just hcid pkgDesc <- getPkgDesc cabalBlobId mpkgDesc
eTree = Right tree addSnapshotPackage snapId compiler Hackage (Right pkgTree) (Just hackageCabalId) isHidden flags packageId pkgDesc
addSnapshotPackage sid compiler Hackage eTree mhcid isHidden flags pid gpd
pure True pure True
-- error case #2
_ -> do _ -> do
lift $ logError $ "Pantry is missing the source tree for " <> display pc lift $ logError $ "Pantry is missing the source tree for " <> display pcabal
pure False pure False
mHackageCabalInfo <- run $ getHackageCabalByKey pid (pcCabalKey pc) mHackageCabalInfo <- run $ getHackageCabalByKey packageId (pcCabalKey pcabal)
case mHackageCabalInfo of case mHackageCabalInfo of
Nothing -> do Nothing -> do
logError $ "Could not find the cabal file for: " <> display pc logError $ "Could not find the cabal file for: " <> display pcabal
pure False pure False
Just (hcid, Nothing) -> do Just (hackageCabalId, Nothing) -> do
mHPI <- mHPI <-
htrFreshPackageInfo <$> htrFreshPackageInfo <$>
getHackageTarball (toPackageIdentifierRevision pc) (Just treeKey) getHackageTarball (toPackageIdentifierRevision pcabal) (Just pTreeKey)
run $ run $
case mHPI of case mHPI of
Just (gpd, treeId) -> storeHackageSnapshotPackage hcid (Just treeId) (Just gpd) Just (pkgDesc, treeId) -> storeHackageSnapshotPackage hackageCabalId (Just treeId) (Just pkgDesc)
Nothing -> storeHackageSnapshotPackage hcid Nothing Nothing Nothing -> storeHackageSnapshotPackage hackageCabalId Nothing Nothing
Just (hcid, mtid) -> run $ storeHackageSnapshotPackage hcid mtid Nothing Just (hackageCabalId, mTreeId) -> run $ storeHackageSnapshotPackage hackageCabalId mTreeId Nothing
where where
pid = PackageIdentifierP (pcPackageName pc) (pcVersion pc) packageId = PackageIdentifierP (pcPackageName pcabal) (pcVersion pcabal)
@ -550,9 +565,7 @@ createOrUpdateSnapshot ::
-> ResourceT (RIO StackageCron) () -> ResourceT (RIO StackageCron) ()
-> SnapshotFileInfo -> SnapshotFileInfo
-> ResourceT (RIO StackageCron) (ResourceT (RIO StackageCron) ()) -> ResourceT (RIO StackageCron) (ResourceT (RIO StackageCron) ())
createOrUpdateSnapshot corePackageInfoGetters prevAction sfi@SnapshotFileInfo { sfiSnapName createOrUpdateSnapshot corePackageInfoGetters prevAction sfi@SnapshotFileInfo { sfiSnapName , sfiUpdatedOn } = do
, sfiUpdatedOn
} = do
finishedDocs <- newIORef False finishedDocs <- newIORef False
runConcurrently runConcurrently
(Concurrently (prevAction >> writeIORef finishedDocs True) *> (Concurrently (prevAction >> writeIORef finishedDocs True) *>
@ -575,11 +588,15 @@ createOrUpdateSnapshot corePackageInfoGetters prevAction sfi@SnapshotFileInfo {
logSticky "Still loading the docs for previous snapshot ..." logSticky "Still loading the docs for previous snapshot ..."
pure loadDocs pure loadDocs
-- | Updates all packages in the snapshot. If any missing they will be created. Returns an action -- | Creates snapshot name (FIXME: Why not do this when creating the snapshot?)
-- that will check for available documentation for modules that are known to exist and mark as -- and updates all packages in the snapshot. If any packages are missing they
-- documented when haddock is present on AWS S3. Only after documentation has been checked this -- will be created. Returns an action that will (a) check for available
-- snapshot will be marked as completely updated. This is required in case something goes wrong and -- documentation for the packages' modules and (b) mark the packages as
-- process is interrupted -- documented when haddock is present on AWS S3.
--
-- (Only after documentation has been checked will this snapshot be marked as
-- completely updated. This is required in case something goes wrong and process
-- is interrupted.)
updateSnapshot :: updateSnapshot ::
Map CompilerP [CorePackageGetter] Map CompilerP [CorePackageGetter]
-> SnapshotId -> SnapshotId

View File

@ -4,7 +4,7 @@ module Stackage.Database.Github
( cloneOrUpdate ( cloneOrUpdate
, lastGitFileUpdate , lastGitFileUpdate
, getStackageContentDir , getStackageContentDir
, getCoreCabalFilesDir , getBackupCoreCabalFilesDir
, GithubRepo(..) , GithubRepo(..)
) where ) where
@ -81,9 +81,9 @@ getStackageContentDir rootDir =
cloneOrUpdate rootDir (GithubRepo "commercialhaskell" "stackage-content") cloneOrUpdate rootDir (GithubRepo "commercialhaskell" "stackage-content")
-- | Use backup location with cabal files, hackage doesn't have all of them. -- | Use backup location with cabal files, hackage doesn't have all of them.
getCoreCabalFilesDir :: getBackupCoreCabalFilesDir ::
(MonadReader env m, HasLogFunc env, HasProcessContext env, MonadIO m) (MonadReader env m, HasLogFunc env, HasProcessContext env, MonadIO m)
=> FilePath => FilePath
-> m FilePath -> m FilePath
getCoreCabalFilesDir rootDir = getBackupCoreCabalFilesDir rootDir =
cloneOrUpdate rootDir (GithubRepo "commercialhaskell" "core-cabal-files") cloneOrUpdate rootDir (GithubRepo "commercialhaskell" "core-cabal-files")

View File

@ -117,6 +117,7 @@ data SnapshotFile = SnapshotFile
} deriving (Show) } deriving (Show)
-- Is this a reference to a cabal file stored in Pantry?
data PantryCabal = PantryCabal data PantryCabal = PantryCabal
{ pcPackageName :: !PackageNameP { pcPackageName :: !PackageNameP
, pcVersion :: !VersionP , pcVersion :: !VersionP