From 385620e1853bb67748f5faca0b22c3181c851f71 Mon Sep 17 00:00:00 2001 From: Alexey Kuleshevich Date: Sat, 25 May 2019 20:00:28 +0300 Subject: [PATCH] Made status reporting and cabal file caching optional for the cron job --- app/stackage-server-cron.hs | 8 +++- src/Stackage/Database/Cron.hs | 88 ++++++++++++++++++---------------- src/Stackage/Database/Types.hs | 9 ++-- 3 files changed, 61 insertions(+), 44 deletions(-) diff --git a/app/stackage-server-cron.hs b/app/stackage-server-cron.hs index 5305010..1722ca7 100644 --- a/app/stackage-server-cron.hs +++ b/app/stackage-server-cron.hs @@ -62,7 +62,13 @@ optsParser = value (GithubRepo repoAccount repoName) <> help ("Github repository with snapshot files. Default level is '" ++ - repoAccount ++ "/" ++ repoName ++ "'.")) + repoAccount ++ "/" ++ repoName ++ "'.")) <*> + switch (long "report-progress" <> help "Report how many packages has been loaded.") <*> + switch + (long "cache-cabal-files" <> + help + ("Improve performance by cached parsed cabal files" ++ + " at expense of higher memory consumption")) where repoAccount = "commercialhaskell" repoName = "stackage-snapshots" diff --git a/src/Stackage/Database/Cron.hs b/src/Stackage/Database/Cron.hs index 116ece3..ccb8d65 100644 --- a/src/Stackage/Database/Cron.hs +++ b/src/Stackage/Database/Cron.hs @@ -204,6 +204,8 @@ stackageServerCron StackageCronOptions {..} = do , scDownloadBucketName = scoDownloadBucketName , scUploadBucketName = scoUploadBucketName , scSnapshotsRepo = scoSnapshotsRepo + , scReportProgress = scoReportProgress + , scCacheCabalFiles = scoCacheCabalFiles } in runRIO stackage (runStackageUpdate scoDoNotUpload) @@ -265,7 +267,8 @@ makeCorePackageGetter _compiler pname ver = readIORef pkgInfoRef >>= \case Just pkgInfo -> return pkgInfo Nothing -> do - logSticky $ "Loading core package: " <> display pid + whenM (scReportProgress <$> ask) $ + logSticky $ "Loading core package: " <> display pid htr <- getHackageTarball pir Nothing case htrFreshPackageInfo htr of Just (gpd, treeId) -> do @@ -295,20 +298,24 @@ makeCorePackageGetter _compiler pname ver = addPantryPackage :: SnapshotId -> CompilerP -> Bool -> Map FlagNameP Bool -> PantryPackage -> RIO StackageCron Bool addPantryPackage sid compiler isHidden flags (PantryPackage pc treeKey) = do - gpdCachedRef <- scCachedGPD <$> ask + env <- ask + let gpdCachedRef = scCachedGPD env + cache = scCacheCabalFiles env let blobKeyToInt = fromIntegral . unSqlBackendKey . unBlobKey let updateCacheGPD blobId gpd = atomicModifyIORef' gpdCachedRef (\cacheMap -> (IntMap.insert blobId gpd cacheMap, gpd)) let getCachedGPD treeCabal = \case - Just gpd -> updateCacheGPD (blobKeyToInt treeCabal) gpd - Nothing -> do + Just gpd | cache -> updateCacheGPD (blobKeyToInt treeCabal) gpd + Just gpd -> pure gpd + Nothing | cache -> do cacheMap <- readIORef gpdCachedRef case IntMap.lookup (blobKeyToInt treeCabal) cacheMap of Just gpd -> pure gpd Nothing -> loadBlobById treeCabal >>= updateCacheGPD (blobKeyToInt treeCabal) . parseCabalBlob + Nothing -> parseCabalBlob <$> loadBlobById treeCabal let storeHackageSnapshotPackage hcid mtid mgpd = getTreeForKey treeKey >>= \case Just (Entity treeId _) @@ -521,7 +528,9 @@ createOrUpdateSnapshot corePackageInfoGetters prevAction sfi@SnapshotFileInfo { sfiSnapName sfiUpdatedOn snapshotFile - unlessM (readIORef finishedDocs) $ + report <- scReportProgress <$> ask + when report $ + unlessM (readIORef finishedDocs) $ logSticky "Still loading the docs for previous snapshot ..." pure loadDocs @@ -556,10 +565,29 @@ updateSnapshot corePackageGetters snapshotId snapName updatedOn SnapshotFile {.. pure curSucc -- Leave some cores and db connections for the doc loader n <- max 1 . (`div` 2) <$> getNumCapabilities + before <- getCurrentTime + report <- scReportProgress <$> ask pantryUpdatesSucceeded <- runConcurrently - (Concurrently (runProgressReporter loadedPackageCountRef totalPackages snapName) *> + (Concurrently + (when report (runProgressReporter loadedPackageCountRef totalPackages snapName)) *> Concurrently (pooledMapConcurrentlyN n addPantryPackageWithReport sfPackages)) + after <- getCurrentTime + let timeTotal = round (diffUTCTime after before) + (mins, secs) = timeTotal `quotRem` (60 :: Int) + packagePerSecond = fromIntegral ((totalPackages * 100) `div` timeTotal) / 100 :: Float + logInfo $ + mconcat + [ "Loading snapshot '" + , display snapName + , "' was done (in " + , displayShow mins + , "min " + , displayShow secs + , "sec). With average " + , displayShow packagePerSecond + , " packages/sec. There are still docs." + ] return $ do checkForDocsSucceeded <- tryAny (checkForDocs snapshotId snapName) >>= \case @@ -574,44 +602,24 @@ updateSnapshot corePackageGetters snapshotId snapName updatedOn SnapshotFile {.. else logError $ "There were errors while adding snapshot '" <> display snapName <> "'" --- | Report how many packages has been loaded so far and provide statistics at the end. +-- | Report how many packages has been loaded so far. runProgressReporter :: IORef Int -> Int -> SnapName -> RIO StackageCron () runProgressReporter loadedPackageCountRef totalPackages snapName = do - before <- getCurrentTime let reportProgress = do loadedPackageCount <- readIORef loadedPackageCountRef - if loadedPackageCount < totalPackages - then do - logSticky $ - mconcat - [ "Loading snapshot '" - , display snapName - , "' (" - , displayShow loadedPackageCount - , "/" - , displayShow totalPackages - , ")" - ] - threadDelay 1000000 - reportProgress - else do - after <- getCurrentTime - let timeTotal = round (diffUTCTime after before) - (mins, secs) = timeTotal `quotRem` (60 :: Int) - packagePerSecond = - fromIntegral ((loadedPackageCount * 100) `div` timeTotal) / 100 :: Float - logInfo $ - mconcat - [ "Loading snapshot '" - , display snapName - , "' was done (in " - , displayShow mins - , "min " - , displayShow secs - , "sec). With average " - , displayShow packagePerSecond - , " packages/sec. There are still docs." - ] + when (loadedPackageCount < totalPackages) $ do + logSticky $ + mconcat + [ "Loading snapshot '" + , display snapName + , "' (" + , displayShow loadedPackageCount + , "/" + , displayShow totalPackages + , ")" + ] + threadDelay 1000000 + reportProgress reportProgress -- | Uploads a json file to S3 with all latest snapshots per major lts version and one nightly. diff --git a/src/Stackage/Database/Types.hs b/src/Stackage/Database/Types.hs index 19be63f..5e963f8 100644 --- a/src/Stackage/Database/Types.hs +++ b/src/Stackage/Database/Types.hs @@ -50,9 +50,8 @@ import Data.Aeson import qualified Data.Text as T import Data.Text.Read (decimal) import Network.AWS (Env, HasEnv(..)) -import Pantry as Pantry (BlobKey(..), CabalFileInfo(..), FileSize(..), - HasPantryConfig(..), PackageIdentifierRevision(..), - TreeKey(..)) +import Pantry (BlobKey(..), CabalFileInfo(..), FileSize(..), + HasPantryConfig(..), PackageIdentifierRevision(..), TreeKey(..)) import Pantry.Internal.Stackage as Pantry (PackageNameP(..), PantryConfig, VersionP(..)) import Pantry.SHA256 (fromHexText) @@ -74,6 +73,8 @@ data StackageCronOptions = StackageCronOptions , scoDoNotUpload :: !Bool , scoLogLevel :: !LogLevel , scoSnapshotsRepo :: !GithubRepo + , scoReportProgress :: !Bool + , scoCacheCabalFiles :: !Bool } data StackageCron = StackageCron @@ -87,6 +88,8 @@ data StackageCron = StackageCron , scDownloadBucketName :: !Text , scUploadBucketName :: !Text , scSnapshotsRepo :: !GithubRepo + , scReportProgress :: !Bool + , scCacheCabalFiles :: !Bool } instance HasEnv StackageCron where