{-# LANGUAGE CPP #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} module Stackage.Database.Cron ( stackageServerCron , newHoogleLocker , singleRun , StackageCronOptions(..) , haddockBucketName ) where import Conduit import Control.DeepSeq import Control.Lens ((.~)) import qualified Control.Monad.Trans.AWS as AWS (paginate) import Control.SingleRun import qualified Data.ByteString.Char8 as BS8 import qualified Data.Conduit.Binary as CB import Data.Conduit.Tar (FileInfo(..), FileType(..), untar) import Data.Conduit.Zlib (WindowBits(WindowBits), compress, ungzip) import qualified Data.IntMap.Strict as IntMap import Data.Monoid (Any(..)) import Data.Streaming.Network (bindPortTCP) import Data.Yaml (decodeFileEither) import Database.Persist import Database.Persist.Postgresql import Distribution.PackageDescription (GenericPackageDescription) import qualified Hoogle import Network.AWS hiding (Request, Response) import Network.AWS.Data.Body (toBody) import Network.AWS.Data.Text (toText) import Network.AWS.S3 import Network.HTTP.Client import Network.HTTP.Client.Conduit (bodyReaderSource) import Network.HTTP.Simple (getResponseBody, httpJSONEither, parseRequest) import Network.HTTP.Types (status200, status404) import Pantry (CabalFileInfo(..), DidUpdateOccur(..), HpackExecutable(HpackBundled), PackageIdentifierRevision(..), defaultHackageSecurityConfig) import Pantry.Internal.Stackage (HackageCabalId, HackageTarballResult(..), PantryConfig(..), Storage(..), forceUpdateHackageIndex, getHackageTarball, getTreeForKey, loadBlobById, packageTreeKey, treeCabal) import Path (parseAbsDir, toFilePath) import RIO import RIO.Directory import RIO.FilePath import RIO.List as L import qualified RIO.Map as Map import RIO.Process (mkDefaultProcessContext) import qualified RIO.Set as Set import qualified RIO.Text as T import RIO.Time import Settings import Stackage.Database.Github import Stackage.Database.PackageInfo import Stackage.Database.Query import Stackage.Database.Schema import Stackage.Database.Types import System.Environment (lookupEnv) import UnliftIO.Concurrent (getNumCapabilities) import Web.PathPieces (fromPathPiece, toPathPiece) hoogleKey :: SnapName -> Text hoogleKey name = T.concat [ "hoogle/" , toPathPiece name , "/" , VERSION_hoogle , ".hoo" ] hoogleUrl :: SnapName -> Text hoogleUrl n = T.concat [ "https://s3.amazonaws.com/" , haddockBucketName , "/" , hoogleKey n ] hackageDeprecatedUrl :: Request hackageDeprecatedUrl = "https://hackage.haskell.org/packages/deprecated.json" withStorage :: Int -> (Storage -> IO a) -> IO a withStorage poolSize inner = do connstr <- lookupEnv "PGSTRING" >>= \case Just connstr -> pure (T.pack connstr) Nothing -> appPostgresString <$> getAppSettings withStackageDatabase False PostgresConf {pgPoolSize = poolSize, pgConnStr = encodeUtf8 connstr} (\ db -> inner (Storage (runDatabase db) id)) getStackageSnapshotsDir :: RIO StackageCron FilePath getStackageSnapshotsDir = do cron <- ask cloneOrUpdate (scStackageRoot cron) (scSnapshotsRepo cron) withResponseUnliftIO :: MonadUnliftIO m => Request -> Manager -> (Response BodyReader -> m b) -> m b withResponseUnliftIO req man f = withRunInIO $ \ runInIO -> withResponse req man (runInIO . f) newHoogleLocker :: (HasLogFunc env, MonadIO m) => env -> Manager -> m (SingleRun SnapName (Maybe FilePath)) newHoogleLocker env man = mkSingleRun hoogleLocker where hoogleLocker :: MonadIO n => SnapName -> n (Maybe FilePath) hoogleLocker name = runRIO env $ do let fp = T.unpack $ hoogleKey name fptmp = fp <.> "tmp" exists <- doesFileExist fp if exists then return $ Just fp else do req' <- parseRequest $ T.unpack $ hoogleUrl name let req = req' {decompress = const False} withResponseUnliftIO req man $ \res -> case responseStatus res of status | status == status200 -> do createDirectoryIfMissing True $ takeDirectory fptmp -- TODO: https://github.com/commercialhaskell/rio/issues/160 -- withBinaryFileDurableAtomic fp WriteMode $ \h -> -- runConduitRes $ -- bodyReaderSource (responseBody res) .| ungzip .| -- sinkHandle h runConduitRes $ bodyReaderSource (responseBody res) .| ungzip .| sinkFile fptmp renamePath fptmp fp return $ Just fp | status == status404 -> do logDebug $ "NotFound: " <> display (hoogleUrl name) return Nothing | otherwise -> do body <- liftIO $ brConsume $ responseBody res -- TODO: ideally only consume the body when log level set to -- LevelDebug, will require a way to get LogLevel from LogFunc mapM_ (logDebug . displayBytesUtf8) body return Nothing getHackageDeprecations :: (HasLogFunc env, MonadReader env m, MonadIO m) => m [Deprecation] getHackageDeprecations = do jsonResponseDeprecated <- httpJSONEither hackageDeprecatedUrl case getResponseBody jsonResponseDeprecated of Left err -> do logError $ "There was an error parsing deprecated.json file: " <> fromString (displayException err) return [] Right deprecated -> return deprecated stackageServerCron :: StackageCronOptions -> IO () stackageServerCron StackageCronOptions {..} = do void $ -- Hacky approach instead of PID files catchIO (bindPortTCP 17834 "127.0.0.1") $ const $ throwString "Stackage Cron loader process already running, exiting." connectionCount <- getNumCapabilities withStorage connectionCount $ \storage -> do lo <- logOptionsHandle stdout True stackageRootDir <- getAppUserDataDirectory "stackage" pantryRootDir <- parseAbsDir (stackageRootDir "pantry") createDirectoryIfMissing True (toFilePath pantryRootDir) updateRef <- newMVar True cabalImmutable <- newIORef Map.empty cabalMutable <- newIORef Map.empty gpdCache <- newIORef IntMap.empty defaultProcessContext <- mkDefaultProcessContext aws <- newEnv Discover withLogFunc (setLogMinLevel scoLogLevel lo) $ \logFunc -> let pantryConfig = PantryConfig { pcHackageSecurity = defaultHackageSecurityConfig , pcHpackExecutable = HpackBundled , pcRootDir = pantryRootDir , pcStorage = storage , pcUpdateRef = updateRef , pcParsedCabalFilesRawImmutable = cabalImmutable , pcParsedCabalFilesMutable = cabalMutable , pcConnectionCount = connectionCount } stackage = StackageCron { scPantryConfig = pantryConfig , scStackageRoot = stackageRootDir , scProcessContext = defaultProcessContext , scLogFunc = logFunc , scForceFullUpdate = scoForceUpdate , scCachedGPD = gpdCache , scEnvAWS = aws , scDownloadBucketName = scoDownloadBucketName , scUploadBucketName = scoUploadBucketName , scSnapshotsRepo = scoSnapshotsRepo , scReportProgress = scoReportProgress , scCacheCabalFiles = scoCacheCabalFiles } in runRIO stackage (runStackageUpdate scoDoNotUpload) runStackageUpdate :: Bool -> RIO StackageCron () runStackageUpdate doNotUpload = do forceFullUpdate <- scForceFullUpdate <$> ask logInfo $ "Starting stackage-cron update" <> bool "" " with --force-update" forceFullUpdate runStackageMigrations didUpdate <- forceUpdateHackageIndex (Just "stackage-server cron job") case didUpdate of UpdateOccurred -> logInfo "Updated hackage index" NoUpdateOccurred -> logInfo "No new packages in hackage index" logInfo "Getting deprecated info now" getHackageDeprecations >>= setDeprecations corePackageGetters <- makeCorePackageGetters runResourceT $ join $ runConduit $ sourceSnapshots .| foldMC (createOrUpdateSnapshot corePackageGetters) (pure ()) run $ mapM_ (`rawExecute` []) ["COMMIT", "VACUUM", "BEGIN"] unless doNotUpload $ do uploadSnapshotsJSON buildAndUploadHoogleDB -- | This will look at 'global-hints.yaml' and will create core package getters that are reused -- later for adding those package to individual snapshot. makeCorePackageGetters :: RIO StackageCron (Map CompilerP [CorePackageGetter]) makeCorePackageGetters = do rootDir <- scStackageRoot <$> ask contentDir <- getStackageContentDir rootDir liftIO (decodeFileEither (contentDir "stack" "global-hints.yaml")) >>= \case Right (hints :: Map CompilerP (Map PackageNameP VersionP)) -> Map.traverseWithKey (\compiler -> fmap Map.elems . Map.traverseMaybeWithKey (makeCorePackageGetter compiler)) hints Left exc -> do logError $ "Error parsing 'global-hints.yaml' file: " <> fromString (displayException exc) return mempty -- | Core package info rarely changes between the snapshots, therefore it would be wasteful to -- load, parse and update all packages from gloabl-hints for each snapshot, instead we produce -- a memoized version that will do it once initiall and then return information aboat a -- package on subsequent invocations. makeCorePackageGetter :: CompilerP -> PackageNameP -> VersionP -> RIO StackageCron (Maybe CorePackageGetter) makeCorePackageGetter _compiler pname ver = run (getHackageCabalByRev0 pid) >>= \case Nothing -> do logWarn $ "Core package from global-hints: '" <> display pid <> "' was not found in pantry." pure Nothing Just (hackageCabalId, blobId, _) -> do pkgInfoRef <- newIORef Nothing -- use for caching of pkgInfo let getMemoPackageInfo = readIORef pkgInfoRef >>= \case Just pkgInfo -> return pkgInfo Nothing -> do whenM (scReportProgress <$> ask) $ logSticky $ "Loading core package: " <> display pid htr <- getHackageTarball pir Nothing case htrFreshPackageInfo htr of Just (gpd, treeId) -> do mTree <- run $ getEntity treeId let pkgInfo = (mTree, Just hackageCabalId, pid, gpd) gpd `deepseq` writeIORef pkgInfoRef $ Just pkgInfo pure pkgInfo Nothing -> do (cabalBlob, mTree) <- run ((,) <$> loadBlobById blobId <*> getTreeForKey (packageTreeKey (htrPackage htr))) let gpd = parseCabalBlob cabalBlob pkgInfo = (mTree, Just hackageCabalId, pid, gpd) gpd `deepseq` writeIORef pkgInfoRef $ Just pkgInfo pure pkgInfo pure $ Just getMemoPackageInfo where pid = PackageIdentifierP pname ver pir = PackageIdentifierRevision (unPackageNameP pname) (unVersionP ver) (CFIRevision (Revision 0)) -- TODO: for now it is only from hackage, PantryPackage needs an update to use other origins -- | A pantry package is being added to a particular snapshot. Extra information like compiler and -- flags are passed on in order to properly figure out dependencies and modules addPantryPackage :: SnapshotId -> CompilerP -> Bool -> Map FlagNameP Bool -> PantryPackage -> RIO StackageCron Bool addPantryPackage sid compiler isHidden flags (PantryPackage pc treeKey) = do env <- ask let gpdCachedRef = scCachedGPD env cache = scCacheCabalFiles env let blobKeyToInt = fromIntegral . unSqlBackendKey . unBlobKey let updateCacheGPD blobId gpd = gpd `deepseq` atomicModifyIORef' gpdCachedRef (\cacheMap -> (IntMap.insert blobId gpd cacheMap, gpd)) let getCachedGPD treeCabal = \case Just gpd | cache -> updateCacheGPD (blobKeyToInt treeCabal) gpd Just gpd -> pure gpd Nothing | cache -> do cacheMap <- readIORef gpdCachedRef case IntMap.lookup (blobKeyToInt treeCabal) cacheMap of Just gpd -> pure gpd Nothing -> loadBlobById treeCabal >>= updateCacheGPD (blobKeyToInt treeCabal) . parseCabalBlob Nothing -> parseCabalBlob <$> loadBlobById treeCabal let storeHackageSnapshotPackage hcid mtid mgpd = getTreeForKey treeKey >>= \case Just (Entity treeId _) | Just tid <- mtid , tid /= treeId -> do lift $ logError $ "Pantry Tree Key mismatch for: " <> display pc pure False mTree@(Just (Entity _ Tree {treeCabal})) | Just treeCabal' <- treeCabal -> do gpd <- getCachedGPD treeCabal' mgpd let mhcid = Just hcid addSnapshotPackage sid compiler Hackage mTree mhcid isHidden flags pid gpd pure True _ -> do lift $ logError $ "Pantry is missing the source tree for " <> display pc pure False mHackageCabalInfo <- run $ getHackageCabalByKey pid (pcCabalKey pc) case mHackageCabalInfo of Nothing -> do logError $ "Could not find the cabal file for: " <> display pc pure False Just (hcid, Nothing) -> do mHPI <- htrFreshPackageInfo <$> getHackageTarball (toPackageIdentifierRevision pc) (Just treeKey) run $ case mHPI of Just (gpd, treeId) -> storeHackageSnapshotPackage hcid (Just treeId) (Just gpd) Nothing -> storeHackageSnapshotPackage hcid Nothing Nothing Just (hcid, mtid) -> run $ storeHackageSnapshotPackage hcid mtid Nothing where pid = PackageIdentifierP (pcPackageName pc) (pcVersion pc) -- | Download a list of available .html files from S3 bucket for a particular resolver and record -- in the database which modules have documentation available for them. checkForDocs :: SnapshotId -> SnapName -> ResourceT (RIO StackageCron) () checkForDocs snapshotId snapName = do bucketName <- lift (scDownloadBucketName <$> ask) mods <- runConduit $ AWS.paginate (req bucketName) .| concatMapC (^. lovrsContents) .| mapC (\obj -> toText (obj ^. oKey)) .| concatMapC (T.stripSuffix ".html") .| concatMapC (T.stripPrefix prefix) .| concatMapC pathToPackageModule .| sinkList -- it is faster to download all modules in this snapshot, than process them with a conduit all -- the way to the database. sidsCacheRef <- newIORef Map.empty -- Cache is for SnapshotPackageId, there will be many modules per peckage, no need to look into -- the database for each one of them. n <- max 1 . (`div` 2) <$> getNumCapabilities notFoundList <- lift $ pooledMapConcurrentlyN n (markModules sidsCacheRef) mods forM_ (Set.fromList $ catMaybes notFoundList) $ \pid -> lift $ logWarn $ "Documentation available for package '" <> display pid <> "' but was not found in this snapshot: " <> display snapName where prefix = textDisplay snapName <> "/" req bucketName = listObjectsV2 (BucketName bucketName) & lovPrefix .~ Just prefix -- | This function records all package modules that have documentation available, the ones -- that are not found in the snapshot reported back as an error. Besides being run -- concurrently this function optimizes the SnapshotPackageId lookup as well, since that can -- be shared amongst many modules of one package. markModules sidsCacheRef (pid, modName) = do sidsCache <- readIORef sidsCacheRef let mSnapshotPackageId = Map.lookup pid sidsCache mFound <- run $ markModuleHasDocs snapshotId pid mSnapshotPackageId modName case mFound of Nothing -> pure $ Just pid Just snapshotPackageId | Nothing <- mSnapshotPackageId -> do atomicModifyIORef' sidsCacheRef (\cacheMap -> (Map.insert pid snapshotPackageId cacheMap, ())) pure Nothing _ -> pure Nothing data SnapshotFileInfo = SnapshotFileInfo { sfiSnapName :: !SnapName , sfiUpdatedOn :: !UTCTime , sfiSnapshotFileGetter :: !(RIO StackageCron (Maybe SnapshotFile)) } -- | Use 'github.com/commercialhaskell/stackage-snapshots' repository to source all of the packages -- one snapshot at a time. sourceSnapshots :: ConduitT a SnapshotFileInfo (ResourceT (RIO StackageCron)) () sourceSnapshots = do snapshotsDir <- lift $ lift getStackageSnapshotsDir sourceDirectoryDeep False (snapshotsDir "lts") .| concatMapMC (getLtsParser snapshotsDir) sourceDirectoryDeep False (snapshotsDir "nightly") .| concatMapMC (getNightlyParser snapshotsDir) where makeSnapshotFileInfo gitDir fp mFileNameDate snapName = do let parseSnapshot updatedOn = do esnap <- liftIO $ decodeFileEither fp case esnap of Right snap -> let publishDate = sfPublishDate snap <|> mFileNameDate <|> Just (utctDay updatedOn) in return $ Just snap {sfPublishDate = publishDate} Left exc -> do logError $ "Error parsing snapshot file: " <> fromString fp <> "\n" <> fromString (displayException exc) return Nothing lastGitFileUpdate gitDir fp >>= \case Left err -> do logError $ "Error parsing git commit date: " <> fromString err return Nothing Right updatedOn -> do env <- lift ask return $ Just SnapshotFileInfo { sfiSnapName = snapName , sfiUpdatedOn = updatedOn , sfiSnapshotFileGetter = runRIO env (parseSnapshot updatedOn) } getLtsParser gitDir fp = case mapM (BS8.readInt . BS8.pack) $ take 2 $ reverse (splitPath fp) of Just [(minor, ".yaml"), (major, "/")] -> makeSnapshotFileInfo gitDir fp Nothing $ SNLts major minor _ -> do logError ("Couldn't parse the filepath into an LTS version: " <> display (T.pack fp)) return Nothing getNightlyParser gitDir fp = case mapM (BS8.readInt . BS8.pack) $ take 3 $ reverse (splitPath fp) of Just [(day, ".yaml"), (month, "/"), (year, "/")] | Just date <- fromGregorianValid (fromIntegral year) month day -> makeSnapshotFileInfo gitDir fp (Just date) $ SNNightly date _ -> do logError ("Couldn't parse the filepath into a Nightly date: " <> display (T.pack fp)) return Nothing -- | Creates a new `Snapshot` if it is not yet present in the database and decides if update -- is necessary when it already exists. decideOnSnapshotUpdate :: SnapshotFileInfo -> RIO StackageCron (Maybe (SnapshotId, SnapshotFile)) decideOnSnapshotUpdate SnapshotFileInfo {sfiSnapName, sfiUpdatedOn, sfiSnapshotFileGetter} = do forceUpdate <- scForceFullUpdate <$> ask let mkLogMsg rest = "Snapshot with name: " <> display sfiSnapName <> " " <> rest mKeySnapFile <- run (getBy (UniqueSnapshot sfiSnapName)) >>= \case Just (Entity _key snap) | snapshotUpdatedOn snap == Just sfiUpdatedOn && not forceUpdate -> do logInfo $ mkLogMsg "already exists and is up to date." return Nothing Just entity@(Entity _key snap) | Nothing <- snapshotUpdatedOn snap -> do logWarn $ mkLogMsg "did not finish updating last time." fmap (Just entity, ) <$> sfiSnapshotFileGetter Just entity -> do unless forceUpdate $ logWarn $ mkLogMsg "was updated, applying new patch." fmap (Just entity, ) <$> sfiSnapshotFileGetter Nothing -> fmap (Nothing, ) <$> sfiSnapshotFileGetter -- Add new snapshot to the database, when necessary case mKeySnapFile of Just (Just (Entity snapKey snap), sf@SnapshotFile {sfCompiler, sfPublishDate}) | Just publishDate <- sfPublishDate -> do let updatedSnap = Snapshot sfiSnapName sfCompiler publishDate (snapshotUpdatedOn snap) run $ replace snapKey updatedSnap pure $ Just (snapKey, sf) Just (Nothing, sf@SnapshotFile {sfCompiler, sfPublishDate}) | Just publishDate <- sfPublishDate -> fmap (, sf) <$> run (insertUnique (Snapshot sfiSnapName sfCompiler publishDate Nothing)) _ -> return Nothing type CorePackageGetter = RIO StackageCron ( Maybe (Entity Tree) , Maybe HackageCabalId , PackageIdentifierP , GenericPackageDescription) -- | This is an optimized version of snapshoat loading which can load a snapshot and documentation -- info for previous snapshot at the same time. It will execute concurrently the loading of -- current snapshot as well as an action that was passed as an argument. At the end it will return -- an action that should be invoked in order to mark modules that have documentation available, -- which in turn can be passed as an argument to the next snapshot loader. createOrUpdateSnapshot :: Map CompilerP [CorePackageGetter] -> ResourceT (RIO StackageCron) () -> SnapshotFileInfo -> ResourceT (RIO StackageCron) (ResourceT (RIO StackageCron) ()) createOrUpdateSnapshot corePackageInfoGetters prevAction sfi@SnapshotFileInfo { sfiSnapName , sfiUpdatedOn } = do finishedDocs <- newIORef False runConcurrently (Concurrently (prevAction >> writeIORef finishedDocs True) *> Concurrently (lift (loadCurrentSnapshot finishedDocs))) where loadCurrentSnapshot finishedDocs = do loadDocs <- decideOnSnapshotUpdate sfi >>= \case Nothing -> return $ pure () Just (snapshotId, snapshotFile) -> updateSnapshot corePackageInfoGetters snapshotId sfiSnapName sfiUpdatedOn snapshotFile report <- scReportProgress <$> ask when report $ unlessM (readIORef finishedDocs) $ logSticky "Still loading the docs for previous snapshot ..." pure loadDocs -- | Updates all packages in the snapshot. If any missing they will be created. Returns an action -- that will check for available documentation for modules that are known to exist and mark as -- documented when haddock is present on AWS S3. Only after documentation has been checked this -- snapshot will be marked as completely updated. This is required in case something goes wrong and -- process is interrupted updateSnapshot :: Map CompilerP [CorePackageGetter] -> SnapshotId -> SnapName -> UTCTime -> SnapshotFile -> RIO StackageCron (ResourceT (RIO StackageCron) ()) updateSnapshot corePackageGetters snapshotId snapName updatedOn SnapshotFile {..} = do insertSnapshotName snapshotId snapName loadedPackageCountRef <- newIORef (0 :: Int) let totalPackages = length sfPackages addPantryPackageWithReport pp = do let PantryCabal {pcPackageName} = ppPantryCabal pp isHidden = fromMaybe False (Map.lookup pcPackageName sfHidden) flags = fromMaybe Map.empty $ Map.lookup pcPackageName sfFlags curSucc <- addPantryPackage snapshotId sfCompiler isHidden flags pp atomicModifyIORef' loadedPackageCountRef (\c -> (c + 1, ())) pure curSucc -- Leave some cores and db connections for the doc loader n <- max 1 . (`div` 2) <$> getNumCapabilities before <- getCurrentTime report <- scReportProgress <$> ask pantryUpdatesSucceeded <- runConcurrently (Concurrently (when report (runProgressReporter loadedPackageCountRef totalPackages snapName)) *> Concurrently (pooledMapConcurrentlyN n addPantryPackageWithReport sfPackages)) after <- getCurrentTime let timeTotal = round (diffUTCTime after before) (mins, secs) = timeTotal `quotRem` (60 :: Int) packagePerSecond = fromIntegral ((totalPackages * 100) `div` timeTotal) / 100 :: Float allPantryUpdatesSucceeded = and pantryUpdatesSucceeded logInfo $ mconcat [ "Loading snapshot '" , display snapName , "' was done (in " , displayShow mins , "min " , displayShow secs , "sec). With average " , displayShow packagePerSecond , " packages/sec. There are still docs." ] case Map.lookup sfCompiler corePackageGetters of Nothing -> logError $ "Hints are not found for the compiler: " <> display sfCompiler Just _ | not allPantryUpdatesSucceeded -> logWarn $ mconcat [ "There was an issue loading a snapshot '" , display snapName , "', deferring addition of packages " , "from global-hints until next time." ] Just compilerCorePackages -> forM_ compilerCorePackages $ \getCorePackageInfo -> do (mTree, mhcid, pid, gpd) <- getCorePackageInfo run $ addSnapshotPackage snapshotId sfCompiler Core mTree mhcid False mempty pid gpd return $ do checkForDocsSucceeded <- tryAny (checkForDocs snapshotId snapName) >>= \case Left exc -> do logError $ "Received exception while getting the docs: " <> displayShow exc return False Right () -> return True if allPantryUpdatesSucceeded && checkForDocsSucceeded && Map.member sfCompiler corePackageGetters then do lift $ snapshotMarkUpdated snapshotId updatedOn logInfo $ "Created or updated snapshot '" <> display snapName <> "' successfully" else logError $ "There were errors while adding snapshot '" <> display snapName <> "'" -- | Report how many packages has been loaded so far. runProgressReporter :: IORef Int -> Int -> SnapName -> RIO StackageCron () runProgressReporter loadedPackageCountRef totalPackages snapName = do let reportProgress = do loadedPackageCount <- readIORef loadedPackageCountRef when (loadedPackageCount < totalPackages) $ do logSticky $ mconcat [ "Loading snapshot '" , display snapName , "' (" , displayShow loadedPackageCount , "/" , displayShow totalPackages , ")" ] threadDelay 1000000 reportProgress reportProgress -- | Uploads a json file to S3 with all latest snapshots per major lts version and one nightly. uploadSnapshotsJSON :: RIO StackageCron () uploadSnapshotsJSON = do snapshots <- snapshotsJSON uploadBucket <- scUploadBucketName <$> ask let key = ObjectKey "snapshots.json" uploadFromRIO key $ set poACL (Just OPublicRead) $ set poContentType (Just "application/json") $ putObject (BucketName uploadBucket) key (toBody snapshots) -- | Writes a gzipped version of hoogle db into temporary file onto the file system and then uploads -- it to S3. Temporary file is removed upon completion uploadHoogleDB :: FilePath -> ObjectKey -> RIO StackageCron () uploadHoogleDB fp key = withTempFile (takeDirectory fp) (takeFileName fp <.> "gz") $ \fpgz h -> do runConduitRes $ sourceFile fp .| compress 9 (WindowBits 31) .| CB.sinkHandle h hClose h body <- chunkedFile defaultChunkSize fpgz uploadBucket <- scUploadBucketName <$> ask uploadFromRIO key $ set poACL (Just OPublicRead) $ putObject (BucketName uploadBucket) key body uploadFromRIO :: AWSRequest a => ObjectKey -> a -> RIO StackageCron () uploadFromRIO key po = do logInfo $ "Uploading " <> displayShow key <> " to S3 bucket." env <- ask eres <- runResourceT $ runAWS env $ trying _Error $ send po case eres of Left e -> logError $ "Couldn't upload " <> displayShow key <> " to S3 becuase " <> displayShow e Right _ -> logInfo $ "Successfully uploaded " <> displayShow key <> " to S3" buildAndUploadHoogleDB :: RIO StackageCron () buildAndUploadHoogleDB = do snapshots <- lastLtsNightly 50 5 env <- ask locker <- newHoogleLocker (env ^. logFuncL) (env ^. envManager) void $ flip Map.traverseWithKey snapshots $ \snapshotId snapName -> do logDebug $ "Starting Hoogle DB download: " <> display (hoogleKey snapName) mfp <- singleRun locker snapName case mfp of Just _ -> logDebug $ "Hoogle database exists for: " <> display snapName Nothing -> do mfp' <- createHoogleDB snapshotId snapName forM_ mfp' $ \fp -> do let key = hoogleKey snapName uploadHoogleDB fp (ObjectKey key) let dest = T.unpack key createDirectoryIfMissing True $ takeDirectory dest renamePath fp dest createHoogleDB :: SnapshotId -> SnapName -> RIO StackageCron (Maybe FilePath) createHoogleDB snapshotId snapName = handleAny logException $ do logInfo $ "Creating Hoogle DB for " <> display snapName downloadBucket <- scDownloadBucketName <$> ask let root = "hoogle-gen" bindir = root "bindir" outname = root "output.hoo" tarKey = toPathPiece snapName <> "/hoogle/orig.tar" tarUrl = "https://s3.amazonaws.com/" <> downloadBucket <> "/" <> tarKey tarFP = root T.unpack tarKey req <- parseRequest $ T.unpack tarUrl man <- view envManager unlessM (doesFileExist tarFP) $ withResponseUnliftIO req {decompress = const True} man $ \res -> do throwErrorStatusCodes req res createDirectoryIfMissing True $ takeDirectory tarFP --withBinaryFileDurableAtomic tarFP WriteMode $ \tarHandle -> --FIXME: https://github.com/commercialhaskell/rio/issues/160 let tmpTarFP = tarFP <.> "tmp" withBinaryFile tmpTarFP WriteMode $ \tarHandle -> runConduitRes $ bodyReaderSource (responseBody res) .| sinkHandle tarHandle renameFile tmpTarFP tarFP void $ tryIO $ removeDirectoryRecursive bindir void $ tryIO $ removeFile outname createDirectoryIfMissing True bindir withSystemTempDirectory ("hoogle-" ++ T.unpack (textDisplay snapName)) $ \tmpdir -> do Any hasRestored <- runConduitRes $ sourceFile tarFP .| untar (restoreHoogleTxtFileWithCabal tmpdir snapshotId snapName) .| foldC unless hasRestored $ error "No Hoogle .txt files found" let args = ["generate", "--database=" ++ outname, "--local=" ++ tmpdir] logInfo $ mconcat [ "Merging databases... (" , foldMap fromString $ L.intersperse " " ("hoogle" : args) , ")" ] liftIO $ Hoogle.hoogle args logInfo "Merge done" return $ Just outname where logException exc = logError ("Problem creating hoogle db for " <> display snapName <> ": " <> displayShow exc) $> Nothing restoreHoogleTxtFileWithCabal :: FilePath -> SnapshotId -> SnapName -> FileInfo -> ConduitM ByteString Any (ResourceT (RIO StackageCron)) () restoreHoogleTxtFileWithCabal tmpdir snapshotId snapName fileInfo = case fileType fileInfo of FTNormal -> do let txtFileName = T.decodeUtf8With T.lenientDecode $ filePath fileInfo txtPackageName = T.takeWhile (/= '.') txtFileName mpkg = fromPathPiece txtPackageName maybe (pure Nothing) (lift . lift . getSnapshotPackageCabalBlob snapshotId) mpkg >>= \case Nothing -> do logWarn $ "Unexpected hoogle filename: " <> display txtFileName <> " in orig.tar for snapshot: " <> display snapName yield $ Any False Just cabal -> do writeFileBinary (tmpdir T.unpack txtPackageName <.> "cabal") cabal sinkFile (tmpdir T.unpack txtFileName) yield $ Any True _ -> yield $ Any False pathToPackageModule :: Text -> Maybe (PackageIdentifierP, ModuleNameP) pathToPackageModule txt = case T.split (== '/') txt of [pkgIdentifier, moduleNameDashes] -> do modName :: ModuleNameP <- fromPathPiece moduleNameDashes pkgId :: PackageIdentifierP <- fromPathPiece pkgIdentifier Just (pkgId, modName) _ -> Nothing