From 6331131b6889ddcea45ce49bae04323002b85e53 Mon Sep 17 00:00:00 2001 From: Bryan Richter Date: Fri, 22 Dec 2023 18:48:30 +0200 Subject: [PATCH 01/18] Enable running stackage-server-cron on an empty DB It did run migrations, but ran them in the wrong spot. --- src/Stackage/Database/Cron.hs | 5 +++-- src/Stackage/Database/Schema.hs | 35 +++++++++++++++++++++------------ 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/src/Stackage/Database/Cron.hs b/src/Stackage/Database/Cron.hs index 77ce0cf..d966618 100644 --- a/src/Stackage/Database/Cron.hs +++ b/src/Stackage/Database/Cron.hs @@ -179,8 +179,9 @@ stackageServerCron StackageCronOptions {..} = do , pcCasaMaxPerRequest = defaultCasaMaxPerRequest , pcSnapshotLocation = defaultSnapshotLocation } - currentHoogleVersionId <- - runRIO logFunc $ getCurrentHoogleVersionIdWithPantryConfig pantryConfig + currentHoogleVersionId <- runRIO logFunc $ do + runStackageMigrations' pantryConfig + getCurrentHoogleVersionIdWithPantryConfig pantryConfig let stackage = StackageCron { scPantryConfig = pantryConfig diff --git a/src/Stackage/Database/Schema.hs b/src/Stackage/Database/Schema.hs index 0e45bab..f5ef5a8 100644 --- a/src/Stackage/Database/Schema.hs +++ b/src/Stackage/Database/Schema.hs @@ -23,6 +23,7 @@ module Stackage.Database.Schema , GetStackageDatabase(..) , withStackageDatabase , runStackageMigrations + , runStackageMigrations' , getCurrentHoogleVersionId , getCurrentHoogleVersionIdWithPantryConfig -- * Tables @@ -217,25 +218,33 @@ withStackageDatabase shouldLog dbs inner = do bracket (liftIO getPoolIO) (liftIO . destroyAllResources) $ \pool -> do inner (StackageDatabase (`runSqlPool` pool)) -getSchema :: (HasLogFunc env, GetStackageDatabase env (RIO env)) => RIO env (Maybe Int) +getSchema :: ReaderT SqlBackend (RIO RIO.LogFunc) (Maybe Int) getSchema = - run $ do + do eres <- tryAny (selectList [] []) lift $ logInfo $ "getSchema result: " <> displayShow eres case eres of Right [Entity _ (Schema v)] -> return $ Just v _ -> return Nothing +runStackageMigrations' :: PantryConfig -> RIO RIO.LogFunc () -- HasLogFunc env => PantryConfig -> RIO env () +runStackageMigrations' pantryConfig = do + stackageDb <- getStackageDatabaseFromPantry pantryConfig + runDatabase stackageDb stackageMigrations + + runStackageMigrations :: (HasLogFunc env, GetStackageDatabase env (RIO env)) => RIO env () -runStackageMigrations = do +runStackageMigrations = run stackageMigrations + +stackageMigrations :: ReaderT SqlBackend (RIO RIO.LogFunc) () -- ReaderT SqlBackend (RIO RIO.LogFunc) () +stackageMigrations = do + runMigration Pantry.migrateAll + runMigration migrateAll actualSchema <- getSchema - run $ do - runMigration Pantry.migrateAll - runMigration migrateAll - unless (actualSchema == Just currentSchema) $ do - lift $ - logWarn $ - "Current schema does not match actual schema: " <> - displayShow (actualSchema, currentSchema) - deleteWhere ([] :: [Filter Schema]) - insert_ $ Schema currentSchema + unless (actualSchema == Just currentSchema) $ do + lift $ + logWarn $ + "Current schema does not match actual schema: " <> + displayShow (actualSchema, currentSchema) + deleteWhere ([] :: [Filter Schema]) + insert_ $ Schema currentSchema From a4cacd6991a5e0db0dfbb5ea029c439cd96b6107 Mon Sep 17 00:00:00 2001 From: Bryan Richter Date: Fri, 22 Dec 2023 18:49:15 +0200 Subject: [PATCH 02/18] Enable overriding S3 endpoint with AWS_S3_ENDPOINT --- src/Stackage/Database/Cron.hs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Stackage/Database/Cron.hs b/src/Stackage/Database/Cron.hs index d966618..fafbd8a 100644 --- a/src/Stackage/Database/Cron.hs +++ b/src/Stackage/Database/Cron.hs @@ -59,6 +59,7 @@ import Stackage.Database.PackageInfo import Stackage.Database.Query import Stackage.Database.Schema import Stackage.Database.Types +import System.Environment (getEnvironment) import UnliftIO.Concurrent (getNumCapabilities) import Web.PathPieces (fromPathPiece, toPathPiece) import qualified Control.Retry as Retry @@ -163,7 +164,12 @@ stackageServerCron StackageCronOptions {..} = do cabalMutable <- newIORef Map.empty gpdCache <- newIORef IntMap.empty defaultProcessContext <- mkDefaultProcessContext - aws <- newEnv Discover + aws <- do + aws' <- newEnv Discover + endpoint <- lookup "AWS_S3_ENDPOINT" <$> getEnvironment + pure $ case endpoint of + Nothing -> aws' + Just ep -> configure (setEndpoint True (BS8.pack ep) 443 s3) aws' withLogFunc (setLogMinLevel scoLogLevel lo) $ \logFunc -> do let pantryConfig = PantryConfig From a2f77219b633f5bec56e528d0d5c40ca5d601b04 Mon Sep 17 00:00:00 2001 From: Bryan Richter Date: Wed, 27 Dec 2023 15:18:47 +0200 Subject: [PATCH 03/18] Remove unused TRUNCATE The LatestVersion cache table was removed in f8a82ec511f866dc3ef6da3a543c4b131fe612f6. --- src/Stackage/Database/Cron.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Stackage/Database/Cron.hs b/src/Stackage/Database/Cron.hs index fafbd8a..19c61e0 100644 --- a/src/Stackage/Database/Cron.hs +++ b/src/Stackage/Database/Cron.hs @@ -225,7 +225,6 @@ runStackageUpdate doNotUpload = do unless doNotUpload uploadSnapshotsJSON buildAndUploadHoogleDB doNotUpload logInfo "Finished building and uploading Hoogle DBs" - run $ rawExecute "TRUNCATE TABLE latest_version" [] -- | This will look at 'global-hints.yaml' and will create core package getters that are reused From 2939d98b9fe47ba623276e2f1f72a3fc4b65cac7 Mon Sep 17 00:00:00 2001 From: Bryan Richter Date: Mon, 8 Jan 2024 10:41:02 +0200 Subject: [PATCH 04/18] Enable building on NixOS --- stack.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/stack.yaml b/stack.yaml index 5449ef3..4392c2e 100644 --- a/stack.yaml +++ b/stack.yaml @@ -14,3 +14,8 @@ extra-deps: drop-packages: - Cabal + +nix: + packages: + - zlib + - postgresql From 6b4232b1c631893f19536724636b17b0fd95a64b Mon Sep 17 00:00:00 2001 From: Bryan Richter Date: Mon, 8 Jan 2024 14:26:38 +0200 Subject: [PATCH 05/18] Add download-bucket-url option --- app/stackage-server-cron.hs | 17 ++++++++++++----- config/settings.yml | 3 +++ src/Application.hs | 2 +- src/Handler/Haddock.hs | 30 +++++++++++++++++------------- src/Handler/StackageIndex.hs | 7 +++---- src/Settings.hs | 3 +++ src/Stackage/Database/Cron.hs | 26 ++++++++++++++------------ src/Stackage/Database/Types.hs | 12 +++++++++--- 8 files changed, 62 insertions(+), 38 deletions(-) diff --git a/app/stackage-server-cron.hs b/app/stackage-server-cron.hs index aaebc0c..b0fc6bd 100644 --- a/app/stackage-server-cron.hs +++ b/app/stackage-server-cron.hs @@ -38,17 +38,24 @@ optsParser = \their yaml files from stackage-snapshots repo have been updated or not.") <*> option readText - (long "download-bucket" <> value haddockBucketName <> metavar "DOWNLOAD_BUCKET" <> + (long "download-bucket" <> value defHaddockBucketName <> metavar "DOWNLOAD_BUCKET" <> help ("S3 Bucket name where things like haddock and current hoogle files should \ - \be downloaded from. Default is: " <> - T.unpack haddockBucketName)) <*> + \be downloaded from. Used in S3 API read operations. Default is: " <> + T.unpack defHaddockBucketName)) <*> option readText - (long "upload-bucket" <> value haddockBucketName <> metavar "UPLOAD_BUCKET" <> + (long "download-bucket-url" <> value defHaddockBucketUrl <> metavar "DOWNLOAD_BUCKET_URL" <> + help + ("Publicly accessible URL where the download bucket can be accessed. Used for \ + \serving the Haddocks on the website. Default is: " <> + T.unpack defHaddockBucketUrl)) <*> + option + readText + (long "upload-bucket" <> value defHaddockBucketName <> metavar "UPLOAD_BUCKET" <> help ("S3 Bucket where hoogle db and snapshots.json file will be uploaded to. Default is: " <> - T.unpack haddockBucketName)) <*> + T.unpack defHaddockBucketName)) <*> switch (long "do-not-upload" <> help "Stop from hoogle db and snapshots.json from being generated and uploaded") <*> diff --git a/config/settings.yml b/config/settings.yml index a28a838..52e7406 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -24,3 +24,6 @@ force-ssl: false postgres-string: "_env:PGSTRING:host=localhost port=5432 user=stackage dbname=stackage password=stackage" postgres-poolsize: "_env:PGPOOLSIZE:8" + +# Publicly-accessible URL for the bucket holding Haddock contents. +download-bucket-url: "_env:DOWNLOAD_BUCKET_URL:https://s3.amazonaws.com/haddock.stackage.org" diff --git a/src/Application.hs b/src/Application.hs index 0818f45..5f0bcd3 100644 --- a/src/Application.hs +++ b/src/Application.hs @@ -157,7 +157,7 @@ withFoundation appLogFunc appSettings inner = do runRIO appLogFunc $ RIO.logError $ "Couldn't get Stack matcher: " <> displayShow e pure oldMatcher appMirrorStatus <- mkUpdateMirrorStatus - hoogleLocker <- newHoogleLocker appLogFunc appHttpManager + hoogleLocker <- newHoogleLocker appLogFunc appHttpManager (appDownloadBucketUrl appSettings) let appGetHoogleDB = singleRun hoogleLocker let appGitRev = $$tGitRev runConcurrently $ runContentUpdates *> Concurrently (inner App {..}) diff --git a/src/Handler/Haddock.hs b/src/Handler/Haddock.hs index 5a7112b..1f37d58 100644 --- a/src/Handler/Haddock.hs +++ b/src/Handler/Haddock.hs @@ -8,13 +8,14 @@ import Import import qualified Data.Text as T (takeEnd) import Stackage.Database -makeURL :: SnapName -> [Text] -> Text -makeURL snapName rest = concat - $ "https://s3.amazonaws.com/" - : haddockBucketName - : "/" - : toPathPiece snapName - : map (cons '/') rest +makeURL :: SnapName -> [Text] -> Handler Text +makeURL snapName rest = do + bucketUrl <- getsYesod (appDownloadBucketUrl . appSettings) + pure . concat + $ bucketUrl + : "/" + : toPathPiece snapName + : map (cons '/') rest shouldRedirect :: Bool shouldRedirect = False @@ -27,7 +28,7 @@ getHaddockR snapName rest result <- redirectWithVersion snapName rest case result of Just route -> redirect route - Nothing -> redirect $ makeURL snapName rest + Nothing -> redirect =<< makeURL snapName rest | Just docType <- mdocType = do cacheSeconds $ 60 * 60 * 24 * 7 result <- redirectWithVersion snapName rest @@ -41,7 +42,7 @@ getHaddockR snapName rest return ("text/html; charset=utf-8", mstyle /= Just "stackage") DocJson -> return ("application/jsontml; charset=utf-8", True) - req <- parseRequest $ unpack $ makeURL snapName rest + req <- parseRequest =<< unpack <$> makeURL snapName rest man <- getHttpManager <$> getYesod (_, res) <- runReaderT (acquireResponse req >>= allocateAcquire) man if plain @@ -54,7 +55,7 @@ getHaddockR snapName rest peekC >>= maybe (return ()) (const $ yield $ encodeUtf8 extra) mapC id) .| mapC (Chunk . toBuilder) - | otherwise = redirect $ makeURL snapName rest + | otherwise = redirect =<< makeURL snapName rest where mdocType = case T.takeEnd 5 <$> headMay (reverse rest) of @@ -141,6 +142,9 @@ getHaddockBackupR (snap':rest) | Just branch <- fromPathPiece snap' = track "Handler.Haddock.getHaddockBackupR" $ do snapName <- newestSnapshot branch >>= maybe notFound pure redirect $ HaddockR snapName rest -getHaddockBackupR rest = track "Handler.Haddock.getHaddockBackupR" $ redirect $ concat - $ "https://s3.amazonaws.com/haddock.stackage.org" - : map (cons '/') rest +getHaddockBackupR rest = track "Handler.Haddock.getHaddockBackupR" $ do + bucketUrl <- getsYesod (appDownloadBucketUrl . appSettings) + redirect + $ concat + $ bucketUrl + : map (cons '/') rest diff --git a/src/Handler/StackageIndex.hs b/src/Handler/StackageIndex.hs index 58bc767..8b44c42 100644 --- a/src/Handler/StackageIndex.hs +++ b/src/Handler/StackageIndex.hs @@ -2,13 +2,12 @@ module Handler.StackageIndex where import Import -import Stackage.Database.Types (haddockBucketName) getStackageIndexR :: SnapName -> Handler TypedContent -getStackageIndexR slug = +getStackageIndexR slug = do + bucketUrl <- getsYesod (appDownloadBucketUrl . appSettings) redirect $ concat - [ "https://s3.amazonaws.com/" - , haddockBucketName + [ bucketUrl , "/package-index/" , toPathPiece slug , ".tar.gz" diff --git a/src/Settings.hs b/src/Settings.hs index c3e6e72..a02cc90 100644 --- a/src/Settings.hs +++ b/src/Settings.hs @@ -56,6 +56,8 @@ data AppSettings = AppSettings -- ^ Force redirect to SSL , appDevDownload :: Bool -- ^ Controls how Git and database resources are downloaded (True means less downloading) + , appDownloadBucketUrl :: Text + -- ^ Publicly-accessible URL for the bucket holding Haddock contents. } data DatabaseSettings @@ -109,6 +111,7 @@ instance FromJSON AppSettings where appSkipCombining <- o .:? "skip-combining" .!= dev appForceSsl <- o .:? "force-ssl" .!= not dev appDevDownload <- o .:? "dev-download" .!= dev + appDownloadBucketUrl <- o .:? "download-bucket-url" .!= "https://s3.amazonaws.com/haddock.stackage.org" return AppSettings {..} diff --git a/src/Stackage/Database/Cron.hs b/src/Stackage/Database/Cron.hs index 19c61e0..a6ec20e 100644 --- a/src/Stackage/Database/Cron.hs +++ b/src/Stackage/Database/Cron.hs @@ -10,7 +10,8 @@ module Stackage.Database.Cron , newHoogleLocker , singleRun , StackageCronOptions(..) - , haddockBucketName + , defHaddockBucketName + , defHaddockBucketUrl ) where import Conduit @@ -74,10 +75,9 @@ hoogleKey name = T.concat , ".hoo" ] -hoogleUrl :: SnapName -> Text -hoogleUrl n = T.concat - [ "https://s3.amazonaws.com/" - , haddockBucketName +hoogleUrl :: SnapName -> Text -> Text +hoogleUrl n haddockBucketUrl = T.concat + [ haddockBucketUrl , "/" , hoogleKey n ] @@ -101,8 +101,8 @@ withResponseUnliftIO :: MonadUnliftIO m => Request -> Manager -> (Response BodyR withResponseUnliftIO req man f = withRunInIO $ \ runInIO -> withResponse req man (runInIO . f) newHoogleLocker :: - (HasLogFunc env, MonadIO m) => env -> Manager -> m (SingleRun SnapName (Maybe FilePath)) -newHoogleLocker env man = mkSingleRun hoogleLocker + (HasLogFunc env, MonadIO m) => env -> Manager -> Text -> m (SingleRun SnapName (Maybe FilePath)) +newHoogleLocker env man bucketUrl = mkSingleRun hoogleLocker where hoogleLocker :: MonadIO n => SnapName -> n (Maybe FilePath) hoogleLocker name = @@ -112,7 +112,7 @@ newHoogleLocker env man = mkSingleRun hoogleLocker if exists then return $ Just fp else do - req' <- parseRequest $ T.unpack $ hoogleUrl name + req' <- parseRequest $ T.unpack $ hoogleUrl name bucketUrl let req = req' {decompress = const False} withResponseUnliftIO req man $ \res -> case responseStatus res of @@ -125,7 +125,7 @@ newHoogleLocker env man = mkSingleRun hoogleLocker sinkHandle h return $ Just fp | status == status404 -> do - logDebug $ "NotFound: " <> display (hoogleUrl name) + logDebug $ "NotFound: " <> display (hoogleUrl name bucketUrl) return Nothing | otherwise -> do body <- liftIO $ brConsume $ responseBody res @@ -198,6 +198,7 @@ stackageServerCron StackageCronOptions {..} = do , scCachedGPD = gpdCache , scEnvAWS = aws , scDownloadBucketName = scoDownloadBucketName + , scDownloadBucketUrl = scoDownloadBucketUrl , scUploadBucketName = scoUploadBucketName , scSnapshotsRepo = scoSnapshotsRepo , scReportProgress = scoReportProgress @@ -700,7 +701,8 @@ buildAndUploadHoogleDB :: Bool -> RIO StackageCron () buildAndUploadHoogleDB doNotUpload = do snapshots <- lastLtsNightlyWithoutHoogleDb 5 5 env <- ask - locker <- newHoogleLocker (env ^. logFuncL) (env ^. envManager) + bucketUrl <- asks scDownloadBucketUrl + locker <- newHoogleLocker (env ^. logFuncL) (env ^. envManager) bucketUrl for_ snapshots $ \(snapshotId, snapName) -> unlessM (checkInsertSnapshotHoogleDb False snapshotId) $ do logInfo $ "Starting Hoogle database download: " <> display (hoogleKey snapName) @@ -725,12 +727,12 @@ createHoogleDB :: SnapshotId -> SnapName -> RIO StackageCron (Maybe FilePath) createHoogleDB snapshotId snapName = handleAny logException $ do logInfo $ "Creating Hoogle DB for " <> display snapName - downloadBucket <- scDownloadBucketName <$> ask + downloadBucketUrl <- scDownloadBucketUrl <$> ask let root = "hoogle-gen" bindir = root "bindir" outname = root "output.hoo" tarKey = toPathPiece snapName <> "/hoogle/orig.tar" - tarUrl = "https://s3.amazonaws.com/" <> downloadBucket <> "/" <> tarKey + tarUrl = downloadBucketUrl <> "/" <> tarKey tarFP = root T.unpack tarKey -- When tarball is downloaded it is saved with durability and atomicity, so if it -- is present it is not in a corrupted state diff --git a/src/Stackage/Database/Types.hs b/src/Stackage/Database/Types.hs index 02dabdb..9fd9e79 100644 --- a/src/Stackage/Database/Types.hs +++ b/src/Stackage/Database/Types.hs @@ -40,7 +40,8 @@ module Stackage.Database.Types , Origin(..) , LatestInfo(..) , Deprecation(..) - , haddockBucketName + , defHaddockBucketName + , defHaddockBucketUrl , Changelog(..) , Readme(..) , StackageCronOptions(..) @@ -61,12 +62,16 @@ import Stackage.Database.Schema import Text.Blaze (ToMarkup(..)) import Types -haddockBucketName :: Text -haddockBucketName = "haddock.stackage.org" +defHaddockBucketName :: Text +defHaddockBucketName = "haddock.stackage.org" + +defHaddockBucketUrl :: Text +defHaddockBucketUrl = "https://s3.amazonaws.com/" <> defHaddockBucketName data StackageCronOptions = StackageCronOptions { scoForceUpdate :: !Bool , scoDownloadBucketName :: !Text + , scoDownloadBucketUrl :: !Text , scoUploadBucketName :: !Text , scoDoNotUpload :: !Bool , scoLogLevel :: !LogLevel @@ -84,6 +89,7 @@ data StackageCron = StackageCron , scCachedGPD :: !(IORef (IntMap GenericPackageDescription)) , scEnvAWS :: !Env , scDownloadBucketName :: !Text + , scDownloadBucketUrl :: !Text , scUploadBucketName :: !Text , scSnapshotsRepo :: !GithubRepo , scReportProgress :: !Bool From 143b9b01c5ca56c84bfab04f66fce74d3460ab81 Mon Sep 17 00:00:00 2001 From: Bryan Richter Date: Mon, 15 Jan 2024 15:15:01 +0200 Subject: [PATCH 06/18] Work around amazonka#975 --- stack.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/stack.yaml b/stack.yaml index 4392c2e..e16eb11 100644 --- a/stack.yaml +++ b/stack.yaml @@ -1,6 +1,9 @@ resolver: lts-18.28 extra-deps: - amazonka-1.6.1 +- github: chreekat/amazonka + commit: b/1.6.1-r2-compat + subdirs: [core] - barrier-0.1.1 - classy-prelude-yesod-1.5.0 - unliftio-core-0.1.2.0 @@ -9,6 +12,7 @@ extra-deps: - companion-0.1.0 - aeson-warning-parser-0.1.0 - hpack-0.35.0 +- http-client-0.5.14@sha256:4880b27d6741e331454a1d4c887d96ce3d7d625322c8433983a4b1cd08538577,5348 - git: https://github.com/commercialhaskell/pantry.git commit: 5df643cc1deb561d9c52a9cb6f593aba2bc4c08e From c1c7d14e159eebbbee0c6d895c7648de6e1ad97a Mon Sep 17 00:00:00 2001 From: Bryan Richter Date: Mon, 12 Feb 2024 15:12:57 +0200 Subject: [PATCH 07/18] Upgrade some hoogle messages to warnings --- src/Stackage/Database/Cron.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Stackage/Database/Cron.hs b/src/Stackage/Database/Cron.hs index a6ec20e..a1e2215 100644 --- a/src/Stackage/Database/Cron.hs +++ b/src/Stackage/Database/Cron.hs @@ -125,11 +125,11 @@ newHoogleLocker env man bucketUrl = mkSingleRun hoogleLocker sinkHandle h return $ Just fp | status == status404 -> do - logDebug $ "NotFound: " <> display (hoogleUrl name bucketUrl) + logWarn $ "NotFound: " <> display (hoogleUrl name bucketUrl) return Nothing | otherwise -> do body <- liftIO $ brConsume $ responseBody res - mapM_ (logDebug . displayBytesUtf8) body + mapM_ (logWarn . displayBytesUtf8) body return Nothing getHackageDeprecations :: From 33e5cb2589d0dfcbcb55b3e10f78fa8a43c5f4f2 Mon Sep 17 00:00:00 2001 From: Bryan Richter Date: Mon, 12 Feb 2024 15:15:22 +0200 Subject: [PATCH 08/18] Upgrade all the way to lts-22.6 I stopped at 22.6 because I'm using NixOS and ghc-9.6.3 is the last version available on the stable channel right now. Later snapshots use 9.6.4. --- src/Handler/DownloadStack.hs | 7 +- src/Handler/MirrorStatus.hs | 4 +- src/Stackage/Database/Cron.hs | 144 +++++++++++++-------------- src/Stackage/Database/PackageInfo.hs | 4 +- src/Stackage/Database/Query.hs | 2 +- src/Stackage/Database/Schema.hs | 1 + src/Stackage/Database/Types.hs | 5 +- src/Stackage/Snapshot/Diff.hs | 3 +- stack.yaml | 23 +---- 9 files changed, 87 insertions(+), 106 deletions(-) diff --git a/src/Handler/DownloadStack.hs b/src/Handler/DownloadStack.hs index ecd8e14..154c1e9 100644 --- a/src/Handler/DownloadStack.hs +++ b/src/Handler/DownloadStack.hs @@ -11,6 +11,7 @@ import Data.Conduit.Attoparsec (sinkParser) import Data.WebsiteContent import Import import Yesod.GitRepo +import qualified Data.Aeson.KeyMap as Aeson getDownloadStackListR :: Handler Html getDownloadStackListR = track "Handler.DownloadStack.getDownloadStackListR" $ do @@ -35,14 +36,14 @@ getLatestMatcher man = do return $ \pattern' -> do let pattern'' = pattern' ++ "." Object top <- return val - Array assets <- lookup "assets" top + Array assets <- Aeson.lookup "assets" top headMay $ preferZip $ catMaybes $ map (findMatch pattern'') assets where findMatch pattern' (Object o) = do - String name <- lookup "name" o + String name <- Aeson.lookup "name" o guard $ not $ ".asc" `isSuffixOf` name guard $ pattern' `isInfixOf` name - String url <- lookup "browser_download_url" o + String url <- Aeson.lookup "browser_download_url" o Just url findMatch _ _ = Nothing diff --git a/src/Handler/MirrorStatus.hs b/src/Handler/MirrorStatus.hs index 036573a..7dad1c9 100644 --- a/src/Handler/MirrorStatus.hs +++ b/src/Handler/MirrorStatus.hs @@ -12,6 +12,8 @@ import RIO.Time (diffUTCTime, addUTCTime, getCurrentTime) import Text.XML.Stream.Parse import Data.XML.Types (Event (EventContent), Content (ContentText)) import qualified Prelude +import qualified Data.Aeson.Key as Aeson +import qualified Data.Aeson.KeyMap as Aeson getMirrorStatusR :: Handler Html getMirrorStatusR = do @@ -148,7 +150,7 @@ getLastModifiedGit org repo ref = do lookupJ :: MonadThrow m => Text -> Value -> m Value lookupJ key (Object o) = - case lookup key o of + case Aeson.lookup (Aeson.fromText key) o of Nothing -> error $ "Key not found: " ++ show key Just x -> return x lookupJ key val = error $ concat diff --git a/src/Stackage/Database/Cron.hs b/src/Stackage/Database/Cron.hs index a1e2215..8c3ab6c 100644 --- a/src/Stackage/Database/Cron.hs +++ b/src/Stackage/Database/Cron.hs @@ -5,6 +5,7 @@ {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE FlexibleContexts #-} module Stackage.Database.Cron ( stackageServerCron , newHoogleLocker @@ -16,7 +17,6 @@ module Stackage.Database.Cron import Conduit import Control.DeepSeq -import qualified Control.Monad.Trans.AWS as AWS (paginate) import Control.SingleRun import qualified Data.ByteString.Char8 as BS8 import qualified Data.Conduit.Binary as CB @@ -29,9 +29,12 @@ import Data.Yaml (decodeFileEither) import Database.Persist hiding (exists) import Database.Persist.Postgresql hiding (exists) import qualified Hoogle -import Network.AWS hiding (Request, Response) -import Network.AWS.Data.Text (toText) -import Network.AWS.S3 +import Amazonka hiding (Request, length, error) +import Amazonka.Data.Text (toText) +import Amazonka.S3 +import Amazonka.S3.ListObjectsV2 +import Amazonka.S3.Lens +import Amazonka.Lens import Network.HTTP.Client import Network.HTTP.Client.Conduit (bodyReaderSource) import Network.HTTP.Simple (getResponseBody, httpJSONEither) @@ -39,9 +42,9 @@ import Network.HTTP.Types (status200, status404) import Pantry (CabalFileInfo(..), DidUpdateOccur(..), HpackExecutable(HpackBundled), PackageIdentifierRevision(..), defaultCasaMaxPerRequest, defaultCasaRepoPrefix, - defaultHackageSecurityConfig, defaultSnapshotLocation) -import Pantry.Internal.Stackage (HackageTarballResult(..), PantryConfig(..), - Storage(..), forceUpdateHackageIndex, + defaultPackageIndexConfig, + defaultSnapshotLocation, withPantryConfig, PantryConfig) +import Pantry.Internal.Stackage (HackageTarballResult(..), forceUpdateHackageIndex, getHackageTarball, packageTreeKey) import Path (parseAbsDir, toFilePath) import RIO @@ -54,7 +57,6 @@ import RIO.Process (mkDefaultProcessContext) import qualified RIO.Set as Set import qualified RIO.Text as T import RIO.Time -import Settings import Stackage.Database.Github import Stackage.Database.PackageInfo import Stackage.Database.Query @@ -86,11 +88,6 @@ hoogleUrl n haddockBucketUrl = T.concat hackageDeprecatedUrl :: Request hackageDeprecatedUrl = "https://hackage.haskell.org/packages/deprecated.json" -withStorage :: (Storage -> IO a) -> IO a -withStorage inner = do - as <- getAppSettings - withStackageDatabase False (appDatabase as) (\db -> inner (Storage (runDatabase db) id)) - getStackageSnapshotsDir :: RIO StackageCron FilePath getStackageSnapshotsDir = do cron <- ask @@ -154,58 +151,52 @@ stackageServerCron StackageCronOptions {..} = do catchIO (bindPortTCP 17834 "127.0.0.1") $ const $ throwString "Stackage Cron loader process already running, exiting." connectionCount <- getNumCapabilities - withStorage $ \storage -> do - lo <- logOptionsHandle stdout True - stackageRootDir <- getAppUserDataDirectory "stackage" - pantryRootDir <- parseAbsDir (stackageRootDir "pantry") - createDirectoryIfMissing True (toFilePath pantryRootDir) - updateRef <- newMVar True - cabalImmutable <- newIORef Map.empty - cabalMutable <- newIORef Map.empty - gpdCache <- newIORef IntMap.empty - defaultProcessContext <- mkDefaultProcessContext - aws <- do - aws' <- newEnv Discover - endpoint <- lookup "AWS_S3_ENDPOINT" <$> getEnvironment - pure $ case endpoint of - Nothing -> aws' - Just ep -> configure (setEndpoint True (BS8.pack ep) 443 s3) aws' - withLogFunc (setLogMinLevel scoLogLevel lo) $ \logFunc -> do - let pantryConfig = - PantryConfig - { pcHackageSecurity = defaultHackageSecurityConfig - , pcHpackExecutable = HpackBundled - , pcRootDir = pantryRootDir - , pcStorage = storage - , pcUpdateRef = updateRef - , pcParsedCabalFilesRawImmutable = cabalImmutable - , pcParsedCabalFilesMutable = cabalMutable - , pcConnectionCount = connectionCount - , pcCasaRepoPrefix = defaultCasaRepoPrefix - , pcCasaMaxPerRequest = defaultCasaMaxPerRequest - , pcSnapshotLocation = defaultSnapshotLocation - } - currentHoogleVersionId <- runRIO logFunc $ do + lo <- logOptionsHandle stdout True + stackageRootDir <- getAppUserDataDirectory "stackage" + pantryRootDir <- parseAbsDir (stackageRootDir "pantry") + createDirectoryIfMissing True (toFilePath pantryRootDir) + gpdCache <- newIORef IntMap.empty + defaultProcessContext <- mkDefaultProcessContext + aws <- do + aws' <- newEnv discover + endpoint <- lookup "AWS_S3_ENDPOINT" <$> getEnvironment + pure $ case endpoint of + Nothing -> aws' + Just ep -> configureService (setEndpoint True (BS8.pack ep) 443 Amazonka.S3.defaultService) aws' + withLogFunc (setLogMinLevel scoLogLevel lo) $ \logFunc -> do + let cronWithPantryConfig :: HasLogFunc env => (PantryConfig -> RIO env a) -> RIO env a + cronWithPantryConfig = + withPantryConfig + pantryRootDir + defaultPackageIndexConfig + HpackBundled + connectionCount + defaultCasaRepoPrefix + defaultCasaMaxPerRequest + defaultSnapshotLocation + + currentHoogleVersionId <- runRIO logFunc $ do + cronWithPantryConfig $ \pantryConfig -> do runStackageMigrations' pantryConfig getCurrentHoogleVersionIdWithPantryConfig pantryConfig - let stackage = - StackageCron - { scPantryConfig = pantryConfig - , scStackageRoot = stackageRootDir - , scProcessContext = defaultProcessContext - , scLogFunc = logFunc - , scForceFullUpdate = scoForceUpdate - , scCachedGPD = gpdCache - , scEnvAWS = aws - , scDownloadBucketName = scoDownloadBucketName - , scDownloadBucketUrl = scoDownloadBucketUrl - , scUploadBucketName = scoUploadBucketName - , scSnapshotsRepo = scoSnapshotsRepo - , scReportProgress = scoReportProgress - , scCacheCabalFiles = scoCacheCabalFiles - , scHoogleVersionId = currentHoogleVersionId - } - runRIO stackage (runStackageUpdate scoDoNotUpload) + let stackage pantryConfig = + StackageCron + { scPantryConfig = pantryConfig + , scStackageRoot = stackageRootDir + , scProcessContext = defaultProcessContext + , scLogFunc = logFunc + , scForceFullUpdate = scoForceUpdate + , scCachedGPD = gpdCache + , scEnvAWS = aws + , scDownloadBucketName = scoDownloadBucketName + , scDownloadBucketUrl = scoDownloadBucketUrl + , scUploadBucketName = scoUploadBucketName + , scSnapshotsRepo = scoSnapshotsRepo + , scReportProgress = scoReportProgress + , scCacheCabalFiles = scoCacheCabalFiles + , scHoogleVersionId = currentHoogleVersionId + } + runRIO logFunc $ cronWithPantryConfig $ \pantryConfig -> runRIO (stackage pantryConfig) (runStackageUpdate scoDoNotUpload) runStackageUpdate :: Bool -> RIO StackageCron () @@ -393,10 +384,11 @@ addPantryPackage sid compiler isHidden flags (PantryPackage pc treeKey) = do checkForDocs :: SnapshotId -> SnapName -> ResourceT (RIO StackageCron) () checkForDocs snapshotId snapName = do bucketName <- lift (scDownloadBucketName <$> ask) + env <- asks scEnvAWS mods <- runConduit $ - AWS.paginate (req bucketName) .| concatMapC (^. lovrsContents) .| - mapC (\obj -> toText (obj ^. oKey)) .| + paginate env (req bucketName) .| concatMapC (fromMaybe [] . (^. listObjectsV2Response_contents)) .| + mapC (\obj -> toText (obj ^. object_key)) .| concatMapC (T.stripSuffix ".html" >=> T.stripPrefix prefix >=> pathToPackageModule) .| sinkList -- it is faster to download all modules in this snapshot, than process them with a conduit all @@ -414,7 +406,7 @@ checkForDocs snapshotId snapName = do display snapName where prefix = textDisplay snapName <> "/" - req bucketName = listObjectsV2 (BucketName bucketName) & lovPrefix .~ Just prefix + req bucketName = newListObjectsV2 (BucketName bucketName) & listObjectsV2_prefix .~ Just prefix -- | This function records all package modules that have documentation available, the ones -- that are not found in the snapshot reported back as an error. Besides being run -- concurrently this function optimizes the SnapshotPackageId lookup as well, since that can @@ -670,9 +662,9 @@ uploadSnapshotsJSON = do uploadBucket <- scUploadBucketName <$> ask let key = ObjectKey "snapshots.json" uploadFromRIO key $ - set poACL (Just OPublicRead) $ - set poContentType (Just "application/json") $ - putObject (BucketName uploadBucket) key (toBody snapshots) + set putObject_acl (Just ObjectCannedACL_Public_read) $ + set putObject_contentType (Just "application/json") $ + newPutObject (BucketName uploadBucket) key (toBody snapshots) -- | Writes a gzipped version of hoogle db into temporary file onto the file system and then uploads -- it to S3. Temporary file is removed upon completion @@ -684,14 +676,14 @@ uploadHoogleDB fp key = body <- toBody <$> readFileBinary fpgz uploadBucket <- scUploadBucketName <$> ask uploadFromRIO key $ - set poACL (Just OPublicRead) $ putObject (BucketName uploadBucket) key body + set putObject_acl (Just ObjectCannedACL_Public_read) $ newPutObject (BucketName uploadBucket) key body -uploadFromRIO :: AWSRequest a => ObjectKey -> a -> RIO StackageCron () +uploadFromRIO :: (AWSRequest a, Typeable a, Typeable (AWSResponse a)) => ObjectKey -> a -> RIO StackageCron () uploadFromRIO key po = do logInfo $ "Uploading " <> displayShow key <> " to S3 bucket." - env <- ask - eres <- runResourceT $ runAWS env $ trying _Error $ send po + env <- asks scEnvAWS + eres <- runResourceT $ trying _Error $ send env po case eres of Left e -> logError $ "Couldn't upload " <> displayShow key <> " to S3 becuase " <> displayShow e @@ -701,8 +693,9 @@ buildAndUploadHoogleDB :: Bool -> RIO StackageCron () buildAndUploadHoogleDB doNotUpload = do snapshots <- lastLtsNightlyWithoutHoogleDb 5 5 env <- ask + awsEnv <- asks scEnvAWS bucketUrl <- asks scDownloadBucketUrl - locker <- newHoogleLocker (env ^. logFuncL) (env ^. envManager) bucketUrl + locker <- newHoogleLocker (env ^. logFuncL) (awsEnv ^. env_manager) bucketUrl for_ snapshots $ \(snapshotId, snapName) -> unlessM (checkInsertSnapshotHoogleDb False snapshotId) $ do logInfo $ "Starting Hoogle database download: " <> display (hoogleKey snapName) @@ -738,7 +731,8 @@ createHoogleDB snapshotId snapName = -- is present it is not in a corrupted state unlessM (doesFileExist tarFP) $ do req <- parseRequest $ T.unpack tarUrl - man <- view envManager + env <- asks scEnvAWS + let man = env ^. env_manager withResponseUnliftIO req {decompress = const True} man $ \res -> do throwErrorStatusCodes req res createDirectoryIfMissing True $ takeDirectory tarFP diff --git a/src/Stackage/Database/PackageInfo.hs b/src/Stackage/Database/PackageInfo.hs index f47a563..80ecad0 100644 --- a/src/Stackage/Database/PackageInfo.hs +++ b/src/Stackage/Database/PackageInfo.hs @@ -23,7 +23,7 @@ import Distribution.Compiler (CompilerFlavor(GHC)) import Distribution.Package (Dependency(..)) import Distribution.PackageDescription (CondTree(..), Condition(..), ConfVar(..), - Flag(flagDefault, flagName), FlagName, + PackageFlag(..), flagDefault, flagName, FlagName, GenericPackageDescription, author, condExecutables, condLibrary, description, genPackageFlags, homepage, @@ -152,7 +152,7 @@ getCheckCond compiler overrideFlags gpd = go where go (Var (OS os)) = os == Linux -- arbitrary go (Var (Arch arch)) = arch == X86_64 -- arbitrary - go (Var (Flag flag)) = fromMaybe False $ Map.lookup flag flags + go (Var (PackageFlag flag)) = fromMaybe False $ Map.lookup flag flags go (Var (Impl flavor range)) = flavor == compilerFlavor && compilerVersion `withinRange` range go (Lit b) = b go (CNot c) = not $ go c diff --git a/src/Stackage/Database/Query.hs b/src/Stackage/Database/Query.hs index 7f2b5e5..4f94c6e 100644 --- a/src/Stackage/Database/Query.hs +++ b/src/Stackage/Database/Query.hs @@ -206,7 +206,7 @@ snapshotsJSON = do Just n -> (("nightly" A..= printNightly n) :) return $ A.object $ nightly lts where - toObj lts@(major, _) = T.pack ("lts-" <> show major) A..= printLts lts + toObj lts@(major, _) = fromString ("lts-" <> show major) A..= printLts lts printLts (major, minor) = "lts-" <> show major <> "." <> show minor printNightly day = "nightly-" <> T.pack (show day) diff --git a/src/Stackage/Database/Schema.hs b/src/Stackage/Database/Schema.hs index f5ef5a8..b4cb87d 100644 --- a/src/Stackage/Database/Schema.hs +++ b/src/Stackage/Database/Schema.hs @@ -15,6 +15,7 @@ {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE UndecidableInstances #-} +{-# LANGUAGE TypeOperators #-} module Stackage.Database.Schema ( -- * Database run diff --git a/src/Stackage/Database/Types.hs b/src/Stackage/Database/Types.hs index 9fd9e79..c42ef55 100644 --- a/src/Stackage/Database/Types.hs +++ b/src/Stackage/Database/Types.hs @@ -50,7 +50,7 @@ module Stackage.Database.Types import Data.Aeson import qualified Data.Text as T import Data.Text.Read (decimal) -import Network.AWS (Env, HasEnv(..)) +import Amazonka (Env) import Pantry (BlobKey(..), CabalFileInfo(..), FileSize(..), HasPantryConfig(..), PantryConfig, PackageIdentifierRevision(..), TreeKey(..)) import Pantry.SHA256 (fromHexText) @@ -97,9 +97,6 @@ data StackageCron = StackageCron , scHoogleVersionId :: !VersionId } -instance HasEnv StackageCron where - environment = lens scEnvAWS (\c f -> c {scEnvAWS = f}) - instance HasLogFunc StackageCron where logFuncL = lens scLogFunc (\c f -> c {scLogFunc = f}) diff --git a/src/Stackage/Snapshot/Diff.hs b/src/Stackage/Snapshot/Diff.hs index 641812c..7371f7a 100644 --- a/src/Stackage/Snapshot/Diff.hs +++ b/src/Stackage/Snapshot/Diff.hs @@ -15,6 +15,7 @@ module Stackage.Snapshot.Diff import ClassyPrelude (sortOn, toCaseFold) import Data.Aeson +import Data.Aeson.Key import qualified Data.Text as T (commonPrefixes) import Data.These import RIO @@ -61,7 +62,7 @@ newtype VersionChange = VersionChange { unVersionChange :: These VersionP Versio deriving (Show, Eq, Generic, Typeable) instance ToJSON (WithSnapshotNames VersionChange) where - toJSON (WithSnapshotNames (toPathPiece -> aKey) (toPathPiece -> bKey) change) = + toJSON (WithSnapshotNames (fromText . toPathPiece -> aKey) (fromText . toPathPiece -> bKey) change) = case change of VersionChange (This a) -> object [ aKey .= a ] VersionChange (That b) -> object [ bKey .= b ] diff --git a/stack.yaml b/stack.yaml index e16eb11..2bc5b89 100644 --- a/stack.yaml +++ b/stack.yaml @@ -1,25 +1,10 @@ -resolver: lts-18.28 +resolver: lts-22.6 extra-deps: -- amazonka-1.6.1 -- github: chreekat/amazonka - commit: b/1.6.1-r2-compat - subdirs: [core] -- barrier-0.1.1 -- classy-prelude-yesod-1.5.0 -- unliftio-core-0.1.2.0 -- yesod-gitrepo-0.3.0 -- static-bytes-0.1.0 -- companion-0.1.0 -- aeson-warning-parser-0.1.0 -- hpack-0.35.0 -- http-client-0.5.14@sha256:4880b27d6741e331454a1d4c887d96ce3d7d625322c8433983a4b1cd08538577,5348 -- git: https://github.com/commercialhaskell/pantry.git - commit: 5df643cc1deb561d9c52a9cb6f593aba2bc4c08e - -drop-packages: -- Cabal +- hoogle-5.0.18.4@sha256:1372458e97dff541fcda099236af7936bf93ee6b8c5d15695ee6d9426dff5eed,3171 +- safe-0.3.20@sha256:7813ad56161f57d5162a924de5597d454162a2faed06be6e268b37bb5c19d48d,2312 nix: packages: - zlib - postgresql + - pkg-config From 5cb5668295d29102d4cfec5e98ac44914c8956cc Mon Sep 17 00:00:00 2001 From: Bryan Richter Date: Tue, 13 Feb 2024 09:20:41 +0200 Subject: [PATCH 09/18] Revert to previous pinned version of pantry The new pantry version in lts-22.6 was not compatible with the database and/or config on the stackage server. --- src/Data/WebsiteContent.hs | 1 + src/Stackage/Database/Cron.hs | 106 +++++++++++++++++++--------------- stack.yaml | 4 ++ 3 files changed, 64 insertions(+), 47 deletions(-) diff --git a/src/Data/WebsiteContent.hs b/src/Data/WebsiteContent.hs index aa5ce10..eac2d5a 100644 --- a/src/Data/WebsiteContent.hs +++ b/src/Data/WebsiteContent.hs @@ -2,6 +2,7 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE TypeOperators #-} module Data.WebsiteContent ( WebsiteContent (..) , StackRelease (..) diff --git a/src/Stackage/Database/Cron.hs b/src/Stackage/Database/Cron.hs index 8c3ab6c..9fa8485 100644 --- a/src/Stackage/Database/Cron.hs +++ b/src/Stackage/Database/Cron.hs @@ -42,9 +42,9 @@ import Network.HTTP.Types (status200, status404) import Pantry (CabalFileInfo(..), DidUpdateOccur(..), HpackExecutable(HpackBundled), PackageIdentifierRevision(..), defaultCasaMaxPerRequest, defaultCasaRepoPrefix, - defaultPackageIndexConfig, - defaultSnapshotLocation, withPantryConfig, PantryConfig) -import Pantry.Internal.Stackage (HackageTarballResult(..), forceUpdateHackageIndex, + defaultHackageSecurityConfig, defaultSnapshotLocation) +import Pantry.Internal.Stackage (HackageTarballResult(..), PantryConfig(..), + Storage(..), forceUpdateHackageIndex, getHackageTarball, packageTreeKey) import Path (parseAbsDir, toFilePath) import RIO @@ -57,6 +57,7 @@ import RIO.Process (mkDefaultProcessContext) import qualified RIO.Set as Set import qualified RIO.Text as T import RIO.Time +import Settings import Stackage.Database.Github import Stackage.Database.PackageInfo import Stackage.Database.Query @@ -88,6 +89,11 @@ hoogleUrl n haddockBucketUrl = T.concat hackageDeprecatedUrl :: Request hackageDeprecatedUrl = "https://hackage.haskell.org/packages/deprecated.json" +withStorage :: (Storage -> IO a) -> IO a +withStorage inner = do + as <- getAppSettings + withStackageDatabase False (appDatabase as) (\db -> inner (Storage (runDatabase db) id)) + getStackageSnapshotsDir :: RIO StackageCron FilePath getStackageSnapshotsDir = do cron <- ask @@ -151,52 +157,58 @@ stackageServerCron StackageCronOptions {..} = do catchIO (bindPortTCP 17834 "127.0.0.1") $ const $ throwString "Stackage Cron loader process already running, exiting." connectionCount <- getNumCapabilities - lo <- logOptionsHandle stdout True - stackageRootDir <- getAppUserDataDirectory "stackage" - pantryRootDir <- parseAbsDir (stackageRootDir "pantry") - createDirectoryIfMissing True (toFilePath pantryRootDir) - gpdCache <- newIORef IntMap.empty - defaultProcessContext <- mkDefaultProcessContext - aws <- do - aws' <- newEnv discover - endpoint <- lookup "AWS_S3_ENDPOINT" <$> getEnvironment - pure $ case endpoint of - Nothing -> aws' - Just ep -> configureService (setEndpoint True (BS8.pack ep) 443 Amazonka.S3.defaultService) aws' - withLogFunc (setLogMinLevel scoLogLevel lo) $ \logFunc -> do - let cronWithPantryConfig :: HasLogFunc env => (PantryConfig -> RIO env a) -> RIO env a - cronWithPantryConfig = - withPantryConfig - pantryRootDir - defaultPackageIndexConfig - HpackBundled - connectionCount - defaultCasaRepoPrefix - defaultCasaMaxPerRequest - defaultSnapshotLocation - - currentHoogleVersionId <- runRIO logFunc $ do - cronWithPantryConfig $ \pantryConfig -> do + withStorage $ \storage -> do + lo <- logOptionsHandle stdout True + stackageRootDir <- getAppUserDataDirectory "stackage" + pantryRootDir <- parseAbsDir (stackageRootDir "pantry") + createDirectoryIfMissing True (toFilePath pantryRootDir) + updateRef <- newMVar True + cabalImmutable <- newIORef Map.empty + cabalMutable <- newIORef Map.empty + gpdCache <- newIORef IntMap.empty + defaultProcessContext <- mkDefaultProcessContext + aws <- do + aws' <- newEnv discover + endpoint <- lookup "AWS_S3_ENDPOINT" <$> getEnvironment + pure $ case endpoint of + Nothing -> aws' + Just ep -> configureService (setEndpoint True (BS8.pack ep) 443 Amazonka.S3.defaultService) aws' + withLogFunc (setLogMinLevel scoLogLevel lo) $ \logFunc -> do + let pantryConfig = + PantryConfig + { pcHackageSecurity = defaultHackageSecurityConfig + , pcHpackExecutable = HpackBundled + , pcRootDir = pantryRootDir + , pcStorage = storage + , pcUpdateRef = updateRef + , pcParsedCabalFilesRawImmutable = cabalImmutable + , pcParsedCabalFilesMutable = cabalMutable + , pcConnectionCount = connectionCount + , pcCasaRepoPrefix = defaultCasaRepoPrefix + , pcCasaMaxPerRequest = defaultCasaMaxPerRequest + , pcSnapshotLocation = defaultSnapshotLocation + } + currentHoogleVersionId <- runRIO logFunc $ do runStackageMigrations' pantryConfig getCurrentHoogleVersionIdWithPantryConfig pantryConfig - let stackage pantryConfig = - StackageCron - { scPantryConfig = pantryConfig - , scStackageRoot = stackageRootDir - , scProcessContext = defaultProcessContext - , scLogFunc = logFunc - , scForceFullUpdate = scoForceUpdate - , scCachedGPD = gpdCache - , scEnvAWS = aws - , scDownloadBucketName = scoDownloadBucketName - , scDownloadBucketUrl = scoDownloadBucketUrl - , scUploadBucketName = scoUploadBucketName - , scSnapshotsRepo = scoSnapshotsRepo - , scReportProgress = scoReportProgress - , scCacheCabalFiles = scoCacheCabalFiles - , scHoogleVersionId = currentHoogleVersionId - } - runRIO logFunc $ cronWithPantryConfig $ \pantryConfig -> runRIO (stackage pantryConfig) (runStackageUpdate scoDoNotUpload) + let stackage = + StackageCron + { scPantryConfig = pantryConfig + , scStackageRoot = stackageRootDir + , scProcessContext = defaultProcessContext + , scLogFunc = logFunc + , scForceFullUpdate = scoForceUpdate + , scCachedGPD = gpdCache + , scEnvAWS = aws + , scDownloadBucketName = scoDownloadBucketName + , scDownloadBucketUrl = scoDownloadBucketUrl + , scUploadBucketName = scoUploadBucketName + , scSnapshotsRepo = scoSnapshotsRepo + , scReportProgress = scoReportProgress + , scCacheCabalFiles = scoCacheCabalFiles + , scHoogleVersionId = currentHoogleVersionId + } + runRIO stackage (runStackageUpdate scoDoNotUpload) runStackageUpdate :: Bool -> RIO StackageCron () diff --git a/stack.yaml b/stack.yaml index 2bc5b89..7e10533 100644 --- a/stack.yaml +++ b/stack.yaml @@ -2,6 +2,10 @@ resolver: lts-22.6 extra-deps: - hoogle-5.0.18.4@sha256:1372458e97dff541fcda099236af7936bf93ee6b8c5d15695ee6d9426dff5eed,3171 - safe-0.3.20@sha256:7813ad56161f57d5162a924de5597d454162a2faed06be6e268b37bb5c19d48d,2312 +- Cabal-3.8.1.0@sha256:77121d8e1aff14a0fd95684b751599db78a7dd26d55862d9fcef27c88b193e9d,12609 +- Cabal-syntax-3.8.1.0@sha256:ed2d937ba6c6a20b75850349eedd41374885fc42369ef152d69e2ba70f44f593,7620 +- git: https://github.com/commercialhaskell/pantry.git + commit: 5df643cc1deb561d9c52a9cb6f593aba2bc4c08e nix: packages: From 9f7d079cfebfa3a53f2c850a1fc7acec316c9f2f Mon Sep 17 00:00:00 2001 From: Bryan Richter Date: Tue, 13 Feb 2024 14:40:51 +0200 Subject: [PATCH 10/18] Hack to support building with Cabal >=3.4 It remains to be seen if this has any user-visible change. Hopefully not, but even if it does, it shouldn't cause any breakage: ">=0" is forward and backward-compatible. --- src/Types.hs | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/Types.hs b/src/Types.hs index 2d4de53..6533bf4 100644 --- a/src/Types.hs +++ b/src/Types.hs @@ -407,7 +407,25 @@ instance ToMarkup VersionRangeP where instance PersistField VersionRangeP where toPersistValue = PersistText . textDisplay fromPersistValue v = - fromPersistValue v >>= bimap (T.pack . displayException) VersionRangeP . dtParse + fromPersistValue v >>= bimap (T.pack . displayException) VersionRangeP . dtParse . hackwardCompat_3_4 + where + -- We use parseSimple under the hood, which always parses using + -- the latest version of the Cabal spec. In practice, this hasn't + -- been a problem. Until now. + -- + -- Cabal spec 3.4 dropped support for "-any" as a version range, and the + -- database is full of such values. Luckily, ">=0" is a + -- backward-compatible synonym for "-any". New versions of this app will + -- write ">=0" instead of "-any", which old versions of this app will + -- understand just fine. We just need to substitute on read. + -- + -- FIXME: strictly speaking, VersionRange cannot be parsed without + -- knowing the Cabal spec version of the package that used it. There's + -- nothing *wrong* with "-any". That means we probably need to decode it + -- no further than Text and do further processing outside of the + -- PersistField instance. + hackwardCompat_3_4 "-any" = ">=0" + hackwardCompat_3_4 t = t instance PersistFieldSql VersionRangeP where sqlType _ = SqlString From 22ef976f05ea506ac4945bc10967de68dea1d337 Mon Sep 17 00:00:00 2001 From: Bryan Richter Date: Thu, 15 Feb 2024 12:41:51 +0200 Subject: [PATCH 11/18] Reintroduce my patched amazonka Lol --- stack.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/stack.yaml b/stack.yaml index 7e10533..8c5ef99 100644 --- a/stack.yaml +++ b/stack.yaml @@ -6,6 +6,11 @@ extra-deps: - Cabal-syntax-3.8.1.0@sha256:ed2d937ba6c6a20b75850349eedd41374885fc42369ef152d69e2ba70f44f593,7620 - git: https://github.com/commercialhaskell/pantry.git commit: 5df643cc1deb561d9c52a9cb6f593aba2bc4c08e +# This amazonka patched to support Cloudflare, which kinda has a bug. See +# https://github.com/brendanhay/amazonka/issues/975 for details. +- github: chreekat/amazonka + commit: b/r2-compat + subdirs: [lib/amazonka-core] nix: packages: From eebde8b817b6340174a43ba4864470377861b6ad Mon Sep 17 00:00:00 2001 From: Bryan Richter Date: Fri, 16 Feb 2024 13:12:48 +0200 Subject: [PATCH 12/18] Add a bunch of docs around Hoogle DBs so I remember how it all works. --- src/Stackage/Database/Cron.hs | 25 +++++++++++++++--- src/Stackage/Database/Query.hs | 48 +++++++++++++++++++++++++++++++--- 2 files changed, 65 insertions(+), 8 deletions(-) diff --git a/src/Stackage/Database/Cron.hs b/src/Stackage/Database/Cron.hs index 9fa8485..b53e0d5 100644 --- a/src/Stackage/Database/Cron.hs +++ b/src/Stackage/Database/Cron.hs @@ -103,6 +103,11 @@ getStackageSnapshotsDir = do withResponseUnliftIO :: MonadUnliftIO m => Request -> Manager -> (Response BodyReader -> m b) -> m b withResponseUnliftIO req man f = withRunInIO $ \ runInIO -> withResponse req man (runInIO . f) +-- | Under the SingleRun wrapper that ensures only one thing at a time is +-- writing the file in question, ensure that a Hoogle database exists on the +-- filesystem for the given SnapName. But only going so far as downloading it +-- from the haddock bucket. See 'createHoogleDB' for the function that puts it +-- there in the first place. newHoogleLocker :: (HasLogFunc env, MonadIO m) => env -> Manager -> Text -> m (SingleRun SnapName (Maybe FilePath)) newHoogleLocker env man bucketUrl = mkSingleRun hoogleLocker @@ -704,20 +709,30 @@ uploadFromRIO key po = do buildAndUploadHoogleDB :: Bool -> RIO StackageCron () buildAndUploadHoogleDB doNotUpload = do snapshots <- lastLtsNightlyWithoutHoogleDb 5 5 + -- currentHoogleVersionId <- scHoogleVersionId <$> ask env <- ask awsEnv <- asks scEnvAWS bucketUrl <- asks scDownloadBucketUrl + -- locker is an action that returns the path to a hoogle db, if one exists + -- in the haddock bucket already. locker <- newHoogleLocker (env ^. logFuncL) (awsEnv ^. env_manager) bucketUrl + let insertH = checkInsertSnapshotHoogleDb True + checkH = checkInsertSnapshotHoogleDb False for_ snapshots $ \(snapshotId, snapName) -> - unlessM (checkInsertSnapshotHoogleDb False snapshotId) $ do + -- Even though we just got a list of snapshots that don't have hoogle + -- databases, we check again. For some reason. I don't see how this can + -- actually be useful. both lastLtsNightlyWithoutHoogleDb and + -- checkInsertSnapshotHoogleDb just check against SnapshotHoogleDb. + -- Perhaps the check can be removed. + unlessM (checkH snapshotId) $ do logInfo $ "Starting Hoogle database download: " <> display (hoogleKey snapName) mfp <- singleRun locker snapName case mfp of Just _ -> do logInfo $ "Current hoogle database exists for: " <> display snapName - void $ checkInsertSnapshotHoogleDb True snapshotId + void $ insertH snapshotId Nothing -> do - logInfo $ "Current hoogle database does not yet exist for: " <> display snapName + logInfo $ "Current hoogle database does not yet exist in the bucket for: " <> display snapName mfp' <- createHoogleDB snapshotId snapName forM_ mfp' $ \fp -> do let key = hoogleKey snapName @@ -726,8 +741,10 @@ buildAndUploadHoogleDB doNotUpload = do renamePath fp dest unless doNotUpload $ do uploadHoogleDB dest (ObjectKey key) - void $ checkInsertSnapshotHoogleDb True snapshotId + void $ insertH snapshotId +-- | Create a hoogle db from haddocks for the given snapshot, and upload it to +-- the haddock bucket. createHoogleDB :: SnapshotId -> SnapName -> RIO StackageCron (Maybe FilePath) createHoogleDB snapshotId snapName = handleAny logException $ do diff --git a/src/Stackage/Database/Query.hs b/src/Stackage/Database/Query.hs index 4f94c6e..41041a2 100644 --- a/src/Stackage/Database/Query.hs +++ b/src/Stackage/Database/Query.hs @@ -167,25 +167,48 @@ ltsBefore x y = do go (Entity _ lts) = (ltsSnap lts, SNLts (ltsMajor lts) (ltsMinor lts)) +-- | Queries the database for the latest LTS and nightly snapshots that do not +-- have corresponding entries in the SnapshotHoogleDb table with the current +-- Hoogle version. lastLtsNightlyWithoutHoogleDb :: Int -> Int -> RIO StackageCron [(SnapshotId, SnapName)] lastLtsNightlyWithoutHoogleDb ltsCount nightlyCount = do currentHoogleVersionId <- scHoogleVersionId <$> ask let getSnapshotsWithoutHoogeDb snapId snapCount = map (unValue *** unValue) <$> select + -- "snap" is either Lts or Nightly, while "snapshot" is indeed + -- "snapshot" (from $ \(snap `InnerJoin` snapshot) -> do on $ snap ^. snapId ==. snapshot ^. SnapshotId where_ $ notExists $ from $ \snapshotHoogleDb -> where_ $ - (snapshotHoogleDb ^. SnapshotHoogleDbSnapshot ==. snapshot ^. - SnapshotId) &&. - (snapshotHoogleDb ^. SnapshotHoogleDbVersion ==. - val currentHoogleVersionId) + (snapshotHoogleDb ^. SnapshotHoogleDbSnapshot + ==. snapshot ^. SnapshotId) + &&. (snapshotHoogleDb ^. SnapshotHoogleDbVersion + ==. val currentHoogleVersionId) orderBy [desc (snapshot ^. SnapshotCreated)] limit $ fromIntegral snapCount pure (snapshot ^. SnapshotId, snapshot ^. SnapshotName)) + -- In sql, this query would be + -- + -- select snapshot.id, snapshot.name + -- from snapshot + -- join $foo as snap -- either Lts or Nightly + -- on snap.snap = snapshot.id + -- where not exists ( + -- select 1 + -- from snapshot_hoogle_db + -- where snapshot_hoogle_db.snapshot = snapshot.id + -- and snapshot_hoogle_db.version = $currentHoogleVersionId + -- ) + -- order by snapshot.created desc + -- limit $snapCount + -- + -- So it returns a list of snapshots where there is no + -- corresponding entry in the snapshot_hoogle_db table for the + -- current hoogle version. run $ do lts <- getSnapshotsWithoutHoogeDb LtsSnap ltsCount nightly <- getSnapshotsWithoutHoogeDb NightlySnap nightlyCount @@ -1159,10 +1182,27 @@ checkInsertSnapshotHoogleDb shouldInsert snapshotId = do (from (\v -> do where_ $ v ^. VersionId ==. val hoogleVersionId + -- This is reaching into the *pantry* + -- database! pure (v ^. VersionVersion))) + -- in sql, this query would be + -- + -- select version.version + -- from version + -- where version.id = $hoogleVersionId + -- + -- So it returns the "version"s that corresponds to the + -- current hoogle version id. + -- mhver is now Maybe Version, and corresponds to the current + -- hoogle version, assuming it exists in the Version table forM_ mhver $ \hver -> lift $ logInfo $ "Marking hoogle database for version " <> display hver <> " as available." + -- whether or not the version exists, we still put it into snapshot_hoogle_db + -- So literally the only use of the above query is to log the + -- action we're taking. isJust <$> P.insertUniqueEntity sh + -- if we're not inserting, we're just checking if it already exists + -- in snapshot_hoogle_db. else isJust <$> P.checkUnique sh From c568b5f173a5d42bb466e15ed5a7a0ad24d00bfc Mon Sep 17 00:00:00 2001 From: Bryan Richter Date: Thu, 28 Mar 2024 15:00:20 +0200 Subject: [PATCH 13/18] Clean up OpenSearchDescriptions The Attribution tag "contains a list of all sources or entities that should be credited for the content contained in the search feed." Since the search feed has package descriptions, I think it's murky who should actually be attributed. Removing it since I don't know who should be there. https://github.com/dewitt/opensearch/blob/master/opensearch-1-1-draft-6.md#the-attribution-element --- static/opensearchhoogle.xml | 1 - static/opensearchpackage.xml | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/static/opensearchhoogle.xml b/static/opensearchhoogle.xml index 5c48e7b..44557cc 100644 --- a/static/opensearchhoogle.xml +++ b/static/opensearchhoogle.xml @@ -3,7 +3,6 @@ Hoogle Stackage.org Search modules on Stackage.org using hoogle FP Complete CORP. - Copyright FP Complete CORP. false en-us UTF-8 diff --git a/static/opensearchpackage.xml b/static/opensearchpackage.xml index ea2ca65..f8657b6 100644 --- a/static/opensearchpackage.xml +++ b/static/opensearchpackage.xml @@ -1,15 +1,14 @@ Stackage Packages Stackage.org package page - Just to a Stackage.org package page + Jump to a Stackage.org package page FP Complete CORP. - Copyright FP Complete CORP. false en-us UTF-8 UTF-8 https://www.stackage.org/static/img/stackage.png - + https://www.stackage.org From 0dcb101b341045857d0db8858894e178a621bb24 Mon Sep 17 00:00:00 2001 From: Bryan Richter Date: Thu, 28 Mar 2024 15:04:16 +0200 Subject: [PATCH 14/18] Transfer attribution to Haskell Foundation By agreement, FP Complete's name and link to their website will remain indefinitely! :) --- LICENSE | 1 + templates/default-layout.hamlet | 8 ++++++-- templates/home.hamlet | 5 ++++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/LICENSE b/LICENSE index c97f84d..9b2de3f 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,7 @@ The MIT License (MIT) Copyright (c) 2014-2017 FP Complete +Copyright (c) 2024 Haskell Foundation Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/templates/default-layout.hamlet b/templates/default-layout.hamlet index d98f073..9a715b1 100644 --- a/templates/default-layout.hamlet +++ b/templates/default-layout.hamlet @@ -30,6 +30,10 @@ $else
- A service provided by - + A service created by + FP Complete + in 2014 | Donated to the + + Haskell Foundation + in 2024. diff --git a/templates/home.hamlet b/templates/home.hamlet index addd981..fdafb70 100644 --- a/templates/home.hamlet +++ b/templates/home.hamlet @@ -60,4 +60,7 @@ FAQ section on Github.

- Stackage's infrastructure, build machines, initial creation and ongoing maintenance, are proudly sponsored by FP Complete. + Stackage's infrastructure, build machines, initial creation and ongoing maintenance were proudly sponsored by FP Complete from 2014 to 2024. + +

+ Today it is a service provided by the Haskell Foundation. From b56aaf33fc4d93b00d69792c504be33417212e11 Mon Sep 17 00:00:00 2001 From: Bryan Richter Date: Wed, 3 Apr 2024 15:44:06 +0300 Subject: [PATCH 15/18] Add comments, change names for understanding --- src/Control/SingleRun.hs | 6 ++++++ src/Stackage/Database/Cron.hs | 10 +++++----- stack.yaml | 12 ++++++++++++ 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/Control/SingleRun.hs b/src/Control/SingleRun.hs index 776072e..aacd0bb 100644 --- a/src/Control/SingleRun.hs +++ b/src/Control/SingleRun.hs @@ -84,6 +84,12 @@ singleRun sr@(SingleRun var f) k = -- OK, we're done running, so let other -- threads run this again. + + -- NB: as soon as we've modified the MVar, the next + -- call to singleRun will think no thread is working and + -- start over. Anything waiting on us will get our + -- result, but nobody else will. That's ok: singleRun + -- just provides a little caching on top of a mutex. modifyMVar_ var $ return . filter (\(k', _) -> k /= k') case eres of diff --git a/src/Stackage/Database/Cron.hs b/src/Stackage/Database/Cron.hs index b53e0d5..866ccd0 100644 --- a/src/Stackage/Database/Cron.hs +++ b/src/Stackage/Database/Cron.hs @@ -414,12 +414,12 @@ checkForDocs snapshotId snapName = do -- Cache is for SnapshotPackageId, there will be many modules per peckage, no need to look into -- the database for each one of them. n <- max 1 . (`div` 2) <$> getNumCapabilities - notFoundList <- lift $ pooledMapConcurrentlyN n (markModules sidsCacheRef) mods - forM_ (Set.fromList $ catMaybes notFoundList) $ \pid -> + unexpectedPackages <- lift $ pooledMapConcurrentlyN n (markModules sidsCacheRef) mods + forM_ (Set.fromList $ catMaybes unexpectedPackages) $ \pid -> lift $ logWarn $ - "Documentation available for package '" <> display pid <> - "' but was not found in this snapshot: " <> + "Documentation found for package '" <> display pid <> + "', which does not exist in this snapshot: " <> display snapName where prefix = textDisplay snapName <> "/" @@ -433,7 +433,7 @@ checkForDocs snapshotId snapName = do let mSnapshotPackageId = Map.lookup pid sidsCache mFound <- run $ markModuleHasDocs snapshotId pid mSnapshotPackageId modName case mFound of - Nothing -> pure $ Just pid + Nothing -> pure $ Just pid -- This package doesn't exist in the snapshot! Just snapshotPackageId | Nothing <- mSnapshotPackageId -> do atomicModifyIORef' diff --git a/stack.yaml b/stack.yaml index 8c5ef99..561d21f 100644 --- a/stack.yaml +++ b/stack.yaml @@ -1,5 +1,17 @@ resolver: lts-22.6 extra-deps: +# WARNING: Changing the hoogle version causes stackage-server-cron to regenerate +# Hoogle databases FOR EVERY SNAPSHOT, EVER. Usually, that's ok! But don't +# forget! The consequences are: (1) More disk usage. Hoogle databases are not +# cleaned up on the stackage-server-cron side, nor on the stackage-server side. +# (Yet. This will change.) (2) More bucket usage. While it's easy to say it's a +# drop in the literal bucket, such excessive misuse of storage makes +# administration, backups, disaster recovery, and many other DevOps concerns +# harder and harder. All but the latest LTS's database are literally never used +# anyway. (3) The Hoogle database schema is defined by the first three +# version components. Any more frequent regeneration is pure unadulterated +# waste. (4) Stackage's Hoogle search will be unavailable until the new +# databases have been generated. - hoogle-5.0.18.4@sha256:1372458e97dff541fcda099236af7936bf93ee6b8c5d15695ee6d9426dff5eed,3171 - safe-0.3.20@sha256:7813ad56161f57d5162a924de5597d454162a2faed06be6e268b37bb5c19d48d,2312 - Cabal-3.8.1.0@sha256:77121d8e1aff14a0fd95684b751599db78a7dd26d55862d9fcef27c88b193e9d,12609 From 9420272b55a09d92de02320782e866a93d9d47fc Mon Sep 17 00:00:00 2001 From: Bryan Richter Date: Wed, 3 Apr 2024 15:45:21 +0300 Subject: [PATCH 16/18] Log unexpected HTTP response fetching Hoogle DB --- src/Stackage/Database/Cron.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Stackage/Database/Cron.hs b/src/Stackage/Database/Cron.hs index 866ccd0..4e231ad 100644 --- a/src/Stackage/Database/Cron.hs +++ b/src/Stackage/Database/Cron.hs @@ -137,6 +137,7 @@ newHoogleLocker env man bucketUrl = mkSingleRun hoogleLocker return Nothing | otherwise -> do body <- liftIO $ brConsume $ responseBody res + logWarn $ "Unexpected status: " <> displayShow status mapM_ (logWarn . displayBytesUtf8) body return Nothing From 652b78ab6bb675864d9259f1cf3952ea10d154e1 Mon Sep 17 00:00:00 2001 From: Bryan Richter Date: Wed, 3 Apr 2024 15:48:10 +0300 Subject: [PATCH 17/18] Hush hlint --- src/Stackage/Database/Cron.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Stackage/Database/Cron.hs b/src/Stackage/Database/Cron.hs index 4e231ad..04ea847 100644 --- a/src/Stackage/Database/Cron.hs +++ b/src/Stackage/Database/Cron.hs @@ -18,6 +18,7 @@ module Stackage.Database.Cron import Conduit import Control.DeepSeq import Control.SingleRun +import Control.Lens ((?~)) import qualified Data.ByteString.Char8 as BS8 import qualified Data.Conduit.Binary as CB import Data.Conduit.Tar (FileInfo(..), FileType(..), untar) @@ -424,7 +425,7 @@ checkForDocs snapshotId snapName = do display snapName where prefix = textDisplay snapName <> "/" - req bucketName = newListObjectsV2 (BucketName bucketName) & listObjectsV2_prefix .~ Just prefix + req bucketName = newListObjectsV2 (BucketName bucketName) & listObjectsV2_prefix ?~ prefix -- | This function records all package modules that have documentation available, the ones -- that are not found in the snapshot reported back as an error. Besides being run -- concurrently this function optimizes the SnapshotPackageId lookup as well, since that can From a62a2a8cb47e04dbf6b9ab45bb929b13dd500a88 Mon Sep 17 00:00:00 2001 From: Bryan Richter Date: Wed, 3 Apr 2024 15:48:26 +0300 Subject: [PATCH 18/18] Ensure correct version of HLS used in nix shell --- stack.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/stack.yaml b/stack.yaml index 561d21f..12cc9dc 100644 --- a/stack.yaml +++ b/stack.yaml @@ -29,3 +29,4 @@ nix: - zlib - postgresql - pkg-config + - haskell-language-server