diff --git a/LICENSE b/LICENSE index c97f84d..9b2de3f 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,7 @@ The MIT License (MIT) Copyright (c) 2014-2017 FP Complete +Copyright (c) 2024 Haskell Foundation Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/app/stackage-server-cron.hs b/app/stackage-server-cron.hs index aaebc0c..b0fc6bd 100644 --- a/app/stackage-server-cron.hs +++ b/app/stackage-server-cron.hs @@ -38,17 +38,24 @@ optsParser = \their yaml files from stackage-snapshots repo have been updated or not.") <*> option readText - (long "download-bucket" <> value haddockBucketName <> metavar "DOWNLOAD_BUCKET" <> + (long "download-bucket" <> value defHaddockBucketName <> metavar "DOWNLOAD_BUCKET" <> help ("S3 Bucket name where things like haddock and current hoogle files should \ - \be downloaded from. Default is: " <> - T.unpack haddockBucketName)) <*> + \be downloaded from. Used in S3 API read operations. Default is: " <> + T.unpack defHaddockBucketName)) <*> option readText - (long "upload-bucket" <> value haddockBucketName <> metavar "UPLOAD_BUCKET" <> + (long "download-bucket-url" <> value defHaddockBucketUrl <> metavar "DOWNLOAD_BUCKET_URL" <> + help + ("Publicly accessible URL where the download bucket can be accessed. Used for \ + \serving the Haddocks on the website. Default is: " <> + T.unpack defHaddockBucketUrl)) <*> + option + readText + (long "upload-bucket" <> value defHaddockBucketName <> metavar "UPLOAD_BUCKET" <> help ("S3 Bucket where hoogle db and snapshots.json file will be uploaded to. Default is: " <> - T.unpack haddockBucketName)) <*> + T.unpack defHaddockBucketName)) <*> switch (long "do-not-upload" <> help "Stop from hoogle db and snapshots.json from being generated and uploaded") <*> diff --git a/config/settings.yml b/config/settings.yml index a28a838..52e7406 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -24,3 +24,6 @@ force-ssl: false postgres-string: "_env:PGSTRING:host=localhost port=5432 user=stackage dbname=stackage password=stackage" postgres-poolsize: "_env:PGPOOLSIZE:8" + +# Publicly-accessible URL for the bucket holding Haddock contents. +download-bucket-url: "_env:DOWNLOAD_BUCKET_URL:https://s3.amazonaws.com/haddock.stackage.org" diff --git a/src/Application.hs b/src/Application.hs index 0818f45..5f0bcd3 100644 --- a/src/Application.hs +++ b/src/Application.hs @@ -157,7 +157,7 @@ withFoundation appLogFunc appSettings inner = do runRIO appLogFunc $ RIO.logError $ "Couldn't get Stack matcher: " <> displayShow e pure oldMatcher appMirrorStatus <- mkUpdateMirrorStatus - hoogleLocker <- newHoogleLocker appLogFunc appHttpManager + hoogleLocker <- newHoogleLocker appLogFunc appHttpManager (appDownloadBucketUrl appSettings) let appGetHoogleDB = singleRun hoogleLocker let appGitRev = $$tGitRev runConcurrently $ runContentUpdates *> Concurrently (inner App {..}) diff --git a/src/Control/SingleRun.hs b/src/Control/SingleRun.hs index 776072e..aacd0bb 100644 --- a/src/Control/SingleRun.hs +++ b/src/Control/SingleRun.hs @@ -84,6 +84,12 @@ singleRun sr@(SingleRun var f) k = -- OK, we're done running, so let other -- threads run this again. + + -- NB: as soon as we've modified the MVar, the next + -- call to singleRun will think no thread is working and + -- start over. Anything waiting on us will get our + -- result, but nobody else will. That's ok: singleRun + -- just provides a little caching on top of a mutex. modifyMVar_ var $ return . filter (\(k', _) -> k /= k') case eres of diff --git a/src/Data/WebsiteContent.hs b/src/Data/WebsiteContent.hs index aa5ce10..eac2d5a 100644 --- a/src/Data/WebsiteContent.hs +++ b/src/Data/WebsiteContent.hs @@ -2,6 +2,7 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE TypeOperators #-} module Data.WebsiteContent ( WebsiteContent (..) , StackRelease (..) diff --git a/src/Handler/DownloadStack.hs b/src/Handler/DownloadStack.hs index ecd8e14..154c1e9 100644 --- a/src/Handler/DownloadStack.hs +++ b/src/Handler/DownloadStack.hs @@ -11,6 +11,7 @@ import Data.Conduit.Attoparsec (sinkParser) import Data.WebsiteContent import Import import Yesod.GitRepo +import qualified Data.Aeson.KeyMap as Aeson getDownloadStackListR :: Handler Html getDownloadStackListR = track "Handler.DownloadStack.getDownloadStackListR" $ do @@ -35,14 +36,14 @@ getLatestMatcher man = do return $ \pattern' -> do let pattern'' = pattern' ++ "." Object top <- return val - Array assets <- lookup "assets" top + Array assets <- Aeson.lookup "assets" top headMay $ preferZip $ catMaybes $ map (findMatch pattern'') assets where findMatch pattern' (Object o) = do - String name <- lookup "name" o + String name <- Aeson.lookup "name" o guard $ not $ ".asc" `isSuffixOf` name guard $ pattern' `isInfixOf` name - String url <- lookup "browser_download_url" o + String url <- Aeson.lookup "browser_download_url" o Just url findMatch _ _ = Nothing diff --git a/src/Handler/Haddock.hs b/src/Handler/Haddock.hs index 5a7112b..1f37d58 100644 --- a/src/Handler/Haddock.hs +++ b/src/Handler/Haddock.hs @@ -8,13 +8,14 @@ import Import import qualified Data.Text as T (takeEnd) import Stackage.Database -makeURL :: SnapName -> [Text] -> Text -makeURL snapName rest = concat - $ "https://s3.amazonaws.com/" - : haddockBucketName - : "/" - : toPathPiece snapName - : map (cons '/') rest +makeURL :: SnapName -> [Text] -> Handler Text +makeURL snapName rest = do + bucketUrl <- getsYesod (appDownloadBucketUrl . appSettings) + pure . concat + $ bucketUrl + : "/" + : toPathPiece snapName + : map (cons '/') rest shouldRedirect :: Bool shouldRedirect = False @@ -27,7 +28,7 @@ getHaddockR snapName rest result <- redirectWithVersion snapName rest case result of Just route -> redirect route - Nothing -> redirect $ makeURL snapName rest + Nothing -> redirect =<< makeURL snapName rest | Just docType <- mdocType = do cacheSeconds $ 60 * 60 * 24 * 7 result <- redirectWithVersion snapName rest @@ -41,7 +42,7 @@ getHaddockR snapName rest return ("text/html; charset=utf-8", mstyle /= Just "stackage") DocJson -> return ("application/jsontml; charset=utf-8", True) - req <- parseRequest $ unpack $ makeURL snapName rest + req <- parseRequest =<< unpack <$> makeURL snapName rest man <- getHttpManager <$> getYesod (_, res) <- runReaderT (acquireResponse req >>= allocateAcquire) man if plain @@ -54,7 +55,7 @@ getHaddockR snapName rest peekC >>= maybe (return ()) (const $ yield $ encodeUtf8 extra) mapC id) .| mapC (Chunk . toBuilder) - | otherwise = redirect $ makeURL snapName rest + | otherwise = redirect =<< makeURL snapName rest where mdocType = case T.takeEnd 5 <$> headMay (reverse rest) of @@ -141,6 +142,9 @@ getHaddockBackupR (snap':rest) | Just branch <- fromPathPiece snap' = track "Handler.Haddock.getHaddockBackupR" $ do snapName <- newestSnapshot branch >>= maybe notFound pure redirect $ HaddockR snapName rest -getHaddockBackupR rest = track "Handler.Haddock.getHaddockBackupR" $ redirect $ concat - $ "https://s3.amazonaws.com/haddock.stackage.org" - : map (cons '/') rest +getHaddockBackupR rest = track "Handler.Haddock.getHaddockBackupR" $ do + bucketUrl <- getsYesod (appDownloadBucketUrl . appSettings) + redirect + $ concat + $ bucketUrl + : map (cons '/') rest diff --git a/src/Handler/MirrorStatus.hs b/src/Handler/MirrorStatus.hs index 036573a..7dad1c9 100644 --- a/src/Handler/MirrorStatus.hs +++ b/src/Handler/MirrorStatus.hs @@ -12,6 +12,8 @@ import RIO.Time (diffUTCTime, addUTCTime, getCurrentTime) import Text.XML.Stream.Parse import Data.XML.Types (Event (EventContent), Content (ContentText)) import qualified Prelude +import qualified Data.Aeson.Key as Aeson +import qualified Data.Aeson.KeyMap as Aeson getMirrorStatusR :: Handler Html getMirrorStatusR = do @@ -148,7 +150,7 @@ getLastModifiedGit org repo ref = do lookupJ :: MonadThrow m => Text -> Value -> m Value lookupJ key (Object o) = - case lookup key o of + case Aeson.lookup (Aeson.fromText key) o of Nothing -> error $ "Key not found: " ++ show key Just x -> return x lookupJ key val = error $ concat diff --git a/src/Handler/StackageIndex.hs b/src/Handler/StackageIndex.hs index 58bc767..8b44c42 100644 --- a/src/Handler/StackageIndex.hs +++ b/src/Handler/StackageIndex.hs @@ -2,13 +2,12 @@ module Handler.StackageIndex where import Import -import Stackage.Database.Types (haddockBucketName) getStackageIndexR :: SnapName -> Handler TypedContent -getStackageIndexR slug = +getStackageIndexR slug = do + bucketUrl <- getsYesod (appDownloadBucketUrl . appSettings) redirect $ concat - [ "https://s3.amazonaws.com/" - , haddockBucketName + [ bucketUrl , "/package-index/" , toPathPiece slug , ".tar.gz" diff --git a/src/Settings.hs b/src/Settings.hs index c3e6e72..a02cc90 100644 --- a/src/Settings.hs +++ b/src/Settings.hs @@ -56,6 +56,8 @@ data AppSettings = AppSettings -- ^ Force redirect to SSL , appDevDownload :: Bool -- ^ Controls how Git and database resources are downloaded (True means less downloading) + , appDownloadBucketUrl :: Text + -- ^ Publicly-accessible URL for the bucket holding Haddock contents. } data DatabaseSettings @@ -109,6 +111,7 @@ instance FromJSON AppSettings where appSkipCombining <- o .:? "skip-combining" .!= dev appForceSsl <- o .:? "force-ssl" .!= not dev appDevDownload <- o .:? "dev-download" .!= dev + appDownloadBucketUrl <- o .:? "download-bucket-url" .!= "https://s3.amazonaws.com/haddock.stackage.org" return AppSettings {..} diff --git a/src/Stackage/Database/Cron.hs b/src/Stackage/Database/Cron.hs index 77ce0cf..04ea847 100644 --- a/src/Stackage/Database/Cron.hs +++ b/src/Stackage/Database/Cron.hs @@ -5,18 +5,20 @@ {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE FlexibleContexts #-} module Stackage.Database.Cron ( stackageServerCron , newHoogleLocker , singleRun , StackageCronOptions(..) - , haddockBucketName + , defHaddockBucketName + , defHaddockBucketUrl ) where import Conduit import Control.DeepSeq -import qualified Control.Monad.Trans.AWS as AWS (paginate) import Control.SingleRun +import Control.Lens ((?~)) import qualified Data.ByteString.Char8 as BS8 import qualified Data.Conduit.Binary as CB import Data.Conduit.Tar (FileInfo(..), FileType(..), untar) @@ -28,9 +30,12 @@ import Data.Yaml (decodeFileEither) import Database.Persist hiding (exists) import Database.Persist.Postgresql hiding (exists) import qualified Hoogle -import Network.AWS hiding (Request, Response) -import Network.AWS.Data.Text (toText) -import Network.AWS.S3 +import Amazonka hiding (Request, length, error) +import Amazonka.Data.Text (toText) +import Amazonka.S3 +import Amazonka.S3.ListObjectsV2 +import Amazonka.S3.Lens +import Amazonka.Lens import Network.HTTP.Client import Network.HTTP.Client.Conduit (bodyReaderSource) import Network.HTTP.Simple (getResponseBody, httpJSONEither) @@ -59,6 +64,7 @@ import Stackage.Database.PackageInfo import Stackage.Database.Query import Stackage.Database.Schema import Stackage.Database.Types +import System.Environment (getEnvironment) import UnliftIO.Concurrent (getNumCapabilities) import Web.PathPieces (fromPathPiece, toPathPiece) import qualified Control.Retry as Retry @@ -73,10 +79,9 @@ hoogleKey name = T.concat , ".hoo" ] -hoogleUrl :: SnapName -> Text -hoogleUrl n = T.concat - [ "https://s3.amazonaws.com/" - , haddockBucketName +hoogleUrl :: SnapName -> Text -> Text +hoogleUrl n haddockBucketUrl = T.concat + [ haddockBucketUrl , "/" , hoogleKey n ] @@ -99,9 +104,14 @@ getStackageSnapshotsDir = do withResponseUnliftIO :: MonadUnliftIO m => Request -> Manager -> (Response BodyReader -> m b) -> m b withResponseUnliftIO req man f = withRunInIO $ \ runInIO -> withResponse req man (runInIO . f) +-- | Under the SingleRun wrapper that ensures only one thing at a time is +-- writing the file in question, ensure that a Hoogle database exists on the +-- filesystem for the given SnapName. But only going so far as downloading it +-- from the haddock bucket. See 'createHoogleDB' for the function that puts it +-- there in the first place. newHoogleLocker :: - (HasLogFunc env, MonadIO m) => env -> Manager -> m (SingleRun SnapName (Maybe FilePath)) -newHoogleLocker env man = mkSingleRun hoogleLocker + (HasLogFunc env, MonadIO m) => env -> Manager -> Text -> m (SingleRun SnapName (Maybe FilePath)) +newHoogleLocker env man bucketUrl = mkSingleRun hoogleLocker where hoogleLocker :: MonadIO n => SnapName -> n (Maybe FilePath) hoogleLocker name = @@ -111,7 +121,7 @@ newHoogleLocker env man = mkSingleRun hoogleLocker if exists then return $ Just fp else do - req' <- parseRequest $ T.unpack $ hoogleUrl name + req' <- parseRequest $ T.unpack $ hoogleUrl name bucketUrl let req = req' {decompress = const False} withResponseUnliftIO req man $ \res -> case responseStatus res of @@ -124,11 +134,12 @@ newHoogleLocker env man = mkSingleRun hoogleLocker sinkHandle h return $ Just fp | status == status404 -> do - logDebug $ "NotFound: " <> display (hoogleUrl name) + logWarn $ "NotFound: " <> display (hoogleUrl name bucketUrl) return Nothing | otherwise -> do body <- liftIO $ brConsume $ responseBody res - mapM_ (logDebug . displayBytesUtf8) body + logWarn $ "Unexpected status: " <> displayShow status + mapM_ (logWarn . displayBytesUtf8) body return Nothing getHackageDeprecations :: @@ -163,7 +174,12 @@ stackageServerCron StackageCronOptions {..} = do cabalMutable <- newIORef Map.empty gpdCache <- newIORef IntMap.empty defaultProcessContext <- mkDefaultProcessContext - aws <- newEnv Discover + aws <- do + aws' <- newEnv discover + endpoint <- lookup "AWS_S3_ENDPOINT" <$> getEnvironment + pure $ case endpoint of + Nothing -> aws' + Just ep -> configureService (setEndpoint True (BS8.pack ep) 443 Amazonka.S3.defaultService) aws' withLogFunc (setLogMinLevel scoLogLevel lo) $ \logFunc -> do let pantryConfig = PantryConfig @@ -179,8 +195,9 @@ stackageServerCron StackageCronOptions {..} = do , pcCasaMaxPerRequest = defaultCasaMaxPerRequest , pcSnapshotLocation = defaultSnapshotLocation } - currentHoogleVersionId <- - runRIO logFunc $ getCurrentHoogleVersionIdWithPantryConfig pantryConfig + currentHoogleVersionId <- runRIO logFunc $ do + runStackageMigrations' pantryConfig + getCurrentHoogleVersionIdWithPantryConfig pantryConfig let stackage = StackageCron { scPantryConfig = pantryConfig @@ -191,6 +208,7 @@ stackageServerCron StackageCronOptions {..} = do , scCachedGPD = gpdCache , scEnvAWS = aws , scDownloadBucketName = scoDownloadBucketName + , scDownloadBucketUrl = scoDownloadBucketUrl , scUploadBucketName = scoUploadBucketName , scSnapshotsRepo = scoSnapshotsRepo , scReportProgress = scoReportProgress @@ -218,7 +236,6 @@ runStackageUpdate doNotUpload = do unless doNotUpload uploadSnapshotsJSON buildAndUploadHoogleDB doNotUpload logInfo "Finished building and uploading Hoogle DBs" - run $ rawExecute "TRUNCATE TABLE latest_version" [] -- | This will look at 'global-hints.yaml' and will create core package getters that are reused @@ -386,10 +403,11 @@ addPantryPackage sid compiler isHidden flags (PantryPackage pc treeKey) = do checkForDocs :: SnapshotId -> SnapName -> ResourceT (RIO StackageCron) () checkForDocs snapshotId snapName = do bucketName <- lift (scDownloadBucketName <$> ask) + env <- asks scEnvAWS mods <- runConduit $ - AWS.paginate (req bucketName) .| concatMapC (^. lovrsContents) .| - mapC (\obj -> toText (obj ^. oKey)) .| + paginate env (req bucketName) .| concatMapC (fromMaybe [] . (^. listObjectsV2Response_contents)) .| + mapC (\obj -> toText (obj ^. object_key)) .| concatMapC (T.stripSuffix ".html" >=> T.stripPrefix prefix >=> pathToPackageModule) .| sinkList -- it is faster to download all modules in this snapshot, than process them with a conduit all @@ -398,16 +416,16 @@ checkForDocs snapshotId snapName = do -- Cache is for SnapshotPackageId, there will be many modules per peckage, no need to look into -- the database for each one of them. n <- max 1 . (`div` 2) <$> getNumCapabilities - notFoundList <- lift $ pooledMapConcurrentlyN n (markModules sidsCacheRef) mods - forM_ (Set.fromList $ catMaybes notFoundList) $ \pid -> + unexpectedPackages <- lift $ pooledMapConcurrentlyN n (markModules sidsCacheRef) mods + forM_ (Set.fromList $ catMaybes unexpectedPackages) $ \pid -> lift $ logWarn $ - "Documentation available for package '" <> display pid <> - "' but was not found in this snapshot: " <> + "Documentation found for package '" <> display pid <> + "', which does not exist in this snapshot: " <> display snapName where prefix = textDisplay snapName <> "/" - req bucketName = listObjectsV2 (BucketName bucketName) & lovPrefix .~ Just prefix + req bucketName = newListObjectsV2 (BucketName bucketName) & listObjectsV2_prefix ?~ prefix -- | This function records all package modules that have documentation available, the ones -- that are not found in the snapshot reported back as an error. Besides being run -- concurrently this function optimizes the SnapshotPackageId lookup as well, since that can @@ -417,7 +435,7 @@ checkForDocs snapshotId snapName = do let mSnapshotPackageId = Map.lookup pid sidsCache mFound <- run $ markModuleHasDocs snapshotId pid mSnapshotPackageId modName case mFound of - Nothing -> pure $ Just pid + Nothing -> pure $ Just pid -- This package doesn't exist in the snapshot! Just snapshotPackageId | Nothing <- mSnapshotPackageId -> do atomicModifyIORef' @@ -663,9 +681,9 @@ uploadSnapshotsJSON = do uploadBucket <- scUploadBucketName <$> ask let key = ObjectKey "snapshots.json" uploadFromRIO key $ - set poACL (Just OPublicRead) $ - set poContentType (Just "application/json") $ - putObject (BucketName uploadBucket) key (toBody snapshots) + set putObject_acl (Just ObjectCannedACL_Public_read) $ + set putObject_contentType (Just "application/json") $ + newPutObject (BucketName uploadBucket) key (toBody snapshots) -- | Writes a gzipped version of hoogle db into temporary file onto the file system and then uploads -- it to S3. Temporary file is removed upon completion @@ -677,14 +695,14 @@ uploadHoogleDB fp key = body <- toBody <$> readFileBinary fpgz uploadBucket <- scUploadBucketName <$> ask uploadFromRIO key $ - set poACL (Just OPublicRead) $ putObject (BucketName uploadBucket) key body + set putObject_acl (Just ObjectCannedACL_Public_read) $ newPutObject (BucketName uploadBucket) key body -uploadFromRIO :: AWSRequest a => ObjectKey -> a -> RIO StackageCron () +uploadFromRIO :: (AWSRequest a, Typeable a, Typeable (AWSResponse a)) => ObjectKey -> a -> RIO StackageCron () uploadFromRIO key po = do logInfo $ "Uploading " <> displayShow key <> " to S3 bucket." - env <- ask - eres <- runResourceT $ runAWS env $ trying _Error $ send po + env <- asks scEnvAWS + eres <- runResourceT $ trying _Error $ send env po case eres of Left e -> logError $ "Couldn't upload " <> displayShow key <> " to S3 becuase " <> displayShow e @@ -693,18 +711,30 @@ uploadFromRIO key po = do buildAndUploadHoogleDB :: Bool -> RIO StackageCron () buildAndUploadHoogleDB doNotUpload = do snapshots <- lastLtsNightlyWithoutHoogleDb 5 5 + -- currentHoogleVersionId <- scHoogleVersionId <$> ask env <- ask - locker <- newHoogleLocker (env ^. logFuncL) (env ^. envManager) + awsEnv <- asks scEnvAWS + bucketUrl <- asks scDownloadBucketUrl + -- locker is an action that returns the path to a hoogle db, if one exists + -- in the haddock bucket already. + locker <- newHoogleLocker (env ^. logFuncL) (awsEnv ^. env_manager) bucketUrl + let insertH = checkInsertSnapshotHoogleDb True + checkH = checkInsertSnapshotHoogleDb False for_ snapshots $ \(snapshotId, snapName) -> - unlessM (checkInsertSnapshotHoogleDb False snapshotId) $ do + -- Even though we just got a list of snapshots that don't have hoogle + -- databases, we check again. For some reason. I don't see how this can + -- actually be useful. both lastLtsNightlyWithoutHoogleDb and + -- checkInsertSnapshotHoogleDb just check against SnapshotHoogleDb. + -- Perhaps the check can be removed. + unlessM (checkH snapshotId) $ do logInfo $ "Starting Hoogle database download: " <> display (hoogleKey snapName) mfp <- singleRun locker snapName case mfp of Just _ -> do logInfo $ "Current hoogle database exists for: " <> display snapName - void $ checkInsertSnapshotHoogleDb True snapshotId + void $ insertH snapshotId Nothing -> do - logInfo $ "Current hoogle database does not yet exist for: " <> display snapName + logInfo $ "Current hoogle database does not yet exist in the bucket for: " <> display snapName mfp' <- createHoogleDB snapshotId snapName forM_ mfp' $ \fp -> do let key = hoogleKey snapName @@ -713,24 +743,27 @@ buildAndUploadHoogleDB doNotUpload = do renamePath fp dest unless doNotUpload $ do uploadHoogleDB dest (ObjectKey key) - void $ checkInsertSnapshotHoogleDb True snapshotId + void $ insertH snapshotId +-- | Create a hoogle db from haddocks for the given snapshot, and upload it to +-- the haddock bucket. createHoogleDB :: SnapshotId -> SnapName -> RIO StackageCron (Maybe FilePath) createHoogleDB snapshotId snapName = handleAny logException $ do logInfo $ "Creating Hoogle DB for " <> display snapName - downloadBucket <- scDownloadBucketName <$> ask + downloadBucketUrl <- scDownloadBucketUrl <$> ask let root = "hoogle-gen" bindir = root "bindir" outname = root "output.hoo" tarKey = toPathPiece snapName <> "/hoogle/orig.tar" - tarUrl = "https://s3.amazonaws.com/" <> downloadBucket <> "/" <> tarKey + tarUrl = downloadBucketUrl <> "/" <> tarKey tarFP = root T.unpack tarKey -- When tarball is downloaded it is saved with durability and atomicity, so if it -- is present it is not in a corrupted state unlessM (doesFileExist tarFP) $ do req <- parseRequest $ T.unpack tarUrl - man <- view envManager + env <- asks scEnvAWS + let man = env ^. env_manager withResponseUnliftIO req {decompress = const True} man $ \res -> do throwErrorStatusCodes req res createDirectoryIfMissing True $ takeDirectory tarFP diff --git a/src/Stackage/Database/PackageInfo.hs b/src/Stackage/Database/PackageInfo.hs index f47a563..80ecad0 100644 --- a/src/Stackage/Database/PackageInfo.hs +++ b/src/Stackage/Database/PackageInfo.hs @@ -23,7 +23,7 @@ import Distribution.Compiler (CompilerFlavor(GHC)) import Distribution.Package (Dependency(..)) import Distribution.PackageDescription (CondTree(..), Condition(..), ConfVar(..), - Flag(flagDefault, flagName), FlagName, + PackageFlag(..), flagDefault, flagName, FlagName, GenericPackageDescription, author, condExecutables, condLibrary, description, genPackageFlags, homepage, @@ -152,7 +152,7 @@ getCheckCond compiler overrideFlags gpd = go where go (Var (OS os)) = os == Linux -- arbitrary go (Var (Arch arch)) = arch == X86_64 -- arbitrary - go (Var (Flag flag)) = fromMaybe False $ Map.lookup flag flags + go (Var (PackageFlag flag)) = fromMaybe False $ Map.lookup flag flags go (Var (Impl flavor range)) = flavor == compilerFlavor && compilerVersion `withinRange` range go (Lit b) = b go (CNot c) = not $ go c diff --git a/src/Stackage/Database/Query.hs b/src/Stackage/Database/Query.hs index 7f2b5e5..41041a2 100644 --- a/src/Stackage/Database/Query.hs +++ b/src/Stackage/Database/Query.hs @@ -167,25 +167,48 @@ ltsBefore x y = do go (Entity _ lts) = (ltsSnap lts, SNLts (ltsMajor lts) (ltsMinor lts)) +-- | Queries the database for the latest LTS and nightly snapshots that do not +-- have corresponding entries in the SnapshotHoogleDb table with the current +-- Hoogle version. lastLtsNightlyWithoutHoogleDb :: Int -> Int -> RIO StackageCron [(SnapshotId, SnapName)] lastLtsNightlyWithoutHoogleDb ltsCount nightlyCount = do currentHoogleVersionId <- scHoogleVersionId <$> ask let getSnapshotsWithoutHoogeDb snapId snapCount = map (unValue *** unValue) <$> select + -- "snap" is either Lts or Nightly, while "snapshot" is indeed + -- "snapshot" (from $ \(snap `InnerJoin` snapshot) -> do on $ snap ^. snapId ==. snapshot ^. SnapshotId where_ $ notExists $ from $ \snapshotHoogleDb -> where_ $ - (snapshotHoogleDb ^. SnapshotHoogleDbSnapshot ==. snapshot ^. - SnapshotId) &&. - (snapshotHoogleDb ^. SnapshotHoogleDbVersion ==. - val currentHoogleVersionId) + (snapshotHoogleDb ^. SnapshotHoogleDbSnapshot + ==. snapshot ^. SnapshotId) + &&. (snapshotHoogleDb ^. SnapshotHoogleDbVersion + ==. val currentHoogleVersionId) orderBy [desc (snapshot ^. SnapshotCreated)] limit $ fromIntegral snapCount pure (snapshot ^. SnapshotId, snapshot ^. SnapshotName)) + -- In sql, this query would be + -- + -- select snapshot.id, snapshot.name + -- from snapshot + -- join $foo as snap -- either Lts or Nightly + -- on snap.snap = snapshot.id + -- where not exists ( + -- select 1 + -- from snapshot_hoogle_db + -- where snapshot_hoogle_db.snapshot = snapshot.id + -- and snapshot_hoogle_db.version = $currentHoogleVersionId + -- ) + -- order by snapshot.created desc + -- limit $snapCount + -- + -- So it returns a list of snapshots where there is no + -- corresponding entry in the snapshot_hoogle_db table for the + -- current hoogle version. run $ do lts <- getSnapshotsWithoutHoogeDb LtsSnap ltsCount nightly <- getSnapshotsWithoutHoogeDb NightlySnap nightlyCount @@ -206,7 +229,7 @@ snapshotsJSON = do Just n -> (("nightly" A..= printNightly n) :) return $ A.object $ nightly lts where - toObj lts@(major, _) = T.pack ("lts-" <> show major) A..= printLts lts + toObj lts@(major, _) = fromString ("lts-" <> show major) A..= printLts lts printLts (major, minor) = "lts-" <> show major <> "." <> show minor printNightly day = "nightly-" <> T.pack (show day) @@ -1159,10 +1182,27 @@ checkInsertSnapshotHoogleDb shouldInsert snapshotId = do (from (\v -> do where_ $ v ^. VersionId ==. val hoogleVersionId + -- This is reaching into the *pantry* + -- database! pure (v ^. VersionVersion))) + -- in sql, this query would be + -- + -- select version.version + -- from version + -- where version.id = $hoogleVersionId + -- + -- So it returns the "version"s that corresponds to the + -- current hoogle version id. + -- mhver is now Maybe Version, and corresponds to the current + -- hoogle version, assuming it exists in the Version table forM_ mhver $ \hver -> lift $ logInfo $ "Marking hoogle database for version " <> display hver <> " as available." + -- whether or not the version exists, we still put it into snapshot_hoogle_db + -- So literally the only use of the above query is to log the + -- action we're taking. isJust <$> P.insertUniqueEntity sh + -- if we're not inserting, we're just checking if it already exists + -- in snapshot_hoogle_db. else isJust <$> P.checkUnique sh diff --git a/src/Stackage/Database/Schema.hs b/src/Stackage/Database/Schema.hs index 0e45bab..b4cb87d 100644 --- a/src/Stackage/Database/Schema.hs +++ b/src/Stackage/Database/Schema.hs @@ -15,6 +15,7 @@ {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE UndecidableInstances #-} +{-# LANGUAGE TypeOperators #-} module Stackage.Database.Schema ( -- * Database run @@ -23,6 +24,7 @@ module Stackage.Database.Schema , GetStackageDatabase(..) , withStackageDatabase , runStackageMigrations + , runStackageMigrations' , getCurrentHoogleVersionId , getCurrentHoogleVersionIdWithPantryConfig -- * Tables @@ -217,25 +219,33 @@ withStackageDatabase shouldLog dbs inner = do bracket (liftIO getPoolIO) (liftIO . destroyAllResources) $ \pool -> do inner (StackageDatabase (`runSqlPool` pool)) -getSchema :: (HasLogFunc env, GetStackageDatabase env (RIO env)) => RIO env (Maybe Int) +getSchema :: ReaderT SqlBackend (RIO RIO.LogFunc) (Maybe Int) getSchema = - run $ do + do eres <- tryAny (selectList [] []) lift $ logInfo $ "getSchema result: " <> displayShow eres case eres of Right [Entity _ (Schema v)] -> return $ Just v _ -> return Nothing +runStackageMigrations' :: PantryConfig -> RIO RIO.LogFunc () -- HasLogFunc env => PantryConfig -> RIO env () +runStackageMigrations' pantryConfig = do + stackageDb <- getStackageDatabaseFromPantry pantryConfig + runDatabase stackageDb stackageMigrations + + runStackageMigrations :: (HasLogFunc env, GetStackageDatabase env (RIO env)) => RIO env () -runStackageMigrations = do +runStackageMigrations = run stackageMigrations + +stackageMigrations :: ReaderT SqlBackend (RIO RIO.LogFunc) () -- ReaderT SqlBackend (RIO RIO.LogFunc) () +stackageMigrations = do + runMigration Pantry.migrateAll + runMigration migrateAll actualSchema <- getSchema - run $ do - runMigration Pantry.migrateAll - runMigration migrateAll - unless (actualSchema == Just currentSchema) $ do - lift $ - logWarn $ - "Current schema does not match actual schema: " <> - displayShow (actualSchema, currentSchema) - deleteWhere ([] :: [Filter Schema]) - insert_ $ Schema currentSchema + unless (actualSchema == Just currentSchema) $ do + lift $ + logWarn $ + "Current schema does not match actual schema: " <> + displayShow (actualSchema, currentSchema) + deleteWhere ([] :: [Filter Schema]) + insert_ $ Schema currentSchema diff --git a/src/Stackage/Database/Types.hs b/src/Stackage/Database/Types.hs index 02dabdb..c42ef55 100644 --- a/src/Stackage/Database/Types.hs +++ b/src/Stackage/Database/Types.hs @@ -40,7 +40,8 @@ module Stackage.Database.Types , Origin(..) , LatestInfo(..) , Deprecation(..) - , haddockBucketName + , defHaddockBucketName + , defHaddockBucketUrl , Changelog(..) , Readme(..) , StackageCronOptions(..) @@ -49,7 +50,7 @@ module Stackage.Database.Types import Data.Aeson import qualified Data.Text as T import Data.Text.Read (decimal) -import Network.AWS (Env, HasEnv(..)) +import Amazonka (Env) import Pantry (BlobKey(..), CabalFileInfo(..), FileSize(..), HasPantryConfig(..), PantryConfig, PackageIdentifierRevision(..), TreeKey(..)) import Pantry.SHA256 (fromHexText) @@ -61,12 +62,16 @@ import Stackage.Database.Schema import Text.Blaze (ToMarkup(..)) import Types -haddockBucketName :: Text -haddockBucketName = "haddock.stackage.org" +defHaddockBucketName :: Text +defHaddockBucketName = "haddock.stackage.org" + +defHaddockBucketUrl :: Text +defHaddockBucketUrl = "https://s3.amazonaws.com/" <> defHaddockBucketName data StackageCronOptions = StackageCronOptions { scoForceUpdate :: !Bool , scoDownloadBucketName :: !Text + , scoDownloadBucketUrl :: !Text , scoUploadBucketName :: !Text , scoDoNotUpload :: !Bool , scoLogLevel :: !LogLevel @@ -84,6 +89,7 @@ data StackageCron = StackageCron , scCachedGPD :: !(IORef (IntMap GenericPackageDescription)) , scEnvAWS :: !Env , scDownloadBucketName :: !Text + , scDownloadBucketUrl :: !Text , scUploadBucketName :: !Text , scSnapshotsRepo :: !GithubRepo , scReportProgress :: !Bool @@ -91,9 +97,6 @@ data StackageCron = StackageCron , scHoogleVersionId :: !VersionId } -instance HasEnv StackageCron where - environment = lens scEnvAWS (\c f -> c {scEnvAWS = f}) - instance HasLogFunc StackageCron where logFuncL = lens scLogFunc (\c f -> c {scLogFunc = f}) diff --git a/src/Stackage/Snapshot/Diff.hs b/src/Stackage/Snapshot/Diff.hs index 641812c..7371f7a 100644 --- a/src/Stackage/Snapshot/Diff.hs +++ b/src/Stackage/Snapshot/Diff.hs @@ -15,6 +15,7 @@ module Stackage.Snapshot.Diff import ClassyPrelude (sortOn, toCaseFold) import Data.Aeson +import Data.Aeson.Key import qualified Data.Text as T (commonPrefixes) import Data.These import RIO @@ -61,7 +62,7 @@ newtype VersionChange = VersionChange { unVersionChange :: These VersionP Versio deriving (Show, Eq, Generic, Typeable) instance ToJSON (WithSnapshotNames VersionChange) where - toJSON (WithSnapshotNames (toPathPiece -> aKey) (toPathPiece -> bKey) change) = + toJSON (WithSnapshotNames (fromText . toPathPiece -> aKey) (fromText . toPathPiece -> bKey) change) = case change of VersionChange (This a) -> object [ aKey .= a ] VersionChange (That b) -> object [ bKey .= b ] diff --git a/src/Types.hs b/src/Types.hs index 2d4de53..6533bf4 100644 --- a/src/Types.hs +++ b/src/Types.hs @@ -407,7 +407,25 @@ instance ToMarkup VersionRangeP where instance PersistField VersionRangeP where toPersistValue = PersistText . textDisplay fromPersistValue v = - fromPersistValue v >>= bimap (T.pack . displayException) VersionRangeP . dtParse + fromPersistValue v >>= bimap (T.pack . displayException) VersionRangeP . dtParse . hackwardCompat_3_4 + where + -- We use parseSimple under the hood, which always parses using + -- the latest version of the Cabal spec. In practice, this hasn't + -- been a problem. Until now. + -- + -- Cabal spec 3.4 dropped support for "-any" as a version range, and the + -- database is full of such values. Luckily, ">=0" is a + -- backward-compatible synonym for "-any". New versions of this app will + -- write ">=0" instead of "-any", which old versions of this app will + -- understand just fine. We just need to substitute on read. + -- + -- FIXME: strictly speaking, VersionRange cannot be parsed without + -- knowing the Cabal spec version of the package that used it. There's + -- nothing *wrong* with "-any". That means we probably need to decode it + -- no further than Text and do further processing outside of the + -- PersistField instance. + hackwardCompat_3_4 "-any" = ">=0" + hackwardCompat_3_4 t = t instance PersistFieldSql VersionRangeP where sqlType _ = SqlString diff --git a/stack.yaml b/stack.yaml index 5449ef3..12cc9dc 100644 --- a/stack.yaml +++ b/stack.yaml @@ -1,16 +1,32 @@ -resolver: lts-18.28 +resolver: lts-22.6 extra-deps: -- amazonka-1.6.1 -- barrier-0.1.1 -- classy-prelude-yesod-1.5.0 -- unliftio-core-0.1.2.0 -- yesod-gitrepo-0.3.0 -- static-bytes-0.1.0 -- companion-0.1.0 -- aeson-warning-parser-0.1.0 -- hpack-0.35.0 +# WARNING: Changing the hoogle version causes stackage-server-cron to regenerate +# Hoogle databases FOR EVERY SNAPSHOT, EVER. Usually, that's ok! But don't +# forget! The consequences are: (1) More disk usage. Hoogle databases are not +# cleaned up on the stackage-server-cron side, nor on the stackage-server side. +# (Yet. This will change.) (2) More bucket usage. While it's easy to say it's a +# drop in the literal bucket, such excessive misuse of storage makes +# administration, backups, disaster recovery, and many other DevOps concerns +# harder and harder. All but the latest LTS's database are literally never used +# anyway. (3) The Hoogle database schema is defined by the first three +# version components. Any more frequent regeneration is pure unadulterated +# waste. (4) Stackage's Hoogle search will be unavailable until the new +# databases have been generated. +- hoogle-5.0.18.4@sha256:1372458e97dff541fcda099236af7936bf93ee6b8c5d15695ee6d9426dff5eed,3171 +- safe-0.3.20@sha256:7813ad56161f57d5162a924de5597d454162a2faed06be6e268b37bb5c19d48d,2312 +- Cabal-3.8.1.0@sha256:77121d8e1aff14a0fd95684b751599db78a7dd26d55862d9fcef27c88b193e9d,12609 +- Cabal-syntax-3.8.1.0@sha256:ed2d937ba6c6a20b75850349eedd41374885fc42369ef152d69e2ba70f44f593,7620 - git: https://github.com/commercialhaskell/pantry.git commit: 5df643cc1deb561d9c52a9cb6f593aba2bc4c08e +# This amazonka patched to support Cloudflare, which kinda has a bug. See +# https://github.com/brendanhay/amazonka/issues/975 for details. +- github: chreekat/amazonka + commit: b/r2-compat + subdirs: [lib/amazonka-core] -drop-packages: -- Cabal +nix: + packages: + - zlib + - postgresql + - pkg-config + - haskell-language-server diff --git a/static/opensearchhoogle.xml b/static/opensearchhoogle.xml index 5c48e7b..44557cc 100644 --- a/static/opensearchhoogle.xml +++ b/static/opensearchhoogle.xml @@ -3,7 +3,6 @@ Hoogle Stackage.org Search modules on Stackage.org using hoogle FP Complete CORP. - Copyright FP Complete CORP. false en-us UTF-8 diff --git a/static/opensearchpackage.xml b/static/opensearchpackage.xml index ea2ca65..f8657b6 100644 --- a/static/opensearchpackage.xml +++ b/static/opensearchpackage.xml @@ -1,15 +1,14 @@ Stackage Packages Stackage.org package page - Just to a Stackage.org package page + Jump to a Stackage.org package page FP Complete CORP. - Copyright FP Complete CORP. false en-us UTF-8 UTF-8 https://www.stackage.org/static/img/stackage.png - + https://www.stackage.org diff --git a/templates/default-layout.hamlet b/templates/default-layout.hamlet index d98f073..9a715b1 100644 --- a/templates/default-layout.hamlet +++ b/templates/default-layout.hamlet @@ -30,6 +30,10 @@ $else
- A service provided by - + A service created by + FP Complete + in 2014 | Donated to the + + Haskell Foundation + in 2024. diff --git a/templates/home.hamlet b/templates/home.hamlet index addd981..fdafb70 100644 --- a/templates/home.hamlet +++ b/templates/home.hamlet @@ -60,4 +60,7 @@ FAQ section on Github.

- Stackage's infrastructure, build machines, initial creation and ongoing maintenance, are proudly sponsored by FP Complete. + Stackage's infrastructure, build machines, initial creation and ongoing maintenance were proudly sponsored by FP Complete from 2014 to 2024. + +

+ Today it is a service provided by the Haskell Foundation.