diff --git a/LICENSE b/LICENSE
index c97f84d..9b2de3f 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,7 @@
The MIT License (MIT)
Copyright (c) 2014-2017 FP Complete
+Copyright (c) 2024 Haskell Foundation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
diff --git a/app/stackage-server-cron.hs b/app/stackage-server-cron.hs
index aaebc0c..b0fc6bd 100644
--- a/app/stackage-server-cron.hs
+++ b/app/stackage-server-cron.hs
@@ -38,17 +38,24 @@ optsParser =
\their yaml files from stackage-snapshots repo have been updated or not.") <*>
option
readText
- (long "download-bucket" <> value haddockBucketName <> metavar "DOWNLOAD_BUCKET" <>
+ (long "download-bucket" <> value defHaddockBucketName <> metavar "DOWNLOAD_BUCKET" <>
help
("S3 Bucket name where things like haddock and current hoogle files should \
- \be downloaded from. Default is: " <>
- T.unpack haddockBucketName)) <*>
+ \be downloaded from. Used in S3 API read operations. Default is: " <>
+ T.unpack defHaddockBucketName)) <*>
option
readText
- (long "upload-bucket" <> value haddockBucketName <> metavar "UPLOAD_BUCKET" <>
+ (long "download-bucket-url" <> value defHaddockBucketUrl <> metavar "DOWNLOAD_BUCKET_URL" <>
+ help
+ ("Publicly accessible URL where the download bucket can be accessed. Used for \
+ \serving the Haddocks on the website. Default is: " <>
+ T.unpack defHaddockBucketUrl)) <*>
+ option
+ readText
+ (long "upload-bucket" <> value defHaddockBucketName <> metavar "UPLOAD_BUCKET" <>
help
("S3 Bucket where hoogle db and snapshots.json file will be uploaded to. Default is: " <>
- T.unpack haddockBucketName)) <*>
+ T.unpack defHaddockBucketName)) <*>
switch
(long "do-not-upload" <>
help "Stop from hoogle db and snapshots.json from being generated and uploaded") <*>
diff --git a/config/settings.yml b/config/settings.yml
index a28a838..52e7406 100644
--- a/config/settings.yml
+++ b/config/settings.yml
@@ -24,3 +24,6 @@ force-ssl: false
postgres-string: "_env:PGSTRING:host=localhost port=5432 user=stackage dbname=stackage password=stackage"
postgres-poolsize: "_env:PGPOOLSIZE:8"
+
+# Publicly-accessible URL for the bucket holding Haddock contents.
+download-bucket-url: "_env:DOWNLOAD_BUCKET_URL:https://s3.amazonaws.com/haddock.stackage.org"
diff --git a/src/Application.hs b/src/Application.hs
index 0818f45..5f0bcd3 100644
--- a/src/Application.hs
+++ b/src/Application.hs
@@ -157,7 +157,7 @@ withFoundation appLogFunc appSettings inner = do
runRIO appLogFunc $ RIO.logError $ "Couldn't get Stack matcher: " <> displayShow e
pure oldMatcher
appMirrorStatus <- mkUpdateMirrorStatus
- hoogleLocker <- newHoogleLocker appLogFunc appHttpManager
+ hoogleLocker <- newHoogleLocker appLogFunc appHttpManager (appDownloadBucketUrl appSettings)
let appGetHoogleDB = singleRun hoogleLocker
let appGitRev = $$tGitRev
runConcurrently $ runContentUpdates *> Concurrently (inner App {..})
diff --git a/src/Control/SingleRun.hs b/src/Control/SingleRun.hs
index 776072e..aacd0bb 100644
--- a/src/Control/SingleRun.hs
+++ b/src/Control/SingleRun.hs
@@ -84,6 +84,12 @@ singleRun sr@(SingleRun var f) k =
-- OK, we're done running, so let other
-- threads run this again.
+
+ -- NB: as soon as we've modified the MVar, the next
+ -- call to singleRun will think no thread is working and
+ -- start over. Anything waiting on us will get our
+ -- result, but nobody else will. That's ok: singleRun
+ -- just provides a little caching on top of a mutex.
modifyMVar_ var $ return . filter (\(k', _) -> k /= k')
case eres of
diff --git a/src/Data/WebsiteContent.hs b/src/Data/WebsiteContent.hs
index aa5ce10..eac2d5a 100644
--- a/src/Data/WebsiteContent.hs
+++ b/src/Data/WebsiteContent.hs
@@ -2,6 +2,7 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE TypeOperators #-}
module Data.WebsiteContent
( WebsiteContent (..)
, StackRelease (..)
diff --git a/src/Handler/DownloadStack.hs b/src/Handler/DownloadStack.hs
index ecd8e14..154c1e9 100644
--- a/src/Handler/DownloadStack.hs
+++ b/src/Handler/DownloadStack.hs
@@ -11,6 +11,7 @@ import Data.Conduit.Attoparsec (sinkParser)
import Data.WebsiteContent
import Import
import Yesod.GitRepo
+import qualified Data.Aeson.KeyMap as Aeson
getDownloadStackListR :: Handler Html
getDownloadStackListR = track "Handler.DownloadStack.getDownloadStackListR" $ do
@@ -35,14 +36,14 @@ getLatestMatcher man = do
return $ \pattern' -> do
let pattern'' = pattern' ++ "."
Object top <- return val
- Array assets <- lookup "assets" top
+ Array assets <- Aeson.lookup "assets" top
headMay $ preferZip $ catMaybes $ map (findMatch pattern'') assets
where
findMatch pattern' (Object o) = do
- String name <- lookup "name" o
+ String name <- Aeson.lookup "name" o
guard $ not $ ".asc" `isSuffixOf` name
guard $ pattern' `isInfixOf` name
- String url <- lookup "browser_download_url" o
+ String url <- Aeson.lookup "browser_download_url" o
Just url
findMatch _ _ = Nothing
diff --git a/src/Handler/Haddock.hs b/src/Handler/Haddock.hs
index 5a7112b..1f37d58 100644
--- a/src/Handler/Haddock.hs
+++ b/src/Handler/Haddock.hs
@@ -8,13 +8,14 @@ import Import
import qualified Data.Text as T (takeEnd)
import Stackage.Database
-makeURL :: SnapName -> [Text] -> Text
-makeURL snapName rest = concat
- $ "https://s3.amazonaws.com/"
- : haddockBucketName
- : "/"
- : toPathPiece snapName
- : map (cons '/') rest
+makeURL :: SnapName -> [Text] -> Handler Text
+makeURL snapName rest = do
+ bucketUrl <- getsYesod (appDownloadBucketUrl . appSettings)
+ pure . concat
+ $ bucketUrl
+ : "/"
+ : toPathPiece snapName
+ : map (cons '/') rest
shouldRedirect :: Bool
shouldRedirect = False
@@ -27,7 +28,7 @@ getHaddockR snapName rest
result <- redirectWithVersion snapName rest
case result of
Just route -> redirect route
- Nothing -> redirect $ makeURL snapName rest
+ Nothing -> redirect =<< makeURL snapName rest
| Just docType <- mdocType = do
cacheSeconds $ 60 * 60 * 24 * 7
result <- redirectWithVersion snapName rest
@@ -41,7 +42,7 @@ getHaddockR snapName rest
return ("text/html; charset=utf-8", mstyle /= Just "stackage")
DocJson ->
return ("application/jsontml; charset=utf-8", True)
- req <- parseRequest $ unpack $ makeURL snapName rest
+ req <- parseRequest =<< unpack <$> makeURL snapName rest
man <- getHttpManager <$> getYesod
(_, res) <- runReaderT (acquireResponse req >>= allocateAcquire) man
if plain
@@ -54,7 +55,7 @@ getHaddockR snapName rest
peekC >>= maybe (return ()) (const $ yield $ encodeUtf8 extra)
mapC id) .|
mapC (Chunk . toBuilder)
- | otherwise = redirect $ makeURL snapName rest
+ | otherwise = redirect =<< makeURL snapName rest
where
mdocType =
case T.takeEnd 5 <$> headMay (reverse rest) of
@@ -141,6 +142,9 @@ getHaddockBackupR (snap':rest)
| Just branch <- fromPathPiece snap' = track "Handler.Haddock.getHaddockBackupR" $ do
snapName <- newestSnapshot branch >>= maybe notFound pure
redirect $ HaddockR snapName rest
-getHaddockBackupR rest = track "Handler.Haddock.getHaddockBackupR" $ redirect $ concat
- $ "https://s3.amazonaws.com/haddock.stackage.org"
- : map (cons '/') rest
+getHaddockBackupR rest = track "Handler.Haddock.getHaddockBackupR" $ do
+ bucketUrl <- getsYesod (appDownloadBucketUrl . appSettings)
+ redirect
+ $ concat
+ $ bucketUrl
+ : map (cons '/') rest
diff --git a/src/Handler/MirrorStatus.hs b/src/Handler/MirrorStatus.hs
index 036573a..7dad1c9 100644
--- a/src/Handler/MirrorStatus.hs
+++ b/src/Handler/MirrorStatus.hs
@@ -12,6 +12,8 @@ import RIO.Time (diffUTCTime, addUTCTime, getCurrentTime)
import Text.XML.Stream.Parse
import Data.XML.Types (Event (EventContent), Content (ContentText))
import qualified Prelude
+import qualified Data.Aeson.Key as Aeson
+import qualified Data.Aeson.KeyMap as Aeson
getMirrorStatusR :: Handler Html
getMirrorStatusR = do
@@ -148,7 +150,7 @@ getLastModifiedGit org repo ref = do
lookupJ :: MonadThrow m => Text -> Value -> m Value
lookupJ key (Object o) =
- case lookup key o of
+ case Aeson.lookup (Aeson.fromText key) o of
Nothing -> error $ "Key not found: " ++ show key
Just x -> return x
lookupJ key val = error $ concat
diff --git a/src/Handler/StackageIndex.hs b/src/Handler/StackageIndex.hs
index 58bc767..8b44c42 100644
--- a/src/Handler/StackageIndex.hs
+++ b/src/Handler/StackageIndex.hs
@@ -2,13 +2,12 @@
module Handler.StackageIndex where
import Import
-import Stackage.Database.Types (haddockBucketName)
getStackageIndexR :: SnapName -> Handler TypedContent
-getStackageIndexR slug =
+getStackageIndexR slug = do
+ bucketUrl <- getsYesod (appDownloadBucketUrl . appSettings)
redirect $ concat
- [ "https://s3.amazonaws.com/"
- , haddockBucketName
+ [ bucketUrl
, "/package-index/"
, toPathPiece slug
, ".tar.gz"
diff --git a/src/Settings.hs b/src/Settings.hs
index c3e6e72..a02cc90 100644
--- a/src/Settings.hs
+++ b/src/Settings.hs
@@ -56,6 +56,8 @@ data AppSettings = AppSettings
-- ^ Force redirect to SSL
, appDevDownload :: Bool
-- ^ Controls how Git and database resources are downloaded (True means less downloading)
+ , appDownloadBucketUrl :: Text
+ -- ^ Publicly-accessible URL for the bucket holding Haddock contents.
}
data DatabaseSettings
@@ -109,6 +111,7 @@ instance FromJSON AppSettings where
appSkipCombining <- o .:? "skip-combining" .!= dev
appForceSsl <- o .:? "force-ssl" .!= not dev
appDevDownload <- o .:? "dev-download" .!= dev
+ appDownloadBucketUrl <- o .:? "download-bucket-url" .!= "https://s3.amazonaws.com/haddock.stackage.org"
return AppSettings {..}
diff --git a/src/Stackage/Database/Cron.hs b/src/Stackage/Database/Cron.hs
index 77ce0cf..04ea847 100644
--- a/src/Stackage/Database/Cron.hs
+++ b/src/Stackage/Database/Cron.hs
@@ -5,18 +5,20 @@
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE FlexibleContexts #-}
module Stackage.Database.Cron
( stackageServerCron
, newHoogleLocker
, singleRun
, StackageCronOptions(..)
- , haddockBucketName
+ , defHaddockBucketName
+ , defHaddockBucketUrl
) where
import Conduit
import Control.DeepSeq
-import qualified Control.Monad.Trans.AWS as AWS (paginate)
import Control.SingleRun
+import Control.Lens ((?~))
import qualified Data.ByteString.Char8 as BS8
import qualified Data.Conduit.Binary as CB
import Data.Conduit.Tar (FileInfo(..), FileType(..), untar)
@@ -28,9 +30,12 @@ import Data.Yaml (decodeFileEither)
import Database.Persist hiding (exists)
import Database.Persist.Postgresql hiding (exists)
import qualified Hoogle
-import Network.AWS hiding (Request, Response)
-import Network.AWS.Data.Text (toText)
-import Network.AWS.S3
+import Amazonka hiding (Request, length, error)
+import Amazonka.Data.Text (toText)
+import Amazonka.S3
+import Amazonka.S3.ListObjectsV2
+import Amazonka.S3.Lens
+import Amazonka.Lens
import Network.HTTP.Client
import Network.HTTP.Client.Conduit (bodyReaderSource)
import Network.HTTP.Simple (getResponseBody, httpJSONEither)
@@ -59,6 +64,7 @@ import Stackage.Database.PackageInfo
import Stackage.Database.Query
import Stackage.Database.Schema
import Stackage.Database.Types
+import System.Environment (getEnvironment)
import UnliftIO.Concurrent (getNumCapabilities)
import Web.PathPieces (fromPathPiece, toPathPiece)
import qualified Control.Retry as Retry
@@ -73,10 +79,9 @@ hoogleKey name = T.concat
, ".hoo"
]
-hoogleUrl :: SnapName -> Text
-hoogleUrl n = T.concat
- [ "https://s3.amazonaws.com/"
- , haddockBucketName
+hoogleUrl :: SnapName -> Text -> Text
+hoogleUrl n haddockBucketUrl = T.concat
+ [ haddockBucketUrl
, "/"
, hoogleKey n
]
@@ -99,9 +104,14 @@ getStackageSnapshotsDir = do
withResponseUnliftIO :: MonadUnliftIO m => Request -> Manager -> (Response BodyReader -> m b) -> m b
withResponseUnliftIO req man f = withRunInIO $ \ runInIO -> withResponse req man (runInIO . f)
+-- | Under the SingleRun wrapper that ensures only one thing at a time is
+-- writing the file in question, ensure that a Hoogle database exists on the
+-- filesystem for the given SnapName. But only going so far as downloading it
+-- from the haddock bucket. See 'createHoogleDB' for the function that puts it
+-- there in the first place.
newHoogleLocker ::
- (HasLogFunc env, MonadIO m) => env -> Manager -> m (SingleRun SnapName (Maybe FilePath))
-newHoogleLocker env man = mkSingleRun hoogleLocker
+ (HasLogFunc env, MonadIO m) => env -> Manager -> Text -> m (SingleRun SnapName (Maybe FilePath))
+newHoogleLocker env man bucketUrl = mkSingleRun hoogleLocker
where
hoogleLocker :: MonadIO n => SnapName -> n (Maybe FilePath)
hoogleLocker name =
@@ -111,7 +121,7 @@ newHoogleLocker env man = mkSingleRun hoogleLocker
if exists
then return $ Just fp
else do
- req' <- parseRequest $ T.unpack $ hoogleUrl name
+ req' <- parseRequest $ T.unpack $ hoogleUrl name bucketUrl
let req = req' {decompress = const False}
withResponseUnliftIO req man $ \res ->
case responseStatus res of
@@ -124,11 +134,12 @@ newHoogleLocker env man = mkSingleRun hoogleLocker
sinkHandle h
return $ Just fp
| status == status404 -> do
- logDebug $ "NotFound: " <> display (hoogleUrl name)
+ logWarn $ "NotFound: " <> display (hoogleUrl name bucketUrl)
return Nothing
| otherwise -> do
body <- liftIO $ brConsume $ responseBody res
- mapM_ (logDebug . displayBytesUtf8) body
+ logWarn $ "Unexpected status: " <> displayShow status
+ mapM_ (logWarn . displayBytesUtf8) body
return Nothing
getHackageDeprecations ::
@@ -163,7 +174,12 @@ stackageServerCron StackageCronOptions {..} = do
cabalMutable <- newIORef Map.empty
gpdCache <- newIORef IntMap.empty
defaultProcessContext <- mkDefaultProcessContext
- aws <- newEnv Discover
+ aws <- do
+ aws' <- newEnv discover
+ endpoint <- lookup "AWS_S3_ENDPOINT" <$> getEnvironment
+ pure $ case endpoint of
+ Nothing -> aws'
+ Just ep -> configureService (setEndpoint True (BS8.pack ep) 443 Amazonka.S3.defaultService) aws'
withLogFunc (setLogMinLevel scoLogLevel lo) $ \logFunc -> do
let pantryConfig =
PantryConfig
@@ -179,8 +195,9 @@ stackageServerCron StackageCronOptions {..} = do
, pcCasaMaxPerRequest = defaultCasaMaxPerRequest
, pcSnapshotLocation = defaultSnapshotLocation
}
- currentHoogleVersionId <-
- runRIO logFunc $ getCurrentHoogleVersionIdWithPantryConfig pantryConfig
+ currentHoogleVersionId <- runRIO logFunc $ do
+ runStackageMigrations' pantryConfig
+ getCurrentHoogleVersionIdWithPantryConfig pantryConfig
let stackage =
StackageCron
{ scPantryConfig = pantryConfig
@@ -191,6 +208,7 @@ stackageServerCron StackageCronOptions {..} = do
, scCachedGPD = gpdCache
, scEnvAWS = aws
, scDownloadBucketName = scoDownloadBucketName
+ , scDownloadBucketUrl = scoDownloadBucketUrl
, scUploadBucketName = scoUploadBucketName
, scSnapshotsRepo = scoSnapshotsRepo
, scReportProgress = scoReportProgress
@@ -218,7 +236,6 @@ runStackageUpdate doNotUpload = do
unless doNotUpload uploadSnapshotsJSON
buildAndUploadHoogleDB doNotUpload
logInfo "Finished building and uploading Hoogle DBs"
- run $ rawExecute "TRUNCATE TABLE latest_version" []
-- | This will look at 'global-hints.yaml' and will create core package getters that are reused
@@ -386,10 +403,11 @@ addPantryPackage sid compiler isHidden flags (PantryPackage pc treeKey) = do
checkForDocs :: SnapshotId -> SnapName -> ResourceT (RIO StackageCron) ()
checkForDocs snapshotId snapName = do
bucketName <- lift (scDownloadBucketName <$> ask)
+ env <- asks scEnvAWS
mods <-
runConduit $
- AWS.paginate (req bucketName) .| concatMapC (^. lovrsContents) .|
- mapC (\obj -> toText (obj ^. oKey)) .|
+ paginate env (req bucketName) .| concatMapC (fromMaybe [] . (^. listObjectsV2Response_contents)) .|
+ mapC (\obj -> toText (obj ^. object_key)) .|
concatMapC (T.stripSuffix ".html" >=> T.stripPrefix prefix >=> pathToPackageModule) .|
sinkList
-- it is faster to download all modules in this snapshot, than process them with a conduit all
@@ -398,16 +416,16 @@ checkForDocs snapshotId snapName = do
-- Cache is for SnapshotPackageId, there will be many modules per peckage, no need to look into
-- the database for each one of them.
n <- max 1 . (`div` 2) <$> getNumCapabilities
- notFoundList <- lift $ pooledMapConcurrentlyN n (markModules sidsCacheRef) mods
- forM_ (Set.fromList $ catMaybes notFoundList) $ \pid ->
+ unexpectedPackages <- lift $ pooledMapConcurrentlyN n (markModules sidsCacheRef) mods
+ forM_ (Set.fromList $ catMaybes unexpectedPackages) $ \pid ->
lift $
logWarn $
- "Documentation available for package '" <> display pid <>
- "' but was not found in this snapshot: " <>
+ "Documentation found for package '" <> display pid <>
+ "', which does not exist in this snapshot: " <>
display snapName
where
prefix = textDisplay snapName <> "/"
- req bucketName = listObjectsV2 (BucketName bucketName) & lovPrefix .~ Just prefix
+ req bucketName = newListObjectsV2 (BucketName bucketName) & listObjectsV2_prefix ?~ prefix
-- | This function records all package modules that have documentation available, the ones
-- that are not found in the snapshot reported back as an error. Besides being run
-- concurrently this function optimizes the SnapshotPackageId lookup as well, since that can
@@ -417,7 +435,7 @@ checkForDocs snapshotId snapName = do
let mSnapshotPackageId = Map.lookup pid sidsCache
mFound <- run $ markModuleHasDocs snapshotId pid mSnapshotPackageId modName
case mFound of
- Nothing -> pure $ Just pid
+ Nothing -> pure $ Just pid -- This package doesn't exist in the snapshot!
Just snapshotPackageId
| Nothing <- mSnapshotPackageId -> do
atomicModifyIORef'
@@ -663,9 +681,9 @@ uploadSnapshotsJSON = do
uploadBucket <- scUploadBucketName <$> ask
let key = ObjectKey "snapshots.json"
uploadFromRIO key $
- set poACL (Just OPublicRead) $
- set poContentType (Just "application/json") $
- putObject (BucketName uploadBucket) key (toBody snapshots)
+ set putObject_acl (Just ObjectCannedACL_Public_read) $
+ set putObject_contentType (Just "application/json") $
+ newPutObject (BucketName uploadBucket) key (toBody snapshots)
-- | Writes a gzipped version of hoogle db into temporary file onto the file system and then uploads
-- it to S3. Temporary file is removed upon completion
@@ -677,14 +695,14 @@ uploadHoogleDB fp key =
body <- toBody <$> readFileBinary fpgz
uploadBucket <- scUploadBucketName <$> ask
uploadFromRIO key $
- set poACL (Just OPublicRead) $ putObject (BucketName uploadBucket) key body
+ set putObject_acl (Just ObjectCannedACL_Public_read) $ newPutObject (BucketName uploadBucket) key body
-uploadFromRIO :: AWSRequest a => ObjectKey -> a -> RIO StackageCron ()
+uploadFromRIO :: (AWSRequest a, Typeable a, Typeable (AWSResponse a)) => ObjectKey -> a -> RIO StackageCron ()
uploadFromRIO key po = do
logInfo $ "Uploading " <> displayShow key <> " to S3 bucket."
- env <- ask
- eres <- runResourceT $ runAWS env $ trying _Error $ send po
+ env <- asks scEnvAWS
+ eres <- runResourceT $ trying _Error $ send env po
case eres of
Left e ->
logError $ "Couldn't upload " <> displayShow key <> " to S3 becuase " <> displayShow e
@@ -693,18 +711,30 @@ uploadFromRIO key po = do
buildAndUploadHoogleDB :: Bool -> RIO StackageCron ()
buildAndUploadHoogleDB doNotUpload = do
snapshots <- lastLtsNightlyWithoutHoogleDb 5 5
+ -- currentHoogleVersionId <- scHoogleVersionId <$> ask
env <- ask
- locker <- newHoogleLocker (env ^. logFuncL) (env ^. envManager)
+ awsEnv <- asks scEnvAWS
+ bucketUrl <- asks scDownloadBucketUrl
+ -- locker is an action that returns the path to a hoogle db, if one exists
+ -- in the haddock bucket already.
+ locker <- newHoogleLocker (env ^. logFuncL) (awsEnv ^. env_manager) bucketUrl
+ let insertH = checkInsertSnapshotHoogleDb True
+ checkH = checkInsertSnapshotHoogleDb False
for_ snapshots $ \(snapshotId, snapName) ->
- unlessM (checkInsertSnapshotHoogleDb False snapshotId) $ do
+ -- Even though we just got a list of snapshots that don't have hoogle
+ -- databases, we check again. For some reason. I don't see how this can
+ -- actually be useful. both lastLtsNightlyWithoutHoogleDb and
+ -- checkInsertSnapshotHoogleDb just check against SnapshotHoogleDb.
+ -- Perhaps the check can be removed.
+ unlessM (checkH snapshotId) $ do
logInfo $ "Starting Hoogle database download: " <> display (hoogleKey snapName)
mfp <- singleRun locker snapName
case mfp of
Just _ -> do
logInfo $ "Current hoogle database exists for: " <> display snapName
- void $ checkInsertSnapshotHoogleDb True snapshotId
+ void $ insertH snapshotId
Nothing -> do
- logInfo $ "Current hoogle database does not yet exist for: " <> display snapName
+ logInfo $ "Current hoogle database does not yet exist in the bucket for: " <> display snapName
mfp' <- createHoogleDB snapshotId snapName
forM_ mfp' $ \fp -> do
let key = hoogleKey snapName
@@ -713,24 +743,27 @@ buildAndUploadHoogleDB doNotUpload = do
renamePath fp dest
unless doNotUpload $ do
uploadHoogleDB dest (ObjectKey key)
- void $ checkInsertSnapshotHoogleDb True snapshotId
+ void $ insertH snapshotId
+-- | Create a hoogle db from haddocks for the given snapshot, and upload it to
+-- the haddock bucket.
createHoogleDB :: SnapshotId -> SnapName -> RIO StackageCron (Maybe FilePath)
createHoogleDB snapshotId snapName =
handleAny logException $ do
logInfo $ "Creating Hoogle DB for " <> display snapName
- downloadBucket <- scDownloadBucketName <$> ask
+ downloadBucketUrl <- scDownloadBucketUrl <$> ask
let root = "hoogle-gen"
bindir = root > "bindir"
outname = root > "output.hoo"
tarKey = toPathPiece snapName <> "/hoogle/orig.tar"
- tarUrl = "https://s3.amazonaws.com/" <> downloadBucket <> "/" <> tarKey
+ tarUrl = downloadBucketUrl <> "/" <> tarKey
tarFP = root > T.unpack tarKey
-- When tarball is downloaded it is saved with durability and atomicity, so if it
-- is present it is not in a corrupted state
unlessM (doesFileExist tarFP) $ do
req <- parseRequest $ T.unpack tarUrl
- man <- view envManager
+ env <- asks scEnvAWS
+ let man = env ^. env_manager
withResponseUnliftIO req {decompress = const True} man $ \res -> do
throwErrorStatusCodes req res
createDirectoryIfMissing True $ takeDirectory tarFP
diff --git a/src/Stackage/Database/PackageInfo.hs b/src/Stackage/Database/PackageInfo.hs
index f47a563..80ecad0 100644
--- a/src/Stackage/Database/PackageInfo.hs
+++ b/src/Stackage/Database/PackageInfo.hs
@@ -23,7 +23,7 @@ import Distribution.Compiler (CompilerFlavor(GHC))
import Distribution.Package (Dependency(..))
import Distribution.PackageDescription (CondTree(..), Condition(..),
ConfVar(..),
- Flag(flagDefault, flagName), FlagName,
+ PackageFlag(..), flagDefault, flagName, FlagName,
GenericPackageDescription, author,
condExecutables, condLibrary,
description, genPackageFlags, homepage,
@@ -152,7 +152,7 @@ getCheckCond compiler overrideFlags gpd = go
where
go (Var (OS os)) = os == Linux -- arbitrary
go (Var (Arch arch)) = arch == X86_64 -- arbitrary
- go (Var (Flag flag)) = fromMaybe False $ Map.lookup flag flags
+ go (Var (PackageFlag flag)) = fromMaybe False $ Map.lookup flag flags
go (Var (Impl flavor range)) = flavor == compilerFlavor && compilerVersion `withinRange` range
go (Lit b) = b
go (CNot c) = not $ go c
diff --git a/src/Stackage/Database/Query.hs b/src/Stackage/Database/Query.hs
index 7f2b5e5..41041a2 100644
--- a/src/Stackage/Database/Query.hs
+++ b/src/Stackage/Database/Query.hs
@@ -167,25 +167,48 @@ ltsBefore x y = do
go (Entity _ lts) = (ltsSnap lts, SNLts (ltsMajor lts) (ltsMinor lts))
+-- | Queries the database for the latest LTS and nightly snapshots that do not
+-- have corresponding entries in the SnapshotHoogleDb table with the current
+-- Hoogle version.
lastLtsNightlyWithoutHoogleDb :: Int -> Int -> RIO StackageCron [(SnapshotId, SnapName)]
lastLtsNightlyWithoutHoogleDb ltsCount nightlyCount = do
currentHoogleVersionId <- scHoogleVersionId <$> ask
let getSnapshotsWithoutHoogeDb snapId snapCount =
map (unValue *** unValue) <$>
select
+ -- "snap" is either Lts or Nightly, while "snapshot" is indeed
+ -- "snapshot"
(from $ \(snap `InnerJoin` snapshot) -> do
on $ snap ^. snapId ==. snapshot ^. SnapshotId
where_ $
notExists $
from $ \snapshotHoogleDb ->
where_ $
- (snapshotHoogleDb ^. SnapshotHoogleDbSnapshot ==. snapshot ^.
- SnapshotId) &&.
- (snapshotHoogleDb ^. SnapshotHoogleDbVersion ==.
- val currentHoogleVersionId)
+ (snapshotHoogleDb ^. SnapshotHoogleDbSnapshot
+ ==. snapshot ^. SnapshotId)
+ &&. (snapshotHoogleDb ^. SnapshotHoogleDbVersion
+ ==. val currentHoogleVersionId)
orderBy [desc (snapshot ^. SnapshotCreated)]
limit $ fromIntegral snapCount
pure (snapshot ^. SnapshotId, snapshot ^. SnapshotName))
+ -- In sql, this query would be
+ --
+ -- select snapshot.id, snapshot.name
+ -- from snapshot
+ -- join $foo as snap -- either Lts or Nightly
+ -- on snap.snap = snapshot.id
+ -- where not exists (
+ -- select 1
+ -- from snapshot_hoogle_db
+ -- where snapshot_hoogle_db.snapshot = snapshot.id
+ -- and snapshot_hoogle_db.version = $currentHoogleVersionId
+ -- )
+ -- order by snapshot.created desc
+ -- limit $snapCount
+ --
+ -- So it returns a list of snapshots where there is no
+ -- corresponding entry in the snapshot_hoogle_db table for the
+ -- current hoogle version.
run $ do
lts <- getSnapshotsWithoutHoogeDb LtsSnap ltsCount
nightly <- getSnapshotsWithoutHoogeDb NightlySnap nightlyCount
@@ -206,7 +229,7 @@ snapshotsJSON = do
Just n -> (("nightly" A..= printNightly n) :)
return $ A.object $ nightly lts
where
- toObj lts@(major, _) = T.pack ("lts-" <> show major) A..= printLts lts
+ toObj lts@(major, _) = fromString ("lts-" <> show major) A..= printLts lts
printLts (major, minor) = "lts-" <> show major <> "." <> show minor
printNightly day = "nightly-" <> T.pack (show day)
@@ -1159,10 +1182,27 @@ checkInsertSnapshotHoogleDb shouldInsert snapshotId = do
(from
(\v -> do
where_ $ v ^. VersionId ==. val hoogleVersionId
+ -- This is reaching into the *pantry*
+ -- database!
pure (v ^. VersionVersion)))
+ -- in sql, this query would be
+ --
+ -- select version.version
+ -- from version
+ -- where version.id = $hoogleVersionId
+ --
+ -- So it returns the "version"s that corresponds to the
+ -- current hoogle version id.
+ -- mhver is now Maybe Version, and corresponds to the current
+ -- hoogle version, assuming it exists in the Version table
forM_ mhver $ \hver ->
lift $
logInfo $
"Marking hoogle database for version " <> display hver <> " as available."
+ -- whether or not the version exists, we still put it into snapshot_hoogle_db
+ -- So literally the only use of the above query is to log the
+ -- action we're taking.
isJust <$> P.insertUniqueEntity sh
+ -- if we're not inserting, we're just checking if it already exists
+ -- in snapshot_hoogle_db.
else isJust <$> P.checkUnique sh
diff --git a/src/Stackage/Database/Schema.hs b/src/Stackage/Database/Schema.hs
index 0e45bab..b4cb87d 100644
--- a/src/Stackage/Database/Schema.hs
+++ b/src/Stackage/Database/Schema.hs
@@ -15,6 +15,7 @@
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UndecidableInstances #-}
+{-# LANGUAGE TypeOperators #-}
module Stackage.Database.Schema
( -- * Database
run
@@ -23,6 +24,7 @@ module Stackage.Database.Schema
, GetStackageDatabase(..)
, withStackageDatabase
, runStackageMigrations
+ , runStackageMigrations'
, getCurrentHoogleVersionId
, getCurrentHoogleVersionIdWithPantryConfig
-- * Tables
@@ -217,25 +219,33 @@ withStackageDatabase shouldLog dbs inner = do
bracket (liftIO getPoolIO) (liftIO . destroyAllResources) $ \pool -> do
inner (StackageDatabase (`runSqlPool` pool))
-getSchema :: (HasLogFunc env, GetStackageDatabase env (RIO env)) => RIO env (Maybe Int)
+getSchema :: ReaderT SqlBackend (RIO RIO.LogFunc) (Maybe Int)
getSchema =
- run $ do
+ do
eres <- tryAny (selectList [] [])
lift $ logInfo $ "getSchema result: " <> displayShow eres
case eres of
Right [Entity _ (Schema v)] -> return $ Just v
_ -> return Nothing
+runStackageMigrations' :: PantryConfig -> RIO RIO.LogFunc () -- HasLogFunc env => PantryConfig -> RIO env ()
+runStackageMigrations' pantryConfig = do
+ stackageDb <- getStackageDatabaseFromPantry pantryConfig
+ runDatabase stackageDb stackageMigrations
+
+
runStackageMigrations :: (HasLogFunc env, GetStackageDatabase env (RIO env)) => RIO env ()
-runStackageMigrations = do
+runStackageMigrations = run stackageMigrations
+
+stackageMigrations :: ReaderT SqlBackend (RIO RIO.LogFunc) () -- ReaderT SqlBackend (RIO RIO.LogFunc) ()
+stackageMigrations = do
+ runMigration Pantry.migrateAll
+ runMigration migrateAll
actualSchema <- getSchema
- run $ do
- runMigration Pantry.migrateAll
- runMigration migrateAll
- unless (actualSchema == Just currentSchema) $ do
- lift $
- logWarn $
- "Current schema does not match actual schema: " <>
- displayShow (actualSchema, currentSchema)
- deleteWhere ([] :: [Filter Schema])
- insert_ $ Schema currentSchema
+ unless (actualSchema == Just currentSchema) $ do
+ lift $
+ logWarn $
+ "Current schema does not match actual schema: " <>
+ displayShow (actualSchema, currentSchema)
+ deleteWhere ([] :: [Filter Schema])
+ insert_ $ Schema currentSchema
diff --git a/src/Stackage/Database/Types.hs b/src/Stackage/Database/Types.hs
index 02dabdb..c42ef55 100644
--- a/src/Stackage/Database/Types.hs
+++ b/src/Stackage/Database/Types.hs
@@ -40,7 +40,8 @@ module Stackage.Database.Types
, Origin(..)
, LatestInfo(..)
, Deprecation(..)
- , haddockBucketName
+ , defHaddockBucketName
+ , defHaddockBucketUrl
, Changelog(..)
, Readme(..)
, StackageCronOptions(..)
@@ -49,7 +50,7 @@ module Stackage.Database.Types
import Data.Aeson
import qualified Data.Text as T
import Data.Text.Read (decimal)
-import Network.AWS (Env, HasEnv(..))
+import Amazonka (Env)
import Pantry (BlobKey(..), CabalFileInfo(..), FileSize(..),
HasPantryConfig(..), PantryConfig, PackageIdentifierRevision(..), TreeKey(..))
import Pantry.SHA256 (fromHexText)
@@ -61,12 +62,16 @@ import Stackage.Database.Schema
import Text.Blaze (ToMarkup(..))
import Types
-haddockBucketName :: Text
-haddockBucketName = "haddock.stackage.org"
+defHaddockBucketName :: Text
+defHaddockBucketName = "haddock.stackage.org"
+
+defHaddockBucketUrl :: Text
+defHaddockBucketUrl = "https://s3.amazonaws.com/" <> defHaddockBucketName
data StackageCronOptions = StackageCronOptions
{ scoForceUpdate :: !Bool
, scoDownloadBucketName :: !Text
+ , scoDownloadBucketUrl :: !Text
, scoUploadBucketName :: !Text
, scoDoNotUpload :: !Bool
, scoLogLevel :: !LogLevel
@@ -84,6 +89,7 @@ data StackageCron = StackageCron
, scCachedGPD :: !(IORef (IntMap GenericPackageDescription))
, scEnvAWS :: !Env
, scDownloadBucketName :: !Text
+ , scDownloadBucketUrl :: !Text
, scUploadBucketName :: !Text
, scSnapshotsRepo :: !GithubRepo
, scReportProgress :: !Bool
@@ -91,9 +97,6 @@ data StackageCron = StackageCron
, scHoogleVersionId :: !VersionId
}
-instance HasEnv StackageCron where
- environment = lens scEnvAWS (\c f -> c {scEnvAWS = f})
-
instance HasLogFunc StackageCron where
logFuncL = lens scLogFunc (\c f -> c {scLogFunc = f})
diff --git a/src/Stackage/Snapshot/Diff.hs b/src/Stackage/Snapshot/Diff.hs
index 641812c..7371f7a 100644
--- a/src/Stackage/Snapshot/Diff.hs
+++ b/src/Stackage/Snapshot/Diff.hs
@@ -15,6 +15,7 @@ module Stackage.Snapshot.Diff
import ClassyPrelude (sortOn, toCaseFold)
import Data.Aeson
+import Data.Aeson.Key
import qualified Data.Text as T (commonPrefixes)
import Data.These
import RIO
@@ -61,7 +62,7 @@ newtype VersionChange = VersionChange { unVersionChange :: These VersionP Versio
deriving (Show, Eq, Generic, Typeable)
instance ToJSON (WithSnapshotNames VersionChange) where
- toJSON (WithSnapshotNames (toPathPiece -> aKey) (toPathPiece -> bKey) change) =
+ toJSON (WithSnapshotNames (fromText . toPathPiece -> aKey) (fromText . toPathPiece -> bKey) change) =
case change of
VersionChange (This a) -> object [ aKey .= a ]
VersionChange (That b) -> object [ bKey .= b ]
diff --git a/src/Types.hs b/src/Types.hs
index 2d4de53..6533bf4 100644
--- a/src/Types.hs
+++ b/src/Types.hs
@@ -407,7 +407,25 @@ instance ToMarkup VersionRangeP where
instance PersistField VersionRangeP where
toPersistValue = PersistText . textDisplay
fromPersistValue v =
- fromPersistValue v >>= bimap (T.pack . displayException) VersionRangeP . dtParse
+ fromPersistValue v >>= bimap (T.pack . displayException) VersionRangeP . dtParse . hackwardCompat_3_4
+ where
+ -- We use parseSimple under the hood, which always parses using
+ -- the latest version of the Cabal spec. In practice, this hasn't
+ -- been a problem. Until now.
+ --
+ -- Cabal spec 3.4 dropped support for "-any" as a version range, and the
+ -- database is full of such values. Luckily, ">=0" is a
+ -- backward-compatible synonym for "-any". New versions of this app will
+ -- write ">=0" instead of "-any", which old versions of this app will
+ -- understand just fine. We just need to substitute on read.
+ --
+ -- FIXME: strictly speaking, VersionRange cannot be parsed without
+ -- knowing the Cabal spec version of the package that used it. There's
+ -- nothing *wrong* with "-any". That means we probably need to decode it
+ -- no further than Text and do further processing outside of the
+ -- PersistField instance.
+ hackwardCompat_3_4 "-any" = ">=0"
+ hackwardCompat_3_4 t = t
instance PersistFieldSql VersionRangeP where
sqlType _ = SqlString
diff --git a/stack.yaml b/stack.yaml
index 5449ef3..12cc9dc 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -1,16 +1,32 @@
-resolver: lts-18.28
+resolver: lts-22.6
extra-deps:
-- amazonka-1.6.1
-- barrier-0.1.1
-- classy-prelude-yesod-1.5.0
-- unliftio-core-0.1.2.0
-- yesod-gitrepo-0.3.0
-- static-bytes-0.1.0
-- companion-0.1.0
-- aeson-warning-parser-0.1.0
-- hpack-0.35.0
+# WARNING: Changing the hoogle version causes stackage-server-cron to regenerate
+# Hoogle databases FOR EVERY SNAPSHOT, EVER. Usually, that's ok! But don't
+# forget! The consequences are: (1) More disk usage. Hoogle databases are not
+# cleaned up on the stackage-server-cron side, nor on the stackage-server side.
+# (Yet. This will change.) (2) More bucket usage. While it's easy to say it's a
+# drop in the literal bucket, such excessive misuse of storage makes
+# administration, backups, disaster recovery, and many other DevOps concerns
+# harder and harder. All but the latest LTS's database are literally never used
+# anyway. (3) The Hoogle database schema is defined by the first three
+# version components. Any more frequent regeneration is pure unadulterated
+# waste. (4) Stackage's Hoogle search will be unavailable until the new
+# databases have been generated.
+- hoogle-5.0.18.4@sha256:1372458e97dff541fcda099236af7936bf93ee6b8c5d15695ee6d9426dff5eed,3171
+- safe-0.3.20@sha256:7813ad56161f57d5162a924de5597d454162a2faed06be6e268b37bb5c19d48d,2312
+- Cabal-3.8.1.0@sha256:77121d8e1aff14a0fd95684b751599db78a7dd26d55862d9fcef27c88b193e9d,12609
+- Cabal-syntax-3.8.1.0@sha256:ed2d937ba6c6a20b75850349eedd41374885fc42369ef152d69e2ba70f44f593,7620
- git: https://github.com/commercialhaskell/pantry.git
commit: 5df643cc1deb561d9c52a9cb6f593aba2bc4c08e
+# This amazonka patched to support Cloudflare, which kinda has a bug. See
+# https://github.com/brendanhay/amazonka/issues/975 for details.
+- github: chreekat/amazonka
+ commit: b/r2-compat
+ subdirs: [lib/amazonka-core]
-drop-packages:
-- Cabal
+nix:
+ packages:
+ - zlib
+ - postgresql
+ - pkg-config
+ - haskell-language-server
diff --git a/static/opensearchhoogle.xml b/static/opensearchhoogle.xml
index 5c48e7b..44557cc 100644
--- a/static/opensearchhoogle.xml
+++ b/static/opensearchhoogle.xml
@@ -3,7 +3,6 @@
- Stackage's infrastructure, build machines, initial creation and ongoing maintenance, are proudly sponsored by FP Complete. + Stackage's infrastructure, build machines, initial creation and ongoing maintenance were proudly sponsored by FP Complete from 2014 to 2024. + +
+ Today it is a service provided by the Haskell Foundation.