Add download-bucket-url option

This commit is contained in:
Bryan Richter 2024-01-08 14:26:38 +02:00
parent 2939d98b9f
commit 6b4232b1c6
No known key found for this signature in database
GPG Key ID: B202264020068BFB
8 changed files with 62 additions and 38 deletions

View File

@ -38,17 +38,24 @@ optsParser =
\their yaml files from stackage-snapshots repo have been updated or not.") <*>
option
readText
(long "download-bucket" <> value haddockBucketName <> metavar "DOWNLOAD_BUCKET" <>
(long "download-bucket" <> value defHaddockBucketName <> metavar "DOWNLOAD_BUCKET" <>
help
("S3 Bucket name where things like haddock and current hoogle files should \
\be downloaded from. Default is: " <>
T.unpack haddockBucketName)) <*>
\be downloaded from. Used in S3 API read operations. Default is: " <>
T.unpack defHaddockBucketName)) <*>
option
readText
(long "upload-bucket" <> value haddockBucketName <> metavar "UPLOAD_BUCKET" <>
(long "download-bucket-url" <> value defHaddockBucketUrl <> metavar "DOWNLOAD_BUCKET_URL" <>
help
("Publicly accessible URL where the download bucket can be accessed. Used for \
\serving the Haddocks on the website. Default is: " <>
T.unpack defHaddockBucketUrl)) <*>
option
readText
(long "upload-bucket" <> value defHaddockBucketName <> metavar "UPLOAD_BUCKET" <>
help
("S3 Bucket where hoogle db and snapshots.json file will be uploaded to. Default is: " <>
T.unpack haddockBucketName)) <*>
T.unpack defHaddockBucketName)) <*>
switch
(long "do-not-upload" <>
help "Stop from hoogle db and snapshots.json from being generated and uploaded") <*>

View File

@ -24,3 +24,6 @@ force-ssl: false
postgres-string: "_env:PGSTRING:host=localhost port=5432 user=stackage dbname=stackage password=stackage"
postgres-poolsize: "_env:PGPOOLSIZE:8"
# Publicly-accessible URL for the bucket holding Haddock contents.
download-bucket-url: "_env:DOWNLOAD_BUCKET_URL:https://s3.amazonaws.com/haddock.stackage.org"

View File

@ -157,7 +157,7 @@ withFoundation appLogFunc appSettings inner = do
runRIO appLogFunc $ RIO.logError $ "Couldn't get Stack matcher: " <> displayShow e
pure oldMatcher
appMirrorStatus <- mkUpdateMirrorStatus
hoogleLocker <- newHoogleLocker appLogFunc appHttpManager
hoogleLocker <- newHoogleLocker appLogFunc appHttpManager (appDownloadBucketUrl appSettings)
let appGetHoogleDB = singleRun hoogleLocker
let appGitRev = $$tGitRev
runConcurrently $ runContentUpdates *> Concurrently (inner App {..})

View File

@ -8,13 +8,14 @@ import Import
import qualified Data.Text as T (takeEnd)
import Stackage.Database
makeURL :: SnapName -> [Text] -> Text
makeURL snapName rest = concat
$ "https://s3.amazonaws.com/"
: haddockBucketName
: "/"
: toPathPiece snapName
: map (cons '/') rest
makeURL :: SnapName -> [Text] -> Handler Text
makeURL snapName rest = do
bucketUrl <- getsYesod (appDownloadBucketUrl . appSettings)
pure . concat
$ bucketUrl
: "/"
: toPathPiece snapName
: map (cons '/') rest
shouldRedirect :: Bool
shouldRedirect = False
@ -27,7 +28,7 @@ getHaddockR snapName rest
result <- redirectWithVersion snapName rest
case result of
Just route -> redirect route
Nothing -> redirect $ makeURL snapName rest
Nothing -> redirect =<< makeURL snapName rest
| Just docType <- mdocType = do
cacheSeconds $ 60 * 60 * 24 * 7
result <- redirectWithVersion snapName rest
@ -41,7 +42,7 @@ getHaddockR snapName rest
return ("text/html; charset=utf-8", mstyle /= Just "stackage")
DocJson ->
return ("application/jsontml; charset=utf-8", True)
req <- parseRequest $ unpack $ makeURL snapName rest
req <- parseRequest =<< unpack <$> makeURL snapName rest
man <- getHttpManager <$> getYesod
(_, res) <- runReaderT (acquireResponse req >>= allocateAcquire) man
if plain
@ -54,7 +55,7 @@ getHaddockR snapName rest
peekC >>= maybe (return ()) (const $ yield $ encodeUtf8 extra)
mapC id) .|
mapC (Chunk . toBuilder)
| otherwise = redirect $ makeURL snapName rest
| otherwise = redirect =<< makeURL snapName rest
where
mdocType =
case T.takeEnd 5 <$> headMay (reverse rest) of
@ -141,6 +142,9 @@ getHaddockBackupR (snap':rest)
| Just branch <- fromPathPiece snap' = track "Handler.Haddock.getHaddockBackupR" $ do
snapName <- newestSnapshot branch >>= maybe notFound pure
redirect $ HaddockR snapName rest
getHaddockBackupR rest = track "Handler.Haddock.getHaddockBackupR" $ redirect $ concat
$ "https://s3.amazonaws.com/haddock.stackage.org"
: map (cons '/') rest
getHaddockBackupR rest = track "Handler.Haddock.getHaddockBackupR" $ do
bucketUrl <- getsYesod (appDownloadBucketUrl . appSettings)
redirect
$ concat
$ bucketUrl
: map (cons '/') rest

View File

@ -2,13 +2,12 @@
module Handler.StackageIndex where
import Import
import Stackage.Database.Types (haddockBucketName)
getStackageIndexR :: SnapName -> Handler TypedContent
getStackageIndexR slug =
getStackageIndexR slug = do
bucketUrl <- getsYesod (appDownloadBucketUrl . appSettings)
redirect $ concat
[ "https://s3.amazonaws.com/"
, haddockBucketName
[ bucketUrl
, "/package-index/"
, toPathPiece slug
, ".tar.gz"

View File

@ -56,6 +56,8 @@ data AppSettings = AppSettings
-- ^ Force redirect to SSL
, appDevDownload :: Bool
-- ^ Controls how Git and database resources are downloaded (True means less downloading)
, appDownloadBucketUrl :: Text
-- ^ Publicly-accessible URL for the bucket holding Haddock contents.
}
data DatabaseSettings
@ -109,6 +111,7 @@ instance FromJSON AppSettings where
appSkipCombining <- o .:? "skip-combining" .!= dev
appForceSsl <- o .:? "force-ssl" .!= not dev
appDevDownload <- o .:? "dev-download" .!= dev
appDownloadBucketUrl <- o .:? "download-bucket-url" .!= "https://s3.amazonaws.com/haddock.stackage.org"
return AppSettings {..}

View File

@ -10,7 +10,8 @@ module Stackage.Database.Cron
, newHoogleLocker
, singleRun
, StackageCronOptions(..)
, haddockBucketName
, defHaddockBucketName
, defHaddockBucketUrl
) where
import Conduit
@ -74,10 +75,9 @@ hoogleKey name = T.concat
, ".hoo"
]
hoogleUrl :: SnapName -> Text
hoogleUrl n = T.concat
[ "https://s3.amazonaws.com/"
, haddockBucketName
hoogleUrl :: SnapName -> Text -> Text
hoogleUrl n haddockBucketUrl = T.concat
[ haddockBucketUrl
, "/"
, hoogleKey n
]
@ -101,8 +101,8 @@ withResponseUnliftIO :: MonadUnliftIO m => Request -> Manager -> (Response BodyR
withResponseUnliftIO req man f = withRunInIO $ \ runInIO -> withResponse req man (runInIO . f)
newHoogleLocker ::
(HasLogFunc env, MonadIO m) => env -> Manager -> m (SingleRun SnapName (Maybe FilePath))
newHoogleLocker env man = mkSingleRun hoogleLocker
(HasLogFunc env, MonadIO m) => env -> Manager -> Text -> m (SingleRun SnapName (Maybe FilePath))
newHoogleLocker env man bucketUrl = mkSingleRun hoogleLocker
where
hoogleLocker :: MonadIO n => SnapName -> n (Maybe FilePath)
hoogleLocker name =
@ -112,7 +112,7 @@ newHoogleLocker env man = mkSingleRun hoogleLocker
if exists
then return $ Just fp
else do
req' <- parseRequest $ T.unpack $ hoogleUrl name
req' <- parseRequest $ T.unpack $ hoogleUrl name bucketUrl
let req = req' {decompress = const False}
withResponseUnliftIO req man $ \res ->
case responseStatus res of
@ -125,7 +125,7 @@ newHoogleLocker env man = mkSingleRun hoogleLocker
sinkHandle h
return $ Just fp
| status == status404 -> do
logDebug $ "NotFound: " <> display (hoogleUrl name)
logDebug $ "NotFound: " <> display (hoogleUrl name bucketUrl)
return Nothing
| otherwise -> do
body <- liftIO $ brConsume $ responseBody res
@ -198,6 +198,7 @@ stackageServerCron StackageCronOptions {..} = do
, scCachedGPD = gpdCache
, scEnvAWS = aws
, scDownloadBucketName = scoDownloadBucketName
, scDownloadBucketUrl = scoDownloadBucketUrl
, scUploadBucketName = scoUploadBucketName
, scSnapshotsRepo = scoSnapshotsRepo
, scReportProgress = scoReportProgress
@ -700,7 +701,8 @@ buildAndUploadHoogleDB :: Bool -> RIO StackageCron ()
buildAndUploadHoogleDB doNotUpload = do
snapshots <- lastLtsNightlyWithoutHoogleDb 5 5
env <- ask
locker <- newHoogleLocker (env ^. logFuncL) (env ^. envManager)
bucketUrl <- asks scDownloadBucketUrl
locker <- newHoogleLocker (env ^. logFuncL) (env ^. envManager) bucketUrl
for_ snapshots $ \(snapshotId, snapName) ->
unlessM (checkInsertSnapshotHoogleDb False snapshotId) $ do
logInfo $ "Starting Hoogle database download: " <> display (hoogleKey snapName)
@ -725,12 +727,12 @@ createHoogleDB :: SnapshotId -> SnapName -> RIO StackageCron (Maybe FilePath)
createHoogleDB snapshotId snapName =
handleAny logException $ do
logInfo $ "Creating Hoogle DB for " <> display snapName
downloadBucket <- scDownloadBucketName <$> ask
downloadBucketUrl <- scDownloadBucketUrl <$> ask
let root = "hoogle-gen"
bindir = root </> "bindir"
outname = root </> "output.hoo"
tarKey = toPathPiece snapName <> "/hoogle/orig.tar"
tarUrl = "https://s3.amazonaws.com/" <> downloadBucket <> "/" <> tarKey
tarUrl = downloadBucketUrl <> "/" <> tarKey
tarFP = root </> T.unpack tarKey
-- When tarball is downloaded it is saved with durability and atomicity, so if it
-- is present it is not in a corrupted state

View File

@ -40,7 +40,8 @@ module Stackage.Database.Types
, Origin(..)
, LatestInfo(..)
, Deprecation(..)
, haddockBucketName
, defHaddockBucketName
, defHaddockBucketUrl
, Changelog(..)
, Readme(..)
, StackageCronOptions(..)
@ -61,12 +62,16 @@ import Stackage.Database.Schema
import Text.Blaze (ToMarkup(..))
import Types
haddockBucketName :: Text
haddockBucketName = "haddock.stackage.org"
defHaddockBucketName :: Text
defHaddockBucketName = "haddock.stackage.org"
defHaddockBucketUrl :: Text
defHaddockBucketUrl = "https://s3.amazonaws.com/" <> defHaddockBucketName
data StackageCronOptions = StackageCronOptions
{ scoForceUpdate :: !Bool
, scoDownloadBucketName :: !Text
, scoDownloadBucketUrl :: !Text
, scoUploadBucketName :: !Text
, scoDoNotUpload :: !Bool
, scoLogLevel :: !LogLevel
@ -84,6 +89,7 @@ data StackageCron = StackageCron
, scCachedGPD :: !(IORef (IntMap GenericPackageDescription))
, scEnvAWS :: !Env
, scDownloadBucketName :: !Text
, scDownloadBucketUrl :: !Text
, scUploadBucketName :: !Text
, scSnapshotsRepo :: !GithubRepo
, scReportProgress :: !Bool