Get Hoogle 5 working

This commit is contained in:
Michael Snoyman 2016-06-02 20:26:38 +03:00 committed by Chris Done
parent e54b3f80a6
commit c1e16d8e1a
4 changed files with 113 additions and 178 deletions

View File

@ -1,15 +1,16 @@
{-# LANGUAGE QuasiQuotes #-}
module Handler.Hoogle where
import Control.DeepSeq (NFData(..))
import Control.DeepSeq.Generics (genericRnf)
import Control.Spoon (spoon)
import Data.Data (Data (..))
import Data.Data (Data)
import Data.Text.Read (decimal)
import qualified Hoogle
import Import
import Text.Blaze.Html (preEscapedToHtml)
import Stackage.Database
import qualified Stackage.Database.Cron as Cron
import qualified Data.Text as T
getHoogleDB :: SnapName -> Handler (Maybe FilePath)
getHoogleDB name = do
@ -21,7 +22,7 @@ getHoogleR name = do
Entity _ snapshot <- lookupSnapshot name >>= maybe notFound return
mquery <- lookupGetParam "q"
mpage <- lookupGetParam "page"
exact <- maybe False (const True) <$> lookupGetParam "exact"
exact <- isJust <$> lookupGetParam "exact" -- FIXME remove, Hoogle no longer supports
mresults' <- lookupGetParam "results"
let count' =
case decimal <$> mresults' of
@ -33,25 +34,30 @@ getHoogleR name = do
_ -> 1
offset = (page - 1) * perPage
mdatabasePath <- getHoogleDB name
heDatabase <- case mdatabasePath of
Just x -> return $ liftIO $ Hoogle.loadDatabase x
Nothing -> hoogleDatabaseNotAvailableFor name
dbPath <- maybe (hoogleDatabaseNotAvailableFor name) return mdatabasePath
-- Avoid concurrent Hoogle queries, see
-- https://github.com/fpco/stackage-server/issues/172
lock <- appHoogleLock <$> getYesod
mresults <- case mquery of
Just query -> withMVar lock $ const $ runHoogleQuery heDatabase HoogleQueryInput
{ hqiQueryInput = query
, hqiExactSearch = if exact then Just query else Nothing
, hqiLimitTo = count'
, hqiOffsetBy = offset
}
Nothing -> return $ HoogleQueryOutput "" [] Nothing
HoogleQueryOutput results mtotalCount <-
case mquery of
Just query -> do
let input = HoogleQueryInput
{ hqiQueryInput = query
, hqiLimitTo = count'
, hqiOffsetBy = offset
}
liftIO $ withMVar lock
$ const
$ Hoogle.withDatabase dbPath
-- NB! I got a segfault when I didn't force with $!
$ \db -> return $! runHoogleQuery db input
Nothing -> return $ HoogleQueryOutput [] Nothing
let queryText = fromMaybe "" mquery
pageLink p = (SnapshotR name HoogleR
, (if exact then (("exact", "true"):) else id)
$ (maybe id (\q' -> (("q", q'):)) mquery)
$ maybe id (\q' -> (("q", q'):)) mquery
[("page", tshow p)])
snapshotLink = SnapshotR name StackageHomeR
hoogleForm = $(widgetFile "hoogle-form")
@ -84,15 +90,14 @@ perPage = 10
data HoogleQueryInput = HoogleQueryInput
{ hqiQueryInput :: Text
, hqiExactSearch :: Maybe Text
, hqiLimitTo :: Int
, hqiOffsetBy :: Int
}
deriving (Eq, Read, Show, Data, Typeable, Ord, Generic)
data HoogleQueryOutput = HoogleQueryBad Text
| HoogleQueryOutput Text [HoogleResult] (Maybe Int) -- ^ Text == HTML version of query, Int == total count
deriving (Read, Typeable, Data, Show, Eq)
data HoogleQueryOutput = HoogleQueryOutput [HoogleResult] (Maybe Int) -- ^ Int == total count
deriving (Read, Typeable, Data, Show, Eq, Generic)
instance NFData HoogleQueryOutput
data HoogleResult = HoogleResult
{ hrURL :: String
@ -118,56 +123,33 @@ instance NFData HoogleResult where rnf = genericRnf
instance NFData PackageLink where rnf = genericRnf
instance NFData ModuleLink where rnf = genericRnf
runHoogleQuery :: Monad m
=> m Hoogle.Database
-> HoogleQueryInput
-> m HoogleQueryOutput
runHoogleQuery heDatabase HoogleQueryInput {..} =
runQuery $ Hoogle.parseQuery Hoogle.Haskell query
runHoogleQuery :: Hoogle.Database -> HoogleQueryInput -> HoogleQueryOutput
runHoogleQuery hoogledb HoogleQueryInput {..} =
HoogleQueryOutput targets mcount
where
allTargets = Hoogle.searchDatabase hoogledb query
targets = take (min 100 hqiLimitTo)
$ drop hqiOffsetBy
$ map fixResult allTargets
query = unpack hqiQueryInput
runQuery (Left err) = return $ HoogleQueryBad (tshow err)
runQuery (Right query') = do
hoogledb <- heDatabase
let query'' = Hoogle.queryExact classifier query'
rawRes = concatMap fixResult
$ Hoogle.search hoogledb query''
mres = spoon
$ take (min 100 hqiLimitTo)
$ drop hqiOffsetBy rawRes
mcount = spoon $ limitedLength 0 rawRes
limitedLength x [] = Just x
limitedLength x (_:rest)
| x >= 20 = Nothing
| otherwise = limitedLength (x + 1) rest
rendered = pack $ Hoogle.showTagHTML $ Hoogle.renderQuery query''
return $ case (,) <$> mres <*> mcount of
Nothing ->
HoogleQueryOutput rendered [] (Just 0)
Just (results, mcount') ->
HoogleQueryOutput rendered (take hqiLimitTo results) mcount'
mcount = limitedLength 0 allTargets
classifier = maybe Nothing
(const (Just Hoogle.UnclassifiedItem))
hqiExactSearch
limitedLength x [] = Just x
limitedLength x (_:rest)
| x >= 20 = Nothing
| otherwise = limitedLength (x + 1) rest
fixResult (_, Hoogle.Result locs self docs) = do
(loc, _) <- take 1 locs
let sources' = unionsWith (++) $
mapMaybe (getPkgModPair . snd) locs
return HoogleResult
{ hrURL = loc
, hrSources = mapToList sources'
, hrTitle = Hoogle.showTagHTML self
, hrBody = fromMaybe "Problem loading documentation" $
spoon $ Hoogle.showTagText docs
}
getPkgModPair :: [(String, String)]
-> Maybe (Map PackageLink [ModuleLink])
getPkgModPair [(pkg, pkgname), (modu, moduname)] = do
let pkg' = PackageLink pkgname pkg
modu' = ModuleLink moduname modu
return $ asMap $ singletonMap pkg' [modu']
getPkgModPair _ = Nothing
fixResult Hoogle.Target {..} = HoogleResult
{ hrURL = targetURL
, hrSources = toList $ do
(pname, purl) <- targetPackage
(mname, murl) <- targetModule
let p = PackageLink pname purl
m = ModuleLink mname murl
Just (p, [m])
, hrTitle = -- FIXME find out why these replaces are necessary
unpack $ T.replace "<0>" "" $ T.replace "</0>" "" $ pack
targetItem
, hrBody = targetDocs
}

View File

@ -7,16 +7,13 @@ module Stackage.Database.Cron
import ClassyPrelude.Conduit
import Stackage.PackageIndex.Conduit
import Database.Persist (Entity (Entity))
import Data.Char (isAlpha)
import qualified Codec.Archive.Tar as Tar
import Stackage.Database
import Network.HTTP.Client
import Network.HTTP.Client.Conduit (bodyReaderSource)
import Filesystem (rename, removeTree, removeFile)
import Filesystem (rename, removeTree, removeFile, isFile, createTree)
import Web.PathPieces (toPathPiece)
import Filesystem (isFile, createTree)
import Filesystem.Path.CurrentOS (parent, fromText, encodeString)
import Control.Monad.State.Strict (StateT, get, put)
import Network.HTTP.Types (status200)
import Data.Streaming.Network (bindPortTCP)
import Network.AWS (Credentials (Discover),
@ -35,6 +32,7 @@ import Data.Conduit.Zlib (WindowBits (WindowBits),
compress, ungzip)
import qualified Hoogle
import System.Directory (doesFileExist)
import System.IO.Temp (withSystemTempDirectory)
filename' :: Text
filename' = concat
@ -208,6 +206,7 @@ stackageServerCron = do
createHoogleDB :: StackageDatabase -> Manager -> SnapName -> IO (Maybe FilePath)
createHoogleDB db man name = handleAny (\e -> print e $> Nothing) $ do
putStrLn $ "Creating Hoogle DB for " ++ toPathPiece name
req' <- parseUrl $ unpack tarUrl
let req = req' { decompress = const True }
@ -222,16 +221,27 @@ createHoogleDB db man name = handleAny (\e -> print e $> Nothing) $ do
void $ tryIO $ removeFile (fromString outname)
createTree (fromString bindir)
dbs <- runResourceT
$ sourceTarFile False tarFP
$$ evalStateC 1 (mapMC (singleDB db name bindir))
=$ sinkList
withSystemTempDirectory ("hoogle-" ++ unpack (toPathPiece name)) $ \_tmpdir -> do
let tmpdir = "/Users/michael/Desktop/hoo"
runResourceT
$ sourceTarFile False tarFP
$$ mapM_C (liftIO . singleDB db name tmpdir)
putStrLn "Merging databases..."
Hoogle.mergeDatabase (catMaybes dbs) outname
putStrLn "Merge done"
let args =
[ "generate"
, "--database=" ++ outname
, "--local=" ++ tmpdir
]
putStrLn $ concat
[ "Merging databases... ("
, tshow args
, ")"
]
Hoogle.hoogle args
return $ Just outname
putStrLn "Merge done"
return $ Just outname
where
root = "hoogle-gen"
bindir = root </> "bindir"
@ -243,81 +253,29 @@ createHoogleDB db man name = handleAny (\e -> print e $> Nothing) $ do
singleDB :: StackageDatabase
-> SnapName
-> FilePath -- ^ bindir to write to
-> FilePath -- ^ temp directory to write .txt files to
-> Tar.Entry
-> StateT Int (ResourceT IO) (Maybe FilePath)
singleDB db sname bindir e@(Tar.entryContent -> Tar.NormalFile lbs _) = do
idx <- get
put $! idx + 1
putStrLn $ "Loading file for Hoogle: " ++ pack (Tar.entryPath e)
-> IO ()
singleDB db sname tmpdir e@(Tar.entryContent -> Tar.NormalFile lbs _) = do
--putStrLn $ "Loading file for Hoogle: " ++ pack (Tar.entryPath e)
let pkg = pack $ takeWhile (/= '.') $ Tar.entryPath e
msp <- flip runReaderT db $ do
Just (Entity sid _) <- lookupSnapshot sname
lookupSnapshotPackage sid pkg
case msp of
Nothing -> do
putStrLn $ "Unknown: " ++ pkg
return Nothing
Just (Entity _ sp) -> do
let ver = snapshotPackageVersion sp
pkgver = concat [pkg, "-", ver]
out = bindir </> show idx <.> "hoo"
src' = unlines
$ haddockHacks (Just $ unpack docsUrl)
$ lines
$ unpack
$ decodeUtf8 lbs
Nothing -> putStrLn $ "Unknown: " ++ pkg
Just _ -> do
let out = tmpdir </> unpack pkg <.> "txt"
-- FIXME add @url directive
writeFile out lbs
{-
docsUrl = concat
[ "https://www.stackage.org/haddock/"
, toPathPiece sname
, "/"
, pkgver
, "/index.html"
]
] -}
_errs <- liftIO $ Hoogle.createDatabase "" Hoogle.Haskell [] src' out
return $ Just out
singleDB _ _ _ _ = return Nothing
---------------------------------------------------------------------
-- HADDOCK HACKS
-- (Copied from hoogle-4.2.36/src/Recipe/Haddock.hs)
-- Modifications:
-- 1) Some name qualification
-- 2) Explicit type sig due to polymorphic elem
-- 3) Fixed an unused binding warning
-- Eliminate @version
-- Change :*: to (:*:), Haddock bug
-- Change !!Int to !Int, Haddock bug
-- Change instance [overlap ok] to instance, Haddock bug
-- Change instance [incoherent] to instance, Haddock bug
-- Change instance [safe] to instance, Haddock bug
-- Change !Int to Int, HSE bug
-- Drop {-# UNPACK #-}, Haddock bug
-- Drop everything after where, Haddock bug
haddockHacks :: Maybe Hoogle.URL -> [String] -> [String]
haddockHacks loc src = maybe id haddockPackageUrl loc (translate src)
where
translate :: [String] -> [String]
translate = map (unwords . g . map f . words) . filter (not . isPrefixOf "@version ")
f "::" = "::"
f (':':xs) = "(:" ++ xs ++ ")"
f ('!':'!':x:xs) | isAlpha x = xs
f ('!':x:xs) | isAlpha x || x `elem` ("[(" :: String) = x:xs
f x | x `elem` ["[overlap","ok]","[incoherent]","[safe]"] = ""
f x | x `elem` ["{-#","UNPACK","#-}"] = ""
f x = x
g ("where":_) = []
g (x:xs) = x : g xs
g [] = []
haddockPackageUrl :: Hoogle.URL -> [String] -> [String]
haddockPackageUrl x = concatMap f
where f y | "@package " `isPrefixOf` y = ["@url " ++ x, y]
| otherwise = [y]
singleDB _ _ _ _ = return ()

View File

@ -3,7 +3,7 @@ packages:
- .
- location:
git: https://github.com/ndmitchell/hoogle.git
commit: 779e04ed20a556bbb92789815ea60068fe188891
commit: ca42c4ce3af1c1ae7d413de242063ca1f682d3ff
extra-dep: true
image:
container:

View File

@ -3,37 +3,32 @@
<h1>Hoogle Search
<p>Within <a href=@{snapshotLink}>#{snapshotTitle snapshot}</a>
^{hoogleForm}
$case mresults
$of HoogleQueryBad err
<p>#{err}
<p>For information on what queries should look like, see the <a href="http://www.haskell.org/haskellwiki/Hoogle">hoogle user manual</a>.
$of HoogleQueryOutput _query results mtotalCount
$if null results
<p>Your search produced no results.
$else
<ol .search-results>
$forall HoogleResult url sources self docs <- results
<li>
<p .self>
<a href=#{url}>#{preEscapedToHtml self}
<table .sources>
$forall (pkg, modus) <- sources
<tr>
<th>
<a href=#{plURL pkg}>#{plName pkg}
<td>
$forall ModuleLink name url' <- modus
<a href=#{url'}>#{name}
$if null docs
<p .nodocs>No documentation available.
$else
<p .docs>#{docs}
<p .pagination>
$with mpageCount <- fmap getPageCount mtotalCount
Page #{page} of #{maybe "many" show mpageCount} #
$if page > 1
|
<a href=@?{pageLink $ page - 1}>Previous
$if maybe True ((<) page) mpageCount
|
<a href=@?{pageLink $ page + 1}>Next
$if null results
<p>Your search produced no results.
$else
<ol .search-results>
$forall HoogleResult url sources self docs <- results
<li>
<p .self>
<a href=#{url}>#{preEscapedToHtml self}
<table .sources>
$forall (pkg, modus) <- sources
<tr>
<th>
<a href=#{plURL pkg}>#{plName pkg}
<td>
$forall ModuleLink name url' <- modus
<a href=#{url'}>#{name}
$if null docs
<p .nodocs>No documentation available.
$else
<p .docs>#{docs}
<p .pagination>
$with mpageCount <- fmap getPageCount mtotalCount
Page #{page} of #{maybe "many" show mpageCount} #
$if page > 1
|
<a href=@?{pageLink $ page - 1}>Previous
$if maybe True ((<) page) mpageCount
|
<a href=@?{pageLink $ page + 1}>Next