|
|
Text.ProseDoc
Text.ProseDoc is a tool that reads markdown formatted comments from a Haskell source file and composes the comments and the associated source into a HTML document where the prose and source flow side by side in sync.
The concept is blatantly borrowed from the CoffeeScript tool docco.
ProseDoc can be seen as an alternative way to write and format literal Haskell code. However, the main motivation behind writing ProseDoc was simply that it seemed like an interesting project to tinker with.
The source code repository is located at https://github.com/shangaslammi/prose-doc. | module Text.ProseDoc where
import Control.Applicative ((<$>))
import Control.Monad ((<=<), filterM, forM)
import Control.Error
import Data.Monoid (mempty)
import Data.List (sort, isPrefixOf)
import System.Directory (getDirectoryContents, doesFileExist, doesDirectoryExist)
import System.FilePath ((</>), takeExtension, makeRelative)
import Text.Pandoc.SelfContained
import Text.ProseDoc.Rendering
import Text.ProseDoc.Parser
import Paths_prose_doc
|
The current version can generate a document either from a single source file or a hierarchical module structure. | generatePage :: FilePath -> IO String
generatePage path = do
isFile <- doesFileExist path
if isFile then processSingle path else processDirectory path
processSingle :: FilePath -> IO String
processSingle path = do
t <- runScript (parseSourceFile path)
|
For a single document, we leave out the TOC and simply format the given module. The makeSelfContained function from pandoc is used to embed the style information from an external css file. | cssPath <- getDataFileName "css/prose.css"
makeSelfContained Nothing
$ renderPage cssPath mempty
$ [moduleToHtml (path, t)]
findModules :: FilePath -> IO [FilePath]
findModules root = do
isFile <- doesFileExist root
if isFile
|
For directories, we walk through all subdirectories and gather all files with the extension .hs . | then return $ if takeExtension root == ".hs" then [root] else []
else fmap concat
$ mapM findModules
=<< map (root </>) . filter (not . isPrefixOf ".")
<$> getDirectoryContents root
processDirectory :: FilePath -> IO String
processDirectory path = do
|
Currently, the modules are presented in alphabetical order but this should be user configurable so that more relevant modules can be made to appear first. | mods <- sort . map (makeRelative path) <$> findModules path
htmls <- forM mods $ \m -> do
t <- runScript $ parseSourceFile (path </> m)
return $ moduleToHtml (m, t)
let toc = htmlTOC mods
cssPath <- getDataFileName "css/prose.css"
makeSelfContained Nothing $ renderPage cssPath toc htmls
|
|
|
|
|
Forcing a square peg through a round hole
(aka: syntax coloring of Haskell source code using the haskell-src-exts package)
There are several good syntax highlighting libraries and tools available, so the simplest thing would have been just to use one and be done with it. However, none of the syntax highlighters that I could find worked 100% perfectly with the plethora of language extensions available for modern GHC (TemplateHaskell is particularly tricky), so in a bout of perfectionism I decided I'd try and make a syntax highlighter that uses a full blown Haskell parser to get everything right.
If I really wanted to be sure I'm supporting all the syntax extensions currently available, the right thing to do would be to use the GHC API directly so that anything I can compile, I can highlight. However, I have to admit that the thought of diving into the GHC API absolutely terrifies me (I'll get to it some day, I promise!), so settled with the next best thing, namely haskell-src-exts .
Now, haskell-src-exts supports an admirably large portion of the Haskell syntax, but token classification for syntax highlighting isn't really one of the targeted use-cases for the library.
- You get a
Token stream from the lexer that contains information such as which words are keywords.
- You get an AST (thankfully, annotated with position meta-data) from the parser for figuring out the context, i.e. whether you are inside a type signature or a pattern match.
- And finally, you get comments (again, tagged with position info) as a separate list.
Which gets us to this module: Classifier | {-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Text.ProseDoc.Classifier where
import Control.Applicative
import Data.String ()
import Data.Monoid
import Data.Data
import Data.Typeable (cast)
import Data.Generics.Schemes
import Language.Haskell.Exts.SrcLoc
import qualified Language.Haskell.Exts.Annotated.Syntax as S
import Text.ProseDoc.Tree
import Text.ProseDoc.Tree.Builder
import Text.ProseDoc.Classifier.Types
import Text.ProseDoc.Classifier.Tokens ()
|
The ASTClassifier type class is used to process nodes in the annotated AST produced by haskell-src-ext and to build a Tree (covered later) of what are essentially tokens classified (or labeled) according to their context in the AST.
The mkTree function of the type-class operates in the TreeBuilder monad (also covered later), which keeps track of the current source position and the lists of tokens and comments we haven't yet processed. | class ASTClassifier ast where
mkTree :: ast -> TreeBuilder (Tree Classifier Printable)
mkTree = const mempty
|
As a generic convenience, we define some helper instances so that we can process ast elements, tree builders and lists of the aforementionted in a consistent manner. | instance ASTClassifier (TreeBuilder (Tree Classifier Printable)) where
mkTree = id
instance ASTClassifier a => ASTClassifier [a] where
mkTree = fmap mconcat . mapM mkTree
instance ASTClassifier a => ASTClassifier (Maybe a) where
mkTree Nothing = mempty
mkTree (Just a) = mkTree a
|
The root node of the AST is always the Module . By using the generic traversal scheme from Data.Generics.Schemes we can avoid most of the trouble of having to write an instance for every single kind of AST node.
If there are any source code fragments that were not processed by the generic traversal, we append them with popRemaining . | instance ASTClassifier (S.Module SrcSpan) where
mkTree m = mappend
<$> everything mappend gTree m
<*> popRemaining
|
gTree takes advantage of the Data.Data instance of the AST elements and makes transformations throughout the tree. "Leaf" type nodes like names are processed using popAst and more complex elements delegate to the AST element's ASTClassifier type class instance.
| gTree :: Data a => a -> TreeBuilder (Tree Classifier Printable)
gTree (cast -> Just (c :: S.ModulePragma SrcSpan)) = mkTree c
gTree (cast -> Just (c :: S.ImportDecl SrcSpan)) = mkTree c
gTree (cast -> Just (c :: S.Type SrcSpan)) = mkTree c
gTree (cast -> Just (c :: S.QName SrcSpan)) = mkTree c
gTree (cast -> Just (c :: S.Name SrcSpan)) = mkTree c
gTree (cast -> Just c@(S.Con {})) = popAst' ConstrName c
gTree (cast -> Just c@(S.PApp _ qn _)) = popAst' ConstrName qn
gTree (cast -> Just c@(S.PRec _ qn _)) = popAst' ConstrName qn
gTree (cast -> Just (c :: S.QOp SrcSpan)) = popAst' InfixOperator c
gTree (cast -> Just c@(S.String {})) = popAst' StringLit c
gTree (cast -> Just c@(S.TypeSig l names typ))
= popPrintablesBefore l
<> label Signature (mkTree names <> mkTree typ)
gTree _ = mempty
|
label is an short-hand function for adding a parent classifier to any value that is an ASTClassifier itself.
| label :: ASTClassifier a
=> Classifier
-> a
-> TreeBuilder (Tree Classifier Printable)
label l a = Label l <$> mkTree a
|
label' is a variation of label which discards all labels from child trees. This is sometimes useful to prevent an element from gettings several redundant classifiers.
| label' :: ASTClassifier a
=> Classifier
-> a
-> TreeBuilder (Tree Classifier Printable)
label' l a = Label l . pruneLabels <$> mkTree a
|
popAst assigns the given label to the specified, annotated AST element and pops all code fragments within the AST element's SrcSpan .
| popAst :: S.Annotated ast
=> Classifier
-> ast SrcSpan
-> TreeBuilder (Tree Classifier Printable)
popAst cls ast = popPrintablesBefore l <> label cls (popPrintables l)
where l = S.ann ast
|
popAst' is a version of popAst which prunes any nested labels.
| popAst' :: S.Annotated ast
=> Classifier
-> ast SrcSpan
-> TreeBuilder (Tree Classifier Printable)
popAst' cls ast = popPrintablesBefore l <> label' cls (popPrintables l)
where l = S.ann ast
|
The rest of the module consists of ASTClassifier instances for AST nodes for which we want to do some specific processing i.e. assign specific labels to some children of the node which cannot be accurately identified just based on their type (in which case we could process them in gTree ).
This part of the syntax highlighter is still work-in-progress. We need more ASTClassifier instances to add rest of the syntax coloring. | instance ASTClassifier (S.ModulePragma SrcSpan) where
mkTree p = case p of
S.LanguagePragma l names ->
popPrintablesBefore l
<> (Label ModulePragma <$> mkTree names)
S.OptionsPragma l _ _ ->
popPrintablesBefore l
<> (Label ModulePragma <$> popPrintables l)
S.AnnModulePragma l _ ->
popPrintablesBefore l
<> (Label ModulePragma <$> popPrintables l)
instance ASTClassifier (S.ImportDecl SrcSpan) where
mkTree i@(S.ImportDecl{..})
= popPrintablesBefore importAnn
<> label ImportDecl
( mkTree importModule
<> mkTree importAs
<> mkTree importSpecs
)
instance ASTClassifier (S.ImportSpecList SrcSpan) where
mkTree d
= popPrintablesBefore l
<> everything mappend gTree d
where l = S.ann d
instance ASTClassifier (S.ModuleName SrcSpan) where
mkTree (S.ModuleName l' s)
= popPrintablesBefore l
<> label ModuleName (popPrintables l)
where
SrcSpan {..} = l'
l = l' { srcSpanEndColumn = srcSpanStartColumn + length s }
instance ASTClassifier (S.Name SrcSpan) where
mkTree n = popPrintablesBefore l <> label Name (popPrintables l)
where l = S.ann n
instance ASTClassifier (S.Type SrcSpan) where
mkTree n = popPrintablesBefore l <> case n of
S.TyFun _ a b -> mkTree a <> mkTree b
S.TyApp _ a b -> mkTree a <> mkTree b
S.TyList _ a -> mkTree a
S.TyTuple _ _ a -> mkTree a
S.TyParen _ a -> mkTree a
S.TyCon _ (S.Qual _ m n)
-> mkTree m
<> do
within <- currentlyWithin l
if within
then popCustom Punctuation 1
else mempty
<> label' TypeName (mkTree n)
_ -> label TypeName $ popPrintables l
where l = S.ann n
instance ASTClassifier (S.QName SrcSpan) where
mkTree n = popPrintablesBefore l <> case n of
S.Qual _ m n -> mkTree m <> mkTree n
_ -> label Name $ popPrintables l
where l = S.ann n
|
|
|
|
|
Token Classification
Whereas Text.ProseDoc.Classifier classifies source fragments based on the AST, the Tokens sub-module assigns classifications based on Tokens that are returned by lexTokenStream in Language.Haskell.Exts.Lexer .
This module also handles Comment values that we get as a side-product of parsing the AST. | {-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE RecordWildCards #-}
module Text.ProseDoc.Classifier.Tokens where
import Language.Haskell.Exts.Lexer
import Language.Haskell.Exts.SrcLoc
import Language.Haskell.Exts.Comments
import Text.ProseDoc.Classifier.Types
instance SourceFragment (Loc Token) where
toFragments lt
| cls == Pragma
|
We do a little bit of manual tweaking for Pragma tokens because the pragma starting token includes both the opening comment bracket and the pragma name. | = (pos `setLen` 3, Punctuation)
: (pos `moveCol` 3, Pragma)
: []
| otherwise = (pos, cls) : []
where
pos = loc lt
token = unLoc lt
cls = classifyToken token
moveCol src@(SrcSpan {..}) delta = src
{ srcSpanStartColumn = srcSpanStartColumn + delta }
setLen src@(SrcSpan {..}) len = src
{ srcSpanEndColumn = srcSpanStartColumn + len }
instance SourceFragment Comment where
toFragments (Comment block loc txt) = (loc', comment) : [] where
|
Block comments that start with the character '%' are classified as "prose" and they get lifted out of the source in a later phase. All other comments are retained in the source as-is. | loc' = case txt of
'%':_ -> loc
|
Prose comments must start and end with a newline. The extra lines are pruned here to avoid extra gaps in the source code. | { srcSpanStartColumn = 1
, srcSpanEndColumn = 1
, srcSpanEndLine = srcSpanEndLine + 1
}
_ -> loc
SrcSpan {..} = loc
comment
| block = case txt of
'%':prose -> ProseComment prose
_ -> BlockComment
| otherwise = LineComment
classifyToken :: Token -> Classifier
classifyToken t
| braces t = Braces
| specialPunctuation t = SpecPunctuation
| punctuation t = Punctuation
| keyword t = Keyword
| pragma t = Pragma
| thQuote t = THQuote
| thEscape t = THEscape
| thQuasiQuote t = QuasiQuote
| otherwise = Other
thQuote :: Token -> Bool
thQuote = flip elem
[ THExpQuote
, THPatQuote
, THDecQuote
, THTypQuote
, THCloseQuote
, THVarQuote
, THTyQuote
]
thEscape :: Token -> Bool
thEscape (THIdEscape _) = True
thEscape THParenEscape = True
thEscape _ = False
thQuasiQuote :: Token -> Bool
thQuasiQuote (THQuasiQuote _) = True
thQuasiQuote _ = False
braces :: Token -> Bool
braces = flip elem
[ LeftParen
, RightParen
, LeftHashParen
, RightHashParen
, LeftCurlyBar
, RightCurlyBar
, LeftCurly
, RightCurly
, VRightCurly
, LeftSquare
, RightSquare
]
specialPunctuation :: Token -> Bool
specialPunctuation = flip elem
[ BackQuote
, Colon
, DoubleColon
, Equals
, Backslash
, Bar
, LeftArrow
, RightArrow
, At
, Tilde
, DoubleArrow
, Minus
, Exclamation
, Star
, LeftArrowTail
, RightArrowTail
, LeftDblArrowTail
, RightDblArrowTail
]
punctuation :: Token -> Bool
punctuation = flip elem
[ SemiColon
, Comma
, Underscore
, Dot
, DotDot
]
pragma :: Token -> Bool
pragma t = case t of
RULES -> True
INLINE _ -> True
INLINE_CONLIKE -> True
SPECIALISE -> True
SPECIALISE_INLINE _ -> True
SOURCE -> True
DEPRECATED -> True
WARNING -> True
SCC -> True
GENERATED -> True
CORE -> True
UNPACK -> True
OPTIONS _ -> True
LANGUAGE -> True
ANN -> True
_ -> False
keyword :: Token -> Bool
keyword = flip elem
[ KW_As
, KW_By
, KW_Case
, KW_Class
, KW_Data
, KW_Default
, KW_Deriving
, KW_Do
, KW_MDo
, KW_Else
, KW_Family
, KW_Forall
, KW_Group
, KW_Hiding
, KW_If
, KW_Import
, KW_In
, KW_Infix
, KW_InfixL
, KW_InfixR
, KW_Instance
, KW_Let
, KW_Module
, KW_NewType
, KW_Of
, KW_Proc
, KW_Rec
, KW_Then
, KW_Type
, KW_Using
, KW_Where
, KW_Qualified
, KW_Foreign
, KW_Export
, KW_Safe
, KW_Unsafe
, KW_Threadsafe
, KW_StdCall
, KW_CCall
, KW_CPlusPlus
, KW_DotNet
, KW_Jvm
, KW_Js
]
|
|
|
|
|
|
module Text.ProseDoc.Classifier.Types where
import Language.Haskell.Exts.SrcLoc
data Classifier
= ModuleHead
| ModulePragma
| ImportDecl
| ModuleName
| ValueName
| ConstrName
| TypeName
| Pragma
| Name
| Keyword
| Punctuation
| SpecPunctuation
| Braces
| QuasiQuote
| THQuote
| THEscape
| Other
| BlockComment
| LineComment
| ProseComment String
| Signature
| InfixOperator
| StringLit
deriving (Show, Eq, Ord)
type LineNo = Int
type Column = Int
type Pos = (LineNo, Column)
type Printable = String
type Fragment = (SrcSpan, Classifier)
class SourceFragment f where
toFragments :: f -> [Fragment]
isProse :: Classifier -> Bool
isProse (ProseComment _) = True
isProse _ = False
|
|
|
|
|
Parsing Haskell Source
Haskell modules are parsed using Language.Haskell.Exts.Parser and the tokens, comments and AST is fed to a TreeBuilder (see Text.ProseDoc.Tree.Builder ). | {-# LANGUAGE ScopedTypeVariables #-}
module Text.ProseDoc.Parser where
import Control.Applicative ((<$>))
import Control.Error
import Data.List (sort)
import Language.Haskell.Exts (readExtensions)
import Language.Haskell.Exts.Lexer
import Language.Haskell.Exts.Parser
import Language.Haskell.Exts.SrcLoc
import Language.Haskell.Exts.Pretty (prettyPrint)
import qualified Language.Haskell.Exts.Annotated.Syntax as S
import Text.ProseDoc.Tree
import Text.ProseDoc.Tree.Builder
import Text.ProseDoc.Classifier
import Text.ProseDoc.Classifier.Types
parseSourceFile :: FilePath -> EitherT String IO (Tree Classifier Printable)
parseSourceFile fp = do
src <- scriptIO $ readFile fp
hoistEither $ buildTree fp src
buildTree :: FilePath -> String -> Either String (Tree Classifier Printable)
buildTree path src = case parseResult . parseMode <$> readExtensions src of
Nothing -> Left "unable to parse language extensions"
Just (ParseOk r) -> return r
Just (ParseFailed loc msg) -> Left (prettyPrint loc ++ ": " ++ msg)
where
parseResult mode = do
tokens <- lexTokenStreamWithMode mode src
(ast :: S.Module SrcSpan, comments) <- parseWithComments mode src
let builder = mkTree ast
tree = runTreeBuilder builder src fragments
fragments
= sort
$ concatMap toFragments tokens
++ concatMap toFragments comments
return tree
parseMode exts = defaultParseMode
{ parseFilename = path
, fixities = Just []
, extensions = exts
}
|
|
|
|
|
Rendering to HTML | {-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.ProseDoc.Rendering where
import Control.Monad.State
import Control.Applicative ((<$>),(<*>))
import Control.Arrow ((&&&))
import Data.Monoid
import Data.String (fromString)
import Data.List (isPrefixOf, stripPrefix, intercalate)
import Text.Blaze.Html5 ((!))
import Text.Blaze.Extra
import Text.Blaze.Renderer.String
import qualified Text.Blaze.Html5 as H
import qualified Text.Blaze.Html5.Attributes as A
import Text.Pandoc.Readers.Markdown
import Text.Pandoc.Writers.HTML
import Text.Pandoc.Parsing (defaultParserState)
import Text.Pandoc.Shared (defaultWriterOptions)
import Text.ProseDoc.Tree
import Text.ProseDoc.Classifier.Types
import System.FilePath
|
htmlTOC builds a hierarchical <ul> tree from a list of filenames so that files that are in the same directory are placed as siblings in the tree.
| htmlTOC :: [FilePath] -> H.Html
htmlTOC = (H.ul !. "toc") . evalState (go []) . idify where
|
The internal implementation is rather hairy (there has to be a simpler way to do this!). We maintain the list of remaining filenames in the State monad and pop them out one by one when we are a the appropriate level of the tree.
We use two mutually recursive functions go and go2 .
go processes module names from the stack until it hits a name that doesn't match the current module prefix.
| go prefix = do
paths <- get
case paths of
[] -> return mempty
((fp,anchor):fps) ->
let parts = splitDirectories fp
rest = stripPrefix prefix parts
in case rest of
Nothing -> return mempty
Just r -> put fps >> go2 prefix (r,anchor)
|
go2 inserts the nested levels for a single module name
| go2 prefix ((x:xs), anchor) = do
let base = takeBaseName x
label = H.toHtml (intercalate "." (prefix ++ [base]))
link = H.a ! A.href (fromString ('#' : anchor)) $ label
tag = if null xs then link else label
descend <- case xs of
|
go (prefix ++ [base]) handles cases where we've just processed e.g. module Foo/Bar.hs and created a link for it. Next we have to look for possible child modules under Foo/Bar/ .
| [] -> go (prefix ++ [base])
|
If we still have parts of the current module name left, recurse go2 with the next name part. | _ -> go2 (prefix ++ [x]) (xs, anchor)
|
Finally, handle sibling modules that share the same prefix. | siblings <- go prefix
return $ H.li tag <> H.ul descend <> siblings
idify = map (id &&& pathToId)
moduleToHtml :: (FilePath, Tree Classifier Printable) -> H.Html
moduleToHtml (fp, t)
= H.tr !. "file-header" $ (H.td anchor <> fileTd )
<> treeToHtml t
where
fileTd = H.td . H.code . H.toHtml $ fp
anchor = H.a !# fromString (pathToId fp) $ ""
pathToId :: FilePath -> String
pathToId = map replaceChar . dropExtension where
replaceChar c
| c `elem` pathSeparators = '.'
| otherwise = c
data Section = Section
{ sectionProse :: String
, sectionCode :: Tree Classifier Printable
} deriving Show
|
Split the presentation tree into sections at every ProseComment label. | extractSections :: Tree Classifier Printable -> [Section]
extractSections = (pad:) . (++[pad]) . map toSection . splitTree isProse where
toSection (sep, tree) = case sep of
Nothing -> Section "" tree
Just (Label (ProseComment prose) _) -> Section prose tree
pad = Section "" (Leaf "\n")
treeToHtml :: Tree Classifier Printable -> H.Html
treeToHtml = mapM_ sectionToHtml . extractSections
sectionToHtml :: Section -> H.Html
sectionToHtml (Section {..}) = H.tr (proseTd <> codeTd) where
proseTd = H.td !. "prose" $ markdownToHtml sectionProse
codeTd = H.td !. "code"
$ H.pre
$ H.code !. "haskell"
$ codeTreeToHtml sectionCode
markdownToHtml :: String -> H.Html
markdownToHtml
= writeHtml defaultWriterOptions
. readMarkdown defaultParserState
codeTreeToHtml :: Tree Classifier Printable -> H.Html
codeTreeToHtml = foldTree addSpan H.toHtml . pruneEmptyBranches where
addSpan cls inner = case unwords (cssClass cls) of
"" -> inner
c -> H.span !. fromString c $ inner
|
Map the fragment classifiers into css classes. | cssClass Keyword = ["kw"]
cssClass Pragma = ["kw"]
cssClass ModulePragma = ["pragma"]
cssClass ModuleName = ["module-name"]
cssClass Name = ["name"]
cssClass Signature = ["typesig"]
cssClass TypeName = ["type-name"]
cssClass ConstrName = ["constr-name"]
cssClass Braces = ["brace"]
cssClass SpecPunctuation = ["syntax"]
cssClass Punctuation = ["punct"]
cssClass InfixOperator = ["infix-op"]
cssClass StringLit = ["lit", "string"]
cssClass LineComment = ["comment"]
cssClass BlockComment = ["comment"]
cssClass ImportDecl = ["import"]
cssClass _ = []
renderPage :: FilePath -> H.Html -> [H.Html] -> String
renderPage css toc mods = renderHtml . H.docTypeHtml $ docHead >> docBody where
docHead = H.head $ cssLink >> H.title "ProseDoc Generated Module Listing"
docBody = H.body $ (H.table !. "sections") $ toc <> mconcat mods
cssLink = H.link
! A.rel "stylesheet"
! A.type_ "text/css"
! A.href (fromString css)
|
|
|
|
|
| {-# LANGUAGE GADTs #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module Text.ProseDoc.Tree where
import Data.Monoid
import Data.Foldable (foldMap)
|
A tree data type holds labels, branches and leaf nodes. A label classifies all branches and leaves below it. | data Tree l n where
Empty :: Tree l n
Label :: l -> Tree l n -> Tree l n
Branch :: [Tree l n] -> Tree l n
Leaf :: n -> Tree l n
deriving (Show)
|
The tree is folded into a monoid, depth-first using an abstract function that takes in two functions. The first is used to process labels and the second to process leaf data. | foldTree :: Monoid r => (l -> r -> r) -> (n -> r) -> Tree l n -> r
foldTree bf lf = go where
go Empty = mempty
go (Label l tree) = bf l (go tree)
go (Branch leaves) = foldMap go leaves
go (Leaf t) = lf t
|
Trees form a monoid so that the two trees are inserted as siblings into a branch. | instance Monoid (Tree l n) where
mempty = Empty
mappend Empty b = b
mappend a Empty = a
mappend (Branch b) (Branch b') = Branch (b <> b')
mappend a (Branch b) = Branch (a:b)
mappend a b = Branch [a,b]
|
Remove all label nodes from the tree. | pruneLabels :: Tree l n -> Tree l n
pruneLabels t = case t of
Label _ t' -> t'
Branch bs -> Branch $ map pruneLabels bs
t' -> t'
|
Remove parts of the tree that don't contain any leaf nodes. | pruneEmptyBranches :: Tree l n -> Tree l n
pruneEmptyBranches t = go t where
go t = case t of
Empty -> Empty
Branch bs -> case filter notEmpty . map go $ bs of
[] -> Empty
bs' -> Branch bs'
Label l t -> case go t of
Empty -> Empty
t' -> Label l t'
n -> n
notEmpty Empty = False
notEmpty _ = True
|
A tree can be split into parts based a label test. The split off parts will contain the same labels when walking the tree spine as the part they were cut off from. | breakTree :: (l -> Bool) -> Tree l n -> (Tree l n, Maybe (Tree l n), Tree l n)
breakTree test = go where
go t = case t of
Empty -> (Empty, Nothing, Empty)
Label l t'
| test l -> (Empty, Just (Label l t'), Empty)
| otherwise -> (Label l a, sep, b')
where
(a, sep, b) = go t'
b' = case sep of
Nothing -> Empty
_ -> Label l b
Leaf n -> (Leaf n, Nothing, Empty)
Branch bs -> case sep of
Nothing -> (Branch bs, Nothing, Empty)
_ -> (mconcat a, sep, mconcat b)
where (a, sep, b) = gob bs
gob [] = ([], Nothing, [])
gob (b:bs) = case sep of
Nothing -> (l:b', sep', bs')
_ -> (l:[], sep, r:bs)
where
(l, sep, r) = go b
(b', sep', bs') = gob bs
|
Version of breakTree which splits the tree into a list of tree sections. | splitTree :: (l -> Bool) -> Tree l n -> [(Maybe (Tree l n), Tree l n)]
splitTree test = go Nothing where
go Nothing Empty = []
go sep Empty = (sep, Empty) : []
go sep t = case (sep, l) of
(Nothing, Empty) -> go sep' r
_ -> (sep, l) : go sep' r
where (l, sep', r) = breakTree test t
|
|
|
|
|
| {-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE ViewPatterns #-}
module Text.ProseDoc.Tree.Builder where
import Control.Monad.State
import Control.Monad.Writer
import Control.Applicative
import Language.Haskell.Exts.SrcLoc
import Text.ProseDoc.Tree
import Text.ProseDoc.Classifier.Types
|
A TreeBuilder is a helper monad for associating AST elements with the position tagged streams of tokens and comments.
The first state transformer tracks the remaining source code and the current line/row position in the file. The inner state monad keeps a stack of source code fragments (tokens and comments) which are popped from the stack by the AST element that covers the source location. | newtype TreeBuilder a = TreeBuilder (StateT (String, Pos) (State [Fragment]) a)
deriving (Functor, Applicative, Monad)
|
For convenience, we define a monoid instance for tree-producing TreeBuilders so that we can directly mappend two monadic operations. | instance Monoid (TreeBuilder (Tree Classifier Printable)) where
mempty = return mempty
mappend = liftM2 mappend
|
Given a TreeBuilder , the origina lsource code and classified fragments, create a tree of classified, printable elements. | runTreeBuilder
:: TreeBuilder (Tree Classifier Printable)
-> String
-> [Fragment]
-> Tree Classifier Printable
runTreeBuilder (TreeBuilder bldr) src =
evalState $ evalStateT bldr (src,(1,1))
|
Pop all fragments that are before the given position. | popFragments :: Pos -> TreeBuilder [Fragment]
popFragments pos = TreeBuilder $ do
fragments <- lift get
let (include, exclude) = span ((<= pos).srcSpanEnd.fst) fragments
(include', exclude') = case exclude of
(e@(p,_):_) | srcSpanStart p < pos ->
let (e',e'') = breakFragment pos e
in (include ++ [e'], e'':exclude)
_ -> (include, exclude)
lift $ put exclude'
return include'
|
Sometimes we need to manually tweak some fragments. popCustom lets us pop a custom fragment with given classifier and length from the current source position. | popCustom :: Classifier -> Int -> TreeBuilder (Tree Classifier Printable)
popCustom cls len = do
(ln,col) <- TreeBuilder $ gets snd
fragmentToTree (SrcSpan "" ln col ln (col + len), cls)
|
Break a fragment into two parts at position. | breakFragment :: Pos -> Fragment -> (Fragment, Fragment)
breakFragment (ln,col) (l, cls) = ((loc, cls), (loc', cls)) where
loc = l { srcSpanEndColumn = col, srcSpanEndLine = ln }
loc' = l { srcSpanStartColumn = col, srcSpanStartLine = ln }
|
Pop fragments that are located inside the given SrcSpan from the stack and structure them into a tree. | popPrintables :: SrcSpan -> TreeBuilder (Tree Classifier Printable)
popPrintables loc | isNullSpan loc = return Empty
popPrintables loc =
mconcat <$> (popFragments (srcSpanEnd loc) >>= mapM fragmentToTree)
|
Pop fragments that are located before the given SrcSpan from the stack and structure them into a tree. | popPrintablesBefore :: SrcSpan -> TreeBuilder (Tree Classifier Printable)
popPrintablesBefore loc =
(mconcat <$> (popFragments (srcSpanStart loc) >>= mapM fragmentToTree))
<> leftOvers
where
leftOvers = do
fragments <- TreeBuilder $ lift get
case fragments of
[] -> return mempty
((loc',_):_) -> beforeToTree loc'
|
Pop all remaining source fragments from the stack. | popRemaining :: TreeBuilder (Tree Classifier Printable)
popRemaining = mconcat <$> (popAllFragments >>= mapM fragmentToTree) where
popAllFragments = TreeBuilder (lift get <* lift (put []))
|
Check if the current source location is within the given span. | currentlyWithin :: SrcSpan -> TreeBuilder Bool
currentlyWithin loc = TreeBuilder $ fmap (< srcSpanEnd loc) $ gets snd
|
Pop source code until the end of given fragment span and structure the fragment into a classified tree. | fragmentToTree :: Fragment -> TreeBuilder (Tree Classifier Printable)
fragmentToTree (loc, cls) = beforeToTree loc <> fragment where
fragment = Label cls . Leaf <$> extractSource (srcSpanEnd loc)
beforeToTree :: SrcSpan -> TreeBuilder (Tree Classifier Printable)
beforeToTree loc = do
pre <- extractSource (srcSpanStart loc)
case pre of
"" -> return mempty
_ -> return $ Leaf pre
|
Retrieve source code up until the given line/row position as a string. | extractSource :: Pos -> TreeBuilder String
extractSource (ln', col') = TreeBuilder $ unwrapWriter go where
unwrapWriter = fmap ($"") . execWriterT
go = do
(_, (ln, col)) <- get
let action
| ln < ln' = popLine >> go
| col < col' = popChars (col'-col)
| otherwise = return ()
action
popLine = do
(src, (ln, col)) <- lift get
let (s, tail -> src') = break (=='\n') src
lift $ put (src',(ln+1, 1))
tell (s ++)
tell ('\n' :)
popChars n = do
(src, (ln, col)) <- lift get
let (s, src') = splitAt n src
put (src', (ln, col+n))
tell (s ++)
|
|
|