From edfe34c86c595f253467304ab54061a56cf5045b Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sat, 12 Mar 2022 23:16:31 +0100 Subject: [PATCH] Document more functions in T.P.Parsing and T.P.Shared. --- src/Text/Pandoc/Parsing/GridTable.hs | 12 ++++++-- src/Text/Pandoc/Parsing/Lists.hs | 4 +-- src/Text/Pandoc/Parsing/Smart.hs | 45 ++++++++++++++++++++++++++-- src/Text/Pandoc/Shared.hs | 26 +++++++++++++--- 4 files changed, 76 insertions(+), 11 deletions(-) diff --git a/src/Text/Pandoc/Parsing/GridTable.hs b/src/Text/Pandoc/Parsing/GridTable.hs index ebc12197f..74ad64d78 100644 --- a/src/Text/Pandoc/Parsing/GridTable.hs +++ b/src/Text/Pandoc/Parsing/GridTable.hs @@ -40,16 +40,18 @@ import qualified Text.Pandoc.Builder as B -- line). gridTableWith :: (Monad m, Monad mf, HasLastStrPosition st, HasReaderOptions st) => ParserT Sources st m (mf Blocks) -- ^ Block list parser - -> Bool -- ^ Headerless table + -> Bool -- ^ Headerless table -> ParserT Sources st m (mf Blocks) gridTableWith blocks headless = tableWith (gridTableHeader headless blocks) (gridTableRow blocks) (gridTableSep '-') gridTableFooter +-- | Like @'gridTableWith'@, but returns 'TableComponents' instead of a +-- Table. gridTableWith' :: (Monad m, Monad mf, HasReaderOptions st, HasLastStrPosition st) - => ParserT Sources st m (mf Blocks) -- ^ Block list parser - -> Bool -- ^ Headerless table + => ParserT Sources st m (mf Blocks) -- ^ Block list parser + -> Bool -- ^ Headerless table -> ParserT Sources st m (TableComponents mf) gridTableWith' blocks headless = tableWith' (gridTableHeader headless blocks) (gridTableRow blocks) @@ -59,6 +61,10 @@ gridTableSplitLine :: [Int] -> Text -> [Text] gridTableSplitLine indices line = map removeFinalBar $ tail $ splitTextByIndices (init indices) $ trimr line +-- | Parses a grid segment, where the grid line is made up from the +-- given char and terminated with a plus (@+@). The grid line may begin +-- and/or end with a colon, signaling column alignment. Returns the size +-- of the grid part and column alignment gridPart :: Monad m => Char -> ParserT Sources st m ((Int, Int), Alignment) gridPart ch = do leftColon <- option False (True <$ char ':') diff --git a/src/Text/Pandoc/Parsing/Lists.hs b/src/Text/Pandoc/Parsing/Lists.hs index d3f2a2e6f..09a06e440 100644 --- a/src/Text/Pandoc/Parsing/Lists.hs +++ b/src/Text/Pandoc/Parsing/Lists.hs @@ -56,7 +56,8 @@ import qualified Data.Map as M import qualified Data.Text as T -- | Parses a roman numeral (uppercase or lowercase), returns number. -romanNumeral :: (Stream s m Char, UpdateSourcePos s Char) => Bool -- ^ Uppercase if true +romanNumeral :: (Stream s m Char, UpdateSourcePos s Char) + => Bool -- ^ Uppercase if true -> ParserT s st m Int romanNumeral upperCase = do let rchar uc = char $ if upperCase then uc else toLower uc @@ -212,4 +213,3 @@ orderedListMarker style delim = do TwoParens -> inTwoParens (start, _, _) <- context num return start - diff --git a/src/Text/Pandoc/Parsing/Smart.hs b/src/Text/Pandoc/Parsing/Smart.hs index 58254d2a6..13cb18efe 100644 --- a/src/Text/Pandoc/Parsing/Smart.hs +++ b/src/Text/Pandoc/Parsing/Smart.hs @@ -46,6 +46,10 @@ import Text.Parsec import qualified Text.Pandoc.Builder as B +-- | Parses various ASCII punctuation, quotes, and apostrophe in a smart +-- way, inferring their semantic meaning. +-- +-- Fails unless the 'Ext_smart' extension has been enabled. smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char, UpdateSourcePos s Char) @@ -55,12 +59,16 @@ smartPunctuation inlineParser = do guardEnabled Ext_smart choice [ quoted inlineParser, apostrophe, doubleCloseQuote, dash, ellipses ] +-- | Parses inline text in single or double quotes, assumes English +-- quoting conventions. quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines -> ParserT s st m Inlines quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser +-- | Parses inline text in single quotes, assumes English quoting +-- conventions. singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines @@ -72,6 +80,8 @@ singleQuoted inlineParser = do (withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd))) <|> pure "\8217" +-- | Parses inline text in double quotes; assumes English quoting +-- conventions. doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st, Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines @@ -89,6 +99,14 @@ charOrRef cs = guard (c `elem` cs) return c) +-- | Succeeds if the parser is +-- +-- * not within single quoted text; +-- * not directly after a word; and +-- * looking at an opening single quote char that's not followed by a +-- space. +-- +-- Gobbles the quote character on success. singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char, UpdateSourcePos s Char) => ParserT s st m () @@ -106,6 +124,16 @@ singleQuoteEnd = try $ do charOrRef "'\8217\146" notFollowedBy alphaNum +-- | Succeeds if the parser is +-- +-- * not within a double quoted text; +-- +-- * not directly after a word; and +-- +-- * looking at an opening double quote char that's not followed by a +-- space. +-- +-- Gobbles the quote character on success. doubleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char, UpdateSourcePos s Char) @@ -116,20 +144,33 @@ doubleQuoteStart = do try $ do charOrRef "\"\8220\147" void $ lookAhead (satisfy (not . isSpaceChar)) +-- | Parses a closing quote character. doubleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m () doubleQuoteEnd = void (charOrRef "\"\8221\148") -apostrophe :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines +-- | Parses an ASCII apostrophe (@'@) or right single quotation mark and +-- returns a RIGHT SINGLE QUOtatiON MARK character. +apostrophe :: (Stream s m Char, UpdateSourcePos s Char) + => ParserT s st m Inlines apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\8217") -doubleCloseQuote :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines +-- | Parses an ASCII quotation mark character and returns a RIGHT DOUBLE +-- QUOTATION MARK. +doubleCloseQuote :: (Stream s m Char, UpdateSourcePos s Char) + => ParserT s st m Inlines doubleCloseQuote = B.str "\8221" <$ char '"' +-- | Parses three dots as HORIZONTAL ELLIPSIS. ellipses :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines ellipses = try (string "..." >> return (B.str "\8230")) +-- | Parses two hyphens as EN DASH and three as EM DASH. +-- +-- If the extension @'Ext_old_dashes'@ is enabled, then two hyphens are +-- parsed as EM DASH, and one hyphen is parsed as EN DASH if it is +-- followed by a digit. dash :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines dash = try $ do diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 5b833dd34..26c922a04 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -152,6 +152,9 @@ splitTextBy isSep t | otherwise = let (first, rest) = T.break isSep t in first : splitTextBy isSep (T.dropWhile isSep rest) +-- | Split text at the given widths. Note that the break points are +-- /not/ indices but text widths, which will be different for East Asian +-- characters, emojis, etc. splitTextByIndices :: [Int] -> T.Text -> [T.Text] splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns)) . T.unpack where @@ -160,9 +163,12 @@ splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns)) . T.unpack let (first, rest) = splitAt' x cs in T.pack first : splitTextByRelIndices xs rest --- Note: don't replace this with T.splitAt, which is not sensitive +-- | Returns a pair whose first element is a prefix of @t@ and that has +-- width @n@, and whose second is the remainder of the string. +-- +-- Note: Do *not* replace this with 'T.splitAt', which is not sensitive -- to character widths! -splitAt' :: Int -> [Char] -> ([Char],[Char]) +splitAt' :: Int {-^ n -} -> [Char] {-^ t -} -> ([Char],[Char]) splitAt' _ [] = ([],[]) splitAt' n xs | n <= 0 = ([],xs) splitAt' n (x:xs) = (x:ys,zs) @@ -175,7 +181,11 @@ ordNub l = go Set.empty l go s (x:xs) = if x `Set.member` s then go s xs else x : go (Set.insert x s) xs -findM :: forall m t a. (Monad m, Foldable t) => (a -> m Bool) -> t a -> m (Maybe a) +-- | Returns the last element in a foldable structure for that the +-- monadic predicate holds true, and @Nothing@ if no such element +-- exists. +findM :: forall m t a. (Monad m, Foldable t) + => (a -> m Bool) -> t a -> m (Maybe a) findM p = foldr go (pure Nothing) where go :: a -> m (Maybe a) -> m (Maybe a) @@ -191,6 +201,7 @@ findM p = foldr go (pure Nothing) inquotes :: T.Text -> T.Text inquotes txt = T.cons '\"' (T.snoc txt '\"') +-- | Like @'show'@, but returns a 'T.Text' instead of a 'String'. tshow :: Show a => a -> T.Text tshow = T.pack . show @@ -206,6 +217,8 @@ notElemText c = T.all (/= c) stripTrailingNewlines :: T.Text -> T.Text stripTrailingNewlines = T.dropWhileEnd (== '\n') +-- | Returns 'True' for an ASCII whitespace character, viz. space, +-- carriage return, newline, and horizontal tab. isWS :: Char -> Bool isWS ' ' = True isWS '\r' = True @@ -317,6 +330,7 @@ crFilter = T.filter (/= '\r') normalizeDate :: T.Text -> Maybe T.Text normalizeDate = fmap T.pack . normalizeDate' . T.unpack +-- | Like @'normalizeDate'@, but acts on 'String' instead of 'T.Text'. normalizeDate' :: String -> Maybe String normalizeDate' s = fmap (formatTime defaultTimeLocale "%F") (msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day) @@ -382,10 +396,12 @@ removeFormatting = query go . walk (deNote . deQuote) go LineBreak = [Space] go _ = [] +-- | Replaces 'Note' elements with empty strings. deNote :: Inline -> Inline deNote (Note _) = Str "" deNote x = x +-- | Turns links into spans, keeping just the link text. deLink :: Inline -> Inline deLink (Link _ ils _) = Span nullAttr ils deLink x = x @@ -412,6 +428,8 @@ stringify = query go . walk fixInlines fixInlines (q@Quoted{}) = deQuote q fixInlines x = x +-- | Unwrap 'Quoted' inline elements, enclosing the contents with +-- English-style Unicode quotes instead. deQuote :: Inline -> Inline deQuote (Quoted SingleQuote xs) = Span ("",[],[]) (Str "\8216" : xs ++ [Str "\8217"]) @@ -813,7 +831,7 @@ collapseFilePath = Posix.joinPath . reverse . foldl' go [] . splitDirectories isSingleton _ = Nothing checkPathSeperator = fmap isPathSeparator . isSingleton --- Convert the path part of a file: URI to a regular path. +-- | Converts the path part of a file: URI to a regular path. -- On windows, @/c:/foo@ should be @c:/foo@. -- On linux, @/foo@ should be @/foo@. uriPathToPath :: T.Text -> FilePath