Document more functions in T.P.Parsing and T.P.Shared.

This commit is contained in:
Albert Krewinkel 2022-03-12 23:16:31 +01:00
parent 699336cf5b
commit edfe34c86c
No known key found for this signature in database
GPG key ID: 388DC0B21F631124
4 changed files with 76 additions and 11 deletions

View file

@ -40,16 +40,18 @@ import qualified Text.Pandoc.Builder as B
-- line).
gridTableWith :: (Monad m, Monad mf, HasLastStrPosition st, HasReaderOptions st)
=> ParserT Sources st m (mf Blocks) -- ^ Block list parser
-> Bool -- ^ Headerless table
-> Bool -- ^ Headerless table
-> ParserT Sources st m (mf Blocks)
gridTableWith blocks headless =
tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
(gridTableSep '-') gridTableFooter
-- | Like @'gridTableWith'@, but returns 'TableComponents' instead of a
-- Table.
gridTableWith' :: (Monad m, Monad mf,
HasReaderOptions st, HasLastStrPosition st)
=> ParserT Sources st m (mf Blocks) -- ^ Block list parser
-> Bool -- ^ Headerless table
=> ParserT Sources st m (mf Blocks) -- ^ Block list parser
-> Bool -- ^ Headerless table
-> ParserT Sources st m (TableComponents mf)
gridTableWith' blocks headless =
tableWith' (gridTableHeader headless blocks) (gridTableRow blocks)
@ -59,6 +61,10 @@ gridTableSplitLine :: [Int] -> Text -> [Text]
gridTableSplitLine indices line = map removeFinalBar $ tail $
splitTextByIndices (init indices) $ trimr line
-- | Parses a grid segment, where the grid line is made up from the
-- given char and terminated with a plus (@+@). The grid line may begin
-- and/or end with a colon, signaling column alignment. Returns the size
-- of the grid part and column alignment
gridPart :: Monad m => Char -> ParserT Sources st m ((Int, Int), Alignment)
gridPart ch = do
leftColon <- option False (True <$ char ':')

View file

@ -56,7 +56,8 @@ import qualified Data.Map as M
import qualified Data.Text as T
-- | Parses a roman numeral (uppercase or lowercase), returns number.
romanNumeral :: (Stream s m Char, UpdateSourcePos s Char) => Bool -- ^ Uppercase if true
romanNumeral :: (Stream s m Char, UpdateSourcePos s Char)
=> Bool -- ^ Uppercase if true
-> ParserT s st m Int
romanNumeral upperCase = do
let rchar uc = char $ if upperCase then uc else toLower uc
@ -212,4 +213,3 @@ orderedListMarker style delim = do
TwoParens -> inTwoParens
(start, _, _) <- context num
return start

View file

@ -46,6 +46,10 @@ import Text.Parsec
import qualified Text.Pandoc.Builder as B
-- | Parses various ASCII punctuation, quotes, and apostrophe in a smart
-- way, inferring their semantic meaning.
--
-- Fails unless the 'Ext_smart' extension has been enabled.
smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st,
HasQuoteContext st m,
Stream s m Char, UpdateSourcePos s Char)
@ -55,12 +59,16 @@ smartPunctuation inlineParser = do
guardEnabled Ext_smart
choice [ quoted inlineParser, apostrophe, doubleCloseQuote, dash, ellipses ]
-- | Parses inline text in single or double quotes, assumes English
-- quoting conventions.
quoted :: (HasLastStrPosition st, HasQuoteContext st m,
Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
-> ParserT s st m Inlines
quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
-- | Parses inline text in single quotes, assumes English quoting
-- conventions.
singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m,
Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
@ -72,6 +80,8 @@ singleQuoted inlineParser = do
(withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd)))
<|> pure "\8217"
-- | Parses inline text in double quotes; assumes English quoting
-- conventions.
doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st,
Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
@ -89,6 +99,14 @@ charOrRef cs =
guard (c `elem` cs)
return c)
-- | Succeeds if the parser is
--
-- * not within single quoted text;
-- * not directly after a word; and
-- * looking at an opening single quote char that's not followed by a
-- space.
--
-- Gobbles the quote character on success.
singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m,
Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m ()
@ -106,6 +124,16 @@ singleQuoteEnd = try $ do
charOrRef "'\8217\146"
notFollowedBy alphaNum
-- | Succeeds if the parser is
--
-- * not within a double quoted text;
--
-- * not directly after a word; and
--
-- * looking at an opening double quote char that's not followed by a
-- space.
--
-- Gobbles the quote character on success.
doubleQuoteStart :: (HasLastStrPosition st,
HasQuoteContext st m,
Stream s m Char, UpdateSourcePos s Char)
@ -116,20 +144,33 @@ doubleQuoteStart = do
try $ do charOrRef "\"\8220\147"
void $ lookAhead (satisfy (not . isSpaceChar))
-- | Parses a closing quote character.
doubleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m ()
doubleQuoteEnd = void (charOrRef "\"\8221\148")
apostrophe :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
-- | Parses an ASCII apostrophe (@'@) or right single quotation mark and
-- returns a RIGHT SINGLE QUOtatiON MARK character.
apostrophe :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\8217")
doubleCloseQuote :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
-- | Parses an ASCII quotation mark character and returns a RIGHT DOUBLE
-- QUOTATION MARK.
doubleCloseQuote :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
doubleCloseQuote = B.str "\8221" <$ char '"'
-- | Parses three dots as HORIZONTAL ELLIPSIS.
ellipses :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
ellipses = try (string "..." >> return (B.str "\8230"))
-- | Parses two hyphens as EN DASH and three as EM DASH.
--
-- If the extension @'Ext_old_dashes'@ is enabled, then two hyphens are
-- parsed as EM DASH, and one hyphen is parsed as EN DASH if it is
-- followed by a digit.
dash :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
dash = try $ do

View file

@ -152,6 +152,9 @@ splitTextBy isSep t
| otherwise = let (first, rest) = T.break isSep t
in first : splitTextBy isSep (T.dropWhile isSep rest)
-- | Split text at the given widths. Note that the break points are
-- /not/ indices but text widths, which will be different for East Asian
-- characters, emojis, etc.
splitTextByIndices :: [Int] -> T.Text -> [T.Text]
splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns)) . T.unpack
where
@ -160,9 +163,12 @@ splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns)) . T.unpack
let (first, rest) = splitAt' x cs
in T.pack first : splitTextByRelIndices xs rest
-- Note: don't replace this with T.splitAt, which is not sensitive
-- | Returns a pair whose first element is a prefix of @t@ and that has
-- width @n@, and whose second is the remainder of the string.
--
-- Note: Do *not* replace this with 'T.splitAt', which is not sensitive
-- to character widths!
splitAt' :: Int -> [Char] -> ([Char],[Char])
splitAt' :: Int {-^ n -} -> [Char] {-^ t -} -> ([Char],[Char])
splitAt' _ [] = ([],[])
splitAt' n xs | n <= 0 = ([],xs)
splitAt' n (x:xs) = (x:ys,zs)
@ -175,7 +181,11 @@ ordNub l = go Set.empty l
go s (x:xs) = if x `Set.member` s then go s xs
else x : go (Set.insert x s) xs
findM :: forall m t a. (Monad m, Foldable t) => (a -> m Bool) -> t a -> m (Maybe a)
-- | Returns the last element in a foldable structure for that the
-- monadic predicate holds true, and @Nothing@ if no such element
-- exists.
findM :: forall m t a. (Monad m, Foldable t)
=> (a -> m Bool) -> t a -> m (Maybe a)
findM p = foldr go (pure Nothing)
where
go :: a -> m (Maybe a) -> m (Maybe a)
@ -191,6 +201,7 @@ findM p = foldr go (pure Nothing)
inquotes :: T.Text -> T.Text
inquotes txt = T.cons '\"' (T.snoc txt '\"')
-- | Like @'show'@, but returns a 'T.Text' instead of a 'String'.
tshow :: Show a => a -> T.Text
tshow = T.pack . show
@ -206,6 +217,8 @@ notElemText c = T.all (/= c)
stripTrailingNewlines :: T.Text -> T.Text
stripTrailingNewlines = T.dropWhileEnd (== '\n')
-- | Returns 'True' for an ASCII whitespace character, viz. space,
-- carriage return, newline, and horizontal tab.
isWS :: Char -> Bool
isWS ' ' = True
isWS '\r' = True
@ -317,6 +330,7 @@ crFilter = T.filter (/= '\r')
normalizeDate :: T.Text -> Maybe T.Text
normalizeDate = fmap T.pack . normalizeDate' . T.unpack
-- | Like @'normalizeDate'@, but acts on 'String' instead of 'T.Text'.
normalizeDate' :: String -> Maybe String
normalizeDate' s = fmap (formatTime defaultTimeLocale "%F")
(msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day)
@ -382,10 +396,12 @@ removeFormatting = query go . walk (deNote . deQuote)
go LineBreak = [Space]
go _ = []
-- | Replaces 'Note' elements with empty strings.
deNote :: Inline -> Inline
deNote (Note _) = Str ""
deNote x = x
-- | Turns links into spans, keeping just the link text.
deLink :: Inline -> Inline
deLink (Link _ ils _) = Span nullAttr ils
deLink x = x
@ -412,6 +428,8 @@ stringify = query go . walk fixInlines
fixInlines (q@Quoted{}) = deQuote q
fixInlines x = x
-- | Unwrap 'Quoted' inline elements, enclosing the contents with
-- English-style Unicode quotes instead.
deQuote :: Inline -> Inline
deQuote (Quoted SingleQuote xs) =
Span ("",[],[]) (Str "\8216" : xs ++ [Str "\8217"])
@ -813,7 +831,7 @@ collapseFilePath = Posix.joinPath . reverse . foldl' go [] . splitDirectories
isSingleton _ = Nothing
checkPathSeperator = fmap isPathSeparator . isSingleton
-- Convert the path part of a file: URI to a regular path.
-- | Converts the path part of a file: URI to a regular path.
-- On windows, @/c:/foo@ should be @c:/foo@.
-- On linux, @/foo@ should be @/foo@.
uriPathToPath :: T.Text -> FilePath