Document more functions in T.P.Parsing and T.P.Shared.
This commit is contained in:
parent
699336cf5b
commit
edfe34c86c
4 changed files with 76 additions and 11 deletions
|
@ -40,16 +40,18 @@ import qualified Text.Pandoc.Builder as B
|
|||
-- line).
|
||||
gridTableWith :: (Monad m, Monad mf, HasLastStrPosition st, HasReaderOptions st)
|
||||
=> ParserT Sources st m (mf Blocks) -- ^ Block list parser
|
||||
-> Bool -- ^ Headerless table
|
||||
-> Bool -- ^ Headerless table
|
||||
-> ParserT Sources st m (mf Blocks)
|
||||
gridTableWith blocks headless =
|
||||
tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
|
||||
(gridTableSep '-') gridTableFooter
|
||||
|
||||
-- | Like @'gridTableWith'@, but returns 'TableComponents' instead of a
|
||||
-- Table.
|
||||
gridTableWith' :: (Monad m, Monad mf,
|
||||
HasReaderOptions st, HasLastStrPosition st)
|
||||
=> ParserT Sources st m (mf Blocks) -- ^ Block list parser
|
||||
-> Bool -- ^ Headerless table
|
||||
=> ParserT Sources st m (mf Blocks) -- ^ Block list parser
|
||||
-> Bool -- ^ Headerless table
|
||||
-> ParserT Sources st m (TableComponents mf)
|
||||
gridTableWith' blocks headless =
|
||||
tableWith' (gridTableHeader headless blocks) (gridTableRow blocks)
|
||||
|
@ -59,6 +61,10 @@ gridTableSplitLine :: [Int] -> Text -> [Text]
|
|||
gridTableSplitLine indices line = map removeFinalBar $ tail $
|
||||
splitTextByIndices (init indices) $ trimr line
|
||||
|
||||
-- | Parses a grid segment, where the grid line is made up from the
|
||||
-- given char and terminated with a plus (@+@). The grid line may begin
|
||||
-- and/or end with a colon, signaling column alignment. Returns the size
|
||||
-- of the grid part and column alignment
|
||||
gridPart :: Monad m => Char -> ParserT Sources st m ((Int, Int), Alignment)
|
||||
gridPart ch = do
|
||||
leftColon <- option False (True <$ char ':')
|
||||
|
|
|
@ -56,7 +56,8 @@ import qualified Data.Map as M
|
|||
import qualified Data.Text as T
|
||||
|
||||
-- | Parses a roman numeral (uppercase or lowercase), returns number.
|
||||
romanNumeral :: (Stream s m Char, UpdateSourcePos s Char) => Bool -- ^ Uppercase if true
|
||||
romanNumeral :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> Bool -- ^ Uppercase if true
|
||||
-> ParserT s st m Int
|
||||
romanNumeral upperCase = do
|
||||
let rchar uc = char $ if upperCase then uc else toLower uc
|
||||
|
@ -212,4 +213,3 @@ orderedListMarker style delim = do
|
|||
TwoParens -> inTwoParens
|
||||
(start, _, _) <- context num
|
||||
return start
|
||||
|
||||
|
|
|
@ -46,6 +46,10 @@ import Text.Parsec
|
|||
|
||||
import qualified Text.Pandoc.Builder as B
|
||||
|
||||
-- | Parses various ASCII punctuation, quotes, and apostrophe in a smart
|
||||
-- way, inferring their semantic meaning.
|
||||
--
|
||||
-- Fails unless the 'Ext_smart' extension has been enabled.
|
||||
smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st,
|
||||
HasQuoteContext st m,
|
||||
Stream s m Char, UpdateSourcePos s Char)
|
||||
|
@ -55,12 +59,16 @@ smartPunctuation inlineParser = do
|
|||
guardEnabled Ext_smart
|
||||
choice [ quoted inlineParser, apostrophe, doubleCloseQuote, dash, ellipses ]
|
||||
|
||||
-- | Parses inline text in single or double quotes, assumes English
|
||||
-- quoting conventions.
|
||||
quoted :: (HasLastStrPosition st, HasQuoteContext st m,
|
||||
Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m Inlines
|
||||
-> ParserT s st m Inlines
|
||||
quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
|
||||
|
||||
-- | Parses inline text in single quotes, assumes English quoting
|
||||
-- conventions.
|
||||
singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m,
|
||||
Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m Inlines
|
||||
|
@ -72,6 +80,8 @@ singleQuoted inlineParser = do
|
|||
(withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd)))
|
||||
<|> pure "\8217"
|
||||
|
||||
-- | Parses inline text in double quotes; assumes English quoting
|
||||
-- conventions.
|
||||
doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st,
|
||||
Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m Inlines
|
||||
|
@ -89,6 +99,14 @@ charOrRef cs =
|
|||
guard (c `elem` cs)
|
||||
return c)
|
||||
|
||||
-- | Succeeds if the parser is
|
||||
--
|
||||
-- * not within single quoted text;
|
||||
-- * not directly after a word; and
|
||||
-- * looking at an opening single quote char that's not followed by a
|
||||
-- space.
|
||||
--
|
||||
-- Gobbles the quote character on success.
|
||||
singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m,
|
||||
Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m ()
|
||||
|
@ -106,6 +124,16 @@ singleQuoteEnd = try $ do
|
|||
charOrRef "'\8217\146"
|
||||
notFollowedBy alphaNum
|
||||
|
||||
-- | Succeeds if the parser is
|
||||
--
|
||||
-- * not within a double quoted text;
|
||||
--
|
||||
-- * not directly after a word; and
|
||||
--
|
||||
-- * looking at an opening double quote char that's not followed by a
|
||||
-- space.
|
||||
--
|
||||
-- Gobbles the quote character on success.
|
||||
doubleQuoteStart :: (HasLastStrPosition st,
|
||||
HasQuoteContext st m,
|
||||
Stream s m Char, UpdateSourcePos s Char)
|
||||
|
@ -116,20 +144,33 @@ doubleQuoteStart = do
|
|||
try $ do charOrRef "\"\8220\147"
|
||||
void $ lookAhead (satisfy (not . isSpaceChar))
|
||||
|
||||
-- | Parses a closing quote character.
|
||||
doubleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m ()
|
||||
doubleQuoteEnd = void (charOrRef "\"\8221\148")
|
||||
|
||||
apostrophe :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
|
||||
-- | Parses an ASCII apostrophe (@'@) or right single quotation mark and
|
||||
-- returns a RIGHT SINGLE QUOtatiON MARK character.
|
||||
apostrophe :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m Inlines
|
||||
apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\8217")
|
||||
|
||||
doubleCloseQuote :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
|
||||
-- | Parses an ASCII quotation mark character and returns a RIGHT DOUBLE
|
||||
-- QUOTATION MARK.
|
||||
doubleCloseQuote :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m Inlines
|
||||
doubleCloseQuote = B.str "\8221" <$ char '"'
|
||||
|
||||
-- | Parses three dots as HORIZONTAL ELLIPSIS.
|
||||
ellipses :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m Inlines
|
||||
ellipses = try (string "..." >> return (B.str "\8230"))
|
||||
|
||||
-- | Parses two hyphens as EN DASH and three as EM DASH.
|
||||
--
|
||||
-- If the extension @'Ext_old_dashes'@ is enabled, then two hyphens are
|
||||
-- parsed as EM DASH, and one hyphen is parsed as EN DASH if it is
|
||||
-- followed by a digit.
|
||||
dash :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m Inlines
|
||||
dash = try $ do
|
||||
|
|
|
@ -152,6 +152,9 @@ splitTextBy isSep t
|
|||
| otherwise = let (first, rest) = T.break isSep t
|
||||
in first : splitTextBy isSep (T.dropWhile isSep rest)
|
||||
|
||||
-- | Split text at the given widths. Note that the break points are
|
||||
-- /not/ indices but text widths, which will be different for East Asian
|
||||
-- characters, emojis, etc.
|
||||
splitTextByIndices :: [Int] -> T.Text -> [T.Text]
|
||||
splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns)) . T.unpack
|
||||
where
|
||||
|
@ -160,9 +163,12 @@ splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns)) . T.unpack
|
|||
let (first, rest) = splitAt' x cs
|
||||
in T.pack first : splitTextByRelIndices xs rest
|
||||
|
||||
-- Note: don't replace this with T.splitAt, which is not sensitive
|
||||
-- | Returns a pair whose first element is a prefix of @t@ and that has
|
||||
-- width @n@, and whose second is the remainder of the string.
|
||||
--
|
||||
-- Note: Do *not* replace this with 'T.splitAt', which is not sensitive
|
||||
-- to character widths!
|
||||
splitAt' :: Int -> [Char] -> ([Char],[Char])
|
||||
splitAt' :: Int {-^ n -} -> [Char] {-^ t -} -> ([Char],[Char])
|
||||
splitAt' _ [] = ([],[])
|
||||
splitAt' n xs | n <= 0 = ([],xs)
|
||||
splitAt' n (x:xs) = (x:ys,zs)
|
||||
|
@ -175,7 +181,11 @@ ordNub l = go Set.empty l
|
|||
go s (x:xs) = if x `Set.member` s then go s xs
|
||||
else x : go (Set.insert x s) xs
|
||||
|
||||
findM :: forall m t a. (Monad m, Foldable t) => (a -> m Bool) -> t a -> m (Maybe a)
|
||||
-- | Returns the last element in a foldable structure for that the
|
||||
-- monadic predicate holds true, and @Nothing@ if no such element
|
||||
-- exists.
|
||||
findM :: forall m t a. (Monad m, Foldable t)
|
||||
=> (a -> m Bool) -> t a -> m (Maybe a)
|
||||
findM p = foldr go (pure Nothing)
|
||||
where
|
||||
go :: a -> m (Maybe a) -> m (Maybe a)
|
||||
|
@ -191,6 +201,7 @@ findM p = foldr go (pure Nothing)
|
|||
inquotes :: T.Text -> T.Text
|
||||
inquotes txt = T.cons '\"' (T.snoc txt '\"')
|
||||
|
||||
-- | Like @'show'@, but returns a 'T.Text' instead of a 'String'.
|
||||
tshow :: Show a => a -> T.Text
|
||||
tshow = T.pack . show
|
||||
|
||||
|
@ -206,6 +217,8 @@ notElemText c = T.all (/= c)
|
|||
stripTrailingNewlines :: T.Text -> T.Text
|
||||
stripTrailingNewlines = T.dropWhileEnd (== '\n')
|
||||
|
||||
-- | Returns 'True' for an ASCII whitespace character, viz. space,
|
||||
-- carriage return, newline, and horizontal tab.
|
||||
isWS :: Char -> Bool
|
||||
isWS ' ' = True
|
||||
isWS '\r' = True
|
||||
|
@ -317,6 +330,7 @@ crFilter = T.filter (/= '\r')
|
|||
normalizeDate :: T.Text -> Maybe T.Text
|
||||
normalizeDate = fmap T.pack . normalizeDate' . T.unpack
|
||||
|
||||
-- | Like @'normalizeDate'@, but acts on 'String' instead of 'T.Text'.
|
||||
normalizeDate' :: String -> Maybe String
|
||||
normalizeDate' s = fmap (formatTime defaultTimeLocale "%F")
|
||||
(msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day)
|
||||
|
@ -382,10 +396,12 @@ removeFormatting = query go . walk (deNote . deQuote)
|
|||
go LineBreak = [Space]
|
||||
go _ = []
|
||||
|
||||
-- | Replaces 'Note' elements with empty strings.
|
||||
deNote :: Inline -> Inline
|
||||
deNote (Note _) = Str ""
|
||||
deNote x = x
|
||||
|
||||
-- | Turns links into spans, keeping just the link text.
|
||||
deLink :: Inline -> Inline
|
||||
deLink (Link _ ils _) = Span nullAttr ils
|
||||
deLink x = x
|
||||
|
@ -412,6 +428,8 @@ stringify = query go . walk fixInlines
|
|||
fixInlines (q@Quoted{}) = deQuote q
|
||||
fixInlines x = x
|
||||
|
||||
-- | Unwrap 'Quoted' inline elements, enclosing the contents with
|
||||
-- English-style Unicode quotes instead.
|
||||
deQuote :: Inline -> Inline
|
||||
deQuote (Quoted SingleQuote xs) =
|
||||
Span ("",[],[]) (Str "\8216" : xs ++ [Str "\8217"])
|
||||
|
@ -813,7 +831,7 @@ collapseFilePath = Posix.joinPath . reverse . foldl' go [] . splitDirectories
|
|||
isSingleton _ = Nothing
|
||||
checkPathSeperator = fmap isPathSeparator . isSingleton
|
||||
|
||||
-- Convert the path part of a file: URI to a regular path.
|
||||
-- | Converts the path part of a file: URI to a regular path.
|
||||
-- On windows, @/c:/foo@ should be @c:/foo@.
|
||||
-- On linux, @/foo@ should be @/foo@.
|
||||
uriPathToPath :: T.Text -> FilePath
|
||||
|
|
Loading…
Add table
Reference in a new issue