Org reader: fix parsing problem for colons in headline

Fixed a problem where words surrounded by colons could causing parse
failures in some cases when they occurred in headers.

Fixes: #5993
This commit is contained in:
Albert Krewinkel 2019-12-19 07:09:43 +01:00
parent 5bd2d28b19
commit 0a3cc7260c
No known key found for this signature in database
GPG key ID: 388DC0B21F631124
3 changed files with 37 additions and 11 deletions

View file

@ -1,6 +1,7 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
{- |
Module : Text.Pandoc.Readers.Org.DocumentTree
Copyright : Copyright (C) 2014-2019 Albert Krewinkel
@ -16,8 +17,8 @@ module Text.Pandoc.Readers.Org.DocumentTree
) where
import Prelude
import Control.Arrow ((***))
import Control.Monad (guard, void)
import Control.Arrow ((***), first)
import Control.Monad (guard)
import Data.List (intersperse)
import Data.Maybe (mapMaybe)
import Data.Text (Text)
@ -110,15 +111,13 @@ headline blocks inline lvl = try $ do
level <- headerStart
guard (lvl <= level)
todoKw <- optionMaybe todoKeyword
title <- trimInlinesF . mconcat <$> manyTill inline endOfTitle
tags <- option [] headerTags
newline
(title, tags) <- manyThen inline endOfTitle
planning <- option emptyPlanning planningInfo
properties <- option mempty propertiesDrawer
contents <- blocks
children <- many (headline blocks inline (level + 1))
return $ do
title' <- title
title' <- trimInlinesF (mconcat title)
contents' <- contents
children' <- sequence children
return Headline
@ -132,13 +131,29 @@ headline blocks inline lvl = try $ do
, headlineChildren = children'
}
where
endOfTitle :: Monad m => OrgParser m ()
endOfTitle = void . lookAhead $ optional headerTags *> newline
endOfTitle :: Monad m => OrgParser m [Tag]
endOfTitle = try $ do
skipSpaces
tags <- option [] (headerTags <* skipSpaces)
newline
return tags
headerTags :: Monad m => OrgParser m [Tag]
headerTags = try $
let tag = orgTagWord <* char ':'
in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces)
headerTags = try $ do
char ':'
endBy1 (toTag <$> orgTagWord) (char ':')
manyThen :: Monad m
=> OrgParser m a
-> OrgParser m b
-> OrgParser m ([a], b)
manyThen p end = (([],) <$> try end) <|> do
x <- p
first (x:) <$> manyThen p end
-- titleFollowedByTags :: Monad m => OrgParser m (Inlines, [Tag])
-- titleFollowedByTags = do
unprunedHeadlineToBlocks :: Monad m => Headline -> OrgParserState -> OrgParser m [Block]
unprunedHeadlineToBlocks hdln st =

View file

@ -94,6 +94,7 @@ module Text.Pandoc.Readers.Org.Parsing
, sepBy
, sepBy1
, sepEndBy1
, endBy1
, option
, optional
, optionMaybe

View file

@ -142,6 +142,16 @@ tests =
"* This: is not: tagged" =?>
headerWith ("this-is-not-tagged", [], []) 1 "This: is not: tagged"
, "Untagged header time followed by colon" =:
"** Meeting at 5:23: free food" =?>
let attr = ("meeting-at-523-free-food", [], [])
in headerWith attr 2 "Meeting at 5:23: free food"
, "tag followed by text" =:
"*** Looks like a :tag: but isn't" =?>
let attr = ("looks-like-a-tag-but-isnt", [], [])
in headerWith attr 3 "Looks like a :tag: but isn't"
, "Header starting with strokeout text" =:
T.unlines [ "foo"
, ""