From bffd74323cfd91f5c44ca34e09633247d1d28954 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Tue, 23 Nov 2021 18:32:53 +0100
Subject: [PATCH] Lua: add function `pandoc.utils.text` (#7710)

The function converts a string to `Inlines`, treating interword spaces
as `Space`s or `SoftBreak`s. If you want a `Str` with literal spaces,
use `pandoc.Str`.

Closes: #7709
---
 doc/lua-filters.md                    | 18 +++++++++++++++++
 src/Text/Pandoc/Lua/Marshaling/AST.hs |  1 +
 src/Text/Pandoc/Lua/Module/Utils.hs   | 12 ++++++++++--
 test/lua/module/pandoc-utils.lua      | 28 +++++++++++++++++++++++++++
 4 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/doc/lua-filters.md b/doc/lua-filters.md
index 9fc90a13f..db5d1ccac 100644
--- a/doc/lua-filters.md
+++ b/doc/lua-filters.md
@@ -3082,6 +3082,24 @@ Usage:
     -- outputs "Moin"
     print(pandoc.utils.stringify(inline))
 
+### text {#pandoc.utils.text}
+
+`text (words)`
+
+Converts a string to `Inlines`, treating interword spaces as
+`Space`s or `SoftBreak`s. If you want a single `Str` with literal
+spaces, use `pandoc.Str`.
+
+Parameters:
+
+`words`
+:  markup-less text (string)
+
+Returns:
+
+-   List of inline elements split into words (Inlines)
+
+
 ### to\_roman\_numeral {#pandoc.utils.to_roman_numeral}
 
 `to_roman_numeral (integer)`
diff --git a/src/Text/Pandoc/Lua/Marshaling/AST.hs b/src/Text/Pandoc/Lua/Marshaling/AST.hs
index 31d040c83..9cf683055 100644
--- a/src/Text/Pandoc/Lua/Marshaling/AST.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/AST.hs
@@ -44,6 +44,7 @@ module Text.Pandoc.Lua.Marshaling.AST
   , pushBlock
   , pushCitation
   , pushInline
+  , pushInlines
   , pushListAttributes
   , pushMeta
   , pushMetaValue
diff --git a/src/Text/Pandoc/Lua/Module/Utils.hs b/src/Text/Pandoc/Lua/Module/Utils.hs
index 01ba4eb46..6fd707bf8 100644
--- a/src/Text/Pandoc/Lua/Module/Utils.hs
+++ b/src/Text/Pandoc/Lua/Module/Utils.hs
@@ -29,8 +29,8 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Error (PandocError)
 import Text.Pandoc.Lua.Marshaling ()
 import Text.Pandoc.Lua.Marshaling.AST
-  ( peekBlock, peekInline, peekPandoc, pushBlock, pushInline, pushPandoc
-  , peekAttr, peekMeta, peekMetaValue)
+  ( peekBlock, peekInline, peekPandoc, pushBlock, pushInline, pushInlines
+  ,  pushPandoc, peekAttr, peekMeta, peekMetaValue)
 import Text.Pandoc.Lua.Marshaling.ListAttributes (peekListAttributes)
 import Text.Pandoc.Lua.Marshaling.List (pushPandocList)
 import Text.Pandoc.Lua.Marshaling.SimpleTable
@@ -122,6 +122,14 @@ documentedModule = Module
       <#> parameter peekAstElement "AST element" "elem" "some pandoc AST element"
       =#> functionResult pushText "string" "stringified element"
 
+    , defun "text"
+      ### liftPure (B.toList . B.text)
+      <#> parameter peekText "string" "words" "markup-less inlines text"
+      =#> functionResult pushInlines "Inlines" "list of inline elements"
+      #? ("Converts a string to `Inlines`, treating interword spaces as " <>
+          "`Space`s or `SoftBreak`s.  If you want a `Str` with literal " <>
+          "spaces, use `pandoc.Str`.")
+
     , defun "from_simple_table"
       ### from_simple_table
       <#> parameter peekSimpleTable "SimpleTable" "simple_tbl" ""
diff --git a/test/lua/module/pandoc-utils.lua b/test/lua/module/pandoc-utils.lua
index 9bd903f2d..21f550177 100644
--- a/test/lua/module/pandoc-utils.lua
+++ b/test/lua/module/pandoc-utils.lua
@@ -82,6 +82,34 @@ return {
     end)
   },
 
+  group 'text' {
+    test('string is converted to inlines', function ()
+      local expected = {
+        pandoc.Str 'Madness', pandoc.Space(), pandoc.Str '-', pandoc.Space(),
+        pandoc.Str 'Our', pandoc.Space(), pandoc.Str 'House'
+      }
+      assert.are_same(pandoc.utils.text('Madness - Our House'), expected)
+    end),
+    test('tabs are treated as space', function ()
+      local expected = {
+        pandoc.Str 'Linkin', pandoc.Space(), pandoc.Str 'Park', pandoc.Space(),
+        pandoc.Str '-', pandoc.Space(), pandoc.Str 'Papercut'
+      }
+      assert.are_same(pandoc.utils.text('Linkin Park\t-\tPapercut'), expected)
+    end),
+    test('newlines are treated as softbreaks', function ()
+      local expected = {
+        pandoc.Str 'Porcupine', pandoc.Space(), pandoc.Str 'Tree',
+        pandoc.SoftBreak(), pandoc.Str '-', pandoc.SoftBreak(),
+        pandoc.Str 'Blackest',  pandoc.Space(), pandoc.Str 'Eyes'
+      }
+      assert.are_same(
+        pandoc.utils.text('Porcupine Tree\n-\nBlackest Eyes'),
+        expected
+      )
+    end),
+  },
+
   group 'to_roman_numeral' {
     test('convertes number', function ()
       assert.are_equal('MDCCCLXXXVIII', utils.to_roman_numeral(1888))