From a89d4aa924e0515bf8860aaf30a7a50cf5a08e0c Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 12 Nov 2017 21:48:47 -0800
Subject: [PATCH] lua-filters.md: add wordcount example.

---
 doc/lua-filters.md | 47 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/doc/lua-filters.md b/doc/lua-filters.md
index 5703a68ec..cee4240c7 100644
--- a/doc/lua-filters.md
+++ b/doc/lua-filters.md
@@ -371,6 +371,10 @@ at the "outer level" are included; this ignores blocks inside
 nested constructs, like list items.)
 
 ``` lua
+-- creates a handout from an article, using its headings,
+-- blockquotes, numbered examples, figures, and any
+-- Divs with class "handout"
+
 function Pandoc(doc)
     local hblocks = {}
     for i,el in pairs(doc.blocks) do
@@ -386,6 +390,49 @@ function Pandoc(doc)
 end
 ```
 
+## Counting words in a document
+
+This filter counts the words in the body of a document (omitting
+metadata like titles and abstracts), including words in code.
+It should be more accurate than `wc -w` run directly on a
+Markdown document, since the latter will count markup
+characters, like the `#` in front of an ATX header, or
+tags in HTML documents, as words.  To run it,
+`pandoc --lua-filter wordcount.lua myfile.md`.
+
+``` lua
+-- counts words in a document
+
+words = 0
+
+wordcount = {
+  Str = function(el)
+    -- we don't count a word if it's entirely punctuation:
+    local s = el.text:gsub("%p","")
+    if #s > 0 then
+        words = words + 1
+    end
+  end,
+
+  Code = function(el)
+    _,n = el.text:gsub("%S+","")
+    words = words + n
+  end,
+
+  CodeBlock = function(el)
+    _,n = el.text:gsub("%S+","")
+    words = words + n
+  end
+}
+
+function Pandoc(el)
+    -- skip metadata, just count body:
+    pandoc.walk_block(pandoc.Div(el.blocks), wordcount)
+    print(words .. " words in body")
+    os.exit(0)
+end
+```
+
 ## Converting ABC code to music notation
 
 This filter replaces code blocks with class `abc` with