{- sorting.hs

This file contains the examples on monads to be shown in the fourth
class of the course "Haskell for Life", by Sergiu Ivanov
(sivanov@lacl.fr):

  http://lacl.fr/~sivanov/doku.php?id=en:haskell_for_life

This file is meant to be read sequentially.

This file is distributed under the Creative Commons Attribution Alone
licence.-}

import Control.Parallel (par, pseq)
import Control.DeepSeq (force, NFData, deepseq)
import System.Random (getStdGen, randoms)
import System.Environment (getArgs)
import System.Clock

-- | This is an implementation of Quicksort.
--
-- We always choose the head of the list as the pivot, then we put the
-- elements smaller than the pivot in 'lesser', and those greater than
-- the pivot in 'greater'.  We then sort the two lists and put them
-- together with the pivot in between.
--
-- The 'Ord' typeclass requires comparison operators (like >=) to be
-- defined for the type.
qsort :: Ord a => [a] -> [a]
qsort [] = []
qsort (pivot:rest) = lesser ++ pivot:greater
  where lesser  = qsort [x | x <- rest, x <  pivot]
        greater = qsort [y | y <- rest, y >= pivot]


-- | This is a parallel implementation of Quicksort.
--
-- We would like to have 'lesser' and 'greater' evaluated in parallel.
-- To assure that, we may feel tempted to say
--
--   lesser `par` (lesser ++ pivot:greater)
--
-- In this case the runtime will get the information that we would
-- like 'lesser' evaluated in parallel, ... and then the main thread
-- will start evaluating the arguments to '(++)', sometimes starting
-- with the very same 'lesser'!  This means parallel evaluation of
-- 'lesser' will only be initiated if we get lucky and the runtime
-- starts evaluating the second argument of '(++)' before the first
-- one.  In order to give 'lesser' some time to get evaluated, we
-- shall therefore also require that 'greater' be evaluated before
-- list concatenation is done, like this:
--
--   lesser `par` ( greater `pseq` (lesser ++ pivot:greater) )
--
-- Moreover, since both `par` and `pseq` are right-associative, we can
-- drop some of the parentheses:
--
--   lesser `par` greater `pseq` (lesser ++ pivot:greater)
--
-- Now, even though everything seems nice and shiny, there's still a
-- big problem: 'par' and 'pseq' only assure some very "shallow"
-- evaluation of their left argument.  In the case of our sorting
-- routine this essentially means that only the first elements of
-- 'lesser' and 'greater' are actually evaluated, in parallel, while
-- the rest of the lists are evaluated when the sorted result is
-- _actually_ needed (e.g. when we want to print it); and that
-- evaluation is done sequentially.  Therefore, despite our clever
-- usage of 'par' and 'pseq', the greater part of the sorting is done
-- quite sequentially.
--
-- To solve this problem, we should force deep (complete) evaluation
-- of 'lesser' and 'greater' using the aptly named 'force' function:
--
--   force lesser `par` force greater `pseq` (lesser ++ pivot:greater)
--
-- In this case we will really get our sorting executed in parallel.
--
-- We need to use the function 'force' on lists of type 'a', and thus
-- on values of type 'a' as well.  The existence of the function
-- 'force' is expressed by the typeclass 'NFData'.
parQsort :: (Ord a, NFData a) => [a] -> [a]
parQsort [] = []
parQsort (pivot:rest) = force lesser
                        `par` force greater
                        `pseq` (lesser ++ pivot:greater)
  where lesser  = parQsort [x | x <- rest, x <  pivot]
        greater = parQsort [y | y <- rest, y >= pivot]


-- | Now, just for fun, let's consider the number of times we use
-- 'par': we apply it in every node of the recursion tree in
-- 'parQsort', and there more nodes in this recursion tree than there
-- elements in the array to sort!  That's actually a problem, since
-- 'par' is not exactly free, even though it's not really costly
-- either.
--
-- To avoid allocating separate jobs for very small lists, we will
-- only use `par` for sorting lists longer than a certain threshold.
--
-- The symbol '@' allows us to refer to the deconstructed list.  Thus,
-- when we pattern match on an argument using 'list@(x:xs)' instead of
-- just '(x:xs)', we can use 'list' to refer to the original (not
-- deconstructed) value of the parameter (i.e. to 'list = (x:xs)').
parnQsort :: (Ord a, NFData a) => Int -> [a] -> [a]
parnQsort _ [] = []
parnQsort d list@(pivot:rest) | length list >= d = lesser
                                                   `par` greater
                                                   `pseq` (lesser ++ pivot:greater)
                              | otherwise = lesser ++ pivot:greater
  where lesser  = parnQsort d [x | x <- rest, x <  pivot]
        greater = parnQsort d [y | y <- rest, y >= pivot]


-- | Produces a sequence of random integers of the given length.
--
-- Pseudo-random numbers are usually generating from some information
-- from the outer world, that is why we have to work within the IO
-- monad.
randomInts :: Int -> IO [Int]
randomInts n = do
  gen <- getStdGen        -- Get the global random number generator.
  let ints = randoms gen  -- Get an infinite list of random 'Int's.
  return $ take n ints    -- Return the first 'n' random values.

-- | Runs an IO action and then outputs the running time.
--
-- Let's look at the type signature: according to its return type,
-- this function is in the IO monad, and it returns a 'TimeSpec', a
-- type defined in 'Data.Clock' that contains two fields: one for
-- seconds and another one for nanoseconds.  The only argument of this
-- function is an _unevaluated_ value (a thunk) which the function
-- will evaluate and measure the time it took.  The evaluation will be
-- forced via the 'deepseq' function, and the existence of this
-- function for the type 'a' is guaranteed by requiring that it should
-- belong to the 'NFData' typeclass.
clockedRun :: NFData a => a -> IO TimeSpec
clockedRun x = do
  -- We get the time from the monotonic clock: its value can never be
  -- changed, so we do not risk getting biased results.
  start <- getTime Monotonic
  end <- x `deepseq` getTime Monotonic
  return (diffTimeSpec start end)

-- | Let's test all three functions.
main :: IO ()
main = do
  args <- getArgs -- Get the command line arguments.

  -- If there are some arguments, treat the first one as the length of
  -- the list; otherwise, use the default length: 100000 with many zeros.
  let n | null args = 100000
        | otherwise = read (head args)

  -- Generate the random list.
  putStrLn $ "Generating " ++ show n ++ " random integers."
  list <- randomInts n

  -- Run the sequential Quicksort and measure the time.
  seqTime <- clockedRun (qsort list)
  putStrLn $ "Sequential Quicksort worked in " ++ show (nsec seqTime) ++ " ns."

  -- Run the first version of parallel Quicksort and measure the time.
  parTime <- clockedRun (parQsort list)
  putStrLn $ "Parallel Quicksort (abusive variant) worked in " ++ show (nsec parTime) ++ " ns."

  -- Run the cleverer version of parallel Quicksort and measure the
  -- time.  Don't use 'par' for lists having less than 1000 elements.
  parnTime <- clockedRun (parnQsort 1000 list)
  putStrLn $ "Parallel Quicksort (cleverer variant) worked in " ++ show (nsec parnTime) ++ " ns."