work-site/content/courses/h4life/h4life-scanner.hs

230 lines
8.5 KiB
Haskell
Raw Permalink Normal View History

2018-09-22 23:40:20 +02:00
{- scanner.hs
A simple parallel (and slightly concurrent) port scanner.
Type:
scanner 1 10 '127.0.0.1[30-50]' 'www.google.com[80-1000]'
to scan the ports 30 through 50 on localhost and ports 80 through 1000
on www.google.com. The value 1 specifies the time the scanner will
wait before dropping a connection attempt. The value 10 specifies the
number of threads in the thread pool.
Only TCP connections are attempted.
This is an practical example to be shown in the fourth class of the
course "Haskell for Life", by Sergiu Ivanov (sivanov@lacl.fr):
http://lacl.fr/~sivanov/doku.php?id=en:haskell_for_life
This file is distributed under the Creative Commons Attribution Alone
licence.-}
import Network.Simple.TCP
import Control.Monad.Catch
import System.Timeout
import Control.Concurrent.STM.TChan
import Control.Concurrent.STM.TMVar
import GHC.Conc.Sync
import System.Environment (getArgs)
type Timeout = Int
-- | The possible outcomes of a connection attempt.
data Outcome = Connected | Refused | TimedOut
deriving Show
type PortNumber = Int
data ScanTarget = ScanTarget HostName PortNumber
-- | Attempts connecting to a port (service) on the given machine.
--
-- The timeout should be given in seconds.
testPort :: HostName -> PortNumber -> Timeout -> IO Outcome
testPort host port t =
-- When the connection is actually refused, an IO error is thrown,
-- and we use 'catchIOError' to handle that case. In the handling
-- routine (the lambda function) we will not care about the type of
-- the error, and just conclude that connection was refused.
catchIOError (timedConnect host port t) (\_ -> return Refused)
where
-- 'connect' calls the supplied function if connection is
-- successful (it's a lambda expression in our case). 'connect'
-- actually gives the information about the connection (the
-- socket), but we don't really care: once the lambda is called,
-- the connection succeeded and the port is open.
justConnect host port = connect host (show port) (\_ -> return Connected)
-- Aborts the attempts to connect after a given timeout.
--
-- 'timeout' returns 'Just result' if the function managed to get
-- the result, and 'Nothing otherwise'. We know that our result
-- can only be 'Connected', so no fancy pattern matching. (We may
-- also get an IO error, but that is handled above.)
timedConnect host port t = do
let t' = 1000000 * t -- 'timeout' uses microseconds.
maybeResult <- timeout t' $ justConnect host port
case maybeResult of
Just Connected -> return Connected
Nothing -> return TimedOut
-- | Runs a thread pool of the given size on the given channel
-- containing tasks; waits until the tasks are done.
--
-- All the threads in the pool read tasks from the same channel and
-- terminate when there are no more tasks in the channel. This means
-- that the channel must contain data from the very beginning,
-- otherwise the thread pool will just stop.
threadPool :: Int -> TChan a -> (a -> IO ()) -> IO ()
threadPool size chan process = do
-- 'mapM' is like 'map', but it can handle functions returning IO
-- values.
--
-- Here, the type of 'mapM' specialises to
--
-- mapM_ :: (a -> IO b) -> [a] -> IO [b]
--
-- So we do something for all numbers from 1 to 'size' (see the end
-- of this expression). What we are going to do is spawn a worker
-- thread and create a Boolean 'TMVar' into which the worker will
-- put 'True' when it sees that the channel is empty. When all
-- threads will have put 'True' into their corresponding variables,
-- we can stop waiting.
tmvars <- mapM (\_ -> do -- This argument gives us the number of the
-- worker we are creating, but, hey, who cares!
-- Here's our flag for the worker to raise.
tmvar <- atomically $ newEmptyTMVar
-- Spawn the worker now.
forkIO $ worker chan tmvar
-- Store the flag.
return tmvar
) [1..size]
-- Now, the workers are working. We are going to wait on the
-- corresponding flags until they are finished.
--
-- 'mapM_' is like 'mapM', but it discards the return values. We
-- use it when we are only interested in the side effect: in this
-- case, the waiting ('readTVar' blocks until the variable is not
-- empty).
mapM_ (\tmvar -> atomically $ takeTMVar tmvar) tmvars
where worker chan finished = do
-- The code is pretty self-explanatory: try reading the
-- channel atomically.
maybeTask <- atomically $ tryReadTChan chan
case maybeTask of
-- There's still work to do; so process it and try getting
-- some.
Just task -> do
process task
-- This is a tail recursive call: it's the very last
-- thing that happens in this function, so it actually
-- works pretty much like a while and not like an
-- infinite recursion.
worker chan finished
-- No more work, so this thread may stop ...
Nothing -> do
-- ... but tell the manager we're done before that.
atomically $ putTMVar finished True
-- | Test 'theadPool'.
--
-- Load this file in GHCi and type 'testThreadPool' to see what
-- happens :-)
testThreadPool :: IO ()
testThreadPool = do
-- Will result in ["string1", "string2", ... , "string20"]
--
-- We use '(.)' to apply a series of transformations. First, we
-- transform the number into a string. Second, we prepend "string"
-- to this string. Remember, ("string" ++ ) is a function
-- prepending the word "string" to whatever we give it. Thus
-- ("string" ++) "1" == "string1".
let strings = map (("string" ++) . show) [1..20]
-- Create a channel.
chan <- newTChanIO
-- Put all the strings in the channel.
atomically $ do
-- It's all about partially applied functions again. 'writeTChan'
-- is a function taking two arguments: the channel and the value
-- to write. 'writeTChan chan' is a function taking _one_
-- argument: the value to write to 'chan'.
--
-- We could also write:
--
-- mapM_ (\v -> writeTChan chan v) strings
--
mapM_ (writeTChan chan) strings
-- Create a thread pool of 3 threads, each calling 'putStrLn' on
-- every string from 'chan.
threadPool 3 chan putStrLn
-- Make sure we actually wait for the threads to finish.
putStrLn "All threads finished!"
-- | Parses a string of the form "address[port1-port2]".
--
-- 'break' looks up the first symbol in the string for which the given
-- function returns true, and returns the two halves of the list.
--
-- break odd [2,4,3,5] == ([2,4],[3,5])
--
parseAddress :: String -> (HostName, PortNumber, PortNumber)
parseAddress str = let (host, sPorts) = break (== '[') str
-- sPorts has the form "[number-number]". Drop
-- the brackets.
portsTrimmed = tail $ init sPorts
-- Split on '-', and, in the second half, drop
-- the dash immediately.
(sPort1,(_:sPort2)) = break (== '-') portsTrimmed
in (host, read sPort1, read sPort2)
main :: IO ()
main = do
-- We will now suppose that the list of arguments has the right
-- form. If something goes wrong, we are screwed :-)
-- This lets us get the first two elements of the list.
(sTimeout:sNthreads:addresses) <- getArgs
let timeout = read sTimeout :: Int
nthreads = read sNthreads :: Int
-- Create our channel for tasks.
chan <- newTChanIO
-- We will parse the addresses one by one, generate the
-- corresponding targets, and put them into the channel.
mapM_ (\addr -> do
-- Parse this address.
let (host, port1, port2) = parseAddress addr
-- Now, iterate through the ports in the range
-- 'port1'--'port2' and put the task containing the host
-- name and the port into the channel.
mapM_ (\port ->
atomically $ writeTChan chan (ScanTarget host port)
) [port1..port2]
) addresses
-- Go ahead and scan all our targets in the thread pool of the given
-- size.
threadPool nthreads chan $
(\(ScanTarget host port) -> do
-- So, let's scan this target with the given timeout.
outcome <- testPort host port timeout
-- Now show something.
putStrLn $ host ++ ":" ++ show port ++ " -- " ++ show outcome
)