229 lines
8.5 KiB
Haskell
229 lines
8.5 KiB
Haskell
{- scanner.hs
|
|
|
|
A simple parallel (and slightly concurrent) port scanner.
|
|
|
|
Type:
|
|
|
|
scanner 1 10 '127.0.0.1[30-50]' 'www.google.com[80-1000]'
|
|
|
|
to scan the ports 30 through 50 on localhost and ports 80 through 1000
|
|
on www.google.com. The value 1 specifies the time the scanner will
|
|
wait before dropping a connection attempt. The value 10 specifies the
|
|
number of threads in the thread pool.
|
|
|
|
Only TCP connections are attempted.
|
|
|
|
This is an practical example to be shown in the fourth class of the
|
|
course "Haskell for Life", by Sergiu Ivanov (sivanov@lacl.fr):
|
|
|
|
http://lacl.fr/~sivanov/doku.php?id=en:haskell_for_life
|
|
|
|
This file is distributed under the Creative Commons Attribution Alone
|
|
licence.-}
|
|
|
|
import Network.Simple.TCP
|
|
import Control.Monad.Catch
|
|
import System.Timeout
|
|
import Control.Concurrent.STM.TChan
|
|
import Control.Concurrent.STM.TMVar
|
|
import GHC.Conc.Sync
|
|
import System.Environment (getArgs)
|
|
|
|
type Timeout = Int
|
|
|
|
-- | The possible outcomes of a connection attempt.
|
|
data Outcome = Connected | Refused | TimedOut
|
|
deriving Show
|
|
|
|
type PortNumber = Int
|
|
|
|
data ScanTarget = ScanTarget HostName PortNumber
|
|
|
|
-- | Attempts connecting to a port (service) on the given machine.
|
|
--
|
|
-- The timeout should be given in seconds.
|
|
testPort :: HostName -> PortNumber -> Timeout -> IO Outcome
|
|
testPort host port t =
|
|
-- When the connection is actually refused, an IO error is thrown,
|
|
-- and we use 'catchIOError' to handle that case. In the handling
|
|
-- routine (the lambda function) we will not care about the type of
|
|
-- the error, and just conclude that connection was refused.
|
|
catchIOError (timedConnect host port t) (\_ -> return Refused)
|
|
where
|
|
-- 'connect' calls the supplied function if connection is
|
|
-- successful (it's a lambda expression in our case). 'connect'
|
|
-- actually gives the information about the connection (the
|
|
-- socket), but we don't really care: once the lambda is called,
|
|
-- the connection succeeded and the port is open.
|
|
justConnect host port = connect host (show port) (\_ -> return Connected)
|
|
|
|
-- Aborts the attempts to connect after a given timeout.
|
|
--
|
|
-- 'timeout' returns 'Just result' if the function managed to get
|
|
-- the result, and 'Nothing otherwise'. We know that our result
|
|
-- can only be 'Connected', so no fancy pattern matching. (We may
|
|
-- also get an IO error, but that is handled above.)
|
|
timedConnect host port t = do
|
|
let t' = 1000000 * t -- 'timeout' uses microseconds.
|
|
|
|
maybeResult <- timeout t' $ justConnect host port
|
|
case maybeResult of
|
|
Just Connected -> return Connected
|
|
Nothing -> return TimedOut
|
|
|
|
-- | Runs a thread pool of the given size on the given channel
|
|
-- containing tasks; waits until the tasks are done.
|
|
--
|
|
-- All the threads in the pool read tasks from the same channel and
|
|
-- terminate when there are no more tasks in the channel. This means
|
|
-- that the channel must contain data from the very beginning,
|
|
-- otherwise the thread pool will just stop.
|
|
threadPool :: Int -> TChan a -> (a -> IO ()) -> IO ()
|
|
threadPool size chan process = do
|
|
-- 'mapM' is like 'map', but it can handle functions returning IO
|
|
-- values.
|
|
--
|
|
-- Here, the type of 'mapM' specialises to
|
|
--
|
|
-- mapM_ :: (a -> IO b) -> [a] -> IO [b]
|
|
--
|
|
-- So we do something for all numbers from 1 to 'size' (see the end
|
|
-- of this expression). What we are going to do is spawn a worker
|
|
-- thread and create a Boolean 'TMVar' into which the worker will
|
|
-- put 'True' when it sees that the channel is empty. When all
|
|
-- threads will have put 'True' into their corresponding variables,
|
|
-- we can stop waiting.
|
|
tmvars <- mapM (\_ -> do -- This argument gives us the number of the
|
|
-- worker we are creating, but, hey, who cares!
|
|
|
|
-- Here's our flag for the worker to raise.
|
|
tmvar <- atomically $ newEmptyTMVar
|
|
|
|
-- Spawn the worker now.
|
|
forkIO $ worker chan tmvar
|
|
|
|
-- Store the flag.
|
|
return tmvar
|
|
) [1..size]
|
|
|
|
-- Now, the workers are working. We are going to wait on the
|
|
-- corresponding flags until they are finished.
|
|
--
|
|
-- 'mapM_' is like 'mapM', but it discards the return values. We
|
|
-- use it when we are only interested in the side effect: in this
|
|
-- case, the waiting ('readTVar' blocks until the variable is not
|
|
-- empty).
|
|
mapM_ (\tmvar -> atomically $ takeTMVar tmvar) tmvars
|
|
|
|
where worker chan finished = do
|
|
-- The code is pretty self-explanatory: try reading the
|
|
-- channel atomically.
|
|
maybeTask <- atomically $ tryReadTChan chan
|
|
case maybeTask of
|
|
-- There's still work to do; so process it and try getting
|
|
-- some.
|
|
Just task -> do
|
|
process task
|
|
-- This is a tail recursive call: it's the very last
|
|
-- thing that happens in this function, so it actually
|
|
-- works pretty much like a while and not like an
|
|
-- infinite recursion.
|
|
worker chan finished
|
|
-- No more work, so this thread may stop ...
|
|
Nothing -> do
|
|
-- ... but tell the manager we're done before that.
|
|
atomically $ putTMVar finished True
|
|
|
|
-- | Test 'theadPool'.
|
|
--
|
|
-- Load this file in GHCi and type 'testThreadPool' to see what
|
|
-- happens :-)
|
|
testThreadPool :: IO ()
|
|
testThreadPool = do
|
|
-- Will result in ["string1", "string2", ... , "string20"]
|
|
--
|
|
-- We use '(.)' to apply a series of transformations. First, we
|
|
-- transform the number into a string. Second, we prepend "string"
|
|
-- to this string. Remember, ("string" ++ ) is a function
|
|
-- prepending the word "string" to whatever we give it. Thus
|
|
-- ("string" ++) "1" == "string1".
|
|
let strings = map (("string" ++) . show) [1..20]
|
|
|
|
-- Create a channel.
|
|
chan <- newTChanIO
|
|
|
|
-- Put all the strings in the channel.
|
|
atomically $ do
|
|
-- It's all about partially applied functions again. 'writeTChan'
|
|
-- is a function taking two arguments: the channel and the value
|
|
-- to write. 'writeTChan chan' is a function taking _one_
|
|
-- argument: the value to write to 'chan'.
|
|
--
|
|
-- We could also write:
|
|
--
|
|
-- mapM_ (\v -> writeTChan chan v) strings
|
|
--
|
|
mapM_ (writeTChan chan) strings
|
|
|
|
-- Create a thread pool of 3 threads, each calling 'putStrLn' on
|
|
-- every string from 'chan.
|
|
threadPool 3 chan putStrLn
|
|
|
|
-- Make sure we actually wait for the threads to finish.
|
|
putStrLn "All threads finished!"
|
|
|
|
-- | Parses a string of the form "address[port1-port2]".
|
|
--
|
|
-- 'break' looks up the first symbol in the string for which the given
|
|
-- function returns true, and returns the two halves of the list.
|
|
--
|
|
-- break odd [2,4,3,5] == ([2,4],[3,5])
|
|
--
|
|
parseAddress :: String -> (HostName, PortNumber, PortNumber)
|
|
parseAddress str = let (host, sPorts) = break (== '[') str
|
|
-- sPorts has the form "[number-number]". Drop
|
|
-- the brackets.
|
|
portsTrimmed = tail $ init sPorts
|
|
-- Split on '-', and, in the second half, drop
|
|
-- the dash immediately.
|
|
(sPort1,(_:sPort2)) = break (== '-') portsTrimmed
|
|
in (host, read sPort1, read sPort2)
|
|
|
|
main :: IO ()
|
|
main = do
|
|
-- We will now suppose that the list of arguments has the right
|
|
-- form. If something goes wrong, we are screwed :-)
|
|
|
|
-- This lets us get the first two elements of the list.
|
|
(sTimeout:sNthreads:addresses) <- getArgs
|
|
|
|
let timeout = read sTimeout :: Int
|
|
nthreads = read sNthreads :: Int
|
|
|
|
-- Create our channel for tasks.
|
|
chan <- newTChanIO
|
|
|
|
-- We will parse the addresses one by one, generate the
|
|
-- corresponding targets, and put them into the channel.
|
|
mapM_ (\addr -> do
|
|
-- Parse this address.
|
|
let (host, port1, port2) = parseAddress addr
|
|
|
|
-- Now, iterate through the ports in the range
|
|
-- 'port1'--'port2' and put the task containing the host
|
|
-- name and the port into the channel.
|
|
mapM_ (\port ->
|
|
atomically $ writeTChan chan (ScanTarget host port)
|
|
) [port1..port2]
|
|
) addresses
|
|
|
|
-- Go ahead and scan all our targets in the thread pool of the given
|
|
-- size.
|
|
threadPool nthreads chan $
|
|
(\(ScanTarget host port) -> do
|
|
-- So, let's scan this target with the given timeout.
|
|
outcome <- testPort host port timeout
|
|
|
|
-- Now show something.
|
|
putStrLn $ host ++ ":" ++ show port ++ " -- " ++ show outcome
|
|
)
|