{-----------------------------------------------------------------------------

                 A LIBRARY OF MONADIC PARSER COMBINATORS

                              29th July 1996

                 Graham Hutton               Erik Meijer
            University of Nottingham    University of Utrecht

This Haskell 1.3 script defines a library of parser combinators, and is taken
from sections 1-6 of our article "Monadic Parser Combinators".  Some changes
to the library have been made in the move from Gofer to Haskell:

   * Do notation is used in place of monad comprehension notation;

   * The parser datatype is defined using "newtype", to avoid the overhead
     of tagging and untagging parsers with the P constructor.

------------------------------------------------------------------------------
** Extended to allow a symbol table/state to be threaded through the monad.
** Extended to allow a parameterised token type, rather than just strings.
** Extended to allow error-reporting.

(Extensions: 1998-2000 Malcolm.Wallace@cs.york.ac.uk)
(More extensions: 2004 gk-haskell@ninebynine.org)

------------------------------------------------------------------------------}

-- | This library of monadic parser combinators is based on the ones
--   defined by Graham Hutton and Erik Meijer.  It has been extended by
--   Malcolm Wallace to use an abstract token type (no longer just a
--   string) as input, and to incorporate state in the monad, useful
--   for symbol tables, macros, and so on.  Basic facilities for error
--   reporting have also been added, and later extended by Graham Klyne
--   to return the errors through an @Either@ type, rather than just
--   calling @error@.

module Text.ParserCombinators.HuttonMeijerWallace
  (
  -- * The parser monad
    Parser(..)
  -- * Primitive parser combinators
  , item, eof, papply, papply'
  -- * Derived combinators
  , (+++), {-sat,-} tok, nottok, many, many1
  , sepby, sepby1, chainl, chainl1, chainr, chainr1, ops, bracket
  , toEOF
  -- * Error handling
  , elserror
  -- * State handling
  , stupd, stquery, stget
  -- * Re-parsing
  , reparse
  ) where

import Char
import Monad

infixr 5 +++

--- The parser monad ---------------------------------------------------------

type ParseResult s t e a = Either e [(a,s,[Either e t])]

newtype Parser s t e a   = P ( s -> [Either e t] -> ParseResult s t e a )
    -- ^ The parser type is parametrised on the types of the state @s@,
    --   the input tokens @t@, error-type @e@, and the result value @a@.
    --   The state and remaining input are threaded through the monad.

instance Functor (Parser s t e) where
   -- fmap        :: (a -> b) -> (Parser s t e a -> Parser s t e b)
   fmap f (P p)    = P (\st inp -> case p st inp of
                        Right res -> Right [(f v, s, out) | (v,s,out) <- res]
                        Left err  -> Left err
                       )

instance Monad (Parser s t e) where
   -- return      :: a -> Parser s t e a
   return v        = P (\st inp -> Right [(v,st,inp)])
   -- >>=         :: Parser s t e a -> (a -> Parser s t e b) -> Parser s t e b
   (P p) >>= f     = P (\st inp -> case p st inp of
                        Right res -> foldr joinresults (Right [])
                            [ papply' (f v) s out | (v,s,out) <- res ]
                        Left err  -> Left err
                       )
   -- fail        :: String -> Parser s t e a
   fail err        = P (\st inp -> Right [])
  -- I know it's counterintuitive, but we want no-parse, not an error.

instance MonadPlus (Parser s t e) where
   -- mzero       :: Parser s t e a
   mzero           = P (\st inp -> Right [])
   -- mplus       :: Parser s t e a -> Parser s t e a -> Parser s t e a
   (P p) `mplus` (P q) = P (\st inp -> joinresults (p st inp) (q st inp))

-- joinresults ensures that explicitly raised errors are dominant,
-- provided no parse has yet been found.  The commented out code is
-- a slightly stricter specification of the real code.
joinresults :: ParseResult s t e a -> ParseResult s t e a -> ParseResult s t e a
{-
joinresults (Left  p)  (Left  q)  = Left  p
joinresults (Left  p)  (Right _)  = Left  p
joinresults (Right []) (Left  q)  = Left  q
joinresults (Right p)  (Left  q)  = Right p
joinresults (Right p)  (Right q)  = Right (p++q)
-}
joinresults (Left  p)  q  = Left p
joinresults (Right []) q  = q
joinresults (Right p)  q  = Right (p++ case q of Left _  -> []
                                                 Right r -> r)


--- Primitive parser combinators ---------------------------------------------

-- | Deliver the first remaining token.
item              :: Parser s t e t
item               = P (\st inp -> case inp of
                        []            -> Right []
                        (Left e: _)   -> Left e
                        (Right x: xs) -> Right [(x,st,xs)]
                       )

-- | Fail if end of input is not reached
eof               :: Show p => Parser s (p,t) String ()
eof                = P (\st inp -> case inp of
                        []         -> Right [((),st,[])]
                        (Left e:_) -> Left e
                        (Right (p,_):_) -> Left ("End of input expected at "
                                                 ++show p++"\n  but found text")
                       )

{-
-- | Ensure the value delivered by the parser is evaluated to WHNF.
force             :: Parser s t e a -> Parser s t e a
force (P p)        = P (\st inp -> let Right xs = p st inp
                                       h = head xs in
                                   h `seq` Right (h: tail xs)
                       )
--  [[[GK]]]  ^^^^^^
--  WHNF = Weak Head Normal Form, meaning that it has no top-level redex.
--  In this case, I think that means that the first element of the list
--  is fully evaluated.
--
--  NOTE:  the original form of this function fails if there is no parse
--  result for p st inp (head xs fails if xs is null), so the modified
--  form can assume a Right value only.
--
--  Why is this needed?
--  It's not exported, and the only use of this I see is commented out.
---------------------------------------
-}


-- | Deliver the first parse result only, eliminating any backtracking.
first             :: Parser s t e a -> Parser s t e a
first (P p)        = P (\st inp -> case p st inp of
                                   Right (x:xs) -> Right [x]
                                   otherwise    -> otherwise
                       )

-- | Apply the parser to some real input, given an initial state value.
--   If the parser fails, raise 'error' to halt the program.
--   (This is the original exported behaviour - to allow the caller to
--   deal with the error differently, see @papply'@.)
papply            :: Parser s t String a -> s -> [Either String t]
                                              -> [(a,s,[Either String t])]
papply (P p) st inp = either error id (p st inp)

-- | Apply the parser to some real input, given an initial state value.
--   If the parser fails, return a diagnostic message to the caller.
papply'           :: Parser s t e a -> s -> [Either e t]
                                         -> Either e [(a,s,[Either e t])]
papply' (P p) st inp = p st inp

--- Derived combinators ------------------------------------------------------

-- | A choice between parsers.  Keep only the first success.
(+++)             :: Parser s t e a -> Parser s t e a -> Parser s t e a
p +++ q            = first (p `mplus` q)

-- | Deliver the first token if it satisfies a predicate.
sat               :: (t -> Bool) -> Parser s (p,t) e t
sat p              = do {(_,x) <- item; if p x then return x else mzero}

-- | Deliver the first token if it equals the argument.
tok               :: Eq t => t -> Parser s (p,t) e t
tok t              = do {(_,x) <- item; if x==t then return t else mzero}

-- | Deliver the first token if it does not equal the argument.
nottok            :: Eq t => [t] -> Parser s (p,t) e t
nottok ts          = do {(_,x) <- item; if x `notElem` ts then return x
                                        else mzero}

-- | Deliver zero or more values of @a@.
many              :: Parser s t e a -> Parser s t e [a]
many p             = many1 p +++ return []
--many p           = force (many1 p +++ return [])

-- | Deliver one or more values of @a@.
many1             :: Parser s t e a -> Parser s t e [a]
many1 p            = do {x <- p; xs <- many p; return (x:xs)}

-- | Deliver zero or more values of @a@ separated by @b@'s.
sepby             :: Parser s t e a -> Parser s t e b -> Parser s t e [a]
p `sepby` sep      = (p `sepby1` sep) +++ return []

-- | Deliver one or more values of @a@ separated by @b@'s.
sepby1            :: Parser s t e a -> Parser s t e b -> Parser s t e [a]
p `sepby1` sep     = do {x <- p; xs <- many (do {sep; p}); return (x:xs)}

chainl            :: Parser s t e a -> Parser s t e (a->a->a) -> a
                                                              -> Parser s t e a
chainl p op v      = (p `chainl1` op) +++ return v

chainl1           :: Parser s t e a -> Parser s t e (a->a->a) -> Parser s t e a
p `chainl1` op     = do {x <- p; rest x}
                     where
                        rest x = do {f <- op; y <- p; rest (f x y)}
                                 +++ return x

chainr            :: Parser s t e a -> Parser s t e (a->a->a) -> a
                                                              -> Parser s t e a
chainr p op v      = (p `chainr1` op) +++ return v

chainr1           :: Parser s t e a -> Parser s t e (a->a->a) -> Parser s t e a
p `chainr1` op     = do {x <- p; rest x}
                     where
                        rest x = do { f <- op
                                    ; y <- p `chainr1` op
                                    ; return (f x y)
                                    }
                                 +++ return x

ops               :: [(Parser s t e a, b)] -> Parser s t e b
ops xs             = foldr1 (+++) [do {p; return op} | (p,op) <- xs]

bracket           :: (Show p,Show t) =>
                     Parser s (p,t) e a -> Parser s (p,t) e b ->
                               Parser s (p,t) e c -> Parser s (p,t) e b
bracket open p close = do { open
                          ; x <- p
                          ; close -- `elserror` "improperly matched construct";
                          ; return x
                          }

-- | Accept a complete parse of the input only, no partial parses.
toEOF             :: Show p =>
                     Parser s (p,t) String a -> Parser s (p,t) String a
toEOF p            = do { x <- p; eof; return x }


--- Error handling -----------------------------------------------------------

-- | Return an error using the supplied diagnostic string, and a token type
--   which includes position information.
parseerror :: (Show p,Show t) => String -> Parser s (p,t) String a
parseerror err = P (\st inp ->
                         case inp of
                           [] -> Left "Parse error: unexpected EOF\n"
                           (Left e:_) -> Left ("Lexical error:  "++e)
                           (Right (p,t):_)  ->
                                 Left ("Parse error: in  "++show p++"\n    "
                                       ++err++"\n  "++"Found "++show t)
                   )


-- | If the parser fails, generate an error message.
elserror          :: (Show p,Show t) => Parser s (p,t) String a -> String
                                        -> Parser s (p,t) String a
p `elserror` s     = p +++ parseerror s

--- State handling -----------------------------------------------------------

-- | Update the internal state.
stupd      :: (s->s) -> Parser s t e ()
stupd f     = P (\st inp-> {-let newst = f st in newst `seq`-}
                           Right [((), f st, inp)])

-- | Query the internal state.
stquery    :: (s->a) -> Parser s t e a
stquery f   = P (\st inp-> Right [(f st, st, inp)])

-- | Deliver the entire internal state.
stget      :: Parser s t e s
stget       = P (\st inp-> Right [(st, st, inp)])


--- Push some tokens back onto the input stream and reparse ------------------

-- | This is useful for recursively expanding macros.  When the
--   user-parser recognises a macro use, it can lookup the macro
--   expansion from the parse state, lex it, and then stuff the
--   lexed expansion back down into the parser.
reparse    :: [Either e t] -> Parser s t e ()
reparse ts  = P (\st inp-> Right [((), st, ts++inp)])

------------------------------------------------------------------------------