diff --git a/packages/backend-lalr/data/HappyTemplate.hs b/packages/backend-lalr/data/HappyTemplate.hs index 9e2bdefd..8c82fad0 100644 --- a/packages/backend-lalr/data/HappyTemplate.hs +++ b/packages/backend-lalr/data/HappyTemplate.hs @@ -148,6 +148,11 @@ happyShift new_state ERROR_TOK tk st sts stk@(x `HappyStk` _) = let i = GET_ERROR_TOKEN(x) in -- trace "shifting the error token" $ happyDoAction i tk new_state (HappyCons st sts) stk +-- TODO: When `i` would enter error recovery again, we should instead +-- discard input until the lookahead is acceptable. Perhaps this is +-- simplest to implement in CodeGen for productions using `error`; +-- there we know the context and can implement local shift+discard actions. +-- still need to remember parser-defined error site, though. happyShift new_state i tk st sts stk = happyNewToken new_state (HappyCons st sts) (MK_TOKEN(tk) `HappyStk` stk) @@ -156,8 +161,14 @@ happyShift new_state i tk st sts stk = happySpecReduce_0 i fn ERROR_TOK tk st sts stk = happyFail [] ERROR_TOK tk st sts stk +-- SG: I'm very doubtful that passing [] ("no token expected here") +-- as the first arg to happyFail here and in the following calls is +-- correct. I'm not going to touch it for a lack of understanding +-- and concerns of of backward compatibility, but +-- `happyExpListPerState (IBOX(st) :: Prelude.Int)` +-- seems like a good candidate. happySpecReduce_0 nt fn j tk st sts stk - = happyGoto nt j tk st (HappyCons st sts) (fn `HappyStk` stk) + = happySeq fn (happyGoto nt j tk st (HappyCons st sts) (fn `HappyStk` stk)) happySpecReduce_1 i fn ERROR_TOK tk st sts stk = happyFail [] ERROR_TOK tk st sts stk @@ -226,20 +237,34 @@ happyGoto nt j tk st = where new_state = happyIndexGotoTable nt st ----------------------------------------------------------------------------- --- Error recovery (ERROR_TOK is the error token) - --- parse error if we are in recovery and we fail again +-- Error recovery +-- +-- When there is no applicable action for the current lookahead token `tk`, +-- happy enters error recovery mode. It works in 2 phases: +-- +-- 1. Fixup: Try to see if there is an action for the error token (`errorTok`, +-- which is ERROR_TOK). If there is, do *not* emit an error and pretend +-- instead that an `errorTok` was inserted. +-- When there is no `errorTok` action, call `happyErro` and enter error +-- resumption mode. +-- 2. Error resumption mode: After `happyError` was called TODO: happyError is fatal. +-- Perhaps we should introduce a new `happyAddError`? +-- Current plan: New %resumptive declaration for specifying the two funs, +-- mutually exclusive with %error. +-- +-- This is what usually is associated with `error` +-- in `bison` or `menhir`. Since `error` is used for the Fixup mechanism (1) +-- above, we call the corresponding token `catch`. +-- In particular, `catch` will never *omit* calls to `happyFail`. + +-- parse error if we are in recovery and reached the end of the state stack happyFail explist ERROR_TOK tk old_st _ stk@(x `HappyStk` _) = let i = GET_ERROR_TOKEN(x) in -- trace "failing" $ - happyError_ explist i tk - -{- We don't need state discarding for our restricted implementation of - "error". In fact, it can cause some bogus parses, so I've disabled it - for now --SDM - + happyError_ explist i tk noResumption_ +{- -- discard a state -happyFail ERROR_TOK tk old_st (HappyCons action sts) +happyFail explist ERROR_TOK tk old_st (HappyCons action sts) (saved_tok `HappyStk` _ `HappyStk` stk) = -- trace ("discarding state, depth " ++ show (length stk)) $ happyDoAction ERROR_TOK tk action sts (saved_tok`HappyStk`stk) @@ -247,6 +272,8 @@ happyFail ERROR_TOK tk old_st (HappyCons action sts) -- Enter error recovery: generate an error token, -- save the old token and carry on. +-- When a `happyShift` accepts, we will pop off the error +-- token to resume parsing with the current lookahead `i`. happyFail explist i tk action sts stk = -- trace "entering error recovery" $ happyDoAction ERROR_TOK tk action sts (MK_ERROR_TOKEN(i) `HappyStk` stk) diff --git a/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs b/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs index b05f26b1..6369a768 100644 --- a/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs +++ b/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs @@ -61,6 +61,7 @@ Produce the complete output file. > , token_type = token_type' > , error_handler = error_handler' > , error_sig = error_sig' +> , error_resumptive = error_resumptive' > }) > action goto lang_exts module_header module_trailer > coerce strict @@ -84,20 +85,21 @@ Produce the complete output file. > where > n_starts = length starts' > token = brack token_type' -> + > nowarn_opts = str "{-# OPTIONS_GHC -w #-}" . nl > -- XXX Happy-generated code is full of warnings. Some are easy to > -- fix, others not so easy, and others would require GHC version > -- #ifdefs. For now I'm just disabling all of them. -> + > partTySigs_opts = ifGeGhc710 (str "{-# LANGUAGE PartialTypeSignatures #-}" . nl) > intMaybeHash = str "Happy_GHC_Exts.Int#" > -- Parsing monad and its constraints -> pty = str monad_tycon -> pcont = str monad_context -> +> pty = str monad_tycon -- str "P" +> ptyAt a = brack' (pty . str " " . a) -- \(str "a") -> str "(P a)" +> pcont = str monad_context -- str "Read a", some constraint for "P" to be a monad + > -- If GHC is enabled, wrap the content in a CPP ifdef that includes the > -- content and tests whether the GHC version is >= 7.10.3 > ifGeGhc710 :: (String -> String) -> String -> String @@ -108,13 +110,13 @@ Produce the complete output file. > n_missing_types = length (filter isNothing (elems nt_types)) > happyAbsSyn = str "(HappyAbsSyn " . str wild_tyvars . str ")" > where wild_tyvars = unwords (replicate n_missing_types "_") -> + > -- This decides how to include (if at all) a type signature > -- See > filterTypeSig :: (String -> String) -> String -> String > filterTypeSig content | n_missing_types == 0 = content > | otherwise = ifGeGhc710 content -> + > top_opts = > nowarn_opts > . (str $ unlines @@ -152,7 +154,7 @@ If we're using coercions, we need to generate the injections etc. > = let > happy_item = str "HappyAbsSyn " . str_tyvars > bhappy_item = brack' happy_item -> + > inject n ty > = (case ty of > Nothing -> id @@ -163,7 +165,7 @@ If we're using coercions, we need to generate the injections etc. > . mkHappyWrapCon ty n (str "x") > . nl > . str "{-# INLINE " . mkHappyIn n . str " #-}" -> + > extract n ty > = mkHappyOut n . str " :: " . bhappy_item > . str " -> " . typeParamOut n ty . char '\n' @@ -308,18 +310,18 @@ happyMonadReduce to get polymorphic recursion. Sigh. > . str " -> " . intMaybeHash > . str " -> Happy_IntList -> HappyStk " > . happyAbsSyn . str " -> " -> . pty . str " " . happyAbsSyn . str "\n" +> . ptyAt happyAbsSyn . str "\n" > in filterTypeSig tysig . mkReduceFun i . str " = " > . str s . strspace . lt' . strspace . showInt adjusted_nt > . strspace . reductionFun . nl > . reductionFun . strspace -> + > reductionFun = str "happyReduction_" . shows i -> + > tokPatterns > | coerce = reverse (map mkDummyVar [1 .. length toks]) > | otherwise = reverse (zipWith tokPattern [1..] toks) -> + > tokPattern n _ | n `notElem` vars_used = char '_' > tokPattern n t | t >= firstStartTok && t < fst_term > = if coerce @@ -333,21 +335,21 @@ happyMonadReduce to get polymorphic recursion. Sigh. > else str "(HappyTerminal " > . mkHappyTerminalVar n t > . char ')' -> + > tokLets code'' > | coerce && not (null cases) > = interleave "\n\t" cases > . code'' . str (replicate (length cases) '}') > | otherwise = code'' -> + > cases = [ str "case " . extract t . strspace . mkDummyVar n > . str " of { " . tokPattern n t . str " -> " > | (n,t) <- zip [1..] toks, > n `elem` vars_used ] -> + > extract t | t >= firstStartTok && t < fst_term = mkHappyOut t > | otherwise = str "happyOutTok" -> + > lt = length toks > this_absSynCon | coerce = mkHappyIn nt @@ -358,7 +360,7 @@ The token conversion function. > produceTokenConverter > = case lexer' of { -> + > Nothing -> > str "happyNewToken action sts stk [] =\n\t" > . eofAction "notHappyAtAll" @@ -367,10 +369,11 @@ The token conversion function. > . str "let cont i = " . doAction . str " sts stk tks in\n\t" > . str "case tk of {\n\t" > . interleave ";\n\t" (map doToken token_rep) -> . str "_ -> happyError' ((tk:tks), [])\n\t" +> . str "_ -> happyError' (tk:tks) [] noResumption_\n\t" > . str "}\n\n" -> . str "happyError_ explist " . eofTok . str " tk tks = happyError' (tks, explist)\n" -> . str "happyError_ explist _ tk tks = happyError' ((tk:tks), explist)\n"; +> . str "happyError_ explist " . eofTok . str " tk resume tks = happyError' tks explist resume\n" +> . str "happyError_ explist _ tk resume tks = happyError' (tk:tks) explist resume\n"; + > -- when the token is EOF, tk == _|_ (notHappyAtAll) > -- so we must not pass it to happyError' @@ -385,10 +388,10 @@ The token conversion function. > . str (eof' ++ " -> ") > . eofAction "tk" . str ";\n\t" > . interleave ";\n\t" (map doToken token_rep) -> . str "_ -> happyError' (tk, [])\n\t" +> . str "_ -> happyError' tk [] noResumption_\n\t" > . str "})\n\n" -> . str "happyError_ explist " . eofTok . str " tk = happyError' (tk, explist)\n" -> . str "happyError_ explist _ tk = happyError' (tk, explist)\n"; +> . str "happyError_ explist " . eofTok . str " tk resume = happyError' tk explist resume\n" +> . str "happyError_ explist _ tk resume = happyError' tk explist resume\n"; > -- superfluous pattern match needed to force happyError_ to > -- have the correct type. > } @@ -508,12 +511,12 @@ action array indexed by (terminal * last_state) + state > . str "happyActOffsets = HappyA# \"" --" > . hexChars act_offs > . str "\"#\n\n" --" -> + > . str "happyGotoOffsets :: HappyAddr\n" > . str "happyGotoOffsets = HappyA# \"" --" > . hexChars goto_offs > . str "\"#\n\n" --" -> + > . str "happyAdjustOffset :: Happy_GHC_Exts.Int# -> Happy_GHC_Exts.Int#\n" > . str "happyAdjustOffset off = " > . (if length table < 32768 @@ -522,17 +525,17 @@ action array indexed by (terminal * last_state) + state > . str " then off Happy_GHC_Exts.+# 65536#" > . str " else off") > . str "\n\n" --" -> + > . str "happyDefActions :: HappyAddr\n" > . str "happyDefActions = HappyA# \"" --" > . hexChars defaults > . str "\"#\n\n" --" -> + > . str "happyCheck :: HappyAddr\n" > . str "happyCheck = HappyA# \"" --" > . hexChars check > . str "\"#\n\n" --" -> + > . str "happyTable :: HappyAddr\n" > . str "happyTable = HappyA# \"" --" > . hexChars table @@ -547,11 +550,11 @@ action array indexed by (terminal * last_state) + state > n_terminals = length terms > n_nonterminals = length nonterms - n_starts -- lose %starts -> + > (act_offs,goto_offs,table,defaults,check,explist,min_off) > = mkTables action goto first_nonterm' fst_term > n_terminals n_nonterminals n_starts (bounds token_names') -> + > produceReduceArray > = {- str "happyReduceArr :: Array Int a\n" -} > str "happyReduceArr = Happy_Data_Array.array (" @@ -641,27 +644,30 @@ MonadStuff: > produceMonadStuff = -> str "happyThen :: " . pcont . str " => " . pty -> . str " a -> (a -> " . pty -> . str " b) -> " . pty . str " b\n" +> str "happyThen :: " . pcont . str " => " . ptyAt (str "a") +> . str " -> (a -> " . ptyAt (str "b") +> . str ") -> " . ptyAt (str "b") . str "\n" > . str "happyThen = " . brack monad_then . nl -> . str "happyReturn :: " . pcont . str " => a -> " . pty . str " a\n" +> . str "happyReturn :: " . pcont . str " => a -> " . ptyAt (str "a") . str "\n" > . str "happyReturn = " . brack monad_return . nl > . case lexer' of > Nothing -> > str "happyThen1 m k tks = (" . str monad_then > . str ") m (\\a -> k a tks)\n" -> . str "happyReturn1 :: " . pcont . str " => a -> b -> " . pty . str " a\n" +> . str "happyReturn1 :: " . pcont . str " => a -> b -> " . ptyAt (str "a") . str "\n" > . str "happyReturn1 = \\a tks -> " . brack monad_return > . str " a\n" -> . str "happyError' :: " . str monad_context . str " => ([" -> . token -> . str "], [Prelude.String]) -> " -> . str monad_tycon -> . str " a\n" -> . str "happyError' = " -> . str (if use_monad then "" else "HappyIdentity Prelude.. ") -> . errorHandler . str "\n" +> . str "happyError' :: " . pcont . str " => " +> . str "[" . token . str "] -> " +> . str "[Prelude.String] -> " +> . ptyAt (str "(Maybe a)") . str " -> " +> . ptyAt (str "a") +> . str "\n" +> . str "happyError' = " . errorHandler . str "\n" +> . str "noResumption_ :: " . pcont . str " => " +> . ptyAt (str "(Maybe a)") +> . str "\n" +> . str "noResumption_ = " . noResumption . str "\n" > _ -> > let > happyParseSig = @@ -671,35 +677,39 @@ MonadStuff: > newTokenSig = > str "happyNewToken :: " . pcont . str " => " . intMaybeHash > . str " -> Happy_IntList -> HappyStk " . happyAbsSyn -> . str " -> " . pty . str " " . happyAbsSyn . str"\n" +> . str " -> " . ptyAt happyAbsSyn . str"\n" > . str "\n" > doActionSig = > str "happyDoAction :: " . pcont . str " => " . intMaybeHash > . str " -> " . str token_type' . str " -> " . intMaybeHash > . str " -> Happy_IntList -> HappyStk " . happyAbsSyn -> . str " -> " . pty . str " " . happyAbsSyn . str "\n" +> . str " -> " . ptyAt happyAbsSyn . str"\n" > . str "\n" > reduceArrSig = > str "happyReduceArr :: " . pcont > . str " => Happy_Data_Array.Array Prelude.Int (" . intMaybeHash > . str " -> " . str token_type' . str " -> " . intMaybeHash > . str " -> Happy_IntList -> HappyStk " . happyAbsSyn -> . str " -> " . pty . str " " . happyAbsSyn . str ")\n" +> . str " -> " . ptyAt happyAbsSyn . str ")\n" > . str "\n" > in filterTypeSig (happyParseSig . newTokenSig . doActionSig . reduceArrSig) > . str "happyThen1 :: " . pcont . str " => " . pty > . str " a -> (a -> " . pty > . str " b) -> " . pty . str " b\n" > . str "happyThen1 = happyThen\n" -> . str "happyReturn1 :: " . pcont . str " => a -> " . pty . str " a\n" +> . str "happyReturn1 :: " . pcont . str " => a -> " . ptyAt (str "a") . str "\n" > . str "happyReturn1 = happyReturn\n" -> . str "happyError' :: " . str monad_context . str " => (" -> . token . str ", [Prelude.String]) -> " -> . str monad_tycon -> . str " a\n" -> . str "happyError' tk = " -> . str (if use_monad then "" else "HappyIdentity ") -> . errorHandler . str " tk\n" +> . str "happyError' :: " . pcont . str " => " +> . token . str " -> " +> . str "[Prelude.String] -> " +> . ptyAt (str "(Maybe a)") . str " -> " +> . ptyAt (str "a") +> . str "\n" +> . str "happyError' = " . errorHandler . str "\n" +> . str "noResumption_ :: " . pcont . str " => " +> . ptyAt (str "(Maybe a)") +> . str "\n" +> . str "noResumption_ = " . noResumption . str "\n" An error handler specified with %error is passed the current token when used with %lexer, but happyError (the old way but kept for @@ -707,13 +717,19 @@ compatibility) is not passed the current token. Also, the %errorhandlertype directive determines the API of the provided function. > errorHandler = -> case error_handler' of -> Just h -> case error_sig' of -> ErrorHandlerTypeExpList -> str h -> ErrorHandlerTypeDefault -> str "(\\(tokens, _) -> " . str h . str " tokens)" -> Nothing -> case lexer' of -> Nothing -> str "(\\(tokens, _) -> happyError tokens)" -> Just _ -> str "(\\(tokens, explist) -> happyError)" +> str "(\\tokens explist resume -> " . +> (if use_monad then str "" +> else str "HappyIdentity Prelude.$ ") . +> str (case error_handler' of Just h -> h; Nothing -> "happyError") . str " " . +> str (case (error_handler', lexer') of (Nothing, Just _) -> "" +> _ -> "tokens ") . +> (case error_sig' of ErrorHandlerTypeExpList -> str "explist " +> ErrorHandlerTypeDefault -> str "") . +> (if error_resumptive' then str "resume " +> else str "") . +> str ")" +> noResumption = if use_monad then brack monad_return . str " Nothing" +> else str "HappyIdentity Nothing" > reduceArrElem n > = str "\t(" . shows n . str " , " @@ -758,9 +774,9 @@ directive determines the API of the provided function. > (True,Nothing) -> \(name,_,_,_) -> monadAE name > (False,Just _) -> error "attribute grammars not supported for non-monadic parsers with %lexer" > (False,Nothing)-> \(name,_,_,_) -> regularAE name -> + > defaultAttr = fst (head attributes') -> + > monadAndLexerAE name > = str name . str " = " > . str "do { " @@ -828,20 +844,20 @@ See notes under "Action Tables" above for some subtleties in this function. > getDefault actions = > -- pick out the action for the error token, if any > case [ act | (e, act) <- actions, e == errorTok ] of -> + > -- use error reduction as the default action, if there is one. > act@(LR'Reduce _ _) : _ -> act > act@(LR'Multiple _ (LR'Reduce _ _)) : _ -> act -> + > -- if the error token is shifted or otherwise, don't generate > -- a default action. This is *important*! > (act : _) | act /= LR'Fail -> LR'Fail -> + > -- no error actions, pick a reduce to be the default. > _ -> case reduces of > [] -> LR'Fail > (act:_) -> act -- pick the first one we see for now -> + > where reduces > = [ act | (_, act@(LR'Reduce _ _)) <- actions ] > ++ [ act | (_, LR'Multiple _ act@(LR'Reduce _ _)) <- actions ] @@ -902,11 +918,11 @@ See notes under "Action Tables" above for some subtleties in this function. > , [Int] -- happyExpList > , Int -- happyMinOffset > ) -> + > mkTables action goto first_nonterm' fst_term > n_terminals n_nonterminals n_starts > token_names_bound -> + > = ( elems act_offs > , elems goto_offs > , take max_off (elems table) @@ -916,17 +932,17 @@ See notes under "Action Tables" above for some subtleties in this function. > , min_off > ) > where -> + > (table,check,act_offs,goto_offs,explist,min_off,max_off) > = runST (genTables (length actions) > max_token token_names_bound > sorted_actions explist_actions) -> + > -- the maximum token number used in the parser > max_token = max n_terminals (n_starts+n_nonterminals) - 1 -> + > def_actions = map (\(_,_,def,_,_,_) -> def) actions -> + > actions :: [TableEntry] > actions = > [ (ActionEntry, @@ -943,24 +959,24 @@ See notes under "Action Tables" above for some subtleties in this function. > default_act = getDefault acts' > acts'' = mkActVals acts' default_act > ] -> + > explist_actions :: [(Int, [Int])] > explist_actions = [ (state, concatMap f $ assocs acts) > | (state, acts) <- assocs action ] > where > f (t, LR'Shift _ _ ) = [t - fst token_names_bound] > f (_, _) = [] -> + > -- adjust terminals by -(fst_term+1), so they start at 1 (error is 0). > -- (see ARRAY_NOTES) > adjust token | token == errorTok = 0 > | otherwise = token - fst_term + 1 -> + > mkActVals assocs' default_act = > [ (adjust token, actionVal act) > | (token, act) <- assocs' > , act /= default_act ] -> + > gotos :: [TableEntry] > gotos = [ (GotoEntry, > state, 0, @@ -972,12 +988,12 @@ See notes under "Action Tables" above for some subtleties in this function. > | (state, goto_arr) <- assocs goto, > let goto_vals = mkGotoVals (assocs goto_arr) > ] -> + > -- adjust nonterminals by -first_nonterm', so they start at zero > -- (see ARRAY_NOTES) > mkGotoVals assocs' = > [ (token - first_nonterm', i) | (token, Goto i) <- assocs' ] -> + > sorted_actions = sortBy (flip cmp_state) (actions ++ gotos) > cmp_state (_,_,_,width1,tally1,_) (_,_,_,width2,tally2,_) > | width1 < width2 = LT @@ -1007,19 +1023,19 @@ See notes under "Action Tables" above for some subtleties in this function. > , Int -- lowest offset in table > , Int -- highest offset in table > ) -> + > genTables n_actions max_token token_names_bound entries explist = do -> + > table <- newArray (0, mAX_TABLE_SIZE) 0 > check <- newArray (0, mAX_TABLE_SIZE) (-1) > act_offs <- newArray (0, n_actions) 0 > goto_offs <- newArray (0, n_actions) 0 > off_arr <- newArray (-max_token, mAX_TABLE_SIZE) 0 > exp_array <- newArray (0, (n_actions * n_token_names + 31) `div` 32) 0 -- 32 bits per entry -> + > (min_off,max_off) <- genTables' table check act_offs goto_offs off_arr exp_array entries > explist max_token n_token_names -> + > table' <- freeze table > check' <- freeze check > act_offs' <- freeze act_offs @@ -1046,19 +1062,19 @@ See notes under "Action Tables" above for some subtleties in this function. > -> Int -- maximum token no. > -> Int -- number of token names > -> ST s (Int,Int) -- lowest and highest offsets in table -> + > genTables' table check act_offs goto_offs off_arr exp_array entries > explist max_token n_token_names > = fill_exp_array >> fit_all entries 0 0 1 > where -> + > fit_all [] min_off max_off _ = return (min_off, max_off) > fit_all (s:ss) min_off max_off fst_zero = do > (off, new_min_off, new_max_off, new_fst_zero) <- fit s min_off max_off fst_zero > ss' <- same_states s ss off > writeArray off_arr off 1 > fit_all ss' new_min_off new_max_off new_fst_zero -> + > fill_exp_array = > forM_ explist $ \(state, tokens) -> > forM_ tokens $ \token -> do @@ -1067,7 +1083,7 @@ See notes under "Action Tables" above for some subtleties in this function. > let word_offset = bit_nr `mod` 32 > x <- readArray exp_array word_nr > writeArray exp_array word_nr (setBit x word_offset) -> + > -- try to merge identical states. We only try the next state(s) > -- in the list, but the list is kind-of sorted so we shouldn't > -- miss too many. @@ -1076,15 +1092,15 @@ See notes under "Action Tables" above for some subtleties in this function. > | acts == acts' = do writeArray (which_off e) no off > same_states s ss' off > | otherwise = return ss -> + > which_off ActionEntry = act_offs > which_off GotoEntry = goto_offs -> + > -- fit a vector into the table. Return the offset of the vector, > -- the maximum offset used in the table, and the offset of the first > -- entry in the table (used to speed up the lookups a bit). > fit (_,_,_,_,_,[]) min_off max_off fst_zero = return (0,min_off,max_off,fst_zero) -> + > fit (act_or_goto, state_no, _deflt, _, _, state@((t,_):_)) > min_off max_off fst_zero = do > -- start at offset 1 in the table: all the empty states @@ -1097,9 +1113,9 @@ See notes under "Action Tables" above for some subtleties in this function. > | otherwise = max_off > furthest_left = off > furthest_right = off + max_token -> + > -- trace ("fit: state " ++ show state_no ++ ", off " ++ show off ++ ", elems " ++ show state) $ do -> + > writeArray (which_off act_or_goto) state_no off > addState off table check state > new_fst_zero <- findFstFreeSlot check fst_zero @@ -1119,11 +1135,11 @@ slot is free or not. > findFreeOffset off table off_arr state = do > -- offset 0 isn't allowed > if off == 0 then try_next else do -> + > -- don't use an offset we've used before > b <- readArray off_arr off > if b /= 0 then try_next else do -> + > -- check whether the actions for this state fit in the table > ok <- fits off state table > if not ok then try_next else return off diff --git a/packages/codegen-common/src/Happy/CodeGen/Common/Options.lhs b/packages/codegen-common/src/Happy/CodeGen/Common/Options.lhs index 66239991..ef925f91 100644 --- a/packages/codegen-common/src/Happy/CodeGen/Common/Options.lhs +++ b/packages/codegen-common/src/Happy/CodeGen/Common/Options.lhs @@ -21,5 +21,11 @@ The CommonOptions data type. > expect :: Maybe Int, > lexer :: Maybe (String,String), > error_handler :: Maybe String, -> error_sig :: ErrorHandlerType +> error_sig :: ErrorHandlerType, +> -- ^ ErrorHandlerTypExpList: error handler expects a +> -- `[String]` as first arg with the pretty-printed expected +> -- tokens +> error_resumptive :: Bool +> -- ^ `True` => The error handler expects a `resume` +> -- continuation as last argument. > } diff --git a/packages/frontend/boot-src/Parser.ly b/packages/frontend/boot-src/Parser.ly index a18fabdf..780dd02b 100644 --- a/packages/frontend/boot-src/Parser.ly +++ b/packages/frontend/boot-src/Parser.ly @@ -34,6 +34,7 @@ The parser. > spec_expect { TokenKW TokSpecId_Expect } > spec_error { TokenKW TokSpecId_Error } > spec_errorhandlertype { TokenKW TokSpecId_ErrorHandlerType } +> spec_errorresumptive { TokenKW TokSpecId_ErrorResumptive } > spec_attribute { TokenKW TokSpecId_Attribute } > spec_attributetype { TokenKW TokSpecId_Attributetype } > code { TokenInfo $$ TokCodeQuote } @@ -104,11 +105,11 @@ The parser. > | spec_shift { PrecShift } > | { PrecNone } -> tokInfos :: { [Directive String] } +> tokInfos :: { [Directive String] } > : tokInfos tokInfo { $2 : $1 } > | tokInfo { [$1] } -> tokInfo :: { Directive String } +> tokInfo :: { Directive String } > : spec_tokentype code { TokenType $2 } > | spec_token tokenSpecs { TokenSpec $2 } > | spec_name id optStart { TokenName $2 $3 False } @@ -124,7 +125,8 @@ The parser. > | spec_left ids { TokenLeft $2 } > | spec_expect int { TokenExpect $2 } > | spec_error code { TokenError $2 } -> | spec_errorhandlertype id { TokenErrorHandlerType $2 } +> | spec_errorhandlertype id { TokenErrorHandlerType $2 } +> | spec_errorresumptive { TokenErrorResumptive } > | spec_attributetype code { TokenAttributetype $2 } > | spec_attribute id code { TokenAttribute $2 $3 } diff --git a/packages/frontend/src/Happy/Frontend/AbsSyn.lhs b/packages/frontend/src/Happy/Frontend/AbsSyn.lhs index 3b195a7f..32c8f0ad 100644 --- a/packages/frontend/src/Happy/Frontend/AbsSyn.lhs +++ b/packages/frontend/src/Happy/Frontend/AbsSyn.lhs @@ -11,7 +11,7 @@ Here is the abstract syntax of the language we parse. > AbsSyn(..), Directive(..), > getTokenType, getTokenSpec, getParserNames, getLexer, > getImportedIdentity, getMonad, getError, -> getPrios, getPrioNames, getExpect, getErrorHandlerType, +> getPrios, getPrioNames, getExpect, getErrorHandlerType, getErrorResumptive, > getAttributes, getAttributetype, > Rule(..), Prod(..), Term(..), Prec(..) > ) where @@ -60,13 +60,12 @@ Parser Generator Directives. ToDo: find a consistent way to analyse all the directives together and generate some error messages. -> + > data Directive a > = TokenType String -- %tokentype > | TokenSpec [(a,String)] -- %token > | TokenName String (Maybe String) Bool -- %name/%partial (True <=> %partial) > | TokenLexer String String -- %lexer -> | TokenErrorHandlerType String -- %errorhandlertype > | TokenImportedIdentity -- %importedidentity > | TokenMonad String String String String -- %monad > | TokenNonassoc [String] -- %nonassoc @@ -74,6 +73,8 @@ generate some error messages. > | TokenLeft [String] -- %left > | TokenExpect Int -- %expect > | TokenError String -- %error +> | TokenErrorHandlerType String -- %errorhandlertype +> | TokenErrorResumptive -- %resumptive > | TokenAttributetype String -- %attributetype > | TokenAttribute String String -- %attribute > deriving Show @@ -151,6 +152,9 @@ generate some error messages. > [] -> ErrorHandlerTypeDefault > _ -> error "multiple errorhandlertype directives" +> getErrorResumptive :: [Directive t] -> Bool +> getErrorResumptive ds = not (null [ () | TokenErrorResumptive <- ds ]) + > getAttributes :: [Directive t] -> [(String, String)] > getAttributes ds > = [ (ident,typ) | (TokenAttribute ident typ) <- ds ] diff --git a/packages/frontend/src/Happy/Frontend/Lexer.lhs b/packages/frontend/src/Happy/Frontend/Lexer.lhs index 8bfe65bd..1fe5ee53 100644 --- a/packages/frontend/src/Happy/Frontend/Lexer.lhs +++ b/packages/frontend/src/Happy/Frontend/Lexer.lhs @@ -37,7 +37,6 @@ The lexer. > | TokSpecId_Token -- %token > | TokSpecId_Name -- %name > | TokSpecId_Partial -- %partial -> | TokSpecId_ErrorHandlerType -- %errorhandlertype > | TokSpecId_Lexer -- %lexer > | TokSpecId_ImportedIdentity -- %importedidentity > | TokSpecId_Monad -- %monad @@ -48,6 +47,8 @@ The lexer. > | TokSpecId_Shift -- %shift > | TokSpecId_Expect -- %expect > | TokSpecId_Error -- %error +> | TokSpecId_ErrorHandlerType -- %errorhandlertype +> | TokSpecId_ErrorResumptive -- %errorresumptive > | TokSpecId_Attributetype -- %attributetype > | TokSpecId_Attribute -- %attribute > | TokCodeQuote -- stuff inside { .. } @@ -131,6 +132,8 @@ followed by a special identifier. > cont (TokenKW TokSpecId_Expect) rest > 'e':'r':'r':'o':'r':'h':'a':'n':'d':'l':'e':'r':'t':'y':'p':'e':rest -> > cont (TokenKW TokSpecId_ErrorHandlerType) rest +> 'e':'r':'r':'o':'r':'r':'e':'s':'u':'m':'t':'i':'v':'e':rest -> +> cont (TokenKW TokSpecId_ErrorResumptive) rest > 'e':'r':'r':'o':'r':rest -> > cont (TokenKW TokSpecId_Error) rest > 'a':'t':'t':'r':'i':'b':'u':'t':'e':'t':'y':'p':'e':rest -> @@ -177,24 +180,24 @@ here is a bit tricky, but should work in most cases. > -> ParseResult b > lexReadCode s n c = case s of > '\n':r -> \cont l -> lexReadCode r n ('\n':c) cont (l+1) -> + > '{' :r -> lexReadCode r (n+1) ('{':c) -> + > '}' :r > | n == 0 -> \cont -> cont (TokenInfo ( > cleanupCode (reverse c)) TokCodeQuote) r > | otherwise -> lexReadCode r (n-1) ('}':c) -> + > '"'{-"-}:r -> lexReadString r (\ str r' -> > lexReadCode r' n ('"' : (reverse str) ++ '"' : c)) -> + > a: '\'':r | isAlphaNum a -> lexReadCode r n ('\'':a:c) -> + > '\'' :r -> lexReadSingleChar r (\ str r' -> > lexReadCode r' n ((reverse str) ++ '\'' : c)) -> + > ch:r -> lexReadCode r n (ch:c) -> + > [] -> \_cont -> lexError "No closing '}' in code segment" [] ---------------------------------------------------------------------------- diff --git a/packages/frontend/src/Happy/Frontend/Mangler.lhs b/packages/frontend/src/Happy/Frontend/Mangler.lhs index c1108d2a..ba227607 100644 --- a/packages/frontend/src/Happy/Frontend/Mangler.lhs +++ b/packages/frontend/src/Happy/Frontend/Mangler.lhs @@ -65,7 +65,8 @@ This bit is a real mess, mainly because of the error message support. > starts' = case getParserNames dirs of > [] -> [TokenName "happyParse" Nothing False] > ns -> ns -> +> error_resumptive' = getErrorResumptive dirs + > start_strs = [ startName++'_':p | (TokenName p _ _) <- starts' ] Build up a mapping from name values to strings. @@ -83,7 +84,7 @@ Build up a mapping from name values to strings. > case lookupName str' of > [a] -> return a > [] -> do addErr ("unknown identifier '" ++ str' ++ "'") -> return errorTok +> return errorTok -- SG: What a confusing use of errorTok.. Use dummyTok? > (a:_) -> do addErr ("multiple use of '" ++ str' ++ "'") > return a @@ -106,7 +107,7 @@ Start symbols... Deal with priorities... > priodir = zip [1..] (getPrios dirs) -> + > mkPrio :: Int -> Directive a -> Priority > mkPrio i (TokenNonassoc _) = Prio None i > mkPrio i (TokenRight _) = Prio RightAssoc i @@ -129,13 +130,13 @@ Translate the rules from string to name-based. > convNT (Rule1 nt prods ty) > = do nt' <- mapToName nt > return (nt', prods, ty) -> + > attrs = getAttributes dirs > attrType = fromMaybe "HappyAttrs" (getAttributetype dirs) -> + > transRule (nt, prods, _ty) > = mapM (finishRule nt) prods -> + > finishRule :: Name -> Prod1 -> Writer [ErrMsg] Production > finishRule nt (Prod1 lhs code line prec) > = mapWriter (\(a,e) -> (a, map (addLine line) e)) $ do @@ -145,7 +146,7 @@ Translate the rules from string to name-based. > Left s -> do addErr ("Undeclared precedence token: " ++ s) > return (Production nt lhs' code' No) > Right p -> return (Production nt lhs' code' p) -> + > mkPrec :: [Name] -> Prec -> Either String Priority > mkPrec lhs PrecNone = > case filter (flip elem terminal_names) lhs of @@ -157,9 +158,9 @@ Translate the rules from string to name-based. > case lookup s prioByString of > Nothing -> Left s > Just p -> Right p -> + > mkPrec _ PrecShift = Right PrioLowest -> + > -- in > rules1 <- mapM convNT rules @@ -168,7 +169,7 @@ Translate the rules from string to name-based. > let > type_env = [(nt, t) | Rule1 nt _ (Just (t,[])) <- rules] ++ > [(nt, getTokenType dirs) | nt <- terminal_strs] -- XXX: Doesn't handle $$ type! -> + > fixType (ty,s) = go "" ty > where go acc [] = return (reverse acc) > go acc (c:r) | isLower c = -- look for a run of alphanumerics starting with a lower case letter @@ -182,14 +183,14 @@ Translate the rules from string to name-based. > go1 (c:cs) > Just t -> go1 $ "(" ++ t ++ ")" > | otherwise = go (c:acc) r -> + > convType (nm, t) > = do t' <- fixType t > return (nm, t') -> + > -- in > tys <- mapM convType [ (nm, t) | (nm, _, Just t) <- rules1 ] -> + > let > type_array :: Array Int (Maybe String) @@ -215,7 +216,7 @@ Get the token specs in terms of Names. > lookup_prods :: Name -> [Int] > lookup_prods x | x >= firstStartTok && x < first_t = arr ! x > lookup_prods _ = error "lookup_prods" -> + > productions' = start_prods ++ concat rules2 > prod_array = listArray (0,length productions' - 1) productions' > -- in @@ -245,6 +246,7 @@ Get the token specs in terms of Names. > lexer = getLexer dirs, > error_handler = getError dirs, > error_sig = getErrorHandlerType dirs, +> error_resumptive = error_resumptive', > token_type = getTokenType dirs, > expect = getExpect dirs > }) @@ -258,7 +260,7 @@ Gofer-like stuff: > combine [] = [] > combine ((a,b):(c,d):r) | a == c = combine ((a,b++d) : r) > combine (a:r) = a : combine r -> + For combining actions with possible error messages. @@ -300,7 +302,7 @@ So is this. > where go code acc used = > case code of > [] -> return (reverse acc, used) -> + > '"' :r -> case reads code :: [(String,String)] of > [] -> go r ('"':acc) used > (s,r'):_ -> go r' (reverse (show s) ++ acc) used @@ -309,13 +311,13 @@ So is this. > [] -> go r ('\'':acc) used > (c,r'):_ -> go r' (reverse (show c) ++ acc) used > '\\':'$':r -> go r ('$':acc) used -> + > '$':'>':r -- the "rightmost token" > | arity == 0 -> do addErr "$> in empty rule" > go r acc used > | otherwise -> go r (reverse (mkHappyVar arity) ++ acc) > (arity : used) -> + > '$':r@(i:_) | isDigit i -> > case reads r :: [(Int,String)] of > (j,r'):_ ->