From 80126af45fb7fa11b9a21cdee4a93145dacfe8af Mon Sep 17 00:00:00 2001 From: Karim Taha Date: Sat, 22 Jun 2024 23:24:29 +0300 Subject: [PATCH] Use 32-bit integers instead of 16-bit integers for automaton representation --- packages/backend-lalr/data/HappyTemplate.hs | 43 ++++++----- .../src/Happy/Backend/LALR/ProduceCode.lhs | 76 +++++++++++-------- 2 files changed, 69 insertions(+), 50 deletions(-) diff --git a/packages/backend-lalr/data/HappyTemplate.hs b/packages/backend-lalr/data/HappyTemplate.hs index 179abee9..e9e31422 100644 --- a/packages/backend-lalr/data/HappyTemplate.hs +++ b/packages/backend-lalr/data/HappyTemplate.hs @@ -4,6 +4,9 @@ # error This code isn't being built with GHC. #endif +-- Get WORDS_BIGENDIAN (if defined) +#include "MachDeps.h" + -- Do not remove this comment. Required to fix CPP parsing when using GCC and a clang-compiled alex. #if __GLASGOW_HASKELL__ > 706 # define LT(n,m) ((Happy_GHC_Exts.tagToEnum# (n Happy_GHC_Exts.<# m)) :: Prelude.Bool) @@ -82,16 +85,16 @@ happyDoAction i tk st = {-# INLINE happyNextAction #-} happyNextAction i st = case happyIndexActionTable i st of Just (Happy_GHC_Exts.I# act) -> act - Nothing -> indexShortOffAddr happyDefActions st + Nothing -> happyIndexOffAddr happyDefActions st {-# INLINE happyIndexActionTable #-} happyIndexActionTable i st - | GTE(off, 0#), EQ(indexShortOffAddr happyCheck off, i) - = Prelude.Just (Happy_GHC_Exts.I# (indexShortOffAddr happyTable off)) + | GTE(off, 0#), EQ(happyIndexOffAddr happyCheck off, i) + = Prelude.Just (Happy_GHC_Exts.I# (happyIndexOffAddr happyTable off)) | otherwise = Prelude.Nothing where - off = PLUS(happyAdjustOffset (indexShortOffAddr happyActOffsets st), i) + off = PLUS(happyIndexOffAddr happyActOffsets st, i) data HappyAction = HappyFail @@ -107,24 +110,30 @@ happyDecodeAction action | LT(action, 0#) = HappyReduce NEGATE(PLUS(action, 1 | otherwise = HappyShift MINUS(action, 1#) {-# INLINE happyIndexGotoTable #-} -happyIndexGotoTable nt st = indexShortOffAddr happyTable off +happyIndexGotoTable nt st = happyIndexOffAddr happyTable off where - off = PLUS(happyAdjustOffset (indexShortOffAddr happyGotoOffsets st), nt) + off = PLUS(happyIndexOffAddr happyGotoOffsets st, nt) -indexShortOffAddr (HappyA# arr) off = - Happy_GHC_Exts.narrow16Int# i - where - i = Happy_GHC_Exts.word2Int# (Happy_GHC_Exts.or# (Happy_GHC_Exts.uncheckedShiftL# high 8#) low) - high = Happy_GHC_Exts.int2Word# (Happy_GHC_Exts.ord# (Happy_GHC_Exts.indexCharOffAddr# arr (off' Happy_GHC_Exts.+# 1#))) - low = Happy_GHC_Exts.int2Word# (Happy_GHC_Exts.ord# (Happy_GHC_Exts.indexCharOffAddr# arr off')) - off' = off Happy_GHC_Exts.*# 2# +{-# INLINE happyIndexOffAddr #-} +happyIndexOffAddr :: HappyAddr -> Happy_Int -> Happy_Int +happyIndexOffAddr (HappyA# arr) off = +#if __GLASGOW_HASKELL__ >= 901 + Happy_GHC_Exts.int32ToInt# -- qualified import because it doesn't exist on older GHC's +#endif +#ifdef WORDS_BIGENDIAN + -- The CI of `alex` tests this code path + (Happy_GHC_Exts.word32ToInt32# (Happy_GHC_Exts.wordToWord32# (Happy_GHC_Exts.byteSwap32# (Happy_GHC_Exts.word32ToWord# (Happy_GHC_Exts.int32ToWord32# +#endif + (Happy_GHC_Exts.indexInt32OffAddr# arr off) +#ifdef WORDS_BIGENDIAN + ))))) +#endif {-# INLINE happyLt #-} happyLt x y = LT(x,y) readArrayBit arr bit = - Bits.testBit (Happy_GHC_Exts.I# (indexShortOffAddr arr ((unbox_int bit) `Happy_GHC_Exts.iShiftRA#` 4#))) - (bit `Prelude.mod` 16) + Bits.testBit (Happy_GHC_Exts.I# (happyIndexOffAddr arr ((unbox_int bit) `Happy_GHC_Exts.iShiftRA#` 5#))) (bit `Prelude.mod` 32) where unbox_int (Happy_GHC_Exts.I# x) = x data HappyAddr = HappyA# Happy_GHC_Exts.Addr# @@ -192,9 +201,9 @@ happyMonad2Reduce k nt fn j tk st sts stk = case happyDrop k (HappyCons st sts) of sts1@(HappyCons st1 _) -> let drop_stk = happyDropStk k stk - off = happyAdjustOffset (indexShortOffAddr happyGotoOffsets st1) + off = happyAdjustOffset (happyIndexOffAddr happyGotoOffsets st1) off_i = PLUS(off, nt) - new_state = indexShortOffAddr happyTable off_i + new_state = happyIndexOffAddr happyTable off_i in happyThen1 (fn stk tk) (\r -> happyNewToken new_state sts1 (r `HappyStk` drop_stk)) diff --git a/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs b/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs index 838b012e..f5ea3b4c 100644 --- a/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs +++ b/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs @@ -18,7 +18,9 @@ The code generator. > import Control.Monad ( forM_ ) > import Control.Monad.ST ( ST, runST ) -> import Data.Bits ( setBit ) +> import Data.Word +> import Data.Int +> import Data.Bits > import Data.Array.ST ( STUArray ) > import Data.Array.Unboxed ( UArray ) > import Data.Array.MArray ( MArray(..), freeze, readArray, writeArray ) @@ -486,8 +488,8 @@ machinery to discard states in the parser... > . str "happyExpListPerState st =\n" > . str " token_strs_expected\n" > . str " where token_strs = " . str (show $ elems token_names') . str "\n" -> . str " bit_start = st Prelude.* " . str (show nr_tokens) . str "\n" -> . str " bit_end = (st Prelude.+ 1) Prelude.* " . str (show nr_tokens) . str "\n" +> . str " bit_start = st Prelude.* " . str (show nr_tokens) . str "\n" +> . str " bit_end = (st Prelude.+ 1) Prelude.* " . str (show nr_tokens) . str "\n" > . str " read_bit = readArrayBit happyExpList\n" > . str " bits = Prelude.map read_bit [bit_start..bit_end Prelude.- 1]\n" > . str " bits_indexed = Prelude.zip bits [0.." @@ -504,12 +506,12 @@ action array indexed by (terminal * last_state) + state > produceActionArray > = str "happyActOffsets :: HappyAddr\n" > . str "happyActOffsets = HappyA# \"" --" -> . str (checkedHexChars min_off act_offs) +> . hexChars act_offs > . str "\"#\n\n" --" > > . str "happyGotoOffsets :: HappyAddr\n" > . str "happyGotoOffsets = HappyA# \"" --" -> . str (checkedHexChars min_off goto_offs) +> . hexChars goto_offs > . str "\"#\n\n" --" > > . str "happyAdjustOffset :: Happy_GHC_Exts.Int# -> Happy_GHC_Exts.Int#\n" @@ -523,24 +525,24 @@ action array indexed by (terminal * last_state) + state > > . str "happyDefActions :: HappyAddr\n" > . str "happyDefActions = HappyA# \"" --" -> . str (hexChars defaults) +> . hexChars defaults > . str "\"#\n\n" --" > > . str "happyCheck :: HappyAddr\n" > . str "happyCheck = HappyA# \"" --" -> . str (hexChars check) +> . hexChars check > . str "\"#\n\n" --" > > . str "happyTable :: HappyAddr\n" > . str "happyTable = HappyA# \"" --" -> . str (hexChars table) +> . hexChars table > . str "\"#\n\n" --" > produceExpListArray > = str "happyExpList :: HappyAddr\n" > . str "happyExpList = HappyA# \"" --" -> . str (hexChars explist) +> . hexCharsForBits explist > . str "\"#\n\n" --" > n_terminals = length terms @@ -1013,7 +1015,7 @@ See notes under "Action Tables" above for some subtleties in this function. > act_offs <- newArray (0, n_actions) 0 > goto_offs <- newArray (0, n_actions) 0 > off_arr <- newArray (-max_token, mAX_TABLE_SIZE) 0 -> exp_array <- newArray (0, (n_actions * n_token_names + 15) `div` 16) 0 +> exp_array <- newArray (0, (n_actions * n_token_names + 31) `div` 32) 0 -- 32 bits per entry > > (min_off,max_off) <- genTables' table check act_offs goto_offs off_arr exp_array entries > explist max_token n_token_names @@ -1061,8 +1063,8 @@ See notes under "Action Tables" above for some subtleties in this function. > forM_ explist $ \(state, tokens) -> > forM_ tokens $ \token -> do > let bit_nr = state * n_token_names + token -> let word_nr = bit_nr `div` 16 -> let word_offset = bit_nr `mod` 16 +> let word_nr = bit_nr `div` 32 +> let word_offset = bit_nr `mod` 32 > x <- readArray exp_array word_nr > writeArray exp_array word_nr (setBit x word_offset) > @@ -1216,31 +1218,39 @@ slot is free or not. > brack' s = char '(' . s . char ')' ----------------------------------------------------------------------------- --- Convert an integer to a 16-bit number encoded in \xNN\xNN format suitable --- for placing in a string. +-- Convert an integer to a 32-bit number encoded in little-endian +-- \xNN\xNN\xNN\xNN format suitable for placing in a string. -> hexChars :: [Int] -> String -> hexChars = concatMap hexChar +> hexChars :: [Int] -> String -> String +> hexChars is s = foldr (hexChar . toInt32) s is -> hexChar :: Int -> String -> hexChar i | i < 0 = hexChar (i + 65536) -> hexChar i = toHex (i `mod` 256) ++ toHex (i `div` 256) +The following function is used for generating happyExpList, which is an array of +bits encoded as [Int] for legacy reasons; we don't want to check for overflow +here. -> toHex :: Int -> String -> toHex i = ['\\','x', hexDig (i `div` 16), hexDig (i `mod` 16)] +> hexCharsForBits :: [Int] -> String -> String +> hexCharsForBits is s = foldr (hexChar . fromIntegral) s is -> hexDig :: Int -> Char -> hexDig i | i <= 9 = chr (i + ord '0') -> | otherwise = chr (i - 10 + ord 'a') +The following definition of @hexChar@ chooses a little endian encoding for `Int32` . +Ergo, the compiled parser must use the same endianness when decoding array entries. +On big endian architectures, this means users will have to compile with `WORDS_BIGENDIAN`, +which is defined in the GHC provided C header `MachDeps.h`. -This guards against integers that are so large as to (when converted using -'hexChar') wrap around the maximum value of 16-bit numbers and then end up -larger than an expected minimum value. +> hexChar :: Int32 -> String -> String +> hexChar i s = foldr (toHex . byte i) s [0,1,2,3] -> checkedHexChars :: Int -> [Int] -> String -> checkedHexChars minValue = concatMap hexChar' -> where hexChar' i | checkHexChar minValue i = hexChar i -> | otherwise = error "grammar does not fit in 16-bit representation that is used with '--ghc'" +> byte :: Int32 -> Int -> Word8 +> byte n i = fromIntegral (0xFF .&. shiftR n (i*8)) -> checkHexChar :: Int -> Int -> Bool -> checkHexChar minValue i = i <= 32767 || i - 65536 < minValue +> toHex :: Word8 -> String -> String +> toHex i s = '\\':'x':hexDig (0xF .&. shiftR i 4):hexDig (0xF .&. i):s + +> hexDig :: Word8 -> Char +> hexDig i | i <= 9 = chr (fromIntegral i + ord '0') +> | otherwise = chr (fromIntegral i - 10 + ord 'a') + +> toInt32 :: Int -> Int32 +> toInt32 i +> | i == fromIntegral i32 = i32 +> | otherwise = error ("offset was too large for Int32: " ++ show i) +> where i32 = fromIntegral i