Skip to content

Commit

Permalink
Use 32-bit integers instead of 16-bit integers for automaton represen…
Browse files Browse the repository at this point in the history
…tation
  • Loading branch information
Kariiem authored and sgraf812 committed Jul 22, 2024
1 parent fe480e3 commit 80126af
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 50 deletions.
43 changes: 26 additions & 17 deletions packages/backend-lalr/data/HappyTemplate.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
# error This code isn't being built with GHC.
#endif

-- Get WORDS_BIGENDIAN (if defined)
#include "MachDeps.h"

-- Do not remove this comment. Required to fix CPP parsing when using GCC and a clang-compiled alex.
#if __GLASGOW_HASKELL__ > 706
# define LT(n,m) ((Happy_GHC_Exts.tagToEnum# (n Happy_GHC_Exts.<# m)) :: Prelude.Bool)
Expand Down Expand Up @@ -82,16 +85,16 @@ happyDoAction i tk st =
{-# INLINE happyNextAction #-}
happyNextAction i st = case happyIndexActionTable i st of
Just (Happy_GHC_Exts.I# act) -> act
Nothing -> indexShortOffAddr happyDefActions st
Nothing -> happyIndexOffAddr happyDefActions st

{-# INLINE happyIndexActionTable #-}
happyIndexActionTable i st
| GTE(off, 0#), EQ(indexShortOffAddr happyCheck off, i)
= Prelude.Just (Happy_GHC_Exts.I# (indexShortOffAddr happyTable off))
| GTE(off, 0#), EQ(happyIndexOffAddr happyCheck off, i)
= Prelude.Just (Happy_GHC_Exts.I# (happyIndexOffAddr happyTable off))
| otherwise
= Prelude.Nothing
where
off = PLUS(happyAdjustOffset (indexShortOffAddr happyActOffsets st), i)
off = PLUS(happyIndexOffAddr happyActOffsets st, i)

data HappyAction
= HappyFail
Expand All @@ -107,24 +110,30 @@ happyDecodeAction action | LT(action, 0#) = HappyReduce NEGATE(PLUS(action, 1
| otherwise = HappyShift MINUS(action, 1#)

{-# INLINE happyIndexGotoTable #-}
happyIndexGotoTable nt st = indexShortOffAddr happyTable off
happyIndexGotoTable nt st = happyIndexOffAddr happyTable off
where
off = PLUS(happyAdjustOffset (indexShortOffAddr happyGotoOffsets st), nt)
off = PLUS(happyIndexOffAddr happyGotoOffsets st, nt)

indexShortOffAddr (HappyA# arr) off =
Happy_GHC_Exts.narrow16Int# i
where
i = Happy_GHC_Exts.word2Int# (Happy_GHC_Exts.or# (Happy_GHC_Exts.uncheckedShiftL# high 8#) low)
high = Happy_GHC_Exts.int2Word# (Happy_GHC_Exts.ord# (Happy_GHC_Exts.indexCharOffAddr# arr (off' Happy_GHC_Exts.+# 1#)))
low = Happy_GHC_Exts.int2Word# (Happy_GHC_Exts.ord# (Happy_GHC_Exts.indexCharOffAddr# arr off'))
off' = off Happy_GHC_Exts.*# 2#
{-# INLINE happyIndexOffAddr #-}
happyIndexOffAddr :: HappyAddr -> Happy_Int -> Happy_Int
happyIndexOffAddr (HappyA# arr) off =
#if __GLASGOW_HASKELL__ >= 901
Happy_GHC_Exts.int32ToInt# -- qualified import because it doesn't exist on older GHC's
#endif
#ifdef WORDS_BIGENDIAN
-- The CI of `alex` tests this code path
(Happy_GHC_Exts.word32ToInt32# (Happy_GHC_Exts.wordToWord32# (Happy_GHC_Exts.byteSwap32# (Happy_GHC_Exts.word32ToWord# (Happy_GHC_Exts.int32ToWord32#
#endif
(Happy_GHC_Exts.indexInt32OffAddr# arr off)
#ifdef WORDS_BIGENDIAN
)))))
#endif

{-# INLINE happyLt #-}
happyLt x y = LT(x,y)

readArrayBit arr bit =
Bits.testBit (Happy_GHC_Exts.I# (indexShortOffAddr arr ((unbox_int bit) `Happy_GHC_Exts.iShiftRA#` 4#)))
(bit `Prelude.mod` 16)
Bits.testBit (Happy_GHC_Exts.I# (happyIndexOffAddr arr ((unbox_int bit) `Happy_GHC_Exts.iShiftRA#` 5#))) (bit `Prelude.mod` 32)
where unbox_int (Happy_GHC_Exts.I# x) = x

data HappyAddr = HappyA# Happy_GHC_Exts.Addr#
Expand Down Expand Up @@ -192,9 +201,9 @@ happyMonad2Reduce k nt fn j tk st sts stk =
case happyDrop k (HappyCons st sts) of
sts1@(HappyCons st1 _) ->
let drop_stk = happyDropStk k stk
off = happyAdjustOffset (indexShortOffAddr happyGotoOffsets st1)
off = happyAdjustOffset (happyIndexOffAddr happyGotoOffsets st1)
off_i = PLUS(off, nt)
new_state = indexShortOffAddr happyTable off_i
new_state = happyIndexOffAddr happyTable off_i
in
happyThen1 (fn stk tk)
(\r -> happyNewToken new_state sts1 (r `HappyStk` drop_stk))
Expand Down
76 changes: 43 additions & 33 deletions packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ The code generator.

> import Control.Monad ( forM_ )
> import Control.Monad.ST ( ST, runST )
> import Data.Bits ( setBit )
> import Data.Word
> import Data.Int
> import Data.Bits
> import Data.Array.ST ( STUArray )
> import Data.Array.Unboxed ( UArray )
> import Data.Array.MArray ( MArray(..), freeze, readArray, writeArray )
Expand Down Expand Up @@ -486,8 +488,8 @@ machinery to discard states in the parser...
> . str "happyExpListPerState st =\n"
> . str " token_strs_expected\n"
> . str " where token_strs = " . str (show $ elems token_names') . str "\n"
> . str " bit_start = st Prelude.* " . str (show nr_tokens) . str "\n"
> . str " bit_end = (st Prelude.+ 1) Prelude.* " . str (show nr_tokens) . str "\n"
> . str " bit_start = st Prelude.* " . str (show nr_tokens) . str "\n"
> . str " bit_end = (st Prelude.+ 1) Prelude.* " . str (show nr_tokens) . str "\n"
> . str " read_bit = readArrayBit happyExpList\n"
> . str " bits = Prelude.map read_bit [bit_start..bit_end Prelude.- 1]\n"
> . str " bits_indexed = Prelude.zip bits [0.."
Expand All @@ -504,12 +506,12 @@ action array indexed by (terminal * last_state) + state
> produceActionArray
> = str "happyActOffsets :: HappyAddr\n"
> . str "happyActOffsets = HappyA# \"" --"
> . str (checkedHexChars min_off act_offs)
> . hexChars act_offs
> . str "\"#\n\n" --"
>
> . str "happyGotoOffsets :: HappyAddr\n"
> . str "happyGotoOffsets = HappyA# \"" --"
> . str (checkedHexChars min_off goto_offs)
> . hexChars goto_offs
> . str "\"#\n\n" --"
>
> . str "happyAdjustOffset :: Happy_GHC_Exts.Int# -> Happy_GHC_Exts.Int#\n"
Expand All @@ -523,24 +525,24 @@ action array indexed by (terminal * last_state) + state
>
> . str "happyDefActions :: HappyAddr\n"
> . str "happyDefActions = HappyA# \"" --"
> . str (hexChars defaults)
> . hexChars defaults
> . str "\"#\n\n" --"
>
> . str "happyCheck :: HappyAddr\n"
> . str "happyCheck = HappyA# \"" --"
> . str (hexChars check)
> . hexChars check
> . str "\"#\n\n" --"
>
> . str "happyTable :: HappyAddr\n"
> . str "happyTable = HappyA# \"" --"
> . str (hexChars table)
> . hexChars table
> . str "\"#\n\n" --"


> produceExpListArray
> = str "happyExpList :: HappyAddr\n"
> . str "happyExpList = HappyA# \"" --"
> . str (hexChars explist)
> . hexCharsForBits explist
> . str "\"#\n\n" --"

> n_terminals = length terms
Expand Down Expand Up @@ -1013,7 +1015,7 @@ See notes under "Action Tables" above for some subtleties in this function.
> act_offs <- newArray (0, n_actions) 0
> goto_offs <- newArray (0, n_actions) 0
> off_arr <- newArray (-max_token, mAX_TABLE_SIZE) 0
> exp_array <- newArray (0, (n_actions * n_token_names + 15) `div` 16) 0
> exp_array <- newArray (0, (n_actions * n_token_names + 31) `div` 32) 0 -- 32 bits per entry
>
> (min_off,max_off) <- genTables' table check act_offs goto_offs off_arr exp_array entries
> explist max_token n_token_names
Expand Down Expand Up @@ -1061,8 +1063,8 @@ See notes under "Action Tables" above for some subtleties in this function.
> forM_ explist $ \(state, tokens) ->
> forM_ tokens $ \token -> do
> let bit_nr = state * n_token_names + token
> let word_nr = bit_nr `div` 16
> let word_offset = bit_nr `mod` 16
> let word_nr = bit_nr `div` 32
> let word_offset = bit_nr `mod` 32
> x <- readArray exp_array word_nr
> writeArray exp_array word_nr (setBit x word_offset)
>
Expand Down Expand Up @@ -1216,31 +1218,39 @@ slot is free or not.
> brack' s = char '(' . s . char ')'

-----------------------------------------------------------------------------
-- Convert an integer to a 16-bit number encoded in \xNN\xNN format suitable
-- for placing in a string.
-- Convert an integer to a 32-bit number encoded in little-endian
-- \xNN\xNN\xNN\xNN format suitable for placing in a string.

> hexChars :: [Int] -> String
> hexChars = concatMap hexChar
> hexChars :: [Int] -> String -> String
> hexChars is s = foldr (hexChar . toInt32) s is

> hexChar :: Int -> String
> hexChar i | i < 0 = hexChar (i + 65536)
> hexChar i = toHex (i `mod` 256) ++ toHex (i `div` 256)
The following function is used for generating happyExpList, which is an array of
bits encoded as [Int] for legacy reasons; we don't want to check for overflow
here.

> toHex :: Int -> String
> toHex i = ['\\','x', hexDig (i `div` 16), hexDig (i `mod` 16)]
> hexCharsForBits :: [Int] -> String -> String
> hexCharsForBits is s = foldr (hexChar . fromIntegral) s is

> hexDig :: Int -> Char
> hexDig i | i <= 9 = chr (i + ord '0')
> | otherwise = chr (i - 10 + ord 'a')
The following definition of @hexChar@ chooses a little endian encoding for `Int32` .
Ergo, the compiled parser must use the same endianness when decoding array entries.
On big endian architectures, this means users will have to compile with `WORDS_BIGENDIAN`,
which is defined in the GHC provided C header `MachDeps.h`.

This guards against integers that are so large as to (when converted using
'hexChar') wrap around the maximum value of 16-bit numbers and then end up
larger than an expected minimum value.
> hexChar :: Int32 -> String -> String
> hexChar i s = foldr (toHex . byte i) s [0,1,2,3]

> checkedHexChars :: Int -> [Int] -> String
> checkedHexChars minValue = concatMap hexChar'
> where hexChar' i | checkHexChar minValue i = hexChar i
> | otherwise = error "grammar does not fit in 16-bit representation that is used with '--ghc'"
> byte :: Int32 -> Int -> Word8
> byte n i = fromIntegral (0xFF .&. shiftR n (i*8))

> checkHexChar :: Int -> Int -> Bool
> checkHexChar minValue i = i <= 32767 || i - 65536 < minValue
> toHex :: Word8 -> String -> String
> toHex i s = '\\':'x':hexDig (0xF .&. shiftR i 4):hexDig (0xF .&. i):s

> hexDig :: Word8 -> Char
> hexDig i | i <= 9 = chr (fromIntegral i + ord '0')
> | otherwise = chr (fromIntegral i - 10 + ord 'a')

> toInt32 :: Int -> Int32
> toInt32 i
> | i == fromIntegral i32 = i32
> | otherwise = error ("offset was too large for Int32: " ++ show i)
> where i32 = fromIntegral i

0 comments on commit 80126af

Please sign in to comment.