Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add end position information to BNFC'Position #463

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 24 additions & 4 deletions source/src/BNFC/Backend/Haskell/CFtoAbstract.hs
Original file line number Diff line number Diff line change
Expand Up @@ -92,15 +92,34 @@ cf2Abstract Options{ lang, tokenText, generic, functor } name cf = vsep . concat
-- regardless whether it is used in the abstract syntax.
-- It may be used in the parser.
, [ vcat
[ "-- | Start position (line, column) of something."
[ "-- | Position range ((startLine, startColumn), (endLine, endColumn)) of something."
, ""
, "type" <+> posType <+> "=" <+> "C.Maybe (C.Int, C.Int)"
, "type" <+> posType <+> "=" <+> "C.Maybe ((C.Int, C.Int), (C.Int, C.Int))"
, ""
, "pattern" <+> noPosConstr <+> "::" <+> posType
, "pattern" <+> noPosConstr <+> "=" <+> "C.Nothing"
, ""
, "pattern" <+> posConstr <+> ":: C.Int -> C.Int ->" <+> posType
, "pattern" <+> posConstr <+> "line col =" <+> "C.Just (line, col)"
, "pattern" <+> posConstr <+> ":: (C.Int, C.Int) -> (C.Int, C.Int) ->" <+> posType
, "pattern" <+> posConstr <+> "start end =" <+> "C.Just (start, end)"
, ""
, "{-# COMPLETE" <+> posConstr <> "," <+> noPosConstr <+> "#-}"
, ""
, "startLineCol" <> posConstr <+> "::" <+> posType <+> "-> C.Maybe (C.Int, C.Int)"
, "startLineCol" <> posConstr <+> "= C.fmap C.fst"
, ""
, "endLineCol" <> posConstr <+> "::" <+> posType <+> "-> C.Maybe (C.Int, C.Int)"
, "endLineCol" <> posConstr <+> "= C.fmap C.snd"
, ""
, "span" <> posConstr <+> "::" <+> posType <+> "->" <+> posType <+> "->" <+> posType
, "span" <> posConstr
<+> "(" <+> posConstr <+> "start _end" <+> ")"
<+> "(" <+> posConstr <+> "_start end" <+> ") =" <+> posConstr <+> "start end"
, "span" <> posConstr
<+> "(" <+> posConstr <+> "start end" <+> ") _ =" <+> posConstr <+> "start end"
, "span" <> posConstr
<+> "_ (" <+> posConstr <+> "start end" <+> ") =" <+> posConstr <+> "start end"
, "span" <> posConstr
<+> noPosConstr <+> noPosConstr <+> "=" <+> noPosConstr
]
| defPosition
]
Expand Down Expand Up @@ -159,6 +178,7 @@ cf2Abstract Options{ lang, tokenText, generic, functor } name cf = vsep . concat
[ [ text $ List.intercalate ", " stdClasses | hasTextualToks || hasData ]
, [ text $ List.intercalate ", " funClasses | fun ]
, [ text $ "Int, Maybe(..)" | defPosition ]
, [ text $ "fmap, fst, snd"]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These functions should only be imported when used (currently, that would be under condition functor), otherwise compilation generates warnings with -Wall.

]

-- |
Expand Down
48 changes: 32 additions & 16 deletions source/src/BNFC/Backend/Haskell/CFtoAlex3.hs
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,8 @@ restOfAlex tokenText cf = concat
]
, [ "{"
, "-- | Create a token with position."
, "tok :: (" ++ stringType ++ " -> Tok) -> (Posn -> " ++ stringType ++ " -> Token)"
, "tok f p = PT p . f"
, "tok :: (" ++ stringType ++ " -> Tok) -> (Posn -> Int -> " ++ stringType ++ " -> Token)"
, "tok f p l = PT p l . f"
, ""
, "-- | Token without position."
, "data Tok"
Expand Down Expand Up @@ -173,7 +173,7 @@ restOfAlex tokenText cf = concat
, ""
, "-- | Token with position."
, "data Token"
, " = PT Posn Tok"
, " = PT Posn Int Tok"
, " | Err Posn"
, " deriving (Eq, Show, Ord)"
, ""
Expand All @@ -188,33 +188,49 @@ restOfAlex tokenText cf = concat
, ""
, "-- | Get the position of a token."
, "tokenPosn :: Token -> Posn"
, "tokenPosn (PT p _) = p"
, "tokenPosn (Err p) = p"
, "tokenPosn (PT posn _len _tok) = posn"
, "tokenPosn (Err posn) = posn"
, ""
, "-- | Get line and column of a token."
, "-- | Get the length of a token."
, "tokenLen :: Token -> Int"
, "tokenLen (PT _posn len _tok) = len"
, "tokenLen (Err _) = 0"
, ""
, "-- | Get start line and column of a token."
, "tokenLineCol :: Token -> (Int, Int)"
, "tokenLineCol = posLineCol . tokenPosn"
, ""
, "-- | Get end line and column of a token."
, "tokenLineColEnd :: Token -> (Int, Int)"
, "tokenLineColEnd t = (l, c + n)"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if a token spans several lines? Note that the user can define their own tokens that may contain newline characters.

, " where"
, " (l, c) = tokenLineCol t"
, " n = tokenLen t"
, ""
, "-- | Get line and column for both start and end of a token."
, "tokenSpan :: Token -> ((Int, Int), (Int, Int))"
, "tokenSpan t = (tokenLineCol t, tokenLineColEnd t)"
, ""
, "-- | Get line and column of a position."
, "posLineCol :: Posn -> (Int, Int)"
, "posLineCol (Pn _ l c) = (l,c)"
, ""
, "-- | Convert a token into \"position token\" form."
, "mkPosToken :: Token -> ((Int, Int), " ++ stringType ++ ")"
, "mkPosToken t = (tokenLineCol t, tokenText t)"
, "mkPosToken :: Token -> (((Int, Int), Int), " ++ stringType ++ ")"
, "mkPosToken t = ((tokenLineCol t, tokenLen t), tokenText t)"
, ""
, "-- | Convert a token to its text."
, "tokenText :: Token -> " ++ stringType
, "tokenText t = case t of"
, " PT _ (TS s _) -> s"
, " PT _ (TL s) -> " ++ applyP stringPack "show s"
, " PT _ (TI s) -> s"
, " PT _ (TV s) -> s"
, " PT _ (TD s) -> s"
, " PT _ (TC s) -> s"
, " PT _ _ (TS s _) -> s"
, " PT _ _ (TL s) -> " ++ applyP stringPack "show s"
, " PT _ _ (TI s) -> s"
, " PT _ _ (TV s) -> s"
, " PT _ _ (TD s) -> s"
, " PT _ _ (TC s) -> s"
, " Err _ -> " ++ apply stringPack "\"#error\""
]
, [ " PT _ (T_" ++ name ++ " s) -> s" | name <- tokenNames cf ]
, [ " PT _ _ (T_" ++ name ++ " s) -> s" | name <- tokenNames cf ]
, [ ""
, "-- | Convert a token to a string."
, "prToken :: Token -> String"
Expand Down Expand Up @@ -295,7 +311,7 @@ restOfAlex tokenText cf = concat
, " AlexEOF -> []"
, " AlexError (pos, _, _, _) -> [Err pos]"
, " AlexSkip inp' len -> go inp'"
, " AlexToken inp' len act -> act pos (" ++ stringTake ++ " len str) : (go inp')"
, " AlexToken inp' len act -> act pos len (" ++ stringTake ++ " len str) : (go inp')"
, ""
, "alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)"
, "alexGetByte (p, c, (b:bs), s) = Just (b, (p, c, bs, s))"
Expand Down
48 changes: 32 additions & 16 deletions source/src/BNFC/Backend/Haskell/CFtoHappy.hs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ tokens cf functor
| otherwise = "%token" $$ (nest 2 $ vcat $ map text $ table " " ts)
where
ts = map prToken (cfTokens cf) ++ specialToks cf functor
prToken (t,k) = [ render (convert t), "{ PT _ (TS _ " ++ show k ++ ")", "}" ]
prToken (t,k) = [ render (convert t), "{ PT _ _ (TS _ " ++ show k ++ ")", "}" ]

-- Happy doesn't allow characters such as åäö to occur in the happy file. This
-- is however not a restriction, just a naming paradigm in the happy source file.
Expand All @@ -141,10 +141,10 @@ rulesForHappy absM functor cf = for (ruleGroups cf) $ \ (cat, rules) ->
-- >>> constructRule "Foo" False (npRule "EPlus" (Cat "Exp") [Left (Cat "Exp"), Right "+", Left (Cat "Exp")] Parsable)
-- ("Exp '+' Exp","Foo.EPlus $1 $3")
--
-- If we're using functors, it adds position value:
-- If we're using functors, it adds position range value:
--
-- >>> constructRule "Foo" True (npRule "EPlus" (Cat "Exp") [Left (Cat "Exp"), Right "+", Left (Cat "Exp")] Parsable)
-- ("Exp '+' Exp","(fst $1, Foo.EPlus (fst $1) (snd $1) (snd $3))")
-- ("Exp '+' Exp","(Foo.spanBNFC'Position (fst $1) (fst $3), Foo.EPlus (Foo.spanBNFC'Position (fst $1) (fst $3)) (snd $1) (snd $3))")
--
-- List constructors should not be prefixed by the abstract module name:
--
Expand All @@ -157,7 +157,7 @@ rulesForHappy absM functor cf = for (ruleGroups cf) $ \ (cat, rules) ->
-- Coercion are much simpler:
--
-- >>> constructRule "Foo" True (npRule "_" (Cat "Exp") [Right "(", Left (Cat "Exp"), Right ")"] Parsable)
-- ("'(' Exp ')'","(uncurry Foo.BNFC'Position (tokenLineCol $1), (snd $2))")
-- ("'(' Exp ')'","(Foo.spanBNFC'Position (uncurry Foo.BNFC'Position (tokenSpan $1)) (uncurry Foo.BNFC'Position (tokenSpan $3)), (snd $2))")
--
constructRule :: IsFun f => String -> Bool -> Rul f -> (Pattern, Action)
constructRule absName functor (Rule fun0 _cat rhs Parsable) = (pat, action)
Expand All @@ -167,10 +167,26 @@ constructRule absName functor (Rule fun0 _cat rhs Parsable) = (pat, action)
action
| functor = "(" ++ actionPos id ++ ", " ++ actionValue ++ ")"
| otherwise = actionValue
actionPos paren = case rhs of
[] -> qualify noPosConstr
(Left _:_) -> paren "fst $1"
(Right _:_) -> paren $ unwords [ "uncurry", qualify posConstr , "(tokenLineCol $1)" ]
actionPos paren = case headAndLast rhs of
Nothing -> qualify noPosConstr
Just (startTok, endTok) -> paren $ unwords
[ qualify ("span" ++ posConstr)
, startOf startTok
, endOf endTok
]
where
startOf :: Either a b -> String
startOf Left{} = "(fst $1)"
startOf Right{} = unwords [ "(uncurry", qualify posConstr , "(tokenSpan $1))" ]
endOf :: Either a b -> String
endOf Left{} = "(fst $" ++ show (length rhs) ++ ")"
endOf Right{} = unwords [ "(uncurry", qualify posConstr , "(tokenSpan $" ++ show (length rhs) ++"))" ]

headAndLast :: [a] -> Maybe (a, a)
headAndLast xs =
case (xs, reverse xs) of
(x:_, z:_) -> Just (x, z)
_ -> Nothing
actionValue
| isCoercion fun = unwords metavars
| isNilCons fun = unwords (qualify fun : metavars)
Expand Down Expand Up @@ -297,12 +313,12 @@ footer absName tokenText functor eps _cf = unlines $ concat
-- | GF literals.
specialToks :: CF -> Bool -> [[String]] -- ^ A table with three columns (last is "}").
specialToks cf functor = (`map` literals cf) $ \t -> case t of
"Ident" -> [ "L_Ident" , "{ PT _ (TV " ++ posn t ++ ")", "}" ]
"String" -> [ "L_quoted", "{ PT _ (TL " ++ posn t ++ ")", "}" ]
"Integer" -> [ "L_integ ", "{ PT _ (TI " ++ posn t ++ ")", "}" ]
"Double" -> [ "L_doubl ", "{ PT _ (TD " ++ posn t ++ ")", "}" ]
"Char" -> [ "L_charac", "{ PT _ (TC " ++ posn t ++ ")", "}" ]
own -> [ "L_" ++ own,"{ PT _ (T_" ++ own ++ " " ++ posn own ++ ")", "}" ]
"Ident" -> [ "L_Ident" , "{ PT _ _ (TV " ++ posn t ++ ")", "}" ]
"String" -> [ "L_quoted", "{ PT _ _ (TL " ++ posn t ++ ")", "}" ]
"Integer" -> [ "L_integ ", "{ PT _ _ (TI " ++ posn t ++ ")", "}" ]
"Double" -> [ "L_doubl ", "{ PT _ _ (TD " ++ posn t ++ ")", "}" ]
"Char" -> [ "L_charac", "{ PT _ _ (TC " ++ posn t ++ ")", "}" ]
own -> [ "L_" ++ own,"{ PT _ _ (T_" ++ own ++ " " ++ posn own ++ ")", "}" ]
where
posn tokenCat = if isPositionCat cf tokenCat || functor then "_" else "$$"

Expand All @@ -323,11 +339,11 @@ specialRules absName functor tokenText cf = unlines . intersperse "" . (`map` li
where
mkTypePart tokenCat = if functor then concat [ "(", qualify posType, ", ", tokenCat, ")" ] else tokenCat
mkBodyPart tokenCat
| functor = "(" ++ unwords ["uncurry", qualify posConstr, "(tokenLineCol $1)"] ++ ", " ++ mkValPart tokenCat ++ ")"
| functor = "(" ++ unwords ["uncurry", qualify posConstr, "(tokenSpan $1)"] ++ ", " ++ mkValPart tokenCat ++ ")"
| otherwise = mkValPart tokenCat
mkValPart tokenCat =
case tokenCat of
"String" -> if functor then stringUnpack "((\\(PT _ (TL s)) -> s) $1)"
"String" -> if functor then stringUnpack "((\\(PT _ _ (TL s)) -> s) $1)"
else stringUnpack "$1" -- String never has pos
"Integer" -> if functor then "(read " ++ stringUnpack "(tokenText $1)" ++ ") :: Integer"
else "(read " ++ stringUnpack "$1" ++ ") :: Integer" -- Integer never has pos
Expand Down
6 changes: 3 additions & 3 deletions source/src/BNFC/Backend/Haskell/CFtoLayout.hs
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ cf2Layout layName lexName cf = unlines $ concat
, ""
, "-- | Create a position symbol token."
, "sToken :: Position -> TokSymbol -> Token"
, "sToken p t = PT p $ TK t"
, "sToken p t@(TokSymbol s _) = PT p (length s) $ TK t"
, ""
, "-- | Get the line number of a token."
, "line :: Token -> Line"
Expand All @@ -334,13 +334,13 @@ cf2Layout layName lexName cf = unlines $ concat
, "-- | Check if a word is a layout start token."
, "isLayout :: Token -> Maybe LayoutDelimiters"
, "isLayout = \\case"
, " PT _ (TK t) -> lookup t layoutWords"
, " PT _ _ (TK t) -> lookup t layoutWords"
, " _ -> Nothing"
, ""
, "-- | Check if a token is one of the given symbols."
, "isTokenIn :: [TokSymbol] -> Token -> Bool"
, "isTokenIn ts = \\case"
, " PT _ (TK t) -> t `elem` ts"
, " PT _ _ (TK t) -> t `elem` ts"
, " _ -> False"
, ""
, "-- | Check if a token is a layout stop token."
Expand Down