BNFC · aabounegm · Oct 25, 2023 · Oct 25, 2023 · Nov 7, 2023 · andreasabel
diff --git a/source/src/BNFC/Backend/Haskell/CFtoAbstract.hs b/source/src/BNFC/Backend/Haskell/CFtoAbstract.hs
@@ -92,15 +92,34 @@ cf2Abstract Options{ lang, tokenText, generic, functor } name cf = vsep . concat
       -- regardless whether it is used in the abstract syntax.
       -- It may be used in the parser.
     , [ vcat
-        [ "-- | Start position (line, column) of something."
+        [ "-- | Position range ((startLine, startColumn), (endLine, endColumn)) of something."
         , ""
-        , "type" <+> posType <+> "=" <+> "C.Maybe (C.Int, C.Int)"
+        , "type" <+> posType <+> "=" <+> "C.Maybe ((C.Int, C.Int), (C.Int, C.Int))"
         , ""
         , "pattern" <+> noPosConstr <+> "::" <+> posType
         , "pattern" <+> noPosConstr <+> "=" <+> "C.Nothing"
         , ""
-        , "pattern" <+> posConstr <+> ":: C.Int -> C.Int ->" <+> posType
-        , "pattern" <+> posConstr <+> "line col =" <+> "C.Just (line, col)"
+        , "pattern" <+> posConstr <+> ":: (C.Int, C.Int) -> (C.Int, C.Int) ->" <+> posType
+        , "pattern" <+> posConstr <+> "start end =" <+> "C.Just (start, end)"
+        , ""
+        , "{-# COMPLETE" <+> posConstr <> "," <+> noPosConstr <+> "#-}"
+        , ""
+        , "startLineCol" <> posConstr <+> "::" <+> posType <+> "-> C.Maybe (C.Int, C.Int)"
+        , "startLineCol" <> posConstr <+> "= C.fmap C.fst"
+        , ""
+        , "endLineCol" <> posConstr <+> "::" <+> posType <+> "-> C.Maybe (C.Int, C.Int)"
+        , "endLineCol" <> posConstr <+> "= C.fmap C.snd"
+        , ""
+        , "span" <> posConstr <+> "::" <+> posType <+> "->" <+> posType <+> "->" <+> posType
+        , "span" <> posConstr
+            <+> "(" <+> posConstr <+> "start _end" <+> ")"
+            <+> "(" <+> posConstr <+> "_start end" <+> ") =" <+> posConstr <+> "start end"
+        , "span" <> posConstr
+            <+> "(" <+> posConstr <+> "start end" <+> ") _ =" <+> posConstr <+> "start end"
+        , "span" <> posConstr
+            <+> "_ (" <+> posConstr <+> "start end" <+> ") =" <+> posConstr <+> "start end"
+        , "span" <> posConstr
+            <+> noPosConstr <+> noPosConstr <+> "=" <+> noPosConstr
         ]
       | defPosition
       ]
@@ -159,6 +178,7 @@ cf2Abstract Options{ lang, tokenText, generic, functor } name cf = vsep . concat
       [ [ text $ List.intercalate ", " stdClasses | hasTextualToks || hasData ]
       , [ text $ List.intercalate ", " funClasses | fun ]
       , [ text $ "Int, Maybe(..)" | defPosition ]
+      , [ text $ "fmap, fst, snd"]
       ]
 
 -- |

diff --git a/source/src/BNFC/Backend/Haskell/CFtoAlex3.hs b/source/src/BNFC/Backend/Haskell/CFtoAlex3.hs
@@ -136,8 +136,8 @@ restOfAlex tokenText cf = concat
     ]
   , [ "{"
     , "-- | Create a token with position."
-    , "tok :: (" ++ stringType ++ " -> Tok) -> (Posn -> " ++ stringType ++ " -> Token)"
-    , "tok f p = PT p . f"
+    , "tok :: (" ++ stringType ++ " -> Tok) -> (Posn -> Int -> " ++ stringType ++ " -> Token)"
+    , "tok f p l = PT p l . f"
     , ""
     , "-- | Token without position."
     , "data Tok"
@@ -173,7 +173,7 @@ restOfAlex tokenText cf = concat
     , ""
     , "-- | Token with position."
     , "data Token"
-    , "  = PT  Posn Tok"
+    , "  = PT  Posn Int Tok"
     , "  | Err Posn"
     , "  deriving (Eq, Show, Ord)"
     , ""
@@ -188,33 +188,49 @@ restOfAlex tokenText cf = concat
     , ""
     , "-- | Get the position of a token."
     , "tokenPosn :: Token -> Posn"
-    , "tokenPosn (PT p _) = p"
-    , "tokenPosn (Err p)  = p"
+    , "tokenPosn (PT posn _len _tok) = posn"
+    , "tokenPosn (Err posn)  = posn"
     , ""
-    , "-- | Get line and column of a token."
+    , "-- | Get the length of a token."
+    , "tokenLen :: Token -> Int"
+    , "tokenLen (PT _posn len _tok) = len"
+    , "tokenLen (Err _)  = 0"
+    , ""
+    , "-- | Get start line and column of a token."
     , "tokenLineCol :: Token -> (Int, Int)"
     , "tokenLineCol = posLineCol . tokenPosn"
     , ""
+    , "-- | Get end line and column of a token."
+    , "tokenLineColEnd :: Token -> (Int, Int)"
+    , "tokenLineColEnd t = (l, c + n)"
+    , "  where"
+    , "    (l, c) = tokenLineCol t"
+    , "    n = tokenLen t"
+    , ""
+    , "-- | Get line and column for both start and end of a token."
+    , "tokenSpan :: Token -> ((Int, Int), (Int, Int))"
+    , "tokenSpan t = (tokenLineCol t, tokenLineColEnd t)"
+    , ""
     , "-- | Get line and column of a position."
     , "posLineCol :: Posn -> (Int, Int)"
     , "posLineCol (Pn _ l c) = (l,c)"
     , ""
     , "-- | Convert a token into \"position token\" form."
-    , "mkPosToken :: Token -> ((Int, Int), " ++ stringType ++ ")"
-    , "mkPosToken t = (tokenLineCol t, tokenText t)"
+    , "mkPosToken :: Token -> (((Int, Int), Int), " ++ stringType ++ ")"
+    , "mkPosToken t = ((tokenLineCol t, tokenLen t), tokenText t)"
     , ""
     , "-- | Convert a token to its text."
     , "tokenText :: Token -> " ++ stringType
     , "tokenText t = case t of"
-    , "  PT _ (TS s _) -> s"
-    , "  PT _ (TL s)   -> " ++ applyP stringPack "show s"
-    , "  PT _ (TI s)   -> s"
-    , "  PT _ (TV s)   -> s"
-    , "  PT _ (TD s)   -> s"
-    , "  PT _ (TC s)   -> s"
+    , "  PT _ _ (TS s _) -> s"
+    , "  PT _ _ (TL s)   -> " ++ applyP stringPack "show s"
+    , "  PT _ _ (TI s)   -> s"
+    , "  PT _ _ (TV s)   -> s"
+    , "  PT _ _ (TD s)   -> s"
+    , "  PT _ _ (TC s)   -> s"
     , "  Err _         -> " ++ apply stringPack "\"#error\""
     ]
-  , [ "  PT _ (T_" ++ name ++ " s) -> s" | name <- tokenNames cf ]
+  , [ "  PT _ _ (T_" ++ name ++ " s) -> s" | name <- tokenNames cf ]
   , [ ""
     , "-- | Convert a token to a string."
     , "prToken :: Token -> String"
@@ -295,7 +311,7 @@ restOfAlex tokenText cf = concat
     , "                AlexEOF                   -> []"
     , "                AlexError (pos, _, _, _)  -> [Err pos]"
     , "                AlexSkip  inp' len        -> go inp'"
-    , "                AlexToken inp' len act    -> act pos (" ++ stringTake ++ " len str) : (go inp')"
+    , "                AlexToken inp' len act    -> act pos len (" ++ stringTake ++ " len str) : (go inp')"
     , ""
     , "alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)"
     , "alexGetByte (p, c, (b:bs), s) = Just (b, (p, c, bs, s))"

diff --git a/source/src/BNFC/Backend/Haskell/CFtoHappy.hs b/source/src/BNFC/Backend/Haskell/CFtoHappy.hs
@@ -124,7 +124,7 @@ tokens cf functor
   | otherwise = "%token" $$ (nest 2 $ vcat $ map text $ table " " ts)
   where
     ts            = map prToken (cfTokens cf) ++ specialToks cf functor
-    prToken (t,k) = [ render (convert t), "{ PT _ (TS _ " ++ show k ++ ")", "}" ]
+    prToken (t,k) = [ render (convert t), "{ PT _ _ (TS _ " ++ show k ++ ")", "}" ]
 
 -- Happy doesn't allow characters such as åäö to occur in the happy file. This
 -- is however not a restriction, just a naming paradigm in the happy source file.
@@ -141,10 +141,10 @@ rulesForHappy absM functor cf = for (ruleGroups cf) $ \ (cat, rules) ->
 -- >>> constructRule "Foo" False (npRule "EPlus" (Cat "Exp") [Left (Cat "Exp"), Right "+", Left (Cat "Exp")] Parsable)
 -- ("Exp '+' Exp","Foo.EPlus $1 $3")
 --
--- If we're using functors, it adds position value:
+-- If we're using functors, it adds position range value:
 --
 -- >>> constructRule "Foo" True (npRule "EPlus" (Cat "Exp") [Left (Cat "Exp"), Right "+", Left (Cat "Exp")] Parsable)
--- ("Exp '+' Exp","(fst $1, Foo.EPlus (fst $1) (snd $1) (snd $3))")
+-- ("Exp '+' Exp","(Foo.spanBNFC'Position (fst $1) (fst $3), Foo.EPlus (Foo.spanBNFC'Position (fst $1) (fst $3)) (snd $1) (snd $3))")
 --
 -- List constructors should not be prefixed by the abstract module name:
 --
@@ -157,7 +157,7 @@ rulesForHappy absM functor cf = for (ruleGroups cf) $ \ (cat, rules) ->
 -- Coercion are much simpler:
 --
 -- >>> constructRule "Foo" True (npRule "_" (Cat "Exp") [Right "(", Left (Cat "Exp"), Right ")"] Parsable)
--- ("'(' Exp ')'","(uncurry Foo.BNFC'Position (tokenLineCol $1), (snd $2))")
+-- ("'(' Exp ')'","(Foo.spanBNFC'Position (uncurry Foo.BNFC'Position (tokenSpan $1)) (uncurry Foo.BNFC'Position (tokenSpan $3)), (snd $2))")
 --
 constructRule :: IsFun f => String -> Bool -> Rul f -> (Pattern, Action)
 constructRule absName functor (Rule fun0 _cat rhs Parsable) = (pat, action)
@@ -167,10 +167,26 @@ constructRule absName functor (Rule fun0 _cat rhs Parsable) = (pat, action)
     action
       | functor   = "(" ++ actionPos id ++ ", " ++ actionValue ++ ")"
       | otherwise = actionValue
-    actionPos paren = case rhs of
-      []          -> qualify noPosConstr
-      (Left _:_)  -> paren "fst $1"
-      (Right _:_) -> paren $ unwords [ "uncurry", qualify posConstr , "(tokenLineCol $1)" ]
+    actionPos paren = case headAndLast rhs of
+      Nothing                 -> qualify noPosConstr
+      Just (startTok, endTok) -> paren $ unwords
+        [ qualify ("span" ++ posConstr)
+        , startOf startTok
+        , endOf endTok
+        ]
+      where
+        startOf :: Either a b -> String
+        startOf Left{} = "(fst $1)"
+        startOf Right{} = unwords [ "(uncurry", qualify posConstr , "(tokenSpan $1))" ]
+        endOf :: Either a b -> String
+        endOf Left{} = "(fst $" ++ show (length rhs) ++ ")"
+        endOf Right{} = unwords [ "(uncurry", qualify posConstr , "(tokenSpan $" ++ show (length rhs) ++"))" ]
+
+        headAndLast :: [a] -> Maybe (a, a)
+        headAndLast xs =
+          case (xs, reverse xs) of
+            (x:_, z:_) -> Just (x, z)
+            _          -> Nothing
     actionValue
       | isCoercion fun = unwords metavars
       | isNilCons  fun = unwords (qualify fun : metavars)
@@ -297,12 +313,12 @@ footer absName tokenText functor eps _cf = unlines $ concat
 -- | GF literals.
 specialToks :: CF -> Bool -> [[String]]  -- ^ A table with three columns (last is "}").
 specialToks cf functor = (`map` literals cf) $ \t -> case t of
-  "Ident"   -> [ "L_Ident" , "{ PT _ (TV " ++ posn t ++ ")", "}" ]
-  "String"  -> [ "L_quoted", "{ PT _ (TL " ++ posn t ++ ")", "}" ]
-  "Integer" -> [ "L_integ ", "{ PT _ (TI " ++ posn t ++ ")", "}" ]
-  "Double"  -> [ "L_doubl ", "{ PT _ (TD " ++ posn t ++ ")", "}" ]
-  "Char"    -> [ "L_charac", "{ PT _ (TC " ++ posn t ++ ")", "}" ]
-  own       -> [ "L_" ++ own,"{ PT _ (T_" ++ own ++ " " ++ posn own ++ ")", "}" ]
+  "Ident"   -> [ "L_Ident" , "{ PT _ _ (TV " ++ posn t ++ ")", "}" ]
+  "String"  -> [ "L_quoted", "{ PT _ _ (TL " ++ posn t ++ ")", "}" ]
+  "Integer" -> [ "L_integ ", "{ PT _ _ (TI " ++ posn t ++ ")", "}" ]
+  "Double"  -> [ "L_doubl ", "{ PT _ _ (TD " ++ posn t ++ ")", "}" ]
+  "Char"    -> [ "L_charac", "{ PT _ _ (TC " ++ posn t ++ ")", "}" ]
+  own       -> [ "L_" ++ own,"{ PT _ _ (T_" ++ own ++ " " ++ posn own ++ ")", "}" ]
   where
     posn tokenCat = if isPositionCat cf tokenCat || functor then "_" else "$$"
 
@@ -323,11 +339,11 @@ specialRules absName functor tokenText cf = unlines . intersperse "" . (`map` li
   where
     mkTypePart tokenCat = if functor then concat [ "(", qualify posType, ", ", tokenCat, ")" ] else tokenCat
     mkBodyPart tokenCat
-      | functor   = "(" ++ unwords ["uncurry", qualify posConstr, "(tokenLineCol $1)"] ++ ", " ++ mkValPart tokenCat ++ ")"
+      | functor   = "(" ++ unwords ["uncurry", qualify posConstr, "(tokenSpan $1)"] ++ ", " ++ mkValPart tokenCat ++ ")"
       | otherwise = mkValPart tokenCat
     mkValPart tokenCat =
       case tokenCat of
-        "String"  -> if functor then stringUnpack "((\\(PT _ (TL s)) -> s) $1)"
+        "String"  -> if functor then stringUnpack "((\\(PT _ _ (TL s)) -> s) $1)"
                                 else stringUnpack "$1"                                 -- String never has pos
         "Integer" -> if functor then "(read " ++ stringUnpack "(tokenText $1)" ++ ") :: Integer"
                                 else "(read " ++ stringUnpack "$1" ++ ") :: Integer" -- Integer never has pos

diff --git a/source/src/BNFC/Backend/Haskell/CFtoLayout.hs b/source/src/BNFC/Backend/Haskell/CFtoLayout.hs
@@ -317,7 +317,7 @@ cf2Layout layName lexName cf = unlines $ concat
     , ""
     , "-- | Create a position symbol token."
     , "sToken :: Position -> TokSymbol -> Token"
-    , "sToken p t = PT p $ TK t"
+    , "sToken p t@(TokSymbol s _) = PT p (length s) $ TK t"
     , ""
     , "-- | Get the line number of a token."
     , "line :: Token -> Line"
@@ -334,13 +334,13 @@ cf2Layout layName lexName cf = unlines $ concat
     , "-- | Check if a word is a layout start token."
     , "isLayout :: Token -> Maybe LayoutDelimiters"
     , "isLayout = \\case"
-    , "  PT _ (TK t) -> lookup t layoutWords"
+    , "  PT _ _ (TK t) -> lookup t layoutWords"
     , "  _ -> Nothing"
     , ""
     , "-- | Check if a token is one of the given symbols."
     , "isTokenIn :: [TokSymbol] -> Token -> Bool"
     , "isTokenIn ts = \\case"
-    , "  PT _ (TK t) -> t `elem` ts"
+    , "  PT _ _ (TK t) -> t `elem` ts"
     , "  _ -> False"
     , ""
     , "-- | Check if a token is a layout stop token."