{-

以下のモジュールに基づく。
（ただし hugs -98 オプションは  使用しないように変更した。）

 -}

-----------------------------------------------------------------------------
-- |
-- Module      :  Text.ParserCombinators.Parsec.Token
-- Copyright   :  (c) Daan Leijen 1999-2001
-- License     :  BSD-style (see the file libraries/parsec/LICENSE)
-- 
-- Maintainer  :  daan@cs.uu.nl
-- Stability   :  provisional
-- Portability :  non-portable (uses existentially quantified data constructors)
--
-- A helper module to parse lexical elements (tokens).
-- 
-----------------------------------------------------------------------------

module Token where 
import Data.Char (isAlpha,toLower,toUpper,isSpace,digitToInt)
import Data.List (nub,sort)
import Text.ParserCombinators.Parsec

-- type Numeric = Rational -- 無限精度有理数を使用する場合
type Numeric = Double      -- 倍精度浮動小数点数を使用する場合

identStart, identLetter, opLetter, opStart :: GenParser Char a Char
identStart  = letter <|> oneOf "_"
identLetter = alphaNum <|> oneOf "_'"
opLetter    = oneOf ":!#$%&*+./<=>?@\\^|-~"
opStart     = opLetter

commentStart, commentEnd, commentLine :: String
commentStart   = "{-"
commentEnd     = "-}"
commentLine    = "--"

reservedNames, reservedOpNames :: [String]
reservedNames  = ["let", "letrec", "fn", "in", "if", "then", "else",
                  "class", "method", 
                  "begin", "end", "while", "do", 
                  "getX", "setX", "getY", "setY", "getZ", "setZ", 
		  "write", "read",
                  "try", "catch", "fail",
                  "break", "continue", "abort", "goto", "callcc",
                  "amb", "or", "uniq"]

reservedOpNames = ["=", "\\", "->", "@", ":", "+", "-", "*", "/", "%",
                   "==", "/=", ">", ">=", "<", "<=",
                   "++", "&&", "||"]

----------------------------------------------------------------------
parens p        = between (symbol "(") (symbol ")") p
braces p        = between (symbol "{") (symbol "}") p
angles p        = between (symbol "<") (symbol ">") p
brackets p      = between (symbol "[") (symbol "]") p

semi            = symbol ";"
comma           = symbol ","
dot             = symbol "."
colon           = symbol ":"

commaSep, semiSep :: GenParser Char a b -> GenParser Char a [b]
commaSep p      = sepBy p comma
semiSep p       = sepBy p semi

commaSep1 p     = sepBy1 p comma
semiSep1 p      = sepBy1 p semi


stringLiteral :: CharParser st String
stringLiteral   = lexeme (
                  do{ str <- between (char '"')                   
                                     (char '"' <?> "end of string")
                                     (many stringChar) 
                    ; return (foldr (maybe id (:)) "" str)
                    }
                  <?> "literal string")


stringChar :: CharParser st (Maybe Char)
stringChar      = do{ c <- stringLetter; return (Just c) }
                  <|> stringEscape 
                  <?> "string character"

 
stringLetter :: GenParser Char a Char           
stringLetter    = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026'))


stringEscape :: GenParser Char a (Maybe Char)
stringEscape    = do{ char '\\'
                    ;     do{ escapeGap  ; return Nothing }
                      <|> do{ escapeEmpty; return Nothing }
                      <|> do{ esc <- escapeCode; return (Just esc) }
                    }

escapeEmpty, escapeGap, escapeCode :: GenParser Char a Char                    
escapeEmpty     = char '&'
escapeGap       = do{ many1 space
                    ; char '\\' <?> "end of string gap"
                    }

-- escape codes
escapeCode      = charEsc <|> charNum <|> charAscii <|> charControl
                <?> "escape code"


charControl :: CharParser st Char
charControl     = do{ char '^'
                    ; code <- upper
                    ; return (toEnum (fromEnum code - fromEnum 'A'))
                    }

charNum :: CharParser st Char
charNum         = do{ code <- decimal 
                              <|> do{ char 'o'; number 8 octDigit }
                              <|> do{ char 'x'; number 16 hexDigit }
                    ; return (toEnum (fromInteger code))
                    }

charEsc         = choice (map parseEsc escMap)
                where
                  parseEsc (c,code)     = do{ char c; return code }

                  
charAscii       = choice (map parseAscii asciiMap)
                where
                  parseAscii (asc,code) = try (do{ string asc; return code })


-- escape code tables
escMap :: [(Char, Char)]
escMap          = zip ("abfnrtv\\\"\'") ("\a\b\f\n\r\t\v\\\"\'")

asciiMap :: [(String, Char)]
asciiMap        = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)


ascii2codes, ascii3codes :: [String]
ascii2codes     = ["BS","HT","LF","VT","FF","CR","SO","SI","EM",
                   "FS","GS","RS","US","SP"]

ascii3codes     = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL",
                   "DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB",
                   "CAN","SUB","ESC","DEL"]


ascii2, ascii3 :: [Char]
ascii2          = ['\BS','\HT','\LF','\VT','\FF','\CR','\SO','\SI',
                   '\EM','\FS','\GS','\RS','\US','\SP']

ascii3          = ['\NUL','\SOH','\STX','\ETX','\EOT','\ENQ','\ACK',
                   '\BEL','\DLE','\DC1','\DC2','\DC3','\DC4','\NAK',
                   '\SYN','\ETB','\CAN','\SUB','\ESC','\DEL']


naturalOrFloat :: GenParser Char a (Either Integer Numeric)
naturalOrFloat  = lexeme (natFloat) <?> "number"


-- floats
natFloat :: GenParser Char a (Either Integer Numeric)
natFloat        = do{ char '0'
                    ; zeroNumFloat
                    }
                  <|> decimalFloat


zeroNumFloat :: GenParser Char a (Either Integer Numeric)                  
zeroNumFloat    =  do{ n <- hexadecimal <|> octal
                     ; return (Left n)
                     }    
                <|> decimalFloat
                <|> fractFloat 0
                <|> return (Left 0)                  
                  

decimalFloat    = do{ n <- decimal
                    ; option (Left n) (fractFloat n)
                    } 

fractFloat n    = do{ f <- fractExponent n
                    ; return (Right f)
                    }

fractExponent :: Integer -> GenParser Char a Numeric                    
fractExponent n = do{ fract <- fraction
                    ; expo  <- option 1.0 exponent'
                    ; return ((fromInteger n + fract)*expo)
                    }
                <|>
                  do{ expo <- exponent'
                    ; return ((fromInteger n)*expo)
                    }

fraction :: GenParser Char a Numeric                    
fraction        = do{ char '.'
                    ; digits <- many1 digit <?> "fraction"
                    ; return (foldr op 0.0 digits)
                    }
                  <?> "fraction"
                where
                  op d f    = (f + fromIntegral (digitToInt d))/10.0


exponent' :: GenParser Char a Numeric                    
exponent'       = do{ oneOf "eE"
                    ; f <- sign
                    ; e <- decimal <?> "exponent"
                    ; return (power (f e))
                    }
                  <?> "exponent"
                where
                   power e  | e < 0      = 1.0/power(-e)
                            | otherwise  = fromInteger (10^e)


sign            :: CharParser st (Integer -> Integer)
sign            =   (char '-' >> return negate) 
                <|> (char '+' >> return id)     
                <|> return id


decimal         = number 10 digit        

hexadecimal, octal :: GenParser Char a Integer
hexadecimal     = do{ oneOf "xX"; number 16 hexDigit }

octal           = do{ oneOf "oO"; number 8 octDigit  }


number base baseDigit
    = do{ digits <- many1 baseDigit
        ; let n = foldl (\x d -> base*x + toInteger (digitToInt d)) 0 digits
        ; seq n (return n)
        }   

reservedOp name =   
    lexeme $ try $
    do{ string name
      ; notFollowedBy opLetter <?> ("end of " ++ show name)
      }

operator =
    lexeme $ try $
    do{ name <- oper
      ; if (isReservedOp name)
          then unexpected ("reserved operator " ++ show name)
          else return name
      }  


oper :: GenParser Char a [Char]      
oper =
    do{ c <- opStart
      ; cs <- many opLetter
      ; return (c:cs)
      } 
    <?> "operator"


isReservedOp :: String -> Bool   
isReservedOp name =
    isReserved (sort reservedOpNames) name


reserved name =
     lexeme $ try $
     do{ string name
       ; notFollowedBy identLetter <?> ("end of " ++ show name)
       }

identifier =
    lexeme $ try $
    do{ name <- ident
      ; if (isReservedName name)
          then unexpected ("reserved word " ++ show name)
          else return name
      }

ident :: GenParser Char a [Char]
ident           
    = do{ c <- identStart
        ; cs <- many identLetter
        ; return (c:cs)
        }
    <?> "identifier"


isReservedName :: String -> Bool
isReservedName name
    = isReserved theReservedNames name

    
isReserved :: [String] -> String -> Bool
isReserved names name    
    = scan names
    where
      scan []       = False
      scan (r:rs)   = case (compare r name) of
                        LT  -> scan rs
                        EQ  -> True
                        GT  -> False

theReservedNames :: [String]
theReservedNames = sort reservedNames


symbol name = lexeme (string name)

lexeme p = do{ x <- p; whiteSpace; return x  }


whiteSpace, simpleSpace, oneLineComment, multiLineComment, inCommentMulti :: GenParser Char a ()
whiteSpace = skipMany
             (simpleSpace <|> oneLineComment <|> multiLineComment <?> "")


simpleSpace = skipMany1 (satisfy isSpace)


oneLineComment =
    do{ try (string commentLine)
      ; skipMany (satisfy (/= '\n'))
      ; return ()
      } 

multiLineComment =
    do{  try (string commentStart)
      ;  inCommentMulti
      }


inCommentMulti 
    =   do{ try (string commentEnd) ; return () }
    <|> do{ multiLineComment                     ; inCommentMulti }
    <|> do{ skipMany1 (noneOf startEnd)          ; inCommentMulti }
    <|> do{ oneOf startEnd                       ; inCommentMulti }
    <?> "end of comment"  
    where
      startEnd   = nub (commentEnd ++ commentStart)