{- 以下のモジュールに基づく。 (ただし hugs -98 オプションは 使用しないように変更した。) -} ----------------------------------------------------------------------------- -- | -- Module : Text.ParserCombinators.Parsec.Token -- Copyright : (c) Daan Leijen 1999-2001 -- License : BSD-style (see the file libraries/parsec/LICENSE) -- -- Maintainer : daan@cs.uu.nl -- Stability : provisional -- Portability : non-portable (uses existentially quantified data constructors) -- -- A helper module to parse lexical elements (tokens). -- ----------------------------------------------------------------------------- module Token where import Data.Char (isAlpha,toLower,toUpper,isSpace,digitToInt) import Data.List (nub,sort) import Text.ParserCombinators.Parsec type Numeric = Rational -- 無限精度有理数を使用する場合 -- type Numeric = Double -- 倍精度浮動小数点数を使用する場合 identStart, identLetter, opLetter, opStart :: GenParser Char a Char identStart = letter <|> oneOf "_" identLetter = alphaNum <|> oneOf "_'" opLetter = oneOf ":!#$%&*+./<=>?@\\^|-~" opStart = opLetter commentStart, commentEnd, commentLine :: String commentStart = "{-" commentEnd = "-}" commentLine = "--" reservedNames, reservedOpNames :: [String] reservedNames = ["let", "letrec", "val", "fn", "in", "if", "then", "else", "class", "method", "begin", "end", "while", "do", -- "getX", "setX", "getY", "setY", "getZ", "setZ", -- "write", "read", "try", "catch", -- "fail", "break", "continue", "goto", -- "abort", "callcc", "amb", "or", "uniq"] reservedOpNames = ["=", "\\", "->", "@", ":", "+", "-", "*", "/", "%", "==", "/=", ">", ">=", "<", "<=", "++", "&&", "||"] ---------------------------------------------------------------------- parens p = between (symbol "(") (symbol ")") p braces p = between (symbol "{") (symbol "}") p angles p = between (symbol "<") (symbol ">") p brackets p = between (symbol "[") (symbol "]") p semi = symbol ";" comma = symbol "," dot = symbol "." colon = symbol ":" commaSep, semiSep :: GenParser Char a b -> GenParser Char a [b] commaSep p = sepBy p comma semiSep p = sepBy p semi commaSep1 p = sepBy1 p comma semiSep1 p = sepBy1 p semi stringLiteral :: CharParser st String stringLiteral = lexeme ( do str <- between (char '"') (char '"' "end of string") (many stringChar) return (foldr (maybe id (:)) "" str) "literal string") stringChar :: CharParser st (Maybe Char) stringChar = do c <- stringLetter; return (Just c) <|> stringEscape "string character" stringLetter :: GenParser Char a Char; stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026')) stringEscape :: GenParser Char a (Maybe Char) stringEscape = do char '\\' do { escapeGap; return Nothing } <|> do { escapeEmpty; return Nothing } <|> do { esc <- escapeCode; return (Just esc) } escapeEmpty, escapeGap, escapeCode :: GenParser Char a Char escapeEmpty = char '&' escapeGap = do many1 space char '\\' "end of string gap" -- escape codes escapeCode = charEsc <|> charNum <|> charAscii <|> charControl "escape code" charControl :: CharParser st Char charControl = do char '^' code <- upper return (toEnum (fromEnum code - fromEnum 'A')) charNum :: CharParser st Char charNum = do code <- decimal <|> do { char 'o'; number 8 octDigit } <|> do { char 'x'; number 16 hexDigit } return (toEnum (fromInteger code)) charEsc = choice (map parseEsc escMap) where parseEsc (c,code) = do char c; return code charAscii = choice (map parseAscii asciiMap) where parseAscii (asc,code) = try (do string asc; return code) -- escape code tables escMap :: [(Char, Char)] escMap = zip ("abfnrtv\\\"\'") ("\a\b\f\n\r\t\v\\\"\'") asciiMap :: [(String, Char)] asciiMap = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2) ascii2codes, ascii3codes :: [String] ascii2codes = ["BS","HT","LF","VT","FF","CR","SO","SI","EM", "FS","GS","RS","US","SP"] ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL", "DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB", "CAN","SUB","ESC","DEL"] ascii2, ascii3 :: [Char] ascii2 = ['\BS','\HT','\LF','\VT','\FF','\CR','\SO','\SI', '\EM','\FS','\GS','\RS','\US','\SP'] ascii3 = ['\NUL','\SOH','\STX','\ETX','\EOT','\ENQ','\ACK', '\BEL','\DLE','\DC1','\DC2','\DC3','\DC4','\NAK', '\SYN','\ETB','\CAN','\SUB','\ESC','\DEL'] naturalOrFloat :: GenParser Char a (Either Integer Numeric) naturalOrFloat = lexeme (natFloat) "number" -- floats natFloat :: GenParser Char a (Either Integer Numeric) natFloat = do char '0' zeroNumFloat <|> decimalFloat zeroNumFloat :: GenParser Char a (Either Integer Numeric) zeroNumFloat = do n <- hexadecimal <|> octal return (Left n) <|> decimalFloat <|> fractFloat 0 <|> return (Left 0) decimalFloat = do n <- decimal option (Left n) (fractFloat n) fractFloat n = do f <- fractExponent n return (Right f) fractExponent :: Integer -> GenParser Char a Numeric fractExponent n = do fract <- fraction expo <- option 1.0 exponent' return ((fromInteger n + fract)*expo) <|> do expo <- exponent' return ((fromInteger n)*expo) fraction :: GenParser Char a Numeric fraction = do char '.' digits <- many1 digit "fraction" return (foldr op 0.0 digits) "fraction" where op d f = (f + fromIntegral (digitToInt d))/10.0 exponent' :: GenParser Char a Numeric exponent' = do oneOf "eE" f <- sign e <- decimal "exponent" return (power (f e)) "exponent" where power e | e < 0 = 1.0/power(-e) | otherwise = fromInteger (10^e) sign :: CharParser st (Integer -> Integer) sign = (char '-' >> return negate) <|> (char '+' >> return id) <|> return id decimal = number 10 digit hexadecimal, octal :: GenParser Char a Integer hexadecimal = do oneOf "xX"; number 16 hexDigit octal = do oneOf "oO"; number 8 octDigit number base baseDigit = do digits <- many1 baseDigit let n = foldl (\x d -> base*x + toInteger (digitToInt d)) 0 digits seq n (return n) reservedOp name = lexeme $ try $ do string name notFollowedBy opLetter ("end of " ++ show name) operator = lexeme $ try $ do name <- oper if (isReservedOp name) then unexpected ("reserved operator " ++ show name) else return name oper :: GenParser Char a [Char] oper = do c <- opStart cs <- many opLetter return (c:cs) "operator" isReservedOp :: String -> Bool isReservedOp name = isReserved (sort reservedOpNames) name reserved name = lexeme $ try $ do string name notFollowedBy identLetter ("end of " ++ show name) identifier = lexeme $ try $ do name <- ident if (isReservedName name) then unexpected ("reserved word " ++ show name) else return name ident :: GenParser Char a [Char] ident = do c <- identStart cs <- many identLetter return (c:cs) "identifier" isReservedName :: String -> Bool isReservedName name = isReserved theReservedNames name isReserved :: [String] -> String -> Bool isReserved names name = scan names where scan [] = False scan (r:rs) = case (compare r name) of LT -> scan rs EQ -> True GT -> False theReservedNames :: [String] theReservedNames = sort reservedNames symbol name = lexeme (string name) lexeme p = do x <- p; whiteSpace; return x whiteSpace, simpleSpace, oneLineComment, multiLineComment, inCommentMulti :: GenParser Char a () whiteSpace = skipMany (simpleSpace <|> oneLineComment <|> multiLineComment "") simpleSpace = skipMany1 (satisfy isSpace) oneLineComment = do try (string commentLine) skipMany (satisfy (/= '\n')) return () multiLineComment = do try (string commentStart) inCommentMulti inCommentMulti = do try (string commentEnd) ; return () <|> do multiLineComment ; inCommentMulti <|> do skipMany1 (noneOf startEnd) ; inCommentMulti <|> do oneOf startEnd ; inCommentMulti "end of comment" where startEnd = nub (commentEnd ++ commentStart)