{- 以下のモジュールに基づく。 (ただし hugs -98 オプションは 使用しないように変更した。) -} ----------------------------------------------------------------------------- -- | -- Module : Text.ParserCombinators.Parsec.Token -- Copyright : (c) Daan Leijen 1999-2001 -- License : BSD-style (see the file libraries/parsec/LICENSE) -- -- Maintainer : daan@cs.uu.nl -- Stability : provisional -- Portability : non-portable (uses existentially quantified data constructors) -- -- A helper module to parse lexical elements (tokens). -- ----------------------------------------------------------------------------- module Token where import Data.Char (isAlpha,toLower,toUpper,isSpace,digitToInt) import Data.List (nub,sort) import Text.ParserCombinators.Parsec -- type Numeric = Rational -- 無限精度有理数を使用する場合 type Numeric = Double -- 倍精度浮動小数点数を使用する場合 identStart, identLetter, opLetter, opStart :: GenParser Char a Char identStart = letter <|> oneOf "_" identLetter = alphaNum <|> oneOf "_'" opLetter = oneOf ":!#$%&*+./<=>?@\\^|-~" opStart = opLetter commentStart, commentEnd, commentLine :: String commentStart = "{-" commentEnd = "-}" commentLine = "--" reservedNames, reservedOpNames :: [String] reservedNames = ["let", "letrec", "fn", "in", "if", "then", "else", "class", "method", "begin", "end", "while", "do", "getX", "setX", "getY", "setY", "getZ", "setZ", "write", "read", "try", "catch", "fail", "break", "continue", "abort", "goto", "callcc", "amb", "or", "uniq"] reservedOpNames = ["=", "\\", "->", "@", ":", "+", "-", "*", "/", "%", "==", "/=", ">", ">=", "<", "<=", "++", "&&", "||"] ---------------------------------------------------------------------- parens p = between (symbol "(") (symbol ")") p braces p = between (symbol "{") (symbol "}") p angles p = between (symbol "<") (symbol ">") p brackets p = between (symbol "[") (symbol "]") p semi = symbol ";" comma = symbol "," dot = symbol "." colon = symbol ":" commaSep, semiSep :: GenParser Char a b -> GenParser Char a [b] commaSep p = sepBy p comma semiSep p = sepBy p semi commaSep1 p = sepBy1 p comma semiSep1 p = sepBy1 p semi stringLiteral :: CharParser st String stringLiteral = lexeme ( do{ str <- between (char '"') (char '"' "end of string") (many stringChar) ; return (foldr (maybe id (:)) "" str) } "literal string") stringChar :: CharParser st (Maybe Char) stringChar = do{ c <- stringLetter; return (Just c) } <|> stringEscape "string character" stringLetter :: GenParser Char a Char stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026')) stringEscape :: GenParser Char a (Maybe Char) stringEscape = do{ char '\\' ; do{ escapeGap ; return Nothing } <|> do{ escapeEmpty; return Nothing } <|> do{ esc <- escapeCode; return (Just esc) } } escapeEmpty, escapeGap, escapeCode :: GenParser Char a Char escapeEmpty = char '&' escapeGap = do{ many1 space ; char '\\' "end of string gap" } -- escape codes escapeCode = charEsc <|> charNum <|> charAscii <|> charControl "escape code" charControl :: CharParser st Char charControl = do{ char '^' ; code <- upper ; return (toEnum (fromEnum code - fromEnum 'A')) } charNum :: CharParser st Char charNum = do{ code <- decimal <|> do{ char 'o'; number 8 octDigit } <|> do{ char 'x'; number 16 hexDigit } ; return (toEnum (fromInteger code)) } charEsc = choice (map parseEsc escMap) where parseEsc (c,code) = do{ char c; return code } charAscii = choice (map parseAscii asciiMap) where parseAscii (asc,code) = try (do{ string asc; return code }) -- escape code tables escMap :: [(Char, Char)] escMap = zip ("abfnrtv\\\"\'") ("\a\b\f\n\r\t\v\\\"\'") asciiMap :: [(String, Char)] asciiMap = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2) ascii2codes, ascii3codes :: [String] ascii2codes = ["BS","HT","LF","VT","FF","CR","SO","SI","EM", "FS","GS","RS","US","SP"] ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL", "DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB", "CAN","SUB","ESC","DEL"] ascii2, ascii3 :: [Char] ascii2 = ['\BS','\HT','\LF','\VT','\FF','\CR','\SO','\SI', '\EM','\FS','\GS','\RS','\US','\SP'] ascii3 = ['\NUL','\SOH','\STX','\ETX','\EOT','\ENQ','\ACK', '\BEL','\DLE','\DC1','\DC2','\DC3','\DC4','\NAK', '\SYN','\ETB','\CAN','\SUB','\ESC','\DEL'] naturalOrFloat :: GenParser Char a (Either Integer Numeric) naturalOrFloat = lexeme (natFloat) "number" -- floats natFloat :: GenParser Char a (Either Integer Numeric) natFloat = do{ char '0' ; zeroNumFloat } <|> decimalFloat zeroNumFloat :: GenParser Char a (Either Integer Numeric) zeroNumFloat = do{ n <- hexadecimal <|> octal ; return (Left n) } <|> decimalFloat <|> fractFloat 0 <|> return (Left 0) decimalFloat = do{ n <- decimal ; option (Left n) (fractFloat n) } fractFloat n = do{ f <- fractExponent n ; return (Right f) } fractExponent :: Integer -> GenParser Char a Numeric fractExponent n = do{ fract <- fraction ; expo <- option 1.0 exponent' ; return ((fromInteger n + fract)*expo) } <|> do{ expo <- exponent' ; return ((fromInteger n)*expo) } fraction :: GenParser Char a Numeric fraction = do{ char '.' ; digits <- many1 digit "fraction" ; return (foldr op 0.0 digits) } "fraction" where op d f = (f + fromIntegral (digitToInt d))/10.0 exponent' :: GenParser Char a Numeric exponent' = do{ oneOf "eE" ; f <- sign ; e <- decimal "exponent" ; return (power (f e)) } "exponent" where power e | e < 0 = 1.0/power(-e) | otherwise = fromInteger (10^e) sign :: CharParser st (Integer -> Integer) sign = (char '-' >> return negate) <|> (char '+' >> return id) <|> return id decimal = number 10 digit hexadecimal, octal :: GenParser Char a Integer hexadecimal = do{ oneOf "xX"; number 16 hexDigit } octal = do{ oneOf "oO"; number 8 octDigit } number base baseDigit = do{ digits <- many1 baseDigit ; let n = foldl (\x d -> base*x + toInteger (digitToInt d)) 0 digits ; seq n (return n) } reservedOp name = lexeme $ try $ do{ string name ; notFollowedBy opLetter ("end of " ++ show name) } operator = lexeme $ try $ do{ name <- oper ; if (isReservedOp name) then unexpected ("reserved operator " ++ show name) else return name } oper :: GenParser Char a [Char] oper = do{ c <- opStart ; cs <- many opLetter ; return (c:cs) } "operator" isReservedOp :: String -> Bool isReservedOp name = isReserved (sort reservedOpNames) name reserved name = lexeme $ try $ do{ string name ; notFollowedBy identLetter ("end of " ++ show name) } identifier = lexeme $ try $ do{ name <- ident ; if (isReservedName name) then unexpected ("reserved word " ++ show name) else return name } ident :: GenParser Char a [Char] ident = do{ c <- identStart ; cs <- many identLetter ; return (c:cs) } "identifier" isReservedName :: String -> Bool isReservedName name = isReserved theReservedNames name isReserved :: [String] -> String -> Bool isReserved names name = scan names where scan [] = False scan (r:rs) = case (compare r name) of LT -> scan rs EQ -> True GT -> False theReservedNames :: [String] theReservedNames = sort reservedNames symbol name = lexeme (string name) lexeme p = do{ x <- p; whiteSpace; return x } whiteSpace, simpleSpace, oneLineComment, multiLineComment, inCommentMulti :: GenParser Char a () whiteSpace = skipMany (simpleSpace <|> oneLineComment <|> multiLineComment "") simpleSpace = skipMany1 (satisfy isSpace) oneLineComment = do{ try (string commentLine) ; skipMany (satisfy (/= '\n')) ; return () } multiLineComment = do{ try (string commentStart) ; inCommentMulti } inCommentMulti = do{ try (string commentEnd) ; return () } <|> do{ multiLineComment ; inCommentMulti } <|> do{ skipMany1 (noneOf startEnd) ; inCommentMulti } <|> do{ oneOf startEnd ; inCommentMulti } "end of comment" where startEnd = nub (commentEnd ++ commentStart)