{- 以下のモジュールに基づく。 (ただし hugs -98 オプションは 使用しないように変更した。) -} ----------------------------------------------------------------------------- -- | -- Module : Text.ParserCombinators.Parsec.Token -- Copyright : (c) Daan Leijen 1999-2001 -- License : BSD-style (see the file libraries/parsec/LICENSE) -- -- Maintainer : daan@cs.uu.nl -- Stability : provisional -- Portability : non-portable (uses existentially quantified data constructors) -- -- A helper module to parse lexical elements (tokens). -- ----------------------------------------------------------------------------- module Token where { import Data.Char (isAlpha,toLower,toUpper,isSpace,digitToInt); import Data.List (nub,sort); import Text.ParserCombinators.Parsec; -- type Numeric = Rational; -- 無限精度有理数を使用する場合 type Numeric = Double; -- 倍精度浮動小数点数を使用する場合 identStart, identLetter, opLetter, opStart :: GenParser Char a Char; identStart = letter <|> oneOf "_"; identLetter = alphaNum <|> oneOf "_'"; opLetter = oneOf ":!#$%&*+./<=>?@\\^|-~"; opStart = opLetter; commentStart, commentEnd, commentLine :: String; commentStart = "{-"; commentEnd = "-}"; commentLine = "--"; reservedNames, reservedOpNames :: [String]; reservedNames = ["let", "letrec", "fn", "in", "if", "then", "else", "class", "method", "begin", "end", "while", "do", "getX", "setX", "getY", "setY", "getZ", "setZ", "write", "read", "try", "catch", "fail", "break", "continue", "abort", "goto", "callcc", "amb", "or", "uniq"]; reservedOpNames = ["=", "\\", "->", "@", ":", "+", "-", "*", "/", "%", "==", "/=", ">", ">=", "<", "<=", "++", "&&", "||"]; ---------------------------------------------------------------------- parens p = between (symbol "(") (symbol ")") p; braces p = between (symbol "{") (symbol "}") p; angles p = between (symbol "<") (symbol ">") p; brackets p = between (symbol "[") (symbol "]") p; semi = symbol ";"; comma = symbol ","; dot = symbol "."; colon = symbol ":"; commaSep, semiSep :: GenParser Char a b -> GenParser Char a [b]; commaSep p = sepBy p comma; semiSep p = sepBy p semi; commaSep1 p = sepBy1 p comma; semiSep1 p = sepBy1 p semi; stringLiteral :: CharParser st String; stringLiteral = lexeme ( do{ str <- between (char '"') (char '"' "end of string") (many stringChar) ; return (foldr (maybe id (:)) "" str) } "literal string"); stringChar :: CharParser st (Maybe Char); stringChar = do{ c <- stringLetter; return (Just c) } <|> stringEscape "string character"; stringLetter :: GenParser Char a Char; stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026')); stringEscape :: GenParser Char a (Maybe Char); stringEscape = do{ char '\\' ; do{ escapeGap ; return Nothing } <|> do{ escapeEmpty; return Nothing } <|> do{ esc <- escapeCode; return (Just esc) } }; escapeEmpty, escapeGap, escapeCode :: GenParser Char a Char; escapeEmpty = char '&'; escapeGap = do{ many1 space ; char '\\' "end of string gap" }; -- escape codes escapeCode = charEsc <|> charNum <|> charAscii <|> charControl "escape code"; charControl :: CharParser st Char; charControl = do{ char '^' ; code <- upper ; return (toEnum (fromEnum code - fromEnum 'A')) }; charNum :: CharParser st Char; charNum = do{ code <- decimal <|> do{ char 'o'; number 8 octDigit } <|> do{ char 'x'; number 16 hexDigit } ; return (toEnum (fromInteger code)) }; charEsc = choice (map parseEsc escMap) where { parseEsc (c,code) = do{ char c; return code }; }; charAscii = choice (map parseAscii asciiMap) where { parseAscii (asc,code) = try (do{ string asc; return code }); }; -- escape code tables escMap :: [(Char, Char)]; escMap = zip ("abfnrtv\\\"\'") ("\a\b\f\n\r\t\v\\\"\'"); asciiMap :: [(String, Char)]; asciiMap = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2); ascii2codes, ascii3codes :: [String]; ascii2codes = ["BS","HT","LF","VT","FF","CR","SO","SI","EM", "FS","GS","RS","US","SP"]; ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL", "DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB", "CAN","SUB","ESC","DEL"]; ascii2, ascii3 :: [Char]; ascii2 = ['\BS','\HT','\LF','\VT','\FF','\CR','\SO','\SI', '\EM','\FS','\GS','\RS','\US','\SP']; ascii3 = ['\NUL','\SOH','\STX','\ETX','\EOT','\ENQ','\ACK', '\BEL','\DLE','\DC1','\DC2','\DC3','\DC4','\NAK', '\SYN','\ETB','\CAN','\SUB','\ESC','\DEL']; naturalOrFloat :: GenParser Char a (Either Integer Numeric); naturalOrFloat = lexeme (natFloat) "number"; -- floats natFloat :: GenParser Char a (Either Integer Numeric); natFloat = do{ char '0' ; zeroNumFloat } <|> decimalFloat; zeroNumFloat :: GenParser Char a (Either Integer Numeric); zeroNumFloat = do{ n <- hexadecimal <|> octal ; return (Left n) } <|> decimalFloat <|> fractFloat 0 <|> return (Left 0); decimalFloat = do{ n <- decimal ; option (Left n) (fractFloat n) }; fractFloat n = do{ f <- fractExponent n ; return (Right f) }; fractExponent :: Integer -> GenParser Char a Numeric; fractExponent n = do{ fract <- fraction ; expo <- option 1.0 exponent' ; return ((fromInteger n + fract)*expo) } <|> do{ expo <- exponent' ; return ((fromInteger n)*expo) }; fraction :: GenParser Char a Numeric; fraction = do{ char '.' ; digits <- many1 digit "fraction" ; return (foldr op 0.0 digits) } "fraction" where { op d f = (f + fromIntegral (digitToInt d))/10.0; }; exponent' :: GenParser Char a Numeric; exponent' = do{ oneOf "eE" ; f <- sign ; e <- decimal "exponent" ; return (power (f e)) } "exponent" where { power e | e < 0 = 1.0/power(-e) | otherwise = fromInteger (10^e) }; sign :: CharParser st (Integer -> Integer); sign = (char '-' >> return negate) <|> (char '+' >> return id) <|> return id; decimal = number 10 digit; hexadecimal, octal :: GenParser Char a Integer; hexadecimal = do{ oneOf "xX"; number 16 hexDigit }; octal = do{ oneOf "oO"; number 8 octDigit }; number base baseDigit = do{ digits <- many1 baseDigit ; let n = foldl (\x d -> base*x + toInteger (digitToInt d)) 0 digits ; seq n (return n) }; reservedOp name = lexeme $ try $ do{ string name ; notFollowedBy opLetter ("end of " ++ show name) }; operator = lexeme $ try $ do{ name <- oper ; if (isReservedOp name) then unexpected ("reserved operator " ++ show name) else return name }; oper :: GenParser Char a [Char]; oper = do{ c <- opStart ; cs <- many opLetter ; return (c:cs) } "operator"; isReservedOp :: String -> Bool; isReservedOp name = isReserved (sort reservedOpNames) name; reserved name = lexeme $ try $ do{ string name ; notFollowedBy identLetter ("end of " ++ show name) }; identifier = lexeme $ try $ do{ name <- ident ; if (isReservedName name) then unexpected ("reserved word " ++ show name) else return name }; ident :: GenParser Char a [Char]; ident = do{ c <- identStart ; cs <- many identLetter ; return (c:cs) } "identifier"; isReservedName :: String -> Bool; isReservedName name = isReserved theReservedNames name; isReserved :: [String] -> String -> Bool; isReserved names name = scan names where { scan [] = False; scan (r:rs) = case (compare r name) of { LT -> scan rs; EQ -> True; GT -> False } }; theReservedNames :: [String]; theReservedNames = sort reservedNames; symbol name = lexeme (string name); lexeme p = do{ x <- p; whiteSpace; return x }; whiteSpace, simpleSpace, oneLineComment, multiLineComment, inCommentMulti :: GenParser Char a (); whiteSpace = skipMany (simpleSpace <|> oneLineComment <|> multiLineComment ""); simpleSpace = skipMany1 (satisfy isSpace); oneLineComment = do{ try (string commentLine) ; skipMany (satisfy (/= '\n')) ; return () }; multiLineComment = do{ try (string commentStart) ; inCommentMulti }; inCommentMulti = do{ try (string commentEnd) ; return () } <|> do{ multiLineComment ; inCommentMulti } <|> do{ skipMany1 (noneOf startEnd) ; inCommentMulti } <|> do{ oneOf startEnd ; inCommentMulti } "end of comment" where { startEnd = nub (commentEnd ++ commentStart) }; }