{- 以下のモジュールに基づく。 (ただし hugs -98 オプションは 使用しないように変更した。) -} ----------------------------------------------------------------------------- -- | -- Module : Text.ParserCombinators.Parsec.Token -- Copyright : (c) Daan Leijen 1999-2001 -- License : BSD-style (see the file libraries/parsec/LICENSE) -- -- Maintainer : daan@cs.uu.nl -- Stability : provisional -- Portability : non-portable (uses existentially quantified data constructors) -- -- A helper module to parse lexical elements (tokens). -- ----------------------------------------------------------------------------- module Token where { import Data.Char (isAlpha,toLower,toUpper,isSpace,digitToInt); import Data.List (nub,sort); import Text.ParserCombinators.Parsec; -- type Numeric = Rational; -- 無限精度有理数を使用する場合 type Numeric = Double; -- 倍精度浮動小数点数を使用する場合 identStart, identLetter, opLetter, opStart :: GenParser Char a Char; identStart = letter <|> oneOf "_"; identLetter = alphaNum <|> oneOf "_'"; opLetter = oneOf ":!#$%&*+./<=>?@\\^|-~"; opStart = opLetter; commentStart, commentEnd, commentLine :: String; commentStart = "{-"; commentEnd = "-}"; commentLine = "--"; reservedNames, reservedOpNames :: [String]; reservedNames = ["let", "letrec", "fn", "in", "if", "then", "else", "class", "method", "begin", "end", "while", "do", "getX", "setX", "getY", "setY", "getZ", "setZ", "write", "read", "try", "catch", "fail", "break", "continue", "abort", "goto", "callcc", "amb", "or", "uniq"]; reservedOpNames = ["=", "\\", "->", "@", ":", "+", "-", "*", "/", "%", "==", "/=", ">", ">=", "<", "<=", "++", "&&", "||"]; ---------------------------------------------------------------------- parens p = between (symbol "(") (symbol ")") p; braces p = between (symbol "{") (symbol "}") p; angles p = between (symbol "<") (symbol ">") p; brackets p = between (symbol "[") (symbol "]") p; semi = symbol ";"; comma = symbol ","; dot = symbol "."; colon = symbol ":"; commaSep, semiSep :: GenParser Char a b -> GenParser Char a [b]; commaSep p = sepBy p comma; semiSep p = sepBy p semi; commaSep1 p = sepBy1 p comma; semiSep1 p = sepBy1 p semi; stringLiteral :: CharParser st String; stringLiteral = lexeme ( do{ str <- between (char '"') (char '"' "end of string") (many stringChar) ; return (foldr (maybe id (:)) "" str) } "literal string") ; stringChar :: CharParser st (Maybe Char); stringChar = do{ c <- stringLetter; return (Just c) } <|> stringEscape "string character" ; stringLetter :: GenParser Char a Char; stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026')) ; stringEscape :: GenParser Char a (Maybe Char); stringEscape = do{ char '\\' ; do{ escapeGap ; return Nothing } <|> do{ escapeEmpty; return Nothing } <|> do{ esc <- escapeCode; return (Just esc) } } ; escapeEmpty, escapeGap, escapeCode :: GenParser Char a Char; escapeEmpty = char '&'; escapeGap = do{ many1 space ; char '\\' "end of string gap" } ; -- escape codes escapeCode = charEsc <|> charNum <|> charAscii <|> charControl "escape code" ; charControl :: CharParser st Char; charControl = do{ char '^' ; code <- upper ; return (toEnum (fromEnum code - fromEnum 'A')) } ; charNum :: CharParser st Char; charNum = do{ code <- decimal <|> do{ char 'o'; number 8 octDigit } <|> do{ char 'x'; number 16 hexDigit } ; return (toEnum (fromInteger code)) } ; charEsc = choice (map parseEsc escMap) where { parseEsc (c,code) = do{ char c; return code } } ; charAscii = choice (map parseAscii asciiMap) where { parseAscii (asc,code) = try (do{ string asc; return code }) } ; -- escape code tables escMap :: [(Char, Char)]; escMap = zip ("abfnrtv\\\"\'") ("\a\b\f\n\r\t\v\\\"\'") ; asciiMap :: [(String, Char)]; asciiMap = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2) ; ascii2codes, ascii3codes :: [String]; ascii2codes = ["BS","HT","LF","VT","FF","CR","SO","SI","EM", "FS","GS","RS","US","SP"] ; ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL", "DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB", "CAN","SUB","ESC","DEL"] ; ascii2, ascii3 :: [Char]; ascii2 = ['\BS','\HT','\LF','\VT','\FF','\CR','\SO','\SI', '\EM','\FS','\GS','\RS','\US','\SP'] ; ascii3 = ['\NUL','\SOH','\STX','\ETX','\EOT','\ENQ','\ACK', '\BEL','\DLE','\DC1','\DC2','\DC3','\DC4','\NAK', '\SYN','\ETB','\CAN','\SUB','\ESC','\DEL'] ; naturalOrFloat :: GenParser Char a (Either Integer Numeric); naturalOrFloat = lexeme (natFloat) "number" ; -- floats natFloat :: GenParser Char a (Either Integer Numeric); natFloat = do{ char '0' ; zeroNumFloat } <|> decimalFloat ; zeroNumFloat :: GenParser Char a (Either Integer Numeric); zeroNumFloat = do{ n <- hexadecimal <|> octal ; return (Left n) } <|> decimalFloat <|> fractFloat 0 <|> return (Left 0) ; decimalFloat = do{ n <- decimal ; option (Left n) (fractFloat n) } ; fractFloat n = do{ f <- fractExponent n ; return (Right f) } ; fractExponent :: Integer -> GenParser Char a Numeric; fractExponent n = do{ fract <- fraction ; expo <- option 1.0 exponent' ; return ((fromInteger n + fract)*expo) } <|> do{ expo <- exponent' ; return ((fromInteger n)*expo) } ; fraction :: GenParser Char a Numeric; fraction = do{ char '.' ; digits <- many1 digit "fraction" ; return (foldr op 0.0 digits) } "fraction" where op d f = (f + fromIntegral (digitToInt d))/10.0 ; exponent' :: GenParser Char a Numeric; exponent' = do{ oneOf "eE" ; f <- sign ; e <- decimal "exponent" ; return (power (f e)) } "exponent" where power e | e < 0 = 1.0/power(-e) | otherwise = fromInteger (10^e) ; sign :: CharParser st (Integer -> Integer); sign = (char '-' >> return negate) <|> (char '+' >> return id) <|> return id ; decimal = number 10 digit ; hexadecimal, octal :: GenParser Char a Integer; hexadecimal = do{ oneOf "xX"; number 16 hexDigit } ; octal = do{ oneOf "oO"; number 8 octDigit } ; number base baseDigit = do{ digits <- many1 baseDigit ; let n = foldl (\x d -> base*x + toInteger (digitToInt d)) 0 digits ; seq n (return n) } ; reservedOp name = lexeme $ try $ do{ string name ; notFollowedBy opLetter ("end of " ++ show name) } ; operator = lexeme $ try $ do{ name <- oper ; if (isReservedOp name) then unexpected ("reserved operator " ++ show name) else return name } ; oper :: GenParser Char a [Char]; oper = do{ c <- opStart ; cs <- many opLetter ; return (c:cs) } "operator" ; isReservedOp :: String -> Bool; isReservedOp name = isReserved (sort reservedOpNames) name ; reserved name = lexeme $ try $ do{ string name ; notFollowedBy identLetter ("end of " ++ show name) } ; identifier = lexeme $ try $ do{ name <- ident ; if (isReservedName name) then unexpected ("reserved word " ++ show name) else return name } ; ident :: GenParser Char a [Char]; ident = do{ c <- identStart ; cs <- many identLetter ; return (c:cs) } "identifier" ; isReservedName :: String -> Bool; isReservedName name = isReserved theReservedNames name ; isReserved :: [String] -> String -> Bool; isReserved names name = scan names where scan [] = False scan (r:rs) = case (compare r name) of LT -> scan rs EQ -> True GT -> False ; theReservedNames :: [String]; theReservedNames = sort reservedNames ; symbol name = lexeme (string name); lexeme p = do{ x <- p; whiteSpace; return x } ; whiteSpace, simpleSpace, oneLineComment, multiLineComment, inCommentMulti :: GenParser Char a (); whiteSpace = skipMany (simpleSpace <|> oneLineComment <|> multiLineComment "") ; simpleSpace = skipMany1 (satisfy isSpace) ; oneLineComment = do{ try (string commentLine) ; skipMany (satisfy (/= '\n')) ; return () } ; multiLineComment = do { try (string commentStart) ; inCommentMulti } ; inCommentMulti = do{ try (string commentEnd) ; return () } <|> do{ multiLineComment ; inCommentMulti } <|> do{ skipMany1 (noneOf startEnd) ; inCommentMulti } <|> do{ oneOf startEnd ; inCommentMulti } "end of comment" where startEnd = nub (commentEnd ++ commentStart) ; }