{-

以下のモジュールに基づく。
（ただし hugs -98 オプションは  使用しないように変更した。）

 -}

-----------------------------------------------------------------------------
-- |
-- Module      :  Text.ParserCombinators.Parsec.Token
-- Copyright   :  (c) Daan Leijen 1999-2001
-- License     :  BSD-style (see the file libraries/parsec/LICENSE)
-- 
-- Maintainer  :  daan@cs.uu.nl
-- Stability   :  provisional
-- Portability :  non-portable (uses existentially quantified data constructors)
--
-- A helper module to parse lexical elements (tokens).
-- 
-----------------------------------------------------------------------------

module Token where {

  import Data.Char (isAlpha,toLower,toUpper,isSpace,digitToInt);
  import Data.List (nub,sort);
  import Text.ParserCombinators.Parsec;

  -- type Numeric = Rational; -- 無限精度有理数を使用する場合
  type Numeric = Double;      -- 倍精度浮動小数点数を使用する場合

  identStart, identLetter, opLetter, opStart :: GenParser Char a Char;
  identStart  = letter <|> oneOf "_";
  identLetter = alphaNum <|> oneOf "_'";
  opLetter    = oneOf ":!#$%&*+./<=>?@\\^|-~";
  opStart     = opLetter;

  commentStart, commentEnd, commentLine :: String;
  commentStart   = "{-";
  commentEnd     = "-}";
  commentLine    = "--";

  reservedNames, reservedOpNames :: [String];
  reservedNames  = ["let", "letrec", "fn", "in", "if", "then", "else",
                    "class", "method", 
                    "begin", "end", "while", "do", 
                    "getX", "setX", "getY", "setY", "getZ", "setZ", 
                    "write", "read",
                    "try", "catch", "fail",
                    "break", "continue", "abort", "goto", "callcc",
                    "amb", "or", "uniq"];

  reservedOpNames = ["=", "\\", "->", "@", ":",
                     "+", "-", "*", "/", "%",
                     "==", "/=", ">", ">=", "<", "<=",
                     "++", "&&", "||"];

  ----------------------------------------------------------------------
  parens p        = between (symbol "(") (symbol ")") p;
  braces p        = between (symbol "{") (symbol "}") p;
  angles p        = between (symbol "<") (symbol ">") p;
  brackets p      = between (symbol "[") (symbol "]") p;

  semi            = symbol ";";
  comma           = symbol ",";
  dot             = symbol ".";
  colon           = symbol ":";

  commaSep, semiSep :: GenParser Char a b -> GenParser Char a [b];
  commaSep p      = sepBy p comma;
  semiSep p       = sepBy p semi;

  commaSep1 p     = sepBy1 p comma;
  semiSep1 p      = sepBy1 p semi;


  stringLiteral :: CharParser st String;
  stringLiteral   = lexeme (
                    do{ str <- between (char '"')                   
                                       (char '"' <?> "end of string")
                                       (many stringChar) 
                      ; return (foldr (maybe id (:)) "" str)
                      }
                    <?> "literal string");


  stringChar :: CharParser st (Maybe Char);
  stringChar      = do{ c <- stringLetter; return (Just c) }
                    <|> stringEscape 
                    <?> "string character";

   
  stringLetter :: GenParser Char a Char;       
  stringLetter    = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026'));


  stringEscape :: GenParser Char a (Maybe Char);
  stringEscape    = do{ char '\\'
                      ;     do{ escapeGap  ; return Nothing }
                        <|> do{ escapeEmpty; return Nothing }
                        <|> do{ esc <- escapeCode; return (Just esc) }
                      };

  escapeEmpty, escapeGap, escapeCode :: GenParser Char a Char;                    
  escapeEmpty     = char '&';
  escapeGap       = do{ many1 space
                      ; char '\\' <?> "end of string gap"
                      };

  -- escape codes
  escapeCode      = charEsc <|> charNum <|> charAscii <|> charControl
                  <?> "escape code";


  charControl :: CharParser st Char;
  charControl     = do{ char '^'
                      ; code <- upper
                      ; return (toEnum (fromEnum code - fromEnum 'A'))
                      };

  charNum :: CharParser st Char;
  charNum         = do{ code <- decimal 
                                <|> do{ char 'o'; number 8 octDigit }
                                <|> do{ char 'x'; number 16 hexDigit }
                      ; return (toEnum (fromInteger code))
                      };

  charEsc         = choice (map parseEsc escMap)
                  where {
                    parseEsc (c,code)     = do{ char c; return code };
                  };

                    
  charAscii       = choice (map parseAscii asciiMap)
                  where {
                    parseAscii (asc,code) = try (do{ string asc; return code });
                  };
        

  -- escape code tables
  escMap :: [(Char, Char)];
  escMap          = zip ("abfnrtv\\\"\'") ("\a\b\f\n\r\t\v\\\"\'");

  asciiMap :: [(String, Char)];
  asciiMap        = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2);


  ascii2codes, ascii3codes :: [String];
  ascii2codes     = ["BS","HT","LF","VT","FF","CR","SO","SI","EM",
                     "FS","GS","RS","US","SP"];

  ascii3codes     = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL",
                     "DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB",
                     "CAN","SUB","ESC","DEL"];


  ascii2, ascii3 :: [Char];
  ascii2          = ['\BS','\HT','\LF','\VT','\FF','\CR','\SO','\SI',
                     '\EM','\FS','\GS','\RS','\US','\SP'];

  ascii3          = ['\NUL','\SOH','\STX','\ETX','\EOT','\ENQ','\ACK',
                     '\BEL','\DLE','\DC1','\DC2','\DC3','\DC4','\NAK',
                     '\SYN','\ETB','\CAN','\SUB','\ESC','\DEL'];


  naturalOrFloat :: GenParser Char a (Either Integer Numeric);
  naturalOrFloat  = lexeme (natFloat) <?> "number";


  -- floats
  natFloat :: GenParser Char a (Either Integer Numeric);
  natFloat        = do{ char '0'
                      ; zeroNumFloat
                      }
                    <|> decimalFloat;


  zeroNumFloat :: GenParser Char a (Either Integer Numeric);                  
  zeroNumFloat    =  do{ n <- hexadecimal <|> octal
                       ; return (Left n)
                       }    
                  <|> decimalFloat
                  <|> fractFloat 0
                  <|> return (Left 0);
                    

  decimalFloat    = do{ n <- decimal
                      ; option (Left n) (fractFloat n) };

  fractFloat n    = do{ f <- fractExponent n
                      ; return (Right f) };

  fractExponent :: Integer -> GenParser Char a Numeric;                    
  fractExponent n = do{ fract <- fraction
                      ; expo  <- option 1.0 exponent'
                      ; return ((fromInteger n + fract)*expo) }
                   <|> 
                   do{ expo <- exponent'
                     ; return ((fromInteger n)*expo) };

fraction :: GenParser Char a Numeric;
fraction        = do{ char '.'
                    ; digits <- many1 digit <?> "fraction"
                    ; return (foldr op 0.0 digits) }
                  <?> "fraction"
                where {
                  op d f    = (f + fromIntegral (digitToInt d))/10.0;
                };

exponent' :: GenParser Char a Numeric;
exponent'       = do{ oneOf "eE"
                    ; f <- sign
                    ; e <- decimal <?> "exponent"
                    ; return (power (f e)) }
                  <?> "exponent"
                where {
                   power e  | e < 0      = 1.0/power(-e)
                            | otherwise  = fromInteger (10^e)
                }; 


sign            :: CharParser st (Integer -> Integer);
sign            =   (char '-' >> return negate) 
                <|> (char '+' >> return id)     
                <|> return id;

decimal         = number 10 digit;

hexadecimal, octal :: GenParser Char a Integer;
hexadecimal     = do{ oneOf "xX"; number 16 hexDigit };

octal           = do{ oneOf "oO"; number 8 octDigit  };


number base baseDigit
    = do{ digits <- many1 baseDigit
        ; let n = foldl (\x d -> base*x + toInteger (digitToInt d)) 0 digits
        ; seq n (return n) };   

reservedOp name =   
    lexeme $ try $
    do{ string name
      ; notFollowedBy opLetter <?> ("end of " ++ show name) };

operator =
    lexeme $ try $
    do{ name <- oper
      ; if (isReservedOp name)
          then unexpected ("reserved operator " ++ show name)
          else return name }; 


oper :: GenParser Char a [Char];
oper =
    do{ c <- opStart
      ; cs <- many opLetter
      ; return (c:cs) } 
    <?> "operator";


isReservedOp :: String -> Bool;
isReservedOp name =
    isReserved (sort reservedOpNames) name;


reserved name =
     lexeme $ try $
     do{ string name
       ; notFollowedBy identLetter <?> ("end of " ++ show name) };

identifier =
    lexeme $ try $
    do{ name <- ident
      ; if (isReservedName name)
          then unexpected ("reserved word " ++ show name)
          else return name };

ident :: GenParser Char a [Char];
ident           
    = do{ c <- identStart
        ; cs <- many identLetter
        ; return (c:cs) }
    <?> "identifier";

isReservedName :: String -> Bool;
isReservedName name
    = isReserved theReservedNames name;

isReserved :: [String] -> String -> Bool;
isReserved names name    
    = scan names
    where {
      scan []       = False;
      scan (r:rs)   = case (compare r name) of {
                        LT  -> scan rs;
                        EQ  -> True;
                        GT  -> False
                      }
    }; 

theReservedNames :: [String];
theReservedNames = sort reservedNames;

symbol name = lexeme (string name);

lexeme p = do{ x <- p; whiteSpace; return x };


whiteSpace, simpleSpace, oneLineComment, multiLineComment, inCommentMulti :: GenParser Char a ();
whiteSpace = skipMany
             (simpleSpace <|> oneLineComment <|> multiLineComment <?> "");


simpleSpace = skipMany1 (satisfy isSpace);

oneLineComment =
    do{ try (string commentLine)
      ; skipMany (satisfy (/= '\n'))
      ; return () };

multiLineComment =
    do{  try (string commentStart)
      ;  inCommentMulti };

inCommentMulti 
    =   do{ try (string commentEnd) ; return () }
    <|> do{ multiLineComment                     ; inCommentMulti }
    <|> do{ skipMany1 (noneOf startEnd)          ; inCommentMulti }
    <|> do{ oneOf startEnd                       ; inCommentMulti }
    <?> "end of comment"  
    where {
      startEnd   = nub (commentEnd ++ commentStart)
    };
}