{-

以下のモジュールに基づく。
（ただし hugs -98 オプションは  使用しないように変更した。）

 -}

-----------------------------------------------------------------------------
-- |
-- Module      :  Text.ParserCombinators.Parsec.Token
-- Copyright   :  (c) Daan Leijen 1999-2001
-- License     :  BSD-style (see the file libraries/parsec/LICENSE)
-- 
-- Maintainer  :  daan@cs.uu.nl
-- Stability   :  provisional
-- Portability :  non-portable (uses existentially quantified data constructors)
--
-- A helper module to parse lexical elements (tokens).
-- 
-----------------------------------------------------------------------------

module Token where {
  import Data.Char (isAlpha,toLower,toUpper,isSpace,digitToInt);
  import Data.List (nub,sort);
  import Text.ParserCombinators.Parsec;

  -- type Numeric = Rational; -- 無限精度有理数を使用する場合
  type Numeric = Double;      -- 倍精度浮動小数点数を使用する場合

  identStart, identLetter, opLetter, opStart :: GenParser Char a Char;
  identStart  = letter <|> oneOf "_";
  identLetter = alphaNum <|> oneOf "_'";
  opLetter    = oneOf ":!#$%&*+./<=>?@\\^|-~";
  opStart     = opLetter;

  commentStart, commentEnd, commentLine :: String;
  commentStart   = "{-";
  commentEnd     = "-}";
  commentLine    = "--";

  reservedNames, reservedOpNames :: [String];
  reservedNames  = ["let", "letrec", "fn", "in", "if", "then", "else",
                    "class", "method", 
                    "begin", "end", "while", "do", 
                    "getX", "setX", "getY", "setY", "getZ", "setZ", 
                    "write", "read",
                    "try", "catch", "fail",
                    "break", "continue", "abort", "goto", "callcc",
                    "amb", "or", "uniq"];

  reservedOpNames = ["=", "\\", "->", "@", ":",
                     "+", "-", "*", "/", "%",
                     "==", "/=", ">", ">=", "<", "<=",
                     "++", "&&", "||"];

  ----------------------------------------------------------------------
  parens p        = between (symbol "(") (symbol ")") p;
  braces p        = between (symbol "{") (symbol "}") p;
  angles p        = between (symbol "<") (symbol ">") p;
  brackets p      = between (symbol "[") (symbol "]") p;

  semi            = symbol ";";
  comma           = symbol ",";
  dot             = symbol ".";
  colon           = symbol ":";

  commaSep, semiSep :: GenParser Char a b -> GenParser Char a [b];
  commaSep p      = sepBy p comma;
  semiSep p       = sepBy p semi;

  commaSep1 p     = sepBy1 p comma;
  semiSep1 p      = sepBy1 p semi;


  stringLiteral :: CharParser st String;
  stringLiteral   = lexeme (
                    do{ str <- between (char '"')                   
                                       (char '"' <?> "end of string")
                                       (many stringChar) 
                      ; return (foldr (maybe id (:)) "" str)
                      }
                    <?> "literal string")
  ;

  stringChar :: CharParser st (Maybe Char);
  stringChar      =   do{ c <- stringLetter; return (Just c) }
                  <|> stringEscape 
                  <?> "string character"
  ;
   
  stringLetter :: GenParser Char a Char;           
  stringLetter    = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026'))
  ;

  stringEscape :: GenParser Char a (Maybe Char);
  stringEscape    = do{ char '\\'
                      ;     do{ escapeGap  ; return Nothing }
                        <|> do{ escapeEmpty; return Nothing }
                        <|> do{ esc <- escapeCode; return (Just esc) }
                      }
  ;

  escapeEmpty, escapeGap, escapeCode :: GenParser Char a Char;                    
  escapeEmpty     = char '&';
  escapeGap       = do{ many1 space
                      ; char '\\' <?> "end of string gap"
                      }
  ;
                      
  -- escape codes
  escapeCode      = charEsc <|> charNum <|> charAscii <|> charControl
                  <?> "escape code"
  ;

  charControl :: CharParser st Char;
  charControl     = do{ char '^'
                      ; code <- upper
                      ; return (toEnum (fromEnum code - fromEnum 'A'))
                      }
  ;

  charNum :: CharParser st Char;
  charNum         = do{ code <- decimal 
                                <|> do{ char 'o'; number 8 octDigit }
                                <|> do{ char 'x'; number 16 hexDigit }
                      ; return (toEnum (fromInteger code))
                      }
  ;

  charEsc         = choice (map parseEsc escMap)
                  where {
                    parseEsc (c,code)     = do{ char c; return code }
                  }
  ;
                    
  charAscii       = choice (map parseAscii asciiMap)
                  where {
                    parseAscii (asc,code) = try (do{ string asc; return code })
                  } 
 ;

  -- escape code tables
  escMap :: [(Char, Char)];
  escMap          = zip ("abfnrtv\\\"\'") ("\a\b\f\n\r\t\v\\\"\'")
  ;
  asciiMap :: [(String, Char)];
  asciiMap        = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)
  ;

  ascii2codes, ascii3codes :: [String];
  ascii2codes     = ["BS","HT","LF","VT","FF","CR","SO","SI","EM",
                     "FS","GS","RS","US","SP"]
  ;
  ascii3codes     = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL",
                     "DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB",
                     "CAN","SUB","ESC","DEL"]
  ;

  ascii2, ascii3 :: [Char];
  ascii2          = ['\BS','\HT','\LF','\VT','\FF','\CR','\SO','\SI',
                     '\EM','\FS','\GS','\RS','\US','\SP']
  ;
  ascii3          = ['\NUL','\SOH','\STX','\ETX','\EOT','\ENQ','\ACK',
                     '\BEL','\DLE','\DC1','\DC2','\DC3','\DC4','\NAK',
                     '\SYN','\ETB','\CAN','\SUB','\ESC','\DEL']
  ;

  naturalOrFloat :: GenParser Char a (Either Integer Numeric);
  naturalOrFloat  = lexeme (natFloat) <?> "number"
  ;

  -- floats
  natFloat :: GenParser Char a (Either Integer Numeric);
  natFloat        = do{ char '0'
                      ; zeroNumFloat
                      }
                    <|> decimalFloat
  ;

  zeroNumFloat :: GenParser Char a (Either Integer Numeric);                  
  zeroNumFloat    =  do{ n <- hexadecimal <|> octal
                       ; return (Left n)
                       }
                  <|> decimalFloat
                  <|> fractFloat 0
                  <|> return (Left 0)                  
  ;                  

  decimalFloat    = do{ n <- decimal
                      ; option (Left n) 
                               (fractFloat n)
                      }
  ;
  fractFloat n    = do{ f <- fractExponent n
                      ; return (Right f)
                      }
  ;

  fractExponent :: Integer -> GenParser Char a Numeric;                    
  fractExponent n = do{ fract <- fraction
                      ; expo  <- option 1.0 exponent'
                      ; return ((fromInteger n + fract)*expo)
                      }
                  <|>
                    do{ expo <- exponent'
                      ; return ((fromInteger n)*expo)
                      }
  ;

  fraction :: GenParser Char a Numeric;                    
  fraction        = do{ char '.'
                      ; digits <- many1 digit <?> "fraction"
                      ; return (foldr op 0.0 digits)
                      }
                    <?> "fraction"
                  where
                    op d f    = (f + fromIntegral (digitToInt d))/10.0
  ;

  exponent' :: GenParser Char a Numeric;                    
  exponent'       = do{ oneOf "eE"
                      ; f <- sign
                      ; e <- decimal <?> "exponent"
                      ; return (power (f e))
                      }
                    <?> "exponent"
                  where
                     power e  | e < 0      = 1.0/power(-e)
                              | otherwise  = fromInteger (10^e)
  ;

  sign            :: CharParser st (Integer -> Integer);
  sign            =   (char '-' >> return negate) 
                  <|> (char '+' >> return id)     
                  <|> return id
  ;

  decimal         = number 10 digit        
  ;
  hexadecimal, octal :: GenParser Char a Integer;
  hexadecimal     = do{ oneOf "xX"; number 16 hexDigit }
  ;
  octal           = do{ oneOf "oO"; number 8 octDigit  }
  ;

  number base baseDigit
      = do{ digits <- many1 baseDigit
          ; let n = foldl (\x d -> base*x + toInteger (digitToInt d)) 0 digits
          ; seq n (return n)
          }          
  ;

  reservedOp name =   
      lexeme $ try $
      do{ string name
        ; notFollowedBy opLetter <?> ("end of " ++ show name)
        }
  ;

  operator =
      lexeme $ try $
      do{ name <- oper
        ; if (isReservedOp name)
           then unexpected ("reserved operator " ++ show name)
           else return name
        }
  ;

  oper :: GenParser Char a [Char];      
  oper =
      do{ c <- opStart
        ; cs <- many opLetter
        ; return (c:cs)
        }
      <?> "operator"
  ;

  isReservedOp :: String -> Bool;   
  isReservedOp name =
      isReserved (sort reservedOpNames) name
  ;

  reserved name =
       lexeme $ try $
       do{ string name
         ; notFollowedBy identLetter <?> ("end of " ++ show name)
         }
  ;

  identifier =
      lexeme $ try $
      do{ name <- ident
        ; if (isReservedName name)
           then unexpected ("reserved word " ++ show name)
           else return name
        }
  ;

  ident :: GenParser Char a [Char];
  ident           
      = do{ c <- identStart
          ; cs <- many identLetter
          ; return (c:cs)
          }
      <?> "identifier"
  ;

  isReservedName :: String -> Bool;
  isReservedName name
      = isReserved theReservedNames name
  ;
      
  isReserved :: [String] -> String -> Bool;
  isReserved names name    
      = scan names
      where
        scan []       = False
        scan (r:rs)   = case (compare r name) of
                          LT  -> scan rs
                          EQ  -> True
                          GT  -> False
  ;
  theReservedNames :: [String];
  theReservedNames = sort reservedNames
  ;

  symbol name = lexeme (string name);

  lexeme p = do{ x <- p; whiteSpace; return x  }
  ;

  whiteSpace, simpleSpace, oneLineComment, multiLineComment, inCommentMulti :: GenParser Char a ();
  whiteSpace = skipMany
               (simpleSpace <|> oneLineComment <|> multiLineComment <?> "")
  ;

  simpleSpace = skipMany1 (satisfy isSpace)
  ;

  oneLineComment =
      do{ try (string commentLine)
        ; skipMany (satisfy (/= '\n'))
        ; return ()
        }
  ;

  multiLineComment =
      do { try (string commentStart)
         ; inCommentMulti
         }
  ;

  inCommentMulti 
      =   do{ try (string commentEnd) ; return () }
      <|> do{ multiLineComment                     ; inCommentMulti }
      <|> do{ skipMany1 (noneOf startEnd)          ; inCommentMulti }
      <|> do{ oneOf startEnd                       ; inCommentMulti }
      <?> "end of comment"  
      where
        startEnd   = nub (commentEnd ++ commentStart)
  ; 
}