jsonparser.hs

-- start snippet imports
{-# LANGUAGE DeriveGeneric, TupleSections, LambdaCase #-}
module JSONParser where

import Control.Applicative (Alternative(..), optional)
import Control.Monad (replicateM)
import Data.Bits (shiftL)
import Data.Char (isDigit, isHexDigit, isSpace, chr, ord, digitToInt)
import Data.Functor (($>))
import Data.List (intercalate)
import GHC.Generics (Generic)
import Numeric (showHex)
import Test.QuickCheck hiding (Positive, Negative)
-- end snippet imports

-- start snippet type
data JValue = JNull
            | JBool Bool
            | JString String
            | JNumber { int :: Integer, frac :: [Int], exponent :: Integer }
            | JArray [JValue]
            | JObject [(String, JValue)]
            deriving (Eq, Generic)
-- end snippet type

-- start snippet show-instance
instance Show JValue where
  show value = case value of
    JNull          -> "null"
    JBool True     -> "true"
    JBool False    -> "false"
    JString s      -> showJSONString s
    JNumber s [] 0 -> show s
    JNumber s f 0  -> show s ++ "." ++ concatMap show f
    JNumber s [] e -> show s ++ "e" ++ show e
    JNumber s f e  -> show s ++ "." ++ concatMap show f ++ "e" ++ show e
    JArray a       -> "[" ++ intercalate ", " (map show a) ++ "]"
    JObject o      -> "{" ++ intercalate ", " (map showKV o) ++ "}"
    where
      showKV (k, v) = showJSONString k ++ ": " ++ show v

showJSONString :: String -> String
showJSONString s = "\"" ++ concatMap showJSONChar s ++ "\""

isControl :: Char -> Bool
isControl c = c `elem` ['\0' .. '\31']

showJSONChar :: Char -> String
showJSONChar c = case c of
  '\'' -> "'"
  '\"' -> "\\\""
  '\\' -> "\\\\"
  '/'  -> "\\/"
  '\b' -> "\\b"
  '\f' -> "\\f"
  '\n' -> "\\n"
  '\r' -> "\\r"
  '\t' -> "\\t"
  _ | isControl c -> "\\u" ++ showJSONNonASCIIChar c
  _ -> [c]
  where
    showJSONNonASCIIChar c =
      let a = "0000" ++ showHex (ord c) "" in drop (length a - 4) a
-- end snippet show-instance

-- start snippet scalargens
jNullGen :: Gen JValue
jNullGen = pure JNull

jBoolGen :: Gen JValue
jBoolGen = JBool <$> arbitrary

jNumberGen :: Gen JValue
jNumberGen = JNumber <$> arbitrary <*> listOf (choose (0, 9)) <*> arbitrary
-- end snippet scalargens

-- start snippet stringgen
jsonStringGen :: Gen String
jsonStringGen =
  concat <$> listOf (oneof [ vectorOf 1 arbitraryUnicodeChar
                           , escapedUnicodeChar ])
  where
    escapedUnicodeChar = ("\\u" ++) <$> vectorOf 4 (elements hexDigitLetters)
    hexDigitLetters    = ['0'..'9'] ++ ['a'..'f'] ++ ['A'..'F']

jStringGen :: Gen JValue
jStringGen = JString <$> jsonStringGen
-- end snippet stringgen

-- start snippet compositegens
jArrayGen :: Int -> Gen JValue
jArrayGen = fmap JArray . scale (`div` 2) . listOf . jValueGen . (`div` 2)

jObjectGen :: Int -> Gen JValue
jObjectGen = fmap JObject . scale (`div` 2) . listOf . objKV . (`div` 2)
  where
    objKV n = (,) <$> jsonStringGen <*> jValueGen n
-- end snippet compositegens

-- start snippet valuegen
jValueGen :: Int -> Gen JValue
jValueGen n = if n < 5
  then frequency [(4, oneof scalarGens), (1, oneof (compositeGens n))]
  else frequency [(1, oneof scalarGens), (4, oneof (compositeGens n))]
  where
    scalarGens      = [jNullGen , jBoolGen , jNumberGen , jStringGen]
    compositeGens n = [jArrayGen n, jObjectGen n]
-- end snippet valuegen

-- start snippet value-arbitrary
instance Arbitrary JValue where
  arbitrary = sized jValueGen
  shrink    = genericShrink
-- end snippet value-arbitrary

-- start snippet stringify
jsonWhitespaceGen :: Gen String
jsonWhitespaceGen =
  scale (round . sqrt . fromIntegral)
  . listOf
  . elements
  $ [' ' , '\n' , '\r' , '\t']

stringify :: JValue -> Gen String
stringify = pad . go
  where
    surround l r j = l ++ j ++ r
    pad gen = surround <$> jsonWhitespaceGen <*> jsonWhitespaceGen <*> gen
    commaSeparated = pad . pure . intercalate ","

    go value = case value of
      JArray elements ->
        mapM (pad . stringify) elements
          >>= fmap (surround "[" "]") . commaSeparated
      JObject kvs ->
        mapM stringifyKV kvs >>= fmap (surround "{" "}") . commaSeparated
      _           -> return $ show value

    stringifyKV (k, v) =
      surround <$> pad (pure $ showJSONString k) <*> stringify v <*> pure ":"
-- end snippet stringify

-- start snippet parser
newtype Parser i o =
  Parser { runParser :: i -> Maybe (i, o) }
-- end snippet parser

-- start snippet char-parser-1
char1 :: Char -> Parser String Char
char1 c = Parser $ \case
  (x:xs) | x == c -> Just (xs, x)
  _               -> Nothing
-- end snippet char-parser-1

-- start snippet char-parser-2
satisfy :: (a -> Bool) -> Parser [a] a
satisfy predicate = Parser $ \case
  (x:xs) | predicate x -> Just (xs, x)
  _                    -> Nothing

char :: Char -> Parser String Char
char c = satisfy (== c)
-- end snippet char-parser-2

-- start snippet digit-parser1
digit1 :: Parser String Int
digit1 = Parser $ \i -> case runParser (satisfy isDigit) i of
  Nothing      -> Nothing
  Just (i', o) -> Just (i', digitToInt o)
-- end snippet digit-parser1

-- start snippet digit-parser2
digit2 :: Parser String Int
digit2 = Parser $ \i -> case runParser (satisfy isDigit) i of
  Nothing      -> Nothing
  Just (i', o) -> Just . fmap digitToInt $ (i', o)

digit3 :: Parser String Int
digit3 = Parser $ \i -> fmap (fmap digitToInt) . runParser (satisfy isDigit) $ i
-- end snippet digit-parser2

-- start snippet functor
instance Functor (Parser i) where
  fmap f parser = Parser $ fmap (fmap f) . runParser parser
-- end snippet functor

-- start snippet digit-parser3
digit :: Parser String Int
digit = digitToInt <$> satisfy isDigit
-- end snippet digit-parser3

-- start snippet string-parser1
string1 :: String -> Parser String String
string1 s = case s of
  ""     -> Parser $ \i -> Just (i, "")
  (c:cs) -> Parser $ \i -> case runParser (char c) i of
    Nothing        -> Nothing
    Just (rest, _) -> case runParser (string1 cs) rest of
      Nothing         -> Nothing
      Just (rest', _) -> Just (rest', c:cs)
-- end snippet string-parser1

-- start snippet string-parser2
string2 :: String -> Parser String String
string2 s = case s of
  ""     -> Parser $ pure . (, "")
  (c:cs) -> Parser $ \i -> case runParser (char c) i of
    Nothing        -> Nothing
    Just (rest, c) -> fmap (c:) <$> runParser (string2 cs) rest
-- end snippet string-parser2

-- start snippet applicative
instance Applicative (Parser i) where
  pure x    = Parser $ pure . (, x)
  pf <*> po = Parser $ \input -> case runParser pf input of
    Nothing        -> Nothing
    Just (rest, f) -> fmap f <$> runParser po rest
-- end snippet applicative

-- start snippet string-parser3
string :: String -> Parser String String
string ""     = pure ""
string (c:cs) = (:) <$> char c <*> string cs
-- end snippet string-parser3

-- start snippet null-parser
jNull :: Parser String JValue
jNull = string "null" $> JNull
-- end snippet null-parser

-- start snippet alternative
instance Alternative (Parser i) where
  empty = Parser $ const empty
  p1 <|> p2 = Parser $ \input -> runParser p1 input <|> runParser p2 input
-- end snippet alternative

-- start snippet bool-parser
jBool :: Parser String JValue
jBool =   string "true"  $> JBool True
      <|> string "false" $> JBool False
-- end snippet bool-parser

-- start snippet bool-parser-alt
lookahead :: Parser String Char
lookahead = Parser $ \case
  i@(x:_) -> Just (i, x)
  _       -> Nothing

jBoolAlt :: Parser String JValue
jBoolAlt = do
  c <- lookahead
  JBool <$> case c of
    't' -> string "true"  $> True
    'f' -> string "false" $> False
    _   -> empty
-- end snippet bool-parser-alt

-- start snippet jchar-parser
jsonChar :: Parser String Char
jsonChar =   string "\\\"" $> '"'
         <|> string "\\\\" $> '\\'
         <|> string "\\/"  $> '/'
         <|> string "\\b"  $> '\b'
         <|> string "\\f"  $> '\f'
         <|> string "\\n"  $> '\n'
         <|> string "\\r"  $> '\r'
         <|> string "\\t"  $> '\t'
         <|> unicodeChar
         <|> satisfy (\c -> not (c == '\"' || c == '\\' || isControl c))
  where
    unicodeChar =
      chr . fromIntegral . digitsToNumber 16 0
        <$> (string "\\u" *> replicateM 4 hexDigit)

    hexDigit = digitToInt <$> satisfy isHexDigit

digitsToNumber :: Int -> Integer -> [Int] -> Integer
digitsToNumber base =
  foldl (\num d -> num * fromIntegral base + fromIntegral d)
-- end snippet jchar-parser

-- start snippet monad
instance Monad (Parser i) where
  p >>= f = Parser $ \input -> case runParser p input of
    Nothing        -> Nothing
    Just (rest, o) -> runParser (f o) rest
-- end snippet monad

-- start snippet string-parser
jString :: Parser String JValue
jString = JString <$> (char '"' *> jString')                   -- 1
  where
    jString' = do
      optFirst <- optional jsonChar                            -- 2
      case optFirst of
        Nothing -> "" <$ char '"'                              -- 3
        Just first | not (isSurrogate first) ->                -- 4
          (first:) <$> jString'                                -- 5
        Just first -> do                                       -- 6
          second <- jsonChar                                   -- 7
          if isHighSurrogate first && isLowSurrogate second    -- 8
          then (combineSurrogates first second :) <$> jString' -- 9
          else empty                                           -- 10
-- end snippet string-parser

-- start snippet string-parser-helper
highSurrogateLowerBound, highSurrogateUpperBound :: Int
highSurrogateLowerBound = 0xD800
highSurrogateUpperBound = 0xDBFF

lowSurrogateLowerBound, lowSurrogateUpperBound :: Int
lowSurrogateLowerBound  = 0xDC00
lowSurrogateUpperBound  = 0xDFFF

isHighSurrogate, isLowSurrogate, isSurrogate :: Char -> Bool
isHighSurrogate a =
  ord a >= highSurrogateLowerBound && ord a <= highSurrogateUpperBound
isLowSurrogate a  =
  ord a >= lowSurrogateLowerBound && ord a <= lowSurrogateUpperBound
isSurrogate a     = isHighSurrogate a || isLowSurrogate a

combineSurrogates :: Char -> Char -> Char
combineSurrogates a b = chr $
  ((ord a - highSurrogateLowerBound) `shiftL` 10)
  + (ord b - lowSurrogateLowerBound) + 0x10000
-- end snippet string-parser-helper

-- 1 consume the starting quote
-- 2 get first (optional) char
-- 3 no first char => empty string ending with quote
-- 4 first char is not surrogate
-- 5 process rest
-- 6 first char is surrogate
-- 7 get second char
-- 8 valid surrogate pair
-- 9 combine surrogates, process rest
-- 10 invalid surrogate, fail

-- start snippet string-parser-test
prop_genParseJString :: Property
prop_genParseJString =
  forAllShrink jStringGen shrink $ \js ->
    case runParser jString (show js) of
      Nothing     -> False
      Just (_, o) -> o == js
-- end snippet string-parser-test

-- start snippet int-parser
jUInt :: Parser String Integer
jUInt =   (\d ds -> digitsToNumber 10 0 (d:ds)) <$> digit19 <*> digits
      <|> fromIntegral <$> digit

digit19 :: Parser String Int
digit19 = digitToInt <$> satisfy (\x -> isDigit x && x /= '0')

digits :: Parser String [Int]
digits = some digit

jInt' :: Parser String Integer
jInt' = signInt <$> optional (char '-') <*> jUInt

signInt :: Maybe Char -> Integer -> Integer
signInt (Just '-') i = negate i
signInt _          i = i
-- end snippet int-parser

-- start snippet fracexp-parser
jFrac :: Parser String [Int]
jFrac = char '.' *> digits

jExp :: Parser String Integer
jExp = (char 'e' <|> char 'E')
  *> (signInt <$> optional (char '+' <|> char '-') <*> jUInt)
-- end snippet fracexp-parser

-- start snippet intfracexp-parser
jInt :: Parser String JValue
jInt = JNumber <$> jInt' <*> pure [] <*> pure 0

jIntExp :: Parser String JValue
jIntExp = JNumber <$> jInt' <*> pure [] <*> jExp

jIntFrac :: Parser String JValue
jIntFrac = (\i f -> JNumber i f 0) <$> jInt' <*> jFrac

jIntFracExp :: Parser String JValue
jIntFracExp = (\ ~(JNumber i f _) e -> JNumber i f e) <$> jIntFrac <*> jExp
-- end snippet intfracexp-parser

-- start snippet number-parser
jNumber :: Parser String JValue
jNumber = jIntFracExp <|> jIntExp <|> jIntFrac <|> jInt
-- end snippet number-parser

-- start snippet number-parser-test
prop_genParseJNumber :: Property
prop_genParseJNumber =
  forAllShrink jNumberGen shrink $ \jn ->
    case runParser jNumber (show jn) of
      Nothing     -> False
      Just (_, o) -> o == jn
-- end snippet number-parser-test

-- start snippet array-parser-helper
surroundedBy ::
  Parser String a -> Parser String b -> Parser String a
surroundedBy p1 p2 = p2 *> p1 <* p2

separatedBy :: Parser i v -> Parser i s -> Parser i [v]
separatedBy v s =   (:) <$> v <*> many (s *> v)
                <|> pure []

spaces :: Parser String String
spaces = many (char ' ' <|> char '\n' <|> char '\r' <|> char '\t')
-- end snippet array-parser-helper

-- start snippet array-parser
jArray :: Parser String JValue
jArray = JArray <$>
  (char '['
   *> (jValue `separatedBy` char ',' `surroundedBy` spaces)
   <* char ']')
-- end snippet array-parser

-- start snippet array-parser-test
prop_genParseJArray :: Property
prop_genParseJArray =
  forAllShrink (sized jArrayGen) shrink $ \ja -> do
    jas <- dropWhile isSpace <$> stringify ja
    return . counterexample (show jas) $ case runParser jArray jas of
      Nothing     -> False
      Just (_, o) -> o == ja
-- end snippet array-parser-test

-- start snippet object-parser
jObject :: Parser String JValue
jObject = JObject <$>
  (char '{' *> pair `separatedBy` char ',' `surroundedBy` spaces <* char '}')
  where
    pair = (\ ~(JString s) j -> (s, j))
      <$> (jString `surroundedBy` spaces)
      <*  char ':'
      <*> jValue
-- end snippet object-parser

-- start snippet object-parser-test
prop_genParseJObject :: Property
prop_genParseJObject =
  forAllShrink (sized jObjectGen) shrink $ \jo -> do
    jos <- dropWhile isSpace <$> stringify jo
    return . counterexample (show jos) $ case runParser jObject jos of
      Nothing     -> False
      Just (_, o) -> o == jo
-- end snippet object-parser-test

-- start snippet value-parser
jValue :: Parser String JValue
jValue = jValue' `surroundedBy` spaces
  where
    jValue' =   jNull
            <|> jBool
            <|> jString
            <|> jNumber
            <|> jArray
            <|> jObject
-- end snippet value-parser

-- start snippet value-parser-alt
jValueAlt ::
  Parser String JValue
jValueAlt =
  jValue' `surroundedBy` spaces
  where
    jValue' = do
      c <- lookahead
      case c of
        'n'  -> jNull
        't'  -> jBool
        'f'  -> jBool
        '\"' -> jString
        '['  -> jArray
        '{'  -> jObject
        _    -> jNumber
-- end snippet value-parser-alt

-- start snippet parsejson
parseJSON :: String -> Maybe JValue
parseJSON s = case runParser jValue s of
  Just ("", j) -> Just j
  _            -> Nothing
-- end snippet parsejson

-- start snippet parsejson-test
prop_genParseJSON :: Property
prop_genParseJSON = forAllShrink (sized jValueGen) shrink $ \value -> do
  json <- stringify value
  return . counterexample (show json) . (== Just value) . parseJSON $ json
-- end snippet parsejson-test

-- start snippet run-test
runTests :: IO ()
runTests = do
  putStrLn "== prop_genParseJString =="
  quickCheck prop_genParseJString

  putStrLn "== prop_genParseJNumber =="
  quickCheck prop_genParseJNumber

  putStrLn "== prop_genParseJArray =="
  quickCheck prop_genParseJArray

  putStrLn "== prop_genParseJObject =="
  quickCheck prop_genParseJObject

  putStrLn "== prop_genParseJSON =="
  quickCheck prop_genParseJSON
-- end snippet run-test