Parsing Stuff in Haskell

London HUG, 23rd January 2013

Ben Clifford, benc@cqx.ltd.uk

TURN OFF YOUR PHONES!!! NOW!!

Parsing: taking a string of symbols and analysing it so we can do something interesting with the content.
"57" :: String 
== ['5', '7'] :: [Char]
"57" + 1 :: ?
 (read "57") + 1 
58
CSV:   2,37,"ben clifford",10,888738A
HTTP: GET / 
404 Page Not Found
 {
    "rsvp_limit": 90,
    "status": "upcoming",
    "visibility": "public",
    "venue": {
        "lon": -0.10435,
        "repinned": true,
        "name": "City University",
        "address_1": "College Building, St John Street",
        "lat": 51.52725,
        "country": "gb",
        "city": "London. EC1V 4PB"
    },
    "id": "88494702",
    "time": 1358965800000,
    "yes_rsvp_count": 55,
    "event_url": "http://www.meetup.com/London-HUG/events/88494702/",
    "name": "Parsing Stuff in Haskell",
    "group": {
        "id": 3866232,
        "name": "London HUG",
        "join_mode": "open",
        "urlname": "London-HUG",
    }


}
 
 { "name":"ben",
    "beer":true, 
    "likes": ["puppies", "computers"],
    "weight": 78
 }
 
import Text.ParserCombinators.Parsec hiding ((<|>), many)
import Control.Applicative
import Control.Monad
 true
 
  matchTrue :: Parser String
  matchTrue = string "true"
 
 parse matchTrue "a test parser" "true"
 Right "true"
 
 parse matchTrue "a test parser" "lemon"
 Left (line 1, column 1):
 unexpected "l"
 expecting "true"
 
 alwaysTrue :: Parser Bool
 alwaysTrue = pure True
 
 parse alwaysTrue "test" "true"
 Right True

 parse alwaysTrue "test" "lemon"
 Right True

 parse alwaysTrue "test" "false"
 Right True
 
 matchTrue :: Parser String
 matchTrue = string "true"

 alwaysTrue :: Parser Bool
 alwaysTrue = pure True
 
 boolTrue :: Parser Bool
 boolTrue = matchTrue *> alwaysTrue
 boolTrue = (string "true") *> (pure True)

 *> :: Applicative f => f a -> f b -> f b
 *> :: Parser a -> Parser b -> Parser b
 
 parse boolTrue "test" "true"
 Right True
 
 parse boolTrue "test" "lemon"
 Left (line 1, column 1):
 unexpected "l"
 expecting "true"
 
 boolFalse :: Parser Bool
 boolFalse = (string "false") *> (pure False)
 
 boolTrue :: Parser Bool
 boolFalse :: Parser Bool
 
 bool :: Parser Bool
 bool = boolTrue <|> boolFalse

 (<|>) :: Alternative f => f a -> f a -> f a
 (<|>) :: Parser a -> Parser a -> Parser a
 
 parse bool "test" "true"
 Right True

 parse bool "test" "false"
 Right False
 
 parse bool "test" "lemon"
 Left (line 1, column 1):
 unexpected "l"
 expecting "true" or "false"
 
 "City University"

 stringLiteral :: Parser String
 stringLiteral =
  char '"' *> (many (noneOf ['"'])) <* char '"'
 
 char :: Parser Char
 noneOf :: [Char] -> Parser Char
 many :: Parser p -> Parser [p]
 <* :: Parser a -> Parser b -> Parser a
 *> :: Parser a -> Parser b -> Parser b
 
 parse stringLiteral "test" "\"hello\""
 Parser returned: Right "hello"

 parse stringLiteral "test" "true"
 Parser returned: Left (line 1, column 1):
 unexpected "t"
 expecting "\""

 parse stringLiteral "test" "\"true\""
 Right "true"
 
 value = bool <|> stringLiteral
 
 value = bool <|> stringLiteral :: Parser ?


 Couldn't match expected type `Bool' with actual type `[Char]'
 Expected type: Text.Parsec.Prim.ParsecT
       String () Data.Functor.Identity.Identity Bool
 Actual type: Parser [Char]
  In the second argument of `(<|>)', namely `stringLiteral'
  In the expression: bool <|> stringLiteral
 
 (<|>) :: Parser a -> Parser a -> Parser a 
 
  data JSONValue = 
      B Bool
    | S String

  jsonValue :: Parser JSONValue
 
  jsonValue :: Parser JSONValue
  jsonValue = bool <|> stringLiteral
 
    Couldn't match expected type `JSONValue' with actual type `Bool'
    Expected type: Text.Parsec.Prim.ParsecT
                     String () Data.Functor.Identity.Identity JSONValue
      Actual type: Parser Bool
    In the first argument of `(<|>)', namely `bool'
    In the expression: bool <|> stringLiteral
 
 bool :: Parser Bool
 stringLiteral :: Parser String
 
 jsonBool :: Parser JSONValue
 jsonStringLiteral :: Parser JSONValue

 jsonValue :: Parser JSONValue
 jsonValue = jsonBool <|> jsonStringLiteral
 
 bool :: Parser Bool

 data JSONValue = B Bool | ...
 jsonBool :: Parser JSONValue
 
 jsonBool = B <$> bool
 jsonBool = fmap B bool
 
 (<$>) :: Functor f => (a -> b) -> f a -> f b
 (<$>) :: (a -> b) -> Parser a -> Parser b

 map :: (a -> b) -> [a] -> [b]
 
 parse jsonBool "test" "true"
 Right (B True)

 parse jsonBool "test" "false"
 Right (B False)

 parse jsonBool "test" "lemon"
 Left (line 1, column 1):
 unexpected "l"
 expecting "true" or "false"
 
 jsonStringLiteral :: Parser JSONValue
 jsonStringLiteral = S <$> stringLiteral
 
 jsonBool :: Parser JSONValue
 jsonStringLiteral :: Parser JSONValue

 jsonValue :: Parser JSONValue
 jsonValue = jsonBool <|> jsonStringLiteral
 
 parse jsonValue "test" "\"hello\""
 Right (S "hello")
 parse jsonValue "test" "true"
 Right (B True)
 
 ["Hello","Goodbye",true,false,true] 
 
 array :: Parser [JSONValue]
 array =
   (char '[')
   *>
   ( jsonValue `sepBy` (char ',') )
   <*
   (char ']')
 
 data JSONValue = ... | A [JSONValue]
  jsonArray :: Parser JSONValue
  jsonArray = A <$> array
  jsonValue = ... <|> jsonArray
 
 {"name":"Ben","beer":true}
 
  data JSONValue = ... | O [(String, JSONValue)] | ...
  jsonObject =
    O <$> ((char '{') *>
           (objectEntry `sepBy` comma)
           <* (char '}'))
 
 "beer":true
 
  objectEntry :: Parser (String, JSONValue)
  objectEntry = do
    key <- stringLiteral
    char ':'
    value <- jsonValue
    return (key, value)
 
 parse objectEntry "test" ""beer":true"
 Right ("beer",B True)
 
 parse jsonValue "test" "{"beer":true}"
 Right (O [("beer",B True)])
 
 parse jsonValue "test" "[true,true,true]"
 Right (A [B True,B True,B True])
 
 parse jsonValue "test" "[true, true, true]"
                               ^
 Left (line 1, column 7):
 unexpected " "
 expecting "true", "false", "\"", "[" or "{"
 
 ws :: Parser String
 ws = many (oneOf " \t\n")
 
 lexeme p = p <* ws
 
 jsonBool' = B <$> bool

 jsonBool = lexeme jsonBool'
 
 jsonBool = lexeme jsonBool'
 jsonStringLiteral = lexeme (S <$> stringLiteral)

 jsonArray = A <$> array
 array = (lexeme $ char '[') *>
           (  jsonValue `sepBy` (lexeme $ char ',') )
         <* (lexeme $ char ']')
 

before:

 parse jsonValue "test" "[true, true, true]"
                               ^
 Left (line 1, column 7):
 unexpected " "
 expecting "true", "false", "\"", "[" or "{"
 

after:

 parse jsonValue "test" "[true, true, true]"
 Right (A [B True,B True,B True])
 

data.txt:

 {
    "rsvp_limit": 90,
    "status": "upcoming",
    "visibility": "public",
    "venue": {
        "lon": -0.10435,
        "repinned": true,
        "name": "City University",
        "address_1": "College Building, St John Street",
        "lat": 51.52725,
        "country": "gb",
        "city": "London. EC1V 4PB"
    },
    "id": "88494702",
    "time": 1358965800000,
    "yes_rsvp_count": 55,
    "event_url": "http://www.meetup.com/London-HUG/events/88494702/",
    "name": "Parsing Stuff in Haskell",
    "group": {
        "id": 3866232,
        "name": "London HUG",
        "join_mode": "open",
        "urlname": "London-HUG",
    }
}
 
 x <- parseFromFile jsonValue "data.txt"
 
 Right (O [
   ("rsvp_limit",N 90.0),
   ("status",S "upcoming"),
   ("visibility",S "public"),
   ("venue",O [
     ("lon",N (-0.10435)),
     ("repinned",B True),
     ("name",S "City University"),
     ("address_1",S "College Building, St John Street"),
     ("lat",N 51.52725),
     ("country",S "gb"),
     ("city",S "London. EC1V 4PB")]),
   ...
 
 do
    x <- parseFromFile jsonValue "data.txt"
    case x of
      Right (O x') -> do
        print $ lookup "status" x'
        print $ lookup "rsvp_limit" x'
 Just (S "upcoming")
 Just (N 90.0)
 
  day =  (string "Monday" *> pure 1)
     <|> (string "Tuesday" *> pure 2)
     <|> (string "Wednesday" *> pure 3)
     <|> (string "Thursday" *> pure 4)
 ...
 
 parse day "test" "Monday"
 Right 1
 
 parse day "test" "Tuesday"
 Right 2
 
 parse day "test" "Wednesday"
 Right 3
 
 parse day "test" "Thursday"
                    ^
 Left (line 1, column 1):
 unexpected "h"
 expecting "Tuesday"
 
  day = (string "Monday" *> pure 1)
     <|> try (string "Tuesday" *> pure 2)
     <|>(string "Wednesday" *> pure 3)
     <|>(string "Thursday" *> pure 4)
 ...
 
  <||> :: Parser a -> Parser a -> Parser a
  p <||> q = (try p) <|> q
 
  day =   (string "Monday" *> pure 1)
     <||> (string "Tuesday" *> pure 2)
     <||> (string "Wednesday" *> pure 3)
     <||> (string "Thursday" *> pure 4)
 ...
 
 parse jsonValue "test" "lemon"
 Left (line 1, column 1):
 unexpected "l"
 expecting "true", "false", "\"", "[" or "{"
 
 <?> :: Parser p -> String -> Parser p
 
  jsonValue = jsonBool
          <|> (jsonStringLiteral  "string literal")
          <|> jsonArray
          <|> jsonObject
          <|> jsonNumber
 
 parse jsonValue "test" "lemon"
 Left (line 1, column 1):
 unexpected "l"
 expecting "true", "false", string literal, "[" or "{" 
 
 parse jsonValue "test" "lemon"
 Left (line 1, column 1):
 unexpected "l"
 expecting boolean, string literal, array or object
 
 parse jsonValue "test" "lemon"
 Left (line 1, column 1):
 unexpected "l"
 expecting JSON value
 
 parse jsonBool "test" "false"
 Right (B False)
 
 parse jsonBool "test" "falsehood"
 Using jsonBool to parse string: falsehood
 Right (B False)
 
 parse (jsonBool <* eof) "test" "falsehood"
                                    ^
 Left (line 1, column 6):
 unexpected 'h'
 expecting end of input
 
 parse parseFromFile
 string
 char, noneOf, oneOf
 eof
 pure
 *> and <*
 <|>
 <$> fmap
 do
 <?>
 try
 

plenty of other stuff in haddock...