Other examples

This section has some further example parsers that you can study. There are also examples in the Tutorial and in Generating a parser.

SQL SELECT statement parser

This shows a very simplified parser for a SQL SELECT statement, using custom data structures, and the convenient keyword argument syntax for seq(), followed by Parser.combine_dict().

# A very limited parser for SQL SELECT statements,
# for demo purposes. Supports:
# 1. A simple list of columns (or number/string literals)
# 2. A simple table name
# 3. An optional where condition,
#    which has the form of 'A op B' where A and B are columns, strings or number,
#    and op is a comparison operator
# We demonstrate the use of `map` to create AST nodes with a single arg,
# and `seq` for AST nodes with more than one arg.

import enum
from dataclasses import dataclass
from typing import List, Optional, Union

from parsy import from_enum, regex, seq, string

# -- AST nodes:

class Operator(enum.Enum):
    EQ = "="
    LT = "<"
    GT = ">"
    LTE = "<="
    GTE = ">="

class Number:
    value: int

class String:
    value: str

class Field:
    name: str

class Table:
    name: str

ColumnExpression = Union[Field, String, Number]

class Comparison:
    left: ColumnExpression
    operator: Operator
    right: ColumnExpression

class Select:
    columns: List[ColumnExpression]
    table: Table
    where: Optional[Comparison]

# -- Parsers:

number_literal = regex(r"-?[0-9]+").map(int).map(Number)

# We don't support ' in strings or escaping for simplicity
string_literal = regex(r"'[^']*'").map(lambda s: String(s[1:-1]))

identifier = regex("[a-zA-Z][a-zA-Z0-9_]*")

field = identifier.map(Field)

table = identifier.map(Table)

space = regex(r"\s+")  # non-optional whitespace
padding = regex(r"\s*")  # optional whitespace

column_expr = field | string_literal | number_literal

operator = from_enum(Operator)

comparison = seq(
    left=column_expr << padding,
    right=padding >> column_expr,

SELECT = string("SELECT")
FROM = string("FROM")
WHERE = string("WHERE")

# Here we demonstrate use of leading underscore to discard parts we don't want,
# which is more readable and convenient than `<<` and `>>` sometimes.
select = seq(
    _select=SELECT + space,
    columns=column_expr.sep_by(padding + string(",") + padding, min=1),
    _from=space + FROM + space,
    where=(space >> WHERE >> space >> comparison).optional(),
    _end=padding + string(";"),

# Run these tests with pytest:

def test_select():
    assert select.parse("SELECT thing, stuff, 123, 'hello' FROM my_table WHERE id = 1;") == Select(

def test_optional_where():
    assert select.parse("SELECT 1 FROM x;") == Select(

JSON parser

A full parser for JSON. (This will not be competitive in terms of performance with other implementations!)

This demonstrates the use of forward_declaration, needed due to the circular definition of json_value.

from parsy import forward_declaration, regex, seq, string

# Utilities
whitespace = regex(r"\s*")
lexeme = lambda p: p << whitespace

# Punctuation
lbrace = lexeme(string("{"))
rbrace = lexeme(string("}"))
lbrack = lexeme(string("["))
rbrack = lexeme(string("]"))
colon = lexeme(string(":"))
comma = lexeme(string(","))

# Primitives
true = lexeme(string("true")).result(True)
false = lexeme(string("false")).result(False)
null = lexeme(string("null")).result(None)
number = lexeme(regex(r"-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?")).map(float)
string_part = regex(r'[^"\\]+')
string_esc = string("\\") >> (
    | string("/")
    | string('"')
    | string("b").result("\b")
    | string("f").result("\f")
    | string("n").result("\n")
    | string("r").result("\r")
    | string("t").result("\t")
    | regex(r"u[0-9a-fA-F]{4}").map(lambda s: chr(int(s[1:], 16)))
quoted = lexeme(string('"') >> (string_part | string_esc).many().concat() << string('"'))

# Data structures
json_value = forward_declaration()
object_pair = seq(quoted << colon, json_value).map(tuple)
json_object = lbrace >> object_pair.sep_by(comma).map(dict) << rbrace
array = lbrack >> json_value.sep_by(comma) << rbrack

# Everything
json_value.become(quoted | number | json_object | array | true | false | null)
json_doc = whitespace >> json_value

def test():
    assert (
        "int": 1,
        "string": "hello",
        "a list": [1, 2, 3],
        "escapes": "\n \u24D2",
        "nested": {"x": "y"},
        "other": [true, false, null]
        == {
            "int": 1,
            "string": "hello",
            "a list": [1, 2, 3],
            "escapes": "\n ⓒ",
            "nested": {"x": "y"},
            "other": [True, False, None],

if __name__ == "__main__":
    from sys import stdin
