I am putting together a little module for oss release that will let you parse a boolean expression consisting of and/AND/or/OR
's (no brackets yet) and output a complete elasticsearch query.
Boolean expression logic:
Right now it uses OR
as the basis and puts everything on top of that as AND
s. This means that AND
binds left to right.
I lack input on:
- The quality of the output elasticsearch query - can it be simplified? Are there better approaches?
- The way I interpret the boolean expression.
def string_to_query(s):
s = s.lower()
tokens = [' '.join(t.split()) for t in s.split('or')]
or_terms = []
while tokens:
leaf = tokens.pop()
and_terms = leaf.split('and')
if len(and_terms) < 2:
term = and_terms[0]
or_terms.extend([
{"match": {"Review.Text": {
"query": term, "operator": "and"}}},
{"match": {"Review.Title": {
"query": term, "operator": "and"}}}
])
else:
filters = [
{"bool": {
"should": [{"match": {"Review.Text": {
"query": term, "operator": "and"}}},
{"match": {"Review.Title": {
"query": term, "operator": "and"}}}]
}} for term in and_terms]
or_terms.append(
{"bool": {
"must": filters
}})
return {"query":
{"bool": {
"should": or_terms
}}}
query = string_to_query(
'dog and dog food or cat and cat food'
)
assert query == {
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"bool": {
"should": [
{
"match": {
"Review.Text": {
"operator": "and",
"query": "cat "
}
}
},
{
"match": {
"Review.Title": {
"operator": "and",
"query": "cat "
}
}
}
]
}
},
{
"bool": {
"should": [
{
"match": {
"Review.Text": {
"operator": "and",
"query": " cat food"
}
}
},
{
"match": {
"Review.Title": {
"operator": "and",
"query": " cat food"
}
}
}
]
}
}
]
}
},
{
"bool": {
"must": [
{
"bool": {
"should": [
{
"match": {
"Review.Text": {
"operator": "and",
"query": "dog "
}
}
},
{
"match": {
"Review.Title": {
"operator": "and",
"query": "dog "
}
}
}
]
}
},
{
"bool": {
"should": [
{
"match": {
"Review.Text": {
"operator": "and",
"query": " dog food"
}
}
},
{
"match": {
"Review.Title": {
"operator": "and",
"query": " dog food"
}
}
}
]
}
}
]
}
}
]
}
}
}