2008-01-14 01:01:11 +01:00
|
|
|
/*
|
2012-12-02 23:32:30 +01:00
|
|
|
QueryUtil - support for working with terms and queries
|
2010-12-17 23:28:49 +01:00
|
|
|
|
2008-03-10 08:38:59 +01:00
|
|
|
Copyright (C) Lumiera.org
|
2012-12-02 23:32:30 +01:00
|
|
|
2008, 2012 Hermann Vosseler <Ichthyostega@web.de>
|
2010-12-17 23:28:49 +01:00
|
|
|
|
2008-01-14 01:01:11 +01:00
|
|
|
This program is free software; you can redistribute it and/or
|
|
|
|
|
modify it under the terms of the GNU General Public License as
|
2010-12-17 23:28:49 +01:00
|
|
|
published by the Free Software Foundation; either version 2 of
|
|
|
|
|
the License, or (at your option) any later version.
|
|
|
|
|
|
2008-01-14 01:01:11 +01:00
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
GNU General Public License for more details.
|
2010-12-17 23:28:49 +01:00
|
|
|
|
2008-01-14 01:01:11 +01:00
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
2010-12-17 23:28:49 +01:00
|
|
|
|
2008-01-14 01:01:11 +01:00
|
|
|
* *****************************************************/
|
|
|
|
|
|
|
|
|
|
|
2016-11-03 18:22:31 +01:00
|
|
|
/** @file query-util.cpp
|
2016-11-08 13:18:05 +01:00
|
|
|
** Implementation of helpers for working with predicate queries.
|
2016-11-03 18:20:10 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
2012-12-03 00:18:18 +01:00
|
|
|
#include "lib/error.hpp"
|
2012-12-02 23:32:30 +01:00
|
|
|
#include "lib/query-util.hpp"
|
2012-12-03 00:18:18 +01:00
|
|
|
#include "lib/util.hpp"
|
2008-01-14 01:01:11 +01:00
|
|
|
|
|
|
|
|
#include <boost/algorithm/string.hpp>
|
2019-06-24 02:41:02 +02:00
|
|
|
#include <functional>
|
|
|
|
|
#include <regex>
|
2008-02-18 04:16:53 +01:00
|
|
|
#include <map>
|
2008-01-14 01:01:11 +01:00
|
|
|
|
2008-02-18 04:16:53 +01:00
|
|
|
using std::map;
|
2019-06-24 02:41:02 +02:00
|
|
|
using std::regex;
|
|
|
|
|
using std::smatch;
|
|
|
|
|
using std::regex_search;
|
|
|
|
|
using std::sregex_iterator;
|
2008-01-14 01:01:11 +01:00
|
|
|
|
2008-02-18 04:16:53 +01:00
|
|
|
using util::contains;
|
2008-04-06 05:36:16 +02:00
|
|
|
using util::isnil;
|
2008-02-18 04:16:53 +01:00
|
|
|
|
2008-01-14 01:01:11 +01:00
|
|
|
|
2012-12-03 00:18:18 +01:00
|
|
|
namespace lib {
|
2008-12-31 05:05:34 +01:00
|
|
|
namespace query {
|
|
|
|
|
|
|
|
|
|
namespace { // local definitions
|
|
|
|
|
|
2019-06-24 02:41:02 +02:00
|
|
|
using ChPredicate = std::function<bool(string::value_type)> ;
|
2008-04-06 05:36:16 +02:00
|
|
|
|
|
|
|
|
ChPredicate is_alpha = boost::algorithm::is_alpha();
|
|
|
|
|
ChPredicate is_upper = boost::algorithm::is_upper();
|
|
|
|
|
} // local defs
|
|
|
|
|
|
2008-01-14 01:01:11 +01:00
|
|
|
|
|
|
|
|
void
|
2009-07-13 01:16:40 +02:00
|
|
|
normaliseID (string& id)
|
2008-01-14 01:01:11 +01:00
|
|
|
{
|
2009-07-19 05:47:36 +02:00
|
|
|
id = util::sanitise(id);
|
2008-04-06 05:36:16 +02:00
|
|
|
if (isnil(id) || !is_alpha (id[0]))
|
|
|
|
|
id.insert(0, "o");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
REQUIRE (!isnil(id));
|
|
|
|
|
REQUIRE (is_alpha (id[0]));
|
2008-01-14 01:01:11 +01:00
|
|
|
|
|
|
|
|
char first = id[0];
|
2008-04-06 05:36:16 +02:00
|
|
|
if (is_upper (first))
|
2008-01-14 01:01:11 +01:00
|
|
|
id[0] = std::tolower (first);
|
|
|
|
|
}
|
2008-02-18 04:16:53 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2010-04-14 07:02:40 +02:00
|
|
|
//////////////////////TICKET #613 : centralise generally useful RegExps
|
2010-04-13 06:37:21 +02:00
|
|
|
namespace{ // Implementation details
|
|
|
|
|
|
2008-02-18 04:16:53 +01:00
|
|
|
map<Symbol, regex> regexTable;
|
2008-03-31 03:21:28 +02:00
|
|
|
|
2019-06-24 02:41:02 +02:00
|
|
|
Literal MATCH_ARGUMENT = R"~(\(\s*([\w_\.\-]+)\s*\),?\s*)~";
|
|
|
|
|
const regex FIND_PREDICATE{string{"(\\w+)"} + MATCH_ARGUMENT};
|
2008-04-06 08:56:18 +02:00
|
|
|
|
|
|
|
|
inline regex&
|
|
|
|
|
getTermRegex (Symbol sym)
|
|
|
|
|
{
|
|
|
|
|
if (!contains (regexTable, sym))
|
2019-06-24 02:41:02 +02:00
|
|
|
regexTable[sym] = regex (string(sym)+MATCH_ARGUMENT);
|
2008-04-06 08:56:18 +02:00
|
|
|
return regexTable[sym];
|
|
|
|
|
}
|
2008-02-18 04:16:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** (preliminary) helper: instead of really parsing and evaluating the terms,
|
|
|
|
|
* just do a regular expression match to extract the literal argument
|
|
|
|
|
* behind the given predicate symbol. e.g calling
|
2013-10-25 06:34:38 +02:00
|
|
|
* `extractID ("stream", "id(abc), stream(mpeg)")` yields \c "mpeg"
|
2008-02-18 04:16:53 +01:00
|
|
|
*/
|
2012-12-25 01:16:19 +01:00
|
|
|
string
|
2008-02-18 04:16:53 +01:00
|
|
|
extractID (Symbol sym, const string& termString)
|
|
|
|
|
{
|
|
|
|
|
smatch match;
|
2008-04-06 08:56:18 +02:00
|
|
|
if (regex_search (termString, match, getTermRegex (sym)))
|
|
|
|
|
return (match[1]);
|
2008-02-18 04:16:53 +01:00
|
|
|
else
|
|
|
|
|
return "";
|
2008-04-06 08:56:18 +02:00
|
|
|
}
|
|
|
|
|
|
2008-02-18 04:16:53 +01:00
|
|
|
|
2008-04-06 08:56:18 +02:00
|
|
|
/** (preliminary) helper: cut a term with the given symbol.
|
|
|
|
|
* The term is matched, removed from the original string and returned
|
|
|
|
|
* @note parameter termString will be modified!
|
2012-12-25 01:16:19 +01:00
|
|
|
* @todo as it seems we're not using the extracted term anymore,
|
|
|
|
|
* we could save the effort of rebuilding that term.
|
2008-04-06 08:56:18 +02:00
|
|
|
*/
|
2012-12-25 01:16:19 +01:00
|
|
|
string
|
|
|
|
|
removeTerm (Symbol sym, string& queryString)
|
2008-04-06 08:56:18 +02:00
|
|
|
{
|
|
|
|
|
smatch match;
|
2012-12-25 01:16:19 +01:00
|
|
|
if (regex_search (queryString, match, getTermRegex (sym)))
|
2008-04-06 08:56:18 +02:00
|
|
|
{
|
|
|
|
|
string res (sym); res += "("+match[1]+")";
|
2012-12-25 01:16:19 +01:00
|
|
|
queryString.erase (match.position(), match[0].length());
|
2008-04-06 08:56:18 +02:00
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
return "";
|
2012-12-25 01:16:19 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
hasTerm (Symbol sym, string const& queryString)
|
|
|
|
|
{
|
|
|
|
|
smatch match;
|
|
|
|
|
return regex_search (queryString, match, getTermRegex (sym));
|
|
|
|
|
}
|
2008-01-14 01:01:11 +01:00
|
|
|
|
2008-03-31 03:21:28 +02:00
|
|
|
|
|
|
|
|
/** @note this is a very hackish preliminary implementation.
|
|
|
|
|
* The regex used will flounder when applied to nested terms.
|
|
|
|
|
* We need a real parser for predicate logic terms (which we
|
|
|
|
|
* probably get for free when we embed a prolog system)...
|
|
|
|
|
*/
|
|
|
|
|
uint
|
2010-04-13 06:37:21 +02:00
|
|
|
countPred (const string& q)
|
2008-03-31 03:21:28 +02:00
|
|
|
{
|
|
|
|
|
uint cnt (0);
|
|
|
|
|
sregex_iterator end;
|
2019-06-24 02:41:02 +02:00
|
|
|
for (sregex_iterator i (q.begin(),q.end(), FIND_PREDICATE);
|
2008-03-31 03:21:28 +02:00
|
|
|
i != end; ++i)
|
|
|
|
|
++cnt;
|
|
|
|
|
return cnt;
|
|
|
|
|
}
|
|
|
|
|
|
2012-12-17 23:17:32 +01:00
|
|
|
|
|
|
|
|
/** @note preliminary implementation without any syntax checks
|
|
|
|
|
* @return a conjunction of the predicates
|
|
|
|
|
*/
|
|
|
|
|
string
|
|
|
|
|
appendTerms (string const& pred1, string const& pred2)
|
|
|
|
|
{
|
2012-12-24 03:20:52 +01:00
|
|
|
return isnil(pred1)? pred2
|
2012-12-25 01:16:19 +01:00
|
|
|
: isnil(pred2)? pred1
|
2012-12-24 03:20:52 +01:00
|
|
|
: pred1 + ", " + pred2;
|
2012-12-17 23:17:32 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2008-01-14 01:01:11 +01:00
|
|
|
} // namespace query
|
2012-11-26 01:22:01 +01:00
|
|
|
|
2012-12-03 00:18:18 +01:00
|
|
|
} // namespace lib
|