util for sanitizing a string, making it usable as identifier
This commit is contained in:
parent
814f6fc734
commit
c746629312
7 changed files with 123 additions and 16 deletions
|
|
@ -62,7 +62,7 @@ def setupBasicEnvironment():
|
|||
, BINDIR=BINDIR
|
||||
, CPPPATH=["#"+SRCDIR] # used to find includes, "#" means always absolute to build-root
|
||||
, CPPDEFINES=['-DCINELERRA_VERSION='+VERSION ] # note: it's a list to append further defines
|
||||
, CCFLAGS='-Wall'
|
||||
, CCFLAGS='-Wall ' # -fdiagnostics-show-option
|
||||
)
|
||||
|
||||
handleNoBugSwitches(env)
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ namespace cinelerra
|
|||
{
|
||||
if (!pInstance_)
|
||||
{
|
||||
ThreadLock guard;
|
||||
ThreadLock guard SIDEEFFECT;
|
||||
if (!pInstance_)
|
||||
{
|
||||
if (isDead_)
|
||||
|
|
|
|||
72
src/common/util.cpp
Normal file
72
src/common/util.cpp
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
/*
|
||||
util.cpp - helper functions implementation
|
||||
|
||||
Copyright (C) CinelerraCV
|
||||
2007, Christian Thaeter <ct@pipapo.org>
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
* *****************************************************/
|
||||
|
||||
|
||||
#include "common/util.hpp"
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <boost/function.hpp>
|
||||
#include <boost/bind.hpp>
|
||||
|
||||
using boost::algorithm::trim_right_copy_if;
|
||||
using boost::algorithm::is_any_of;
|
||||
using boost::algorithm::is_alnum;
|
||||
using boost::algorithm::is_space;
|
||||
|
||||
|
||||
namespace util
|
||||
{
|
||||
|
||||
typedef boost::function<bool(string::value_type)> ChPredicate;
|
||||
ChPredicate operator! (ChPredicate p) { return ! boost::bind(p,_1); }
|
||||
|
||||
// character classes used for sanitizing strings
|
||||
ChPredicate isValid (is_alnum() || is_any_of("-_.:+$'()@"));
|
||||
ChPredicate isPunct (is_space() || is_any_of(",;#*~´`?\\=/&%![]{}"));
|
||||
|
||||
|
||||
string
|
||||
sanitize (const string& org)
|
||||
{
|
||||
string res (trim_right_copy_if(org, !isValid ));
|
||||
string::iterator j = res.begin();
|
||||
string::const_iterator i = org.begin();
|
||||
string::const_iterator e = i + (res.length());
|
||||
while ( i != e )
|
||||
{
|
||||
while ( i != e && !isValid (*i) ) ++i;
|
||||
while ( i != e && isValid (*i) ) *(j++) = *(i++);
|
||||
if ( i != e && isPunct (*i) )
|
||||
{
|
||||
*j++ = '_';
|
||||
do ++i;
|
||||
while ( i != e && isPunct (*i));
|
||||
}
|
||||
}
|
||||
res.erase(j,res.end());
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
} // namespace util
|
||||
|
||||
|
|
@ -89,14 +89,25 @@ namespace util
|
|||
|
||||
|
||||
/** produce an identifier based on the given string.
|
||||
* remove non-standard-chars, reduce punctuation to underscores
|
||||
* remove non-standard-chars, reduce sequences of punctuation
|
||||
* and whitespace to single underscores. The sanitized string
|
||||
* will start with an alphanumeric character.
|
||||
*
|
||||
* @par Example Conversions
|
||||
\verbatim
|
||||
"Word" --> 'Word'
|
||||
"a Sentence" --> 'a_Sentence'
|
||||
"trailing Withespace \t \n" --> 'trailing_Withespace'
|
||||
"with a lot \nof Whitespace" --> 'with_a_lot_of_Whitespace'
|
||||
"with\"much (punctuation)[]!" --> 'withmuch_(punctuation)'
|
||||
"§&Ω%€ leading garbarge" --> 'leading_garbarge'
|
||||
"mixed Ω garbarge" --> 'mixed_garbarge'
|
||||
"Bääääh!!" --> 'Bh'
|
||||
\endverbatim
|
||||
*/
|
||||
inline string
|
||||
sanitize (const string& org)
|
||||
{
|
||||
UNIMPLEMENTED ("sanitize String");
|
||||
return org; ///////////////////////////TODO
|
||||
}
|
||||
string sanitize (const string& org);
|
||||
|
||||
|
||||
|
||||
|
||||
/** convienience shortcut: conversion to c-String via string.
|
||||
|
|
@ -113,6 +124,11 @@ namespace util
|
|||
|
||||
/* some common macro definitions */
|
||||
|
||||
/** supress "warning: unused variable" on vars, which
|
||||
* are introduced into a scope because of some sideeffect, i.e. Locking
|
||||
*/
|
||||
#define SIDEEFFECT __attribute__ ((unused));
|
||||
|
||||
/** this macro wraps its parameter into a cstring literal */
|
||||
#define STRINGIFY(TOKEN) __STRNGFY(TOKEN)
|
||||
#define __STRNGFY(TOKEN) #TOKEN
|
||||
|
|
|
|||
|
|
@ -41,8 +41,12 @@ out: 2|ä|
|
|||
out: 3|+|
|
||||
out: 4|€|
|
||||
out: -->oo _O()O_ ä + €
|
||||
out: wrapping cmdline:...
|
||||
out: -->
|
||||
out: wrapping cmdline:Ω ooΩ oΩo Ωoo...
|
||||
out: 0|Ω|
|
||||
out: 1|ooΩ|
|
||||
out: 2|oΩo|
|
||||
out: 3|Ωoo|
|
||||
out: -->Ω ooΩ oΩo Ωoo
|
||||
out: Standard Cmdlineformat:one two
|
||||
END
|
||||
|
||||
|
|
@ -135,7 +139,18 @@ out: --> remaining=SingleTestID spam --eggs
|
|||
END
|
||||
|
||||
|
||||
PLANNED "SanitizedIdentifier_test" SanitizedIdentifier_test <<END
|
||||
TEST "SanitizedIdentifier_test" SanitizedIdentifier_test <<END
|
||||
out: 'Word' --> 'Word'
|
||||
out: 'a Sentence' --> 'a_Sentence'
|
||||
out: 'trailing Withespace
|
||||
out: ' --> 'trailing_Withespace'
|
||||
out: 'with a lot
|
||||
out: of Whitespace' --> 'with_a_lot_of_Whitespace'
|
||||
out: 'with"much (punctuation)[]!' --> 'withmuch_(punctuation)'
|
||||
out: '§&Ω%€ leading garbarge' --> 'leading_garbarge'
|
||||
out: 'mixed Ω garbarge' --> 'mixed_garbarge'
|
||||
out: 'Bääääh!!' --> 'Bh'
|
||||
out: '§&Ω%€' --> ''
|
||||
END
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -44,13 +44,17 @@ namespace util
|
|||
{
|
||||
print_clean ("Word");
|
||||
print_clean ("a Sentence");
|
||||
print_clean ("with a lot \nof Whitespace");
|
||||
print_clean ("with\"much (punctuation)!");
|
||||
print_clean ("trailing Withespace\n \t");
|
||||
print_clean ("with a \t lot\n of Whitespace");
|
||||
print_clean ("with\"much (punctuation)[]!");
|
||||
print_clean ("§&Ω%€ leading garbarge");
|
||||
print_clean ("mixed Ω garbarge");
|
||||
print_clean ("Bääääh!!");
|
||||
print_clean ("§&Ω%€");
|
||||
}
|
||||
|
||||
/** @test print the original and the sanitized string */
|
||||
void print_clean (const string& org)
|
||||
void print_clean (const string org)
|
||||
{
|
||||
cout << "'" << org << "' --> '" << sanitize(org) << "'\n";
|
||||
}
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ namespace util
|
|||
testLine("\nspam");
|
||||
testLine("eat more spam");
|
||||
testLine(" oo _O()O_ ä + €");
|
||||
testLine("\0\too\0\to\0o\t\0oo");
|
||||
testLine("Ω\tooΩ\toΩo\tΩoo");
|
||||
|
||||
testStandardCmdlineformat();
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue