util for sanitizing a string, making it usable as identifier

This commit is contained in:
Fischlurch 2007-09-14 19:18:11 +02:00
parent 814f6fc734
commit c746629312
7 changed files with 123 additions and 16 deletions

View file

@ -62,7 +62,7 @@ def setupBasicEnvironment():
, BINDIR=BINDIR
, CPPPATH=["#"+SRCDIR] # used to find includes, "#" means always absolute to build-root
, CPPDEFINES=['-DCINELERRA_VERSION='+VERSION ] # note: it's a list to append further defines
, CCFLAGS='-Wall'
, CCFLAGS='-Wall ' # -fdiagnostics-show-option
)
handleNoBugSwitches(env)

View file

@ -75,7 +75,7 @@ namespace cinelerra
{
if (!pInstance_)
{
ThreadLock guard;
ThreadLock guard SIDEEFFECT;
if (!pInstance_)
{
if (isDead_)

72
src/common/util.cpp Normal file
View file

@ -0,0 +1,72 @@
/*
util.cpp - helper functions implementation
Copyright (C) CinelerraCV
2007, Christian Thaeter <ct@pipapo.org>
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
* *****************************************************/
#include "common/util.hpp"
#include <boost/algorithm/string.hpp>
#include <boost/function.hpp>
#include <boost/bind.hpp>
using boost::algorithm::trim_right_copy_if;
using boost::algorithm::is_any_of;
using boost::algorithm::is_alnum;
using boost::algorithm::is_space;
namespace util
{
typedef boost::function<bool(string::value_type)> ChPredicate;
ChPredicate operator! (ChPredicate p) { return ! boost::bind(p,_1); }
// character classes used for sanitizing strings
ChPredicate isValid (is_alnum() || is_any_of("-_.:+$'()@"));
ChPredicate isPunct (is_space() || is_any_of(",;#*~´`?\\=/&%![]{}"));
string
sanitize (const string& org)
{
string res (trim_right_copy_if(org, !isValid ));
string::iterator j = res.begin();
string::const_iterator i = org.begin();
string::const_iterator e = i + (res.length());
while ( i != e )
{
while ( i != e && !isValid (*i) ) ++i;
while ( i != e && isValid (*i) ) *(j++) = *(i++);
if ( i != e && isPunct (*i) )
{
*j++ = '_';
do ++i;
while ( i != e && isPunct (*i));
}
}
res.erase(j,res.end());
return res;
}
} // namespace util

View file

@ -89,14 +89,25 @@ namespace util
/** produce an identifier based on the given string.
* remove non-standard-chars, reduce punctuation to underscores
* remove non-standard-chars, reduce sequences of punctuation
* and whitespace to single underscores. The sanitized string
* will start with an alphanumeric character.
*
* @par Example Conversions
\verbatim
"Word" --> 'Word'
"a Sentence" --> 'a_Sentence'
"trailing Withespace \t \n" --> 'trailing_Withespace'
"with a lot \nof Whitespace" --> 'with_a_lot_of_Whitespace'
"with\"much (punctuation)[]!" --> 'withmuch_(punctuation)'
"§&Ω%€ leading garbarge" --> 'leading_garbarge'
"mixed Ω garbarge" --> 'mixed_garbarge'
"Bääääh!!" --> 'Bh'
\endverbatim
*/
inline string
sanitize (const string& org)
{
UNIMPLEMENTED ("sanitize String");
return org; ///////////////////////////TODO
}
string sanitize (const string& org);
/** convienience shortcut: conversion to c-String via string.
@ -113,6 +124,11 @@ namespace util
/* some common macro definitions */
/** supress "warning: unused variable" on vars, which
* are introduced into a scope because of some sideeffect, i.e. Locking
*/
#define SIDEEFFECT __attribute__ ((unused));
/** this macro wraps its parameter into a cstring literal */
#define STRINGIFY(TOKEN) __STRNGFY(TOKEN)
#define __STRNGFY(TOKEN) #TOKEN

View file

@ -41,8 +41,12 @@ out: 2|ä|
out: 3|+|
out: 4|€|
out: -->oo _O()O_ ä + €
out: wrapping cmdline:...
out: -->
out: wrapping cmdline:Ω ooΩ oΩo Ωoo...
out: 0|Ω|
out: 1|ooΩ|
out: 2|oΩo|
out: 3|Ωoo|
out: -->Ω ooΩ oΩo Ωoo
out: Standard Cmdlineformat:one two
END
@ -135,7 +139,18 @@ out: --> remaining=SingleTestID spam --eggs
END
PLANNED "SanitizedIdentifier_test" SanitizedIdentifier_test <<END
TEST "SanitizedIdentifier_test" SanitizedIdentifier_test <<END
out: 'Word' --> 'Word'
out: 'a Sentence' --> 'a_Sentence'
out: 'trailing Withespace
out: ' --> 'trailing_Withespace'
out: 'with a lot
out: of Whitespace' --> 'with_a_lot_of_Whitespace'
out: 'with"much (punctuation)[]!' --> 'withmuch_(punctuation)'
out: '§&Ω%€ leading garbarge' --> 'leading_garbarge'
out: 'mixed Ω garbarge' --> 'mixed_garbarge'
out: 'Bääääh!!' --> 'Bh'
out: '§&Ω%€' --> ''
END

View file

@ -44,13 +44,17 @@ namespace util
{
print_clean ("Word");
print_clean ("a Sentence");
print_clean ("with a lot \nof Whitespace");
print_clean ("with\"much (punctuation)!");
print_clean ("trailing Withespace\n \t");
print_clean ("with a \t lot\n of Whitespace");
print_clean ("with\"much (punctuation)[]!");
print_clean ("§&Ω%€ leading garbarge");
print_clean ("mixed Ω garbarge");
print_clean ("Bääääh!!");
print_clean ("§&Ω%€");
}
/** @test print the original and the sanitized string */
void print_clean (const string& org)
void print_clean (const string org)
{
cout << "'" << org << "' --> '" << sanitize(org) << "'\n";
}

View file

@ -54,7 +54,7 @@ namespace util
testLine("\nspam");
testLine("eat more spam");
testLine(" oo _O()O_ ä + €");
testLine("\0\too\0\to\0o\t\0oo");
testLine("Ω\tooΩ\toΩo\tΩoo");
testStandardCmdlineformat();
}