SymbolTable: needs proper locking (#158)
this might turn into lock contention problem, but better optimise a correct implementation than fix a fast yet broken one. Hint: SessionCommandFunction_test demonstrates that the symbol table can be corrupted by creating Symbol instances in parallel without proper locking. So yes, this is for real.
This commit is contained in:
parent
3f17e6558e
commit
32a76a5703
2 changed files with 98 additions and 8 deletions
|
|
@ -35,15 +35,14 @@
|
|||
|
||||
|
||||
#include "lib/symbol.hpp"
|
||||
#include "lib/symbol-table.hpp"
|
||||
#include "include/limits.h"
|
||||
extern "C" {
|
||||
#include "lib/safeclib.h"
|
||||
}
|
||||
|
||||
#include <boost/functional/hash.hpp>
|
||||
#include <unordered_set>
|
||||
#include <cstddef>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
|
||||
using std::size_t;
|
||||
|
|
@ -59,20 +58,19 @@ namespace lib {
|
|||
const size_t STRING_MAX_RELEVANT = LUMIERA_IDSTRING_MAX_RELEVANT;
|
||||
|
||||
|
||||
namespace { // symbol table implementation
|
||||
namespace { // global symbol table
|
||||
|
||||
/** @warning grows eternally, never shrinks */
|
||||
std::unordered_set<string> symbolTable;
|
||||
SymbolTable symbolTable;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* create Symbol by symbol table lookup.
|
||||
* @note identical strings will be mapped to
|
||||
* the same Symbol (embedded pointer)
|
||||
* @note identical strings will be mapped to the same Symbol (embedded pointer)
|
||||
* @warning potential lock contention, since each ctor call needs to do a lookup
|
||||
*/
|
||||
Symbol::Symbol (string&& definition)
|
||||
: Literal{symbolTable.insert(forward<string> (definition)).first->c_str()}
|
||||
: Literal{symbolTable.internedString (forward<string> (definition))}
|
||||
{ }
|
||||
|
||||
|
||||
|
|
|
|||
92
src/lib/symbol-table.hpp
Normal file
92
src/lib/symbol-table.hpp
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
/*
|
||||
SYMBOL-TABLE.hpp - registry for automatically interned symbol string tokens
|
||||
|
||||
Copyright (C) Lumiera.org
|
||||
2017, Hermann Vosseler <Ichthyostega@web.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of
|
||||
the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
*/
|
||||
|
||||
/** @file symbol-table.hpp
|
||||
** Registry table for automatically _interned strings_.
|
||||
** The implementation of the lib::Symbol token relies on unique string pointers,
|
||||
** such as to create one distinct identity for each distinct "symbol string". The idea
|
||||
** is to generate unique and distinct numeric token IDs, while still holding onto a human readable
|
||||
** string. Which in turn requires us to manage a registry of already known symbol strings; when
|
||||
** a Symbol object with such an already known string is created, it will thus connect internally
|
||||
** to the known token ID.
|
||||
**
|
||||
** @todo as of this writing (4/2017), it is neither clear if we really need such a facility, nor
|
||||
** do we know enough to judge the implementation approach taken here. It might well turn out
|
||||
** that a mere hash over the symbol string would be sufficient. Just the general vision for
|
||||
** Lumiera indicates that we're going to rely heavily on some kind of symbolic or meta processing,
|
||||
** involving a rules based query system, and in the light of such future developments, introducing
|
||||
** a distinct lib::Symbol datatype seemed like a good idea. When we started to generate command IDs
|
||||
** systematically, just literal c-string constants weren't sufficient anymore, leading to this
|
||||
** very preliminary table based implementation.
|
||||
**
|
||||
** @see symbol-impl.cpp
|
||||
** @see Symbol_test
|
||||
** @see Symbol_HashtableTest
|
||||
** @see SessionCommandFunction_test multithreaded access
|
||||
*/
|
||||
|
||||
|
||||
#ifndef LIB_SYMBOL_TABLE_H
|
||||
#define LIB_SYMBOL_TABLE_H
|
||||
|
||||
#include "lib/sync.hpp"
|
||||
#include "lib/symbol.hpp"
|
||||
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
|
||||
|
||||
namespace lib {
|
||||
|
||||
using std::string;
|
||||
using std::forward;
|
||||
|
||||
|
||||
/**
|
||||
* Table for automatically _interned strings_.
|
||||
* This table is used to back the lib::Symbol token type,
|
||||
* which is implemented by a pointer into this registration table
|
||||
* for each new distinct "symbol string" created.
|
||||
* @warning grows eternally, never shrinks
|
||||
*/
|
||||
class SymbolTable
|
||||
: public Sync<>
|
||||
, boost::noncopyable
|
||||
{
|
||||
std::unordered_set<string> table_;
|
||||
|
||||
public:
|
||||
Literal
|
||||
internedString (string && symbolString)
|
||||
{
|
||||
Lock sync(this);
|
||||
auto res = table_.insert (forward<string> (symbolString));
|
||||
return res.first->c_str();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
} // namespace lib
|
||||
#endif /*LIB_SYMBOL_TABLE_H*/
|
||||
Loading…
Reference in a new issue