SymbolTable: needs proper locking (#158)

this might turn into lock contention problem, but better optimise
a correct implementation than fix a fast yet broken one.

Hint: SessionCommandFunction_test demonstrates that the
symbol table can be corrupted by creating Symbol instances
in parallel without proper locking. So yes, this is for real.
This commit is contained in:
Fischlurch 2017-04-08 02:27:51 +02:00
parent 3f17e6558e
commit 32a76a5703
2 changed files with 98 additions and 8 deletions

View file

@ -35,15 +35,14 @@
#include "lib/symbol.hpp"
#include "lib/symbol-table.hpp"
#include "include/limits.h"
extern "C" {
#include "lib/safeclib.h"
}
#include <boost/functional/hash.hpp>
#include <unordered_set>
#include <cstddef>
#include <utility>
#include <string>
using std::size_t;
@ -59,20 +58,19 @@ namespace lib {
const size_t STRING_MAX_RELEVANT = LUMIERA_IDSTRING_MAX_RELEVANT;
namespace { // symbol table implementation
namespace { // global symbol table
/** @warning grows eternally, never shrinks */
std::unordered_set<string> symbolTable;
SymbolTable symbolTable;
}
/**
* create Symbol by symbol table lookup.
* @note identical strings will be mapped to
* the same Symbol (embedded pointer)
* @note identical strings will be mapped to the same Symbol (embedded pointer)
* @warning potential lock contention, since each ctor call needs to do a lookup
*/
Symbol::Symbol (string&& definition)
: Literal{symbolTable.insert(forward<string> (definition)).first->c_str()}
: Literal{symbolTable.internedString (forward<string> (definition))}
{ }

92
src/lib/symbol-table.hpp Normal file
View file

@ -0,0 +1,92 @@
/*
SYMBOL-TABLE.hpp - registry for automatically interned symbol string tokens
Copyright (C) Lumiera.org
2017, Hermann Vosseler <Ichthyostega@web.de>
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/** @file symbol-table.hpp
** Registry table for automatically _interned strings_.
** The implementation of the lib::Symbol token relies on unique string pointers,
** such as to create one distinct identity for each distinct "symbol string". The idea
** is to generate unique and distinct numeric token IDs, while still holding onto a human readable
** string. Which in turn requires us to manage a registry of already known symbol strings; when
** a Symbol object with such an already known string is created, it will thus connect internally
** to the known token ID.
**
** @todo as of this writing (4/2017), it is neither clear if we really need such a facility, nor
** do we know enough to judge the implementation approach taken here. It might well turn out
** that a mere hash over the symbol string would be sufficient. Just the general vision for
** Lumiera indicates that we're going to rely heavily on some kind of symbolic or meta processing,
** involving a rules based query system, and in the light of such future developments, introducing
** a distinct lib::Symbol datatype seemed like a good idea. When we started to generate command IDs
** systematically, just literal c-string constants weren't sufficient anymore, leading to this
** very preliminary table based implementation.
**
** @see symbol-impl.cpp
** @see Symbol_test
** @see Symbol_HashtableTest
** @see SessionCommandFunction_test multithreaded access
*/
#ifndef LIB_SYMBOL_TABLE_H
#define LIB_SYMBOL_TABLE_H
#include "lib/sync.hpp"
#include "lib/symbol.hpp"
#include <boost/noncopyable.hpp>
#include <unordered_set>
#include <utility>
#include <string>
namespace lib {
using std::string;
using std::forward;
/**
* Table for automatically _interned strings_.
* This table is used to back the lib::Symbol token type,
* which is implemented by a pointer into this registration table
* for each new distinct "symbol string" created.
* @warning grows eternally, never shrinks
*/
class SymbolTable
: public Sync<>
, boost::noncopyable
{
std::unordered_set<string> table_;
public:
Literal
internedString (string && symbolString)
{
Lock sync(this);
auto res = table_.insert (forward<string> (symbolString));
return res.first->c_str();
}
};
} // namespace lib
#endif /*LIB_SYMBOL_TABLE_H*/