Login
7 branches 0 tags
Ben (X13/Arch) Slightly improved http client support 10fe1cc 2 years ago 1085 Commits
nujel / lib / symbol.c
/* Nujel - Copyright (C) 2020-2022 - Benjamin Vincent Schulenburg
 * This project uses the MIT license, a copy should be included under /LICENSE */
#ifndef NUJEL_AMALGAMATION
#include "nujel-private.h"
#endif

#include <stdlib.h>
#include <string.h>

lSymbol  lSymbolList[SYM_MAX];
uint     lSymbolActive = 0;
uint     lSymbolMax    = 0;
lSymbol *lSymbolFFree = NULL;

// a hash index over the symbol array.  It is essentially a sparse map such that
// for a string S, lHashSymStr(S) is a slot in the hash table (with linear
// probing if there's a hash collision).  Then lSymbolIndex[slot] is the
// position in lSymbolList where the actual symbol is stored, and
// lSymbolBackIndex[pos] is the opposite mapping from a position in lSymbolList
// to the corresponding slot in the hash table.
// lSymbolIndex stores all indices 1-based, because it allows us to use 0 to
// mean that the slot is empty which is more convenient.  negative values denote
// deleted slots, and positive values denote used slots.
i16      lSymbolIndex[SYM_MAX];
i16      lSymbolBackIndex[SYM_MAX];
#define SYMBOL_SLOT_IS_EMPTY(X) (X == 0)
#define SYMBOL_SLOT_IS_TOMB(X)  (X < 0)
#define SYMBOL_SLOT_IS_USED(X)  (X > 0)

lSymbol *symNull;
lSymbol *symQuote;
lSymbol *symQuasiquote;
lSymbol *symUnquote;
lSymbol *symUnquoteSplicing;
lSymbol *symArr;
lSymbol *symTreeNew;
lSymbol *symDocumentation;
lSymbol *symPure;
lSymbol *symFold;

lSymbol *symType;
lSymbol *symArguments;
lSymbol *symCode;
lSymbol *symData;
lSymbol *symName;

lSymbol *lSymLTNil;
lSymbol *lSymLTBool;
lSymbol *lSymLTPair;
lSymbol *lSymLTLambda;
lSymbol *lSymLTInt;
lSymbol *lSymLTFloat;
lSymbol *lSymLTString;
lSymbol *lSymLTSymbol;
lSymbol *lSymLTKeyword;
lSymbol *lSymLTNativeFunction;
lSymbol *lSymLTEnvironment;
lSymbol *lSymLTMacro;
lSymbol *lSymLTArray;
lSymbol *lSymLTTree;
lSymbol *lSymLTBytecodeArray;
lSymbol *lSymLTBuffer;
lSymbol *lSymLTBufferView;
lSymbol *lSymLTFileHandle;
lSymbol *lSymLTUnknownType;
lSymbol *lSymLTType;
lSymbol *lSymPrototype;

lSymbol *lSymFloatNaN;
lSymbol *lSymFloatInf;
lSymbol *lSymVMError;
lSymbol *lSymTypeError;
lSymbol *lSymOutOfBounds;
lSymbol *lSymIOError;
lSymbol *lSymArityError;
lSymbol *lSymDivisionByZero;
lSymbol *lSymReadError;
lSymbol *lSymOOM;
lSymbol *lSymUnmatchedOpeningBracket;
lSymbol *lSymUnboundVariable;

uint symbolLookups = 0;
uint tombLookups = 0;

void lSymbolInit(){
	lSymbolActive = 0;
	lSymbolMax    = 0;
	lSymbolFFree  = NULL;

	lSymbolList[0].nextFree = NULL;
	lSymbolList[0].c[sizeof(lSymbolList[0].c) - 1] = 0xFF;
	if(lSymbolList[0].nextFree != NULL){
		exit(110);
	}

	symNull              = lSymSM("");
	symQuote             = lSymSM("quote");
	symQuasiquote        = lSymSM("quasiquote");
	symUnquote           = lSymSM("unquote");
	symUnquoteSplicing   = lSymSM("unquote-splicing");
	symArr               = lSymSM("array/new");
	symTreeNew           = lSymSM("tree/new");
	symDocumentation     = lSymSM("documentation");
	symPure              = lSymSM("pure");
	symFold              = lSymSM("fold");

	symType              = lSymSM("type");
	symArguments         = lSymSM("arguments");
	symCode              = lSymSM("code");
	symData              = lSymSM("data");
	symName              = lSymSM("name");

	lSymLTNil            = lSymSM("nil");
	lSymLTBool           = lSymSM("bool");
	lSymLTPair           = lSymSM("pair");
	lSymLTEnvironment    = lSymSM("environment");
	lSymLTLambda         = lSymSM("lambda");
	lSymLTInt            = lSymSM("int");
	lSymLTFloat          = lSymSM("float");
	lSymLTString         = lSymSM("string");
	lSymLTSymbol         = lSymSM("symbol");
	lSymLTKeyword        = lSymSM("keyword");
	lSymLTNativeFunction = lSymSM("native-function");
	lSymLTArray          = lSymSM("array");
	lSymLTMacro          = lSymSM("macro");
	lSymLTTree           = lSymSM("tree");
	lSymLTBytecodeArray  = lSymSM("bytecode-array");
	lSymLTBuffer         = lSymSM("buffer");
	lSymLTBufferView     = lSymSM("buffer-view");
	lSymLTFileHandle     = lSymSM("file-handle");
	lSymLTUnknownType    = lSymSM("unknown-type");
	lSymLTType           = lSymSM("type");

	lSymPrototype        = lSymSM("prototype*");

	lSymFloatNaN         = lSymSM("float-nan");
	lSymFloatInf         = lSymSM("float-inf");
	lSymVMError          = lSymSM("vm-error");
	lSymTypeError        = lSymSM("type-error");
	lSymOutOfBounds      = lSymSM("out-of-bounds");
	lSymIOError          = lSymSM("io-error");
	lSymArityError       = lSymSM("arity-error");
	lSymDivisionByZero   = lSymSM("division-by-zero");
	lSymReadError        = lSymSM("read-error");
	lSymOOM              = lSymSM("out-of-memory");
	lSymUnboundVariable  = lSymSM("unbound-variable");
	lSymUnmatchedOpeningBracket = lSymSM("unmatched-opening-bracket");
}

static inline int lSymIndex(const lSymbol *s){
	return s - lSymbolList;
}

void lSymbolFree(lSymbol *s){
	s->nextFree = lSymbolFFree;
	s->c[sizeof(s->c)-1] = 0xFF;
	lSymbolFFree = s;
	lSymbolActive--;
	const int symIndex = lSymIndex(s);
	const int slot = lSymbolBackIndex[symIndex];
	lSymbolIndex[slot] = -symIndex;
}

lSymbol *lSymSL(const char *str, uint len){
	char buf[32];
	len = MIN(sizeof(buf)-1,len);
	memcpy(buf,str,len);
	buf[len] = 0;
	return lSymS(buf);
}

lSymbol *lSymSM(const char *str){
	return lRootsSymbolPush(lSymS(str));
}

const u32 hashLookupTable[8] = {
	0x2e003dc5,
	0x8b27c03c,
	0x4d9b3063,
	0xbd3e8d7e,
	0x52568b75,
	0x1d528623,
	0xf0a5bd1d,
	0x76c15bf8
};

static u32 lHashSymStr(const char *str){
	u32 res = 0xf3b06b3b;
	while (*str) {
		res = (res << 4) | ((res & 0xF0000000) >> 28);
		res ^= *str++;
#ifdef _MSC_VER
		res ^= hashLookupTable[__popcnt(res) & 0x7];
#else
		res ^= hashLookupTable[__builtin_popcount(res)&0x7];
#endif
	}
	return res;
}

// Probes the symbol index and returns the slot where STR is stored.  If STR is
// not in the map, returns a slot where it could be inserted, which could be
// either an empty slot or a tomb slot from when a different symbol with a hash
// collision was deleted.
uint lSymbolIndexSlot(const char *str){
	uint firstTomb = 0xffffffff;
	uint h = lHashSymStr(str) % SYM_MAX;
	uint hInitial = h;
	//symbolLookups++;
	do {
		//tombLookups++;
		int idx = lSymbolIndex[h];
		if(SYMBOL_SLOT_IS_EMPTY(idx)){
			return firstTomb == 0xffffffff ? h : firstTomb;
		}
		if(SYMBOL_SLOT_IS_TOMB(idx) && firstTomb == 0xffffffff){
			firstTomb = h;
		}
		if(SYMBOL_SLOT_IS_USED(idx)){
			--idx;
			if (0 == strncmp(str,lSymbolList[idx].c,sizeof(lSymbolList[idx].c)-1)
				&& 0 == lSymbolList[idx].c[sizeof(lSymbolList[idx].c)-1])
			{
				return h;
			}
		}
		if (++h == SYM_MAX) {
			h = 0;
		}
	} while (h != hInitial);
	exit(111);
	return 0;
}

lSymbol *lSymS(const char *str){
	uint slot = lSymbolIndexSlot(str);
	int symIndex = lSymbolIndex[slot];
	if(SYMBOL_SLOT_IS_USED(symIndex)){
		return &lSymbolList[symIndex-1];
	}
	lSymbol *ret;
	if(lSymbolFFree){
		ret = lSymbolFFree;
		lSymbolFFree = lSymbolFFree->nextFree;
	}else{
		if(unlikely(lSymbolMax >= SYM_MAX)){
			exit(112);
		}else{
			ret = &lSymbolList[lSymbolMax++];
		}
	}
	lSymbolActive++;
	strncpy(ret->c, str, sizeof(ret->c));
	ret->c[sizeof(ret->c)-1] = 0;
	symIndex = lSymIndex(ret);
	lSymbolIndex[slot] = symIndex + 1;
	lSymbolBackIndex[symIndex] = slot;
	return ret;
}

lSymbol *getTypeSymbolT(const lType T){
	switch(T){
		default:            return lSymLTUnknownType;
		case ltNil:         return lSymLTNil;
		case ltBool:        return lSymLTBool;
		case ltPair:        return lSymLTPair;
		case ltEnvironment: return lSymLTEnvironment;
		case ltLambda:      return lSymLTLambda;
		case ltInt:         return lSymLTInt;
		case ltFloat:       return lSymLTFloat;
		case ltString:      return lSymLTString;
		case ltSymbol:      return lSymLTSymbol;
		case ltKeyword:     return lSymLTKeyword;
		case ltNativeFunc:  return lSymLTNativeFunction;
		case ltArray:       return lSymLTArray;
		case ltMacro:       return lSymLTMacro;
		case ltTree:        return lSymLTTree;
		case ltBytecodeArr: return lSymLTBytecodeArray;
		case ltBuffer:      return lSymLTBuffer;
		case ltBufferView:  return lSymLTBufferView;
		case ltFileHandle:  return lSymLTFileHandle;
	}
}

lSymbol *getTypeSymbol(const lVal v){
	return getTypeSymbolT(v.type);
}