/* Part of SWI-Prolog Author: Jan Wielemaker E-mail: J.Wielemaker@vu.nl WWW: http://www.swi-prolog.org Copyright (c) 1985-2020, University of Amsterdam VU University Amsterdam All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "pl-index.h" #include "pl-comp.h" #include "pl-rsort.h" #include "pl-util.h" #include "pl-proc.h" #include "pl-fli.h" #include "pl-wam.h" #include /******************************* * PARAMETERS * *******************************/ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - MAX_LOOKAHEAD Maximum number of clauses we look ahead on indexed clauses for an alternative clause. If the choice is committed this is lost effort, if it reaches the end of the clause list without finding one the call is deterministic. - MIN_SPEEDUP Do not create an index if the speedup is less - MAX_VAR_FRAC Do not create an index if the fraction of clauses with a variable in the target position exceeds this threshold. - MIN_CLAUSES_FOR_INDEX Create an index if there are more than this number of clauses - MIN_SPEEDUP_RATIO Need at least this ratio of #clauses/speedup for creating an index - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ #define MIN_SPEEDUP (GD->clause_index.min_speedup) #define MAX_VAR_FRAC (GD->clause_index.max_var_fraction) #define MIN_SPEEDUP_RATIO (GD->clause_index.min_speedup_ratio) #define MAX_LOOKAHEAD (GD->clause_index.max_lookahead) #define MIN_CLAUSES_FOR_INDEX (GD->clause_index.min_clauses) /******************************* * TYPES * *******************************/ #define DEAD_INDEX ((ClauseIndex)1) #define ISDEADCI(ci) ((ci) == DEAD_INDEX) typedef struct hash_hints { iarg_t args[MAX_MULTI_INDEX]; /* Hash these arguments */ float speedup; /* Expected speedup */ unsigned int ln_buckets; /* Lg2 of #buckets to use */ unsigned list : 1; /* Use a list per key */ } hash_hints; typedef struct index_context { gen_t generation; /* Current generation */ Definition predicate; /* Current predicate */ ClauseChoice chp; /* Clause choice point */ int depth; /* current depth (0..) */ iarg_t position[MAXINDEXDEPTH+1]; /* Keep track of argument position */ } index_context, *IndexContext; #if USE_LD_MACROS #define bestHash(av, ac, clist, min_speedup, hints, ctx) LDFUNC(bestHash, av, ac, clist, min_speedup, hints, ctx) #define setClauseChoice(chp, cref, generation) LDFUNC(setClauseChoice, chp, cref, generation) #define first_clause_guarded(argv, argc, clist, ctx) LDFUNC(first_clause_guarded, argv, argc, clist, ctx) #endif /*USE_LD_MACROS*/ #define LDFUNC_DECLARATIONS static int bestHash(Word av, size_t ac, ClauseList clist, float min_speedup, hash_hints *hints, IndexContext ctx); static ClauseIndex hashDefinition(ClauseList clist, hash_hints *h, IndexContext ctx); static void replaceIndex(Definition def, ClauseList cl, ClauseIndex *cip, ClauseIndex ci); static void deleteIndexP(Definition def, ClauseList cl, ClauseIndex *cip); static void deleteIndex(Definition def, ClauseList cl, ClauseIndex ci); static void insertIndex(Definition def, ClauseList clist, ClauseIndex ci); static void setClauseChoice(ClauseChoice chp, ClauseRef cref, gen_t generation); static int addClauseToIndex(ClauseIndex ci, Clause cl, ClauseRef where); static void addClauseToListIndexes(Definition def, ClauseList cl, Clause clause, ClauseRef where); static void insertIntoSparseList(ClauseRef cref, ClauseRef *headp, ClauseRef *tailp, ClauseRef where); static ClauseRef first_clause_guarded(Word argv, size_t argc, ClauseList clist, IndexContext ctx); static Code skipToTerm(Clause clause, const iarg_t *position); static void unalloc_index_array(void *p); static void wait_for_index(const ClauseIndex ci); static void completed_index(ClauseIndex ci); #undef LDFUNC_DECLARATIONS /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Compute the index in the hash-array from a machine word and the number of buckets. This used to be simple, but now that our tag bits are on the left side, simply masking will put most things on the same hash-entry as it is very common for all clauses of a predicate to have the same type of object. Hence, we now use exclusive or of the real value part and the tag-bits. NOTE: this function must be kept consistent with argKey() in pl-comp.c! NOTE: This function returns 0 on non-indexable fields, which is why we guarantee that the value is non-0 for indexable values. NOTE: Indirects should not collide with functor_t to allow for deep indexing. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static inline int hashIndex(word key, int buckets) { unsigned int k = MurmurHashIntptr(key, MURMUR_SEED); return k & (buckets-1); } #define canIndex(w) LDFUNC(canIndex, w) static inline int canIndex(DECL_LD word w) { for(;;) { switch(tag(w)) { case TAG_VAR: case TAG_ATTVAR: return FALSE; case TAG_REFERENCE: w = *unRef(w); continue; default: return TRUE; } } } #define indexOfWord(w) LDFUNC(indexOfWord, w) static inline word indexOfWord(DECL_LD word w) { for(;;) { switch(tag(w)) { case TAG_VAR: case TAG_ATTVAR: return 0; case TAG_ATOM: break; /* atom_t */ case TAG_INTEGER: if ( storage(w) == STG_INLINE ) break; #if O_BIGNUM return bignum_index(addressIndirect(w)); #endif /*FALLTHROUGH for int64 and no BIGNUM */ case TAG_STRING: case TAG_FLOAT: { Word p = addressIndirect(w); size_t n = wsizeofInd(*p); return murmur_key(p+1, n*sizeof(*p)); } case TAG_COMPOUND: w = *valPtr(w); /* functor_t */ break; case TAG_REFERENCE: w = *unRef(w); continue; } return w; } } word getIndexOfTerm(term_t t) { GET_LD word w = *valTermRef(t); return indexOfWord(w); } #define nextClauseArg1(chp, generation) \ LDFUNC(nextClauseArg1, chp, generation) static inline ClauseRef nextClauseArg1(DECL_LD ClauseChoice chp, gen_t generation) { ClauseRef cref = chp->cref; word key = chp->key; for( ; cref; cref = cref->next) { if ( (!cref->d.key || key == cref->d.key) && visibleClauseCNT(cref->value.clause, generation)) { ClauseRef result = cref; int maxsearch = MAX_LOOKAHEAD; for( cref = cref->next; cref; cref = cref->next ) { if ( (!cref->d.key || key == cref->d.key) ) { if ( visibleClauseCNT(cref->value.clause, generation) ) { chp->cref = cref; return result; } } if ( --maxsearch == 0 ) { setClauseChoice(chp, cref, generation); return result; } } chp->cref = NULL; return result; } } return NULL; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - nextClauseFromBucket() If we search for a functor there are two options: we have a list for this functor, in which case we can use this or we don't. In the latter case we must still perform the traditional search as clauses without a key may match. (*) We gave up indexed search due to exceeding depth. We should set key to 0 to avoid using the first argument key in nextClause(). Alternatively we'd have to reset the argv to the appropriate deep argument inside nextClause, but unfortunately we do not know which argument we are processing. TBD: Keep a flag telling whether there are non-indexable clauses. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ #define nextClauseFromBucket(ci, argv, ctx) LDFUNC(nextClauseFromBucket, ci, argv, ctx) static ClauseRef nextClauseFromBucket(DECL_LD ClauseIndex ci, Word argv, IndexContext ctx) { ClauseRef cref; word key = ctx->chp->key; if ( ci->is_list ) { DEBUG(MSG_INDEX_FIND, Sdprintf("Searching for %s\n", keyName(key))); assert(ci->args[1] == 0); non_indexed: for(cref = ctx->chp->cref; cref; cref = cref->next) { if ( cref->d.key == key ) { ClauseList cl = &cref->value.clauses; ClauseRef cr; DEBUG(MSG_INDEX_DEEP, Sdprintf("Deep index for %s\n", keyName(key))); if ( isFunctor(cref->d.key) && ctx->depth < MAXINDEXDEPTH ) { int an = ci->args[0]-1; Word a = argv+an; Functor at; size_t argc; deRef(a); assert(isTerm(*a)); at = valueTerm(*a); argv = at->arguments; argc = arityFunctor(at->definition); ctx->position[ctx->depth++] = an; ctx->position[ctx->depth] = END_INDEX_POS; DEBUG(MSG_INDEX_DEEP, Sdprintf("Recursive index for %s at level %d\n", keyName(cref->d.key), ctx->depth)); return first_clause_guarded(argv, argc, cl, ctx); } ctx->chp->key = 0; /* See (*) */ for(cr=cl->first_clause; cr; cr=cr->next) { if ( visibleClauseCNT(cr->value.clause, ctx->generation) ) { setClauseChoice(ctx->chp, cr->next, ctx->generation); return cr; } } return NULL; } } if ( key ) { key = 0; DEBUG(MSG_INDEX_FIND, Sdprintf("Not found; trying non-indexed\n")); goto non_indexed; } else { DEBUG(MSG_INDEX_FIND, Sdprintf("Not found\n")); } return NULL; } for(cref = ctx->chp->cref; cref; cref = cref->next) { if ( (!cref->d.key || key == cref->d.key) && visibleClauseCNT(cref->value.clause, ctx->generation)) { ClauseRef result = cref; int maxsearch = MAX_LOOKAHEAD; for( cref = cref->next; cref; cref = cref->next ) { if ( ((!cref->d.key || key == cref->d.key) && visibleClauseCNT(cref->value.clause, ctx->generation)) || --maxsearch == 0 ) { setClauseChoice(ctx->chp, cref, ctx->generation); return result; } } ctx->chp->cref = NULL; return result; } } return NULL; } /* Make sure the ClauseChoice contains a pointer to a clause that is still visible in generation. This garantees that the clause will not be destroyed. Note that we do not have to perform the full visibility test, just avoid we end up at a clause reference that is free for CGC. */ static void setClauseChoice(DECL_LD ClauseChoice chp, ClauseRef cref, gen_t generation) { while ( cref && !visibleClauseCNT(cref->value.clause, generation) ) cref = cref->next; chp->cref = cref; } #define indexKeyFromArgv(ci, argv) LDFUNC(indexKeyFromArgv, ci, argv) static inline word indexKeyFromArgv(DECL_LD ClauseIndex ci, Word argv) { if ( likely(ci->args[1] == 0) ) { return indexOfWord(argv[ci->args[0]-1]); } else { word key[MAX_MULTI_INDEX]; int harg; for(harg=0; ci->args[harg]; harg++) { if ( !(key[harg] = indexOfWord(argv[ci->args[harg]-1])) ) return 0; } return murmur_key(key, sizeof(word)*harg); } } #if defined(O_DEBUG) || defined(O_MAINTENANCE) static char * iargsName(const iarg_t args[MAX_MULTI_INDEX], char *buf) { static char sbuf[64]; char *s; int i; if ( !buf ) buf = sbuf; s = buf; *s++ = '['; for(i=0; args[i]; i++) { if ( i > 0 ) *s++ = ','; Ssprintf(s, "%d", args[i]); s += strlen(s); } *s++ = ']'; *s = EOS; return buf; } #endif /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - firstClause() finds the first applicable clause and leave information for finding the next clause in chp. TBD: - non-indexable predicates must use a different supervisor - Predicates needing reindexing should use a different supervisor - When to select best table? - When to ignore the best and try again? - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static ClauseRef first_clause_guarded(DECL_LD Word argv, size_t argc, ClauseList clist, IndexContext ctx) { ClauseRef cref; ClauseIndex *cip; hash_hints hints; ClauseChoice chp = ctx->chp; #define STATIC_RELOADING() (LD->reload.generation && false(ctx->predicate, P_DYNAMIC)) if ( unlikely(argc == 0) ) goto simple; /* TBD: alt supervisor */ if ( unlikely(argc > MAXINDEXARG) ) argc = MAXINDEXARG; retry: if ( (cip=clist->clause_indexes) ) { ClauseIndex best_index = NULL; for(; *cip; cip++) { ClauseIndex ci = *cip; word k; if ( ISDEADCI(ci) ) continue; if ( (k=indexKeyFromArgv(ci, argv)) ) { best_index = ci; chp->key = k; break; } } if ( best_index ) { int hi; if ( clist->number_of_clauses > MIN_CLAUSES_FOR_INDEX && (float)clist->number_of_clauses/best_index->speedup > MIN_SPEEDUP_RATIO && !STATIC_RELOADING() ) { DEBUG(MSG_JIT_POOR, Sdprintf("Poor index %s of %s (trying to find better)\n", iargsName(best_index->args, NULL), predicateName(ctx->predicate))); if ( bestHash(argv, argc, clist, best_index->speedup, &hints, ctx) ) { ClauseIndex ci; DEBUG(MSG_JIT, Sdprintf("[%d] Found better at args %s\n", PL_thread_self(), iargsName(hints.args, NULL))); if ( (ci=hashDefinition(clist, &hints, ctx)) ) { chp->key = indexKeyFromArgv(ci, argv); assert(chp->key); best_index = ci; } else { goto retry; } } } if ( best_index->incomplete ) { wait_for_index(best_index); goto retry; } hi = hashIndex(chp->key, best_index->buckets); chp->cref = best_index->entries[hi].head; return nextClauseFromBucket(best_index, argv, ctx); } } if ( unlikely(clist->number_of_clauses == 0) ) return NULL; /* Try first argument indexing if the first argument can be indexed and * we have less than MIN_CLAUSES_FOR_INDEX clauses. Accept if we have * no clause or the next candidate has a different key. If the next * candidate has the same key, deep indexing may help us, so we will * search for other indexes. */ if ( (chp->key = indexOfWord(argv[0])) && (clist->number_of_clauses <= MIN_CLAUSES_FOR_INDEX || STATIC_RELOADING()) ) { chp->cref = clist->first_clause; cref = nextClauseArg1(chp, ctx->generation); if ( !cref || !(chp->cref && chp->cref->d.key == chp->key && cref->d.key == chp->key) ) return cref; /* else duplicate; see whether we can create a deep index */ /* TBD: Avoid trying this every goal */ } if ( !STATIC_RELOADING() && bestHash(argv, argc, clist, 0.0, &hints, ctx) ) { ClauseIndex ci; if ( (ci=hashDefinition(clist, &hints, ctx)) ) { int hi; while ( ci->incomplete ) wait_for_index(ci); if ( ci->invalid ) goto retry; chp->key = indexKeyFromArgv(ci, argv); assert(chp->key); hi = hashIndex(chp->key, ci->buckets); chp->cref = ci->entries[hi].head; return nextClauseFromBucket(ci, argv, ctx); } else { goto retry; } } if ( chp->key ) { chp->cref = clist->first_clause; return nextClauseArg1(chp, ctx->generation); } simple: for(cref = clist->first_clause; cref; cref = cref->next) { if ( visibleClauseCNT(cref->value.clause, ctx->generation) ) { chp->key = 0; setClauseChoice(chp, cref->next, ctx->generation); break; } } return cref; } ClauseRef firstClause(DECL_LD Word argv, LocalFrame fr, Definition def, ClauseChoice chp) { ClauseRef cref; index_context ctx; ctx.generation = generationFrame(fr); ctx.predicate = def; ctx.chp = chp; ctx.depth = 0; ctx.position[0] = END_INDEX_POS; MEMORY_ACQUIRE(); /* sync with retract_clause() */ acquire_def(def); cref = first_clause_guarded(argv, def->functor->arity, &def->impl.clauses, &ctx); #define CHK_STATIC_RELOADING() (LD->reload.generation && false(def, P_DYNAMIC)) DEBUG(CHK_SECURE, assert(!cref || !chp->cref || visibleClause(chp->cref->value.clause, generationFrame(fr)) || CHK_STATIC_RELOADING())); release_def(def); return cref; } ClauseRef nextClause(DECL_LD ClauseChoice chp, Word argv, LocalFrame fr, Definition def) { gen_t generation = generationFrame(fr); ClauseRef cref; (void)argv; /* we want to use these later */ (void)def; /* to create secondary indexes */ MEMORY_ACQUIRE(); acquire_def(def); if ( !chp->key ) /* not indexed */ { for(cref=chp->cref; cref; cref = cref->next) { if ( visibleClauseCNT(cref->value.clause, generation) ) { setClauseChoice(chp, cref->next, generation); break; } } } else { cref = nextClauseArg1(chp, generation); } release_def(def); DEBUG(CHK_SECURE, assert(!cref || !chp->cref || visibleClause(chp->cref->value.clause, generation))); return cref; } /******************************* * HASH SUPPORT * *******************************/ static int cmp_iarg(const void *p1, const void *p2) { const iarg_t *u1 = p1; const iarg_t *u2 = p2; return *u1 < *u2 ? -1 : *u1 > *u2 ? 1 : 0; } static void canonicalHap(iarg_t *hap) { int i; for(i=0; iln_buckets; bytes = sizeof(struct clause_bucket) * buckets; memset(ci, 0, sizeof(*ci)); memcpy(ci->args, hap, sizeof(ci->args)); ci->buckets = buckets; ci->is_list = hints->list; ci->incomplete = TRUE; ci->speedup = hints->speedup; ci->entries = allocHeapOrHalt(bytes); copytpos(ci->position, ctx->position); memset(ci->entries, 0, bytes); ATOMIC_INC(&GD->statistics.indexes.created); return ci; } static void freeClauseListRef(ClauseRef cref) { ClauseList cl = &cref->value.clauses; ClauseRef cr, next; deleteIndexes(cl, TRUE); for(cr=cl->first_clause; cr; cr=next) { next = cr->next; lingerClauseRef(cr); } if ( cl->args ) { assert(isFunctor(cref->d.key)); freeHeap(cl->args, arityFunctor(cref->d.key)*sizeof(*cl->args)); } freeHeap(cref, SIZEOF_CREF_LIST); } static void vfree_clause_list_ref(void *cref) { freeClauseListRef(cref); } static void lingerClauseListRef(Definition def, ClauseRef cref) { linger_always(&def->lingering, vfree_clause_list_ref, cref); } static void unallocClauseIndexTableEntries(ClauseIndex ci) { ClauseBucket cb; int i; for(cb=ci->entries,i=ci->buckets; --i>=0; cb++) { ClauseRef cr, next; for(cr = cb->head; cr; cr = next) { next = cr->next; if ( ci->is_list ) freeClauseListRef(cr); else lingerClauseRef(cr); } } freeHeap(ci->entries, ci->buckets * sizeof(struct clause_bucket)); } void unallocClauseIndexTable(ClauseIndex ci) { unallocClauseIndexTableEntries(ci); freeHeap(ci, sizeof(struct clause_index)); ATOMIC_INC(&GD->statistics.indexes.destroyed); } static ClauseRef newClauseListRef(word key) { ClauseRef cref = allocHeapOrHalt(SIZEOF_CREF_LIST); memset(cref, 0, SIZEOF_CREF_LIST); cref->d.key = key; return cref; } static void addToClauseList(ClauseRef cref, Clause clause, word key, ClauseRef where) { Definition def = clause->predicate; ClauseList cl = &cref->value.clauses; ClauseRef cr = newClauseRef(clause, key); if ( cl->first_clause ) { if ( where == CL_END ) { cl->last_clause->next = cr; cl->last_clause = cr; } else if ( where == CL_START ) { cr->next = cl->first_clause; cl->first_clause = cr; } else { insertIntoSparseList(cr, &cl->first_clause, &cl->last_clause, where); } cl->number_of_clauses++; } else { cl->first_clause = cl->last_clause = cr; cl->number_of_clauses = 1; } addClauseToListIndexes(def, cl, clause, where); } static void addClauseToListIndexes(Definition def, ClauseList cl, Clause clause, ClauseRef where) { ClauseIndex *cip; if ( (cip=cl->clause_indexes) ) { for(; *cip; cip++) { ClauseIndex ci = *cip; if ( ISDEADCI(ci) ) continue; while ( ci->incomplete ) wait_for_index(ci); if ( ci->invalid ) return; if ( ci->size >= ci->resize_above || !addClauseToIndex(ci, clause, where) ) deleteIndexP(def, cl, cip); } } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - This linked list *headp *tailp is a sparse (filtered) list of the clauses. Insert cref into this list. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static void insertIntoSparseList(ClauseRef cref, ClauseRef *headp, ClauseRef *tailp, ClauseRef where) { if ( !(*headp) ) { *headp = *tailp = cref; } else if ( where == CL_END ) { (*tailp)->next = cref; *tailp = cref; } else if ( where == CL_START ) { cref->next = *headp; *headp = cref; } else { Clause clause = cref->value.clause; ClauseRef pred_cref = clause->predicate->impl.clauses.first_clause; ClauseRef ci_cref = *headp; ClauseRef ci_prev = NULL; for(; pred_cref; pred_cref=pred_cref->next) { if ( pred_cref == where || ci_prev == *tailp ) { if ( ci_cref == *headp ) { cref->next = *headp; *headp = cref; } else if ( ci_prev == *tailp ) { (*tailp)->next = cref; *tailp = cref; } else { cref->next = ci_cref; ci_prev->next = cref; } return; } if ( pred_cref->value.clause == ci_cref->value.clause ) { ci_prev = ci_cref; ci_cref = ci_cref->next; } } } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Add a clause to a bucket. There are some special cases: - If the key denotes a functor, create or extend a clause-list for this functor. - If the key is non-indexable, add the clause both to the bucket chain and to all functor clause-lists (*). The latter also implies that if we create a functor clause-list we must add all non-indexable clauses to it (**). Return how many indexable entries have been added to the bucket. The non-indexable clauses are added to an entry with key=0. This entry must be used if none of the indexes matches. Where is one of - CL_START (asserta) - CL_END (assertz) - The clause reference before which the clause must be inserted. This is used by reconsult. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static int addClauseBucket(ClauseBucket ch, Clause cl, word key, word arg1key, ClauseRef where, int is_list) { ClauseRef cr; if ( is_list ) { ClauseRef cref; ClauseList vars = NULL; if ( key ) { for(cref=ch->head; cref; cref=cref->next) { if ( cref->d.key == key ) { addToClauseList(cref, cl, arg1key, where); DEBUG(MSG_INDEX_UPDATE, Sdprintf("Adding to existing %s\n", keyName(key))); return 0; } else if ( !cref->d.key ) { vars = &cref->value.clauses; } } } else { for(cref=ch->head; cref; cref=cref->next) { if ( !cref->d.key ) vars = &cref->value.clauses; addToClauseList(cref, cl, arg1key, where); } if ( vars ) return 0; } DEBUG(MSG_INDEX_UPDATE, Sdprintf("Adding new %s\n", keyName(key))); cr = newClauseListRef(key); if ( vars ) /* (**) */ { for(cref=vars->first_clause; cref; cref=cref->next) { addToClauseList(cr, cref->value.clause, arg1key, CL_END); if ( true(cref->value.clause, CL_ERASED) ) /* or do not add? */ { cr->value.clauses.number_of_clauses--; cr->value.clauses.erased_clauses++; } DEBUG(MSG_INDEX_UPDATE, Sdprintf("Preparing var to clause-list for %s\n", keyName(key))); } if ( cr->value.clauses.erased_clauses ) ch->dirty++; } addToClauseList(cr, cl, arg1key, where); } else { cr = newClauseRef(cl, key); } if ( is_list ) where = CL_START; /* doesn't matter where we insert */ insertIntoSparseList(cr, &ch->head, &ch->tail, where); return key ? 1 : 0; } static void deleteClauseList(ClauseList cl, Clause clause) { ClauseRef cr, prev=NULL; for(cr=cl->first_clause; cr; prev=cr, cr=cr->next) { if ( cr->value.clause == clause ) { if ( !prev ) { cl->first_clause = cr->next; if ( !cr->next ) cl->last_clause = NULL; } else { prev->next = cr->next; if ( !cr->next ) cl->last_clause = prev; } cl->number_of_clauses--; return; } } assert(0); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - deleteClauseBucket() returns number of deleted indexable values. Note that deleting with key=0 can still delete indexable values if the var-key is the only clause of an indexed clause-list. TBD: We can delete clause-lists that are indexable but only contain non-indexed clauses. This probably requires us to keep track of the number of such clauses. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static int deleteClauseBucket(ClauseBucket ch, Clause clause, word key, int is_list) { ClauseRef prev = NULL; ClauseRef c; if ( is_list ) { if ( key ) { for(c = ch->head; c; prev = c, c = c->next) { if ( c->d.key == key ) { ClauseList cl = &c->value.clauses; deleteClauseList(cl, clause); if ( !cl->first_clause ) goto delete; /* will return 1 */ } } return 0; } else { int deleted = 0; for(c = ch->head; c;) { ClauseList cl = &c->value.clauses; deleteClauseList(cl, clause); if ( !cl->first_clause ) { ClauseRef d; d = c; c = c->next; if ( !prev ) { ch->head = d->next; if ( !d->next ) ch->tail = NULL; } else { prev->next = d->next; if ( !d->next) ch->tail = prev; } if ( d->d.key ) deleted++; freeClauseListRef(d); continue; } prev = c; c = c->next; } return deleted; } } else { for(c = ch->head; c; prev = c, c = c->next) { if ( c->value.clause == clause ) { delete: if ( !prev ) { ch->head = c->next; if ( !c->next ) ch->tail = NULL; } else { prev->next = c->next; if ( !c->next) ch->tail = prev; } if ( is_list ) freeClauseListRef(c); else lingerClauseRef(c); return 1; } } assert(0); return 0; } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - (*) The clause list may have been marked as "dirty" by deleteActiveClauseFromBucket() even though it does not contain the clause being deleted. We reset the count when we have finished scanning the list to the number of erased clauses remaining in the list. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static void gcClauseList(ClauseList clist, DirtyDefInfo ddi, gen_t start, Buffer tr_starts) { ClauseRef cref=clist->first_clause, prev = NULL; size_t left = 0; while(cref && clist->erased_clauses) { Clause cl = cref->value.clause; if ( true(cl, CL_ERASED) ) { if ( ddi_is_garbage(ddi, start, tr_starts, cl) ) { ClauseRef c = cref; clist->erased_clauses--; cref = cref->next; if ( !prev ) { clist->first_clause = c->next; if ( !c->next ) clist->last_clause = NULL; } else { prev->next = c->next; if ( c->next == NULL) clist->last_clause = prev; } lingerClauseRef(c); continue; } else { left++; } } prev = cref; cref = cref->next; } clist->erased_clauses = left; /* see (*) */ } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - gcClauseBucket() removes all erased clauses from the bucket and returns the number of indexable entries that have been removed from the bucket. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static int gcClauseBucket(Definition def, ClauseBucket ch, unsigned int dirty, int is_list, DirtyDefInfo ddi, gen_t start, Buffer tr_starts) { ClauseRef cref = ch->head, prev = NULL; int deleted = 0; while( cref && dirty ) { if ( is_list ) { ClauseList cl = &cref->value.clauses; if ( cl->erased_clauses ) { gcClauseList(cl, ddi, start, tr_starts); if ( !cl->erased_clauses ) dirty--; if ( cl->first_clause == NULL ) goto delete; } } else { Clause cl = cref->value.clause; if ( true(cl, CL_ERASED) && ddi_is_garbage(ddi, start, tr_starts, cl) ) { ClauseRef c; dirty--; delete: c = cref; if ( cref->d.key ) deleted++; /* only reduce size by indexed */ cref = cref->next; if ( !prev ) { ch->head = c->next; if ( !c->next ) ch->tail = NULL; } else { prev->next = c->next; if ( c->next == NULL) ch->tail = prev; } if ( is_list ) lingerClauseListRef(def, c); else lingerClauseRef(c); continue; } } prev = cref; cref = cref->next; } DEBUG(CHK_SECURE, { if ( !is_list ) { for(cref=ch->head; cref; cref=cref->next) { Clause cl = cref->value.clause; assert( false(cl, CL_ERASED) || !ddi_is_garbage(ddi, start, tr_starts, cl) ); } } }); ch->dirty = dirty; return deleted; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - See also deleteActiveClauseFromIndexes() comment - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static void cleanClauseIndex(Definition def, ClauseList cl, ClauseIndex ci, DirtyDefInfo ddi, gen_t start, Buffer tr_starts) { if ( cl->number_of_clauses < ci->resize_below ) { deleteIndex(def, cl, ci); } else { if ( ci->dirty ) { ClauseBucket ch = ci->entries; int n = ci->buckets; for(; n; n--, ch++) { if ( ch->dirty ) { ci->size -= gcClauseBucket(def, ch, ch->dirty, ci->is_list, ddi, start, tr_starts); if ( !ch->dirty && --ci->dirty == 0 ) break; } } } assert((int)ci->size >= 0); } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - cleanClauseIndexes() is called from cleanDefinition() to remove clause references erased before generation `active` from the indexes. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ void cleanClauseIndexes(Definition def, ClauseList cl, DirtyDefInfo ddi, gen_t start, Buffer tr_starts) { ClauseIndex *cip; if ( (cip=cl->clause_indexes) ) { for(; *cip; cip++) { ClauseIndex ci = *cip; if ( ISDEADCI(ci) ) continue; cleanClauseIndex(def, cl, ci, ddi, start, tr_starts); } } } void clearTriedIndexes(Definition def) { int arity = def->functor->arity; int i; for(i=0; iimpl.clauses.args[i]; ainfo->assessed = FALSE; } def->impl.clauses.jiti_tried = 0; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Clear the ->tried_index vector occasionally for dynamic predicates because evaluation may change. We do this if the number of clauses reaches the next power of two. We use P_SHRUNKPOW2 to inttroduce histerases into the system, such that asserting/retracting around a pow2 boundery does not lead to repeated re-evaluation of the index capabilities. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static int has_pow2_clauses(Definition def) { unsigned int nc = def->impl.clauses.number_of_clauses; return nc > 0 && 1<head; cref; cref=cref->next) { ClauseList cl = &cref->value.clauses; if ( cl->erased_clauses++ == 0 ) cb->dirty++; cl->number_of_clauses--; } } else { ClauseRef cref; for(cref=cb->head; cref; cref=cref->next) { if ( cref->d.key == key ) { ClauseList cl = &cref->value.clauses; if ( cl->erased_clauses++ == 0 ) cb->dirty++; cl->number_of_clauses--; #ifdef O_DEBUG if ( DEBUGGING(CHK_SECURE) ) { ClauseRef cr; unsigned int erased = 0; unsigned int count = 0; for(cr=cl->first_clause; cr; cr=cr->next) { if ( true(cr->value.clause, CL_ERASED) ) erased++; else count++; } assert(erased == cl->erased_clauses); assert(count == cl->number_of_clauses); } #endif return; } } assert(0); } } static inline word indexKeyFromClause(ClauseIndex ci, Clause cl, Code *end) { Code PC = skipToTerm(cl, ci->position); if ( likely(ci->args[1] == 0) ) { int arg = ci->args[0] - 1; word key; if ( arg > 0 ) PC = skipArgs(PC, arg); if ( end ) *end = PC; if ( argKey(PC, 0, &key) ) return key; return 0; } else { word key[MAX_MULTI_INDEX]; /* TBD: special case for 1 arg */ int pcarg = 1; int harg; DEBUG(CHK_SECURE, if ( end ) *end = NULL); for(harg=0; ci->args[harg]; harg++) { if ( ci->args[harg] > pcarg ) PC = skipArgs(PC, ci->args[harg]-pcarg); pcarg = ci->args[harg]; if ( !argKey(PC, 0, &key[harg]) ) return 0; } return murmur_key(key, sizeof(word)*harg); } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Deal with deletion of an active clause from the indexes. This clause cannot really be deleted as it might still be alive for goals of an older generation. The task of this deleteActiveClauseFromIndex() is to maintain administration that makes it easy to actually clean up the index if this is need. The actual cleanup is done by cleanClauseIndex(). On the clause index, it maintains a `dirty' that indicates how many of the buckets contain erased clauses. Each bucket maintains a dirty count that indicates the number of references to erased clauses in that bucket. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static void deleteActiveClauseFromIndex(ClauseIndex ci, Clause cl) { word key = indexKeyFromClause(ci, cl, NULL); if ( key == 0 ) /* not indexed */ { int i; ClauseBucket cb; for(i=ci->buckets, cb = ci->entries; --i>=0; cb++) { if ( cb->dirty == 0 ) ci->dirty++; if ( ci->is_list ) deleteActiveClauseFromBucket(cb, key); else cb->dirty++; } assert(ci->dirty == ci->buckets); } else { int hi = hashIndex(key, ci->buckets); ClauseBucket cb = &ci->entries[hi]; if ( cb->dirty == 0 ) ci->dirty++; if ( ci->is_list ) deleteActiveClauseFromBucket(cb, key); else cb->dirty++; assert(cb->dirty>0); } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - deleteActiveClauseFromIndexes() is called on a retract from a dynamic predicate that is referenced or has too many clauses to justify a costly update of its clause lists. It is also called from removeClausesPredicate(), which is called when reloading a source file. For dynamic predicates, the predicate is locked. L_PREDICATE is held if def is static. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ void deleteActiveClauseFromIndexes(Definition def, Clause cl) { ClauseIndex *cip; shrunkpow2(def); if ( (cip=def->impl.clauses.clause_indexes) ) { for(; *cip; cip++) { ClauseIndex ci = *cip; if ( ISDEADCI(ci) ) continue; while( ci->incomplete ) wait_for_index(ci); if ( ci->invalid ) return; if ( true(def, P_DYNAMIC) ) { if ( def->impl.clauses.number_of_clauses < ci->resize_below ) { DEBUG(MSG_JIT_DELINDEX, Sdprintf("Deleted index %d from %s (shrunk too much)\n", (int)ci->args[0], predicateName(def))); deleteIndexP(def, &def->impl.clauses, cip); } else { deleteActiveClauseFromIndex(ci, cl); } } else { deleteIndexP(def, &def->impl.clauses, cip); } } } } void deleteIndexes(ClauseList clist, int isnew) { ClauseIndex *cip0; assert(isnew); /* TBD for non-new */ if ( (cip0=clist->clause_indexes) ) { ClauseIndex *cip; for(cip = cip0; *cip; cip++) { ClauseIndex ci = *cip; if ( ISDEADCI(ci) ) continue; unallocClauseIndexTable(ci); } unalloc_index_array(cip0); clist->clause_indexes = NULL; } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Only called from destroyDefinition(), which is only called when removing a temporary module or during final cleanup. So, there are no more references. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ void deleteIndexesDefinition(Definition def) { ClauseList clist = &def->impl.clauses; ClauseIndex *cip0; if ( (cip0=clist->clause_indexes) ) { ClauseIndex *cip; for(cip = cip0; *cip; cip++) { ClauseIndex ci = *cip; if ( ISDEADCI(ci) ) continue; deleteIndexP(def, clist, cip); } freeHeap(cip0, 0); clist->clause_indexes = NULL; } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Add a clause to an index. If the clause cannot be indexed (typically because it has a variable at the argument location), the clause must be added to all indexes. ClauseIndex->size maintains the number of elements in the list that are indexed. This is needed for resizing the index. TBD: Merge compound detection with skipToTerm() - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static int addClauseToIndex(ClauseIndex ci, Clause cl, ClauseRef where) { ClauseBucket ch = ci->entries; Code pc = NULL; word key = indexKeyFromClause(ci, cl, &pc); word arg1key = 0; if ( ci->is_list ) /* find first argument key for term */ { if ( key == 0 ) return FALSE; switch(decode(*pc)) { case H_FUNCTOR: case H_LIST: case H_RFUNCTOR: case H_RLIST: pc = stepPC(pc); argKey(pc, 0, &arg1key); } } if ( key == 0 ) /* a non-indexable field */ { int n = ci->buckets; for(; n; n--, ch++) addClauseBucket(ch, cl, key, arg1key, where, ci->is_list); } else { int hi = hashIndex(key, ci->buckets); DEBUG(MSG_INDEX_UPDATE, Sdprintf("Storing in bucket %d\n", hi)); ci->size += addClauseBucket(&ch[hi], cl, key, arg1key, where, ci->is_list); } return TRUE; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - addClauseToIndexes() is called (only) by assertProcedure(), which has the definition locked. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ int addClauseToIndexes(Definition def, Clause clause, ClauseRef where) { addClauseToListIndexes(def, &def->impl.clauses, clause, where); reconsider_index(def); DEBUG(CHK_SECURE, checkDefinition(def)); return TRUE; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Called from unlinkClause(), which is called for retracting a clause from a dynamic predicate which is not referenced and has few clauses. In other cases, deleteActiveClauseFromIndex() is called. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ void delClauseFromIndex(Definition def, Clause cl) { ClauseIndex *cip; shrunkpow2(def); for(cip=def->impl.clauses.clause_indexes; *cip; cip++) { ClauseIndex ci = *cip; ClauseBucket ch; word key; if ( ISDEADCI(ci) ) continue; ch = ci->entries; key = indexKeyFromClause(ci, cl, NULL); if ( key == 0 ) /* a non-indexable field */ { int n = ci->buckets; for(; n; n--, ch++) deleteClauseBucket(ch, cl, key, ci->is_list); } else { int hi = hashIndex(key, ci->buckets); ci->size -= deleteClauseBucket(&ch[hi], cl, key, ci->is_list); } } } static void wait_for_index(const ClauseIndex ci) { DEBUG(MSG_JIT, Sdprintf("[%d] waiting for index %p ...\n", PL_thread_self(), ci)); #ifdef O_PLMT pthread_mutex_lock(&GD->thread.index.mutex); if ( ci->incomplete ) pthread_cond_wait(&GD->thread.index.cond, &GD->thread.index.mutex); pthread_mutex_unlock(&GD->thread.index.mutex); DEBUG(MSG_JIT, Sdprintf("[%d] index %p %sready\n", PL_thread_self(), ci, ci->incomplete ? "not " : "")); #endif } static void completed_index(ClauseIndex ci) { #ifdef O_PLMT pthread_mutex_lock(&GD->thread.index.mutex); ci->incomplete = FALSE; pthread_cond_broadcast(&GD->thread.index.cond); pthread_mutex_unlock(&GD->thread.index.mutex); DEBUG(MSG_JIT, Sdprintf("[%d] index %p completed\n", PL_thread_self(), ci)); #else ci->incomplete = FALSE; #endif } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create a hash-index on def for arg. We compute the hash unlocked, checking at the end that nobody messed with the clause list. If that happened anyway, we retry. At the end, we lock the definition and add the new index to the indexes of the predicate. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static ClauseIndex hashDefinition(ClauseList clist, hash_hints *hints, IndexContext ctx) { ClauseRef cref; ClauseIndex ci; ClauseIndex *cip; DEBUG(MSG_JIT, Sdprintf("[%d] hashDefinition(%s, %s, %d) (%s)\n", PL_thread_self(), predicateName(ctx->predicate), iargsName(hints->args, NULL), 2<ln_buckets, hints->list ? "lists" : "clauses")); #if defined(O_PLMT) && !defined(NDEBUG) { GET_LD assert(LD->thread.info->access.predicate == ctx->predicate); } #endif canonicalHap(hints->args); LOCKDEF(ctx->predicate); if ( (cip=clist->clause_indexes) ) { for(; *cip; cip++) { ClauseIndex cio = *cip; if ( ISDEADCI(cio) ) continue; if ( memcmp(cio->args, hints->args, sizeof(cio->args)) == 0 ) { UNLOCKDEF(ctx->predicate); DEBUG(MSG_JIT, Sdprintf("[%d] already created\n", PL_thread_self())); return cio; } } } ci = newClauseIndexTable(hints->args, hints, ctx); insertIndex(ctx->predicate, clist, ci); UNLOCKDEF(ctx->predicate); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Adding a pause here creates a window where other threads may mark clauses as erased (CL_ERASED). This prevents them being added to the just-created index. See the test_cgc_1 test case in src/Tests/GC/test_cgc_1.pl usleep(1000); - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ for(cref = clist->first_clause; cref; cref = cref->next) { if ( false(cref->value.clause, CL_ERASED) ) { if ( !addClauseToIndex(ci, cref->value.clause, CL_END) ) { ci->invalid = TRUE; completed_index(ci); deleteIndex(ctx->predicate, clist, ci); return NULL; } } } ci->resize_above = ci->size*2; ci->resize_below = ci->size/4; completed_index(ci); return ci; } static ClauseIndex * copyIndex(ClauseIndex *org, int extra) { ClauseIndex *ncip; int size = extra; if ( org ) { ClauseIndex *cip; for(cip=org; *cip; cip++) { ClauseIndex ci = *cip; if ( !ISDEADCI(ci) ) size++; } } if ( size ) { ClauseIndex *ncipo; int i; ncipo = ncip = allocHeapOrHalt((size+1)*sizeof(*ncip)); if ( org ) { ClauseIndex *cip; for(cip=org; *cip; cip++) { ClauseIndex ci = *cip; if ( !ISDEADCI(ci) ) *ncipo++ = ci; } } for(i=0; ispeedup < ci2->speedup ? 1 : ci1->speedup > ci2->speedup ? -1 : 0; } static void sortIndexes(ClauseIndex *cip) { if ( cip ) { int i; for(i=0; cip[i]; i++) ; qsort(cip, i, sizeof(*cip), cmp_indexes); } } static int isSortedIndexes(ClauseIndex *cip) { if ( cip ) { float speedup = (float)PLMAXINT; for( ; *cip; cip++) { ClauseIndex ci = *cip; if ( ISDEADCI(ci) ) continue; if ( speedup < ci->speedup ) return FALSE; speedup = ci->speedup; } } return TRUE; } static void unalloc_index_array(void *p) { freeHeap(p, 0); } static void setIndexes(Definition def, ClauseList cl, ClauseIndex *cip) { ClauseIndex *cipo = cl->clause_indexes; MEMORY_BARRIER(); cl->clause_indexes = cip; if ( cipo ) linger_always(&def->lingering, unalloc_index_array, cipo); } /* Caller must have the predicate locked */ static void unalloc_ci(void *p) { DEBUG(MSG_JIT, Sdprintf("unallocClauseIndexTable(%p)\n", p)); unallocClauseIndexTable(p); } static void /* definition must be locked */ replaceIndex(Definition def, ClauseList cl, ClauseIndex *cip, ClauseIndex ci) { ClauseIndex old = *cip; *cip = ci; DEBUG(MSG_JIT, Sdprintf("[%d] replaceIndex(%s) %p-->%p (gen=%lld)\n", PL_thread_self(), predicateName(def), old, ci, global_generation())); if ( !ISDEADCI(old) ) { int i; /* delete corresponding assessments */ for(i=0; iargs[i]) ) { arg_info *ai = &cl->args[a-1]; ai->assessed = 0; } } linger_always(&def->lingering, unalloc_ci, old); } if ( !isSortedIndexes(cl->clause_indexes) ) { cip = copyIndex(cl->clause_indexes, 0); sortIndexes(cip); setIndexes(def, cl, cip); } } static void deleteIndexP(Definition def, ClauseList cl, ClauseIndex *cip) { replaceIndex(def, cl, cip, DEAD_INDEX); } static void deleteIndex(Definition def, ClauseList clist, ClauseIndex ci) { ClauseIndex *cip; if ( (cip=clist->clause_indexes) ) { for(; *cip; cip++) { if ( *cip == ci ) { deleteIndexP(def, clist, cip); return; } } } assert(0); } static void insertIndex(Definition def, ClauseList clist, ClauseIndex ci) { ClauseIndex *ocip; if ( (ocip=clist->clause_indexes) ) { ClauseIndex *cip = ocip; ClauseIndex *ncip; for(; *cip; cip++) { if ( ISDEADCI(*cip) ) { *cip = ci; if ( isSortedIndexes(ocip) ) return; *cip = DEAD_INDEX; } } ncip = copyIndex(ocip, 1); for(cip=ncip; *cip; cip++) { if ( ISDEADCI(*cip) ) *cip = ci; } sortIndexes(ncip); setIndexes(def, clist, ncip); } else { ClauseIndex *cip = allocHeapOrHalt(2*sizeof(*cip)); cip[0] = ci; cip[1] = NULL; clist->clause_indexes = cip; } } #if defined(O_MAINTENANCE) || defined(O_DEBUG) /******************************* * CHECKING * *******************************/ int checkClauseIndexSizes(Definition def, int nindexable) { ClauseIndex *cip; int rc = TRUE; if ( (cip=def->impl.clauses.clause_indexes) ) { for( ; *cip; cip++ ) { ClauseIndex ci = *cip; if ( ISDEADCI(ci) ) continue; if ( ci->size != nindexable ) { Sdprintf("%s has inconsistent clause index->size", predicateName(def)); rc = FALSE; } } } return rc; } void checkClauseIndexes(Definition def) { ClauseIndex *cip; if ( (cip=def->impl.clauses.clause_indexes) ) { GET_LD for(; *cip; cip++ ) { ClauseIndex ci = *cip; unsigned int i; ClauseBucket cb; unsigned int ci_dirty = 0; /* # dirty buckets */ unsigned int ci_size = 0; /* # indexable values in table */ if ( ISDEADCI(ci) ) continue; if ( ci->incomplete ) /* building async; stats may be wrong */ continue; for(i=0,cb=ci->entries; ibuckets; i++,cb++) { ClauseRef cref; unsigned int dirty = 0; Definition old; acquire_def2(def, old); for(cref=cb->head; cref; cref=cref->next) { if ( cref->d.key ) ci_size++; if ( ci->is_list ) { ClauseList cl = &cref->value.clauses; ClauseRef cr; unsigned int erased = 0; unsigned int count = 0; for(cr=cl->first_clause; cr; cr=cr->next) { if ( true(cr->value.clause, CL_ERASED) ) erased++; else count++; } assert(erased == cl->erased_clauses); assert(count == cl->number_of_clauses); if ( erased ) dirty++; } else { Clause clause = cref->value.clause; if ( true(clause, CL_ERASED) ) dirty++; } } release_def2(def, old); assert(cb->dirty == dirty); if ( cb->dirty ) ci_dirty++; } assert(ci->dirty == ci_dirty); assert(ci->size == ci_size); } } } void listIndexGenerations(Definition def, gen_t gen) { ClauseIndex *cip; if ( (cip=def->impl.clauses.clause_indexes) ) { GET_LD for(; *cip; cip++) { ClauseIndex ci = *cip; unsigned int i; if ( ISDEADCI(ci) ) continue; Sdprintf("\nHash %sindex for %s (%d dirty)\n", ci->is_list ? "list-" : "", iargsName(ci->args, NULL), ci->dirty); for(i=0; ibuckets; i++) { ClauseRef cref; if ( !ci->entries[i].head && !ci->entries[i].dirty ) continue; Sdprintf("\nEntries at i = %d, dirty = %d:\n", i, ci->entries[i].dirty); acquire_def(def); for(cref=ci->entries[i].head; cref; cref=cref->next) { if ( ci->is_list ) { ClauseList cl = &cref->value.clauses; ClauseRef cr; Sdprintf("List count=%d, erased=%d (%s)\n", cl->number_of_clauses, cl->erased_clauses, keyName(cref->d.key)); for(cr=cl->first_clause; cr; cr=cr->next) { Clause clause = cr->value.clause; Sdprintf(" %p: [%2d] %8s-%10s%s%s\n", clause, clauseNo(clause, 0), generationName(clause->generation.created), generationName(clause->generation.erased), true(clause, CL_ERASED) ? " erased" : "", visibleClause(clause, gen) ? " v" : " X"); } } else { Clause clause = cref->value.clause; Sdprintf("%p: [%2d] %8s-%10s%s%s%s\n", clause, clauseNo(clause, 0), generationName(clause->generation.created), generationName(clause->generation.erased), true(clause, CL_ERASED) ? " erased" : "", visibleClause(clause, gen) ? " v " : " X ", keyName(cref->d.key)); } } release_def(def); } } } } #endif /*O_MAINTENANCE||O_DEBUG*/ /******************************* * ASSESSMENT * *******************************/ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - These functions access the index capabilities for a given key-set. It establishes whether a hash makes sense, how big (#buckets) the hash should be and whether to use a list of clauses or a list of clause-lists as units in the hash. Lists of clauses are more compact and suitable for arguments that have a good distribution (i.e. few clauses match a key). Lists of clause-lists are suitable if individual keys refer to many clauses. In such cases we can create secondary index to improve. There are two cases for this: 1. The keys are functors. In that case we might be able to hash on one or more of the compound arguments. 2. It can be profitable to create a secondary hash on another argument. Note that this can always be used, whether the key is a functor or not. Even without secondary indices, lists can be profitable if a rare key and a popular key collide on the same hash-bucket. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ #define ASSESS_BUFSIZE 10 typedef struct key_asm { word key; unsigned int count; unsigned int nvcomp; /* compound with arguments */ } key_asm; typedef struct hash_assessment { iarg_t args[MAX_MULTI_INDEX]; /* arg for which to assess */ size_t allocated; /* allocated size of array */ size_t size; /* keys in array */ size_t var_count; /* # non-indexable cases */ size_t funct_count; /* # functor cases */ float stdev; /* Standard deviation */ float speedup; /* Expected speedup */ unsigned list; /* Put lists in the buckets */ size_t space; /* Space indication */ key_asm *keys; /* tmp key-set */ } hash_assessment; typedef struct assessment_set { hash_assessment *assessments; int count; int allocated; hash_assessment buf[ASSESS_BUFSIZE]; } assessment_set; static void init_assessment_set(assessment_set *as) { as->assessments = as->buf; as->count = 0; as->allocated = ASSESS_BUFSIZE; } static void free_keys_in_assessment_set(assessment_set *as) { int i; hash_assessment *a; for (i=0, a=as->assessments; icount; i++, a++) if ( a->keys ) free(a->keys); } static void free_assessment_set(assessment_set *as) { if ( as->assessments != as->buf ) free(as->assessments); } static hash_assessment * /* TBD: resource error */ alloc_assessment(assessment_set *as, iarg_t *ia) { hash_assessment *a; if ( as->count >= as->allocated ) { size_t newbytes = sizeof(*as->assessments)*2*as->allocated; if ( as->assessments == as->buf ) { as->assessments = malloc(newbytes); memcpy(as->assessments, as->buf, sizeof(as->buf)); } else { as->assessments = realloc(as->assessments, newbytes); } as->allocated *= 2; } a = &as->assessments[as->count++]; memset(a, 0, sizeof(*a)); memcpy(a->args, ia, sizeof(a->args)); canonicalHap(a->args); return a; } static int best_hash_assessment(const void *p1, const void *p2, void *ctx) { ClauseList clist = ctx; const iarg_t *a1 = p1; const iarg_t *a2 = p2; const arg_info *i1 = &clist->args[*a1]; const arg_info *i2 = &clist->args[*a2]; return i1->speedup - i2->speedup > 0 ? -1 : i1->speedup - i2->speedup < 0 ? 1 : 0; } static void sort_assessments(ClauseList clist, iarg_t *instantiated, int ninstantiated) { sort_r(instantiated, ninstantiated, sizeof(*instantiated), best_hash_assessment, clist); } static int compar_keys(const void *p1, const void *p2) { const key_asm *k1 = p1; const key_asm *k2 = p2; intptr_t d = (k1->key - k2->key); return d < 0 ? -1 : d > 0 ? 1 : 0; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Only the final call gets a clause_count > 0. Here we do the remainder of the assessment. We could consider for a separate function to merely reduce the set. (*) Currently we cannot combine variables with functor indexes as firstClause() deterministically goes into the matching functor and after going into the recursive indexes we loose the context to find the unbound clause. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static int assess_remove_duplicates(hash_assessment *a, size_t clause_count) { key_asm *s = a->keys; key_asm *o = a->keys-1; key_asm *e = &s[a->size]; word c = 0; /* invalid key */ size_t fc = 0; size_t i = 0; float A=0.0, Q=0.0; a->speedup = 0.0; if ( !a->keys ) return FALSE; qsort(a->keys, a->size, sizeof(key_asm), compar_keys); for( ; skey != c ) { if ( i++ > 0 && clause_count ) { float A0 = A; A = A+((float)o->count-A)/(float)(i-1); Q = Q+((float)o->count-A0)*((float)o->count-A); if ( o->nvcomp ) fc += o->nvcomp - 1; /* no point if there is just one */ } c = s->key; *++o = *s; } else { o->count += s->count; s->nvcomp += s->nvcomp; } } if ( i > 0 && clause_count ) { float A0 = A; A = A+((float)o->count-A)/(float)i; Q = Q+((float)o->count-A0)*((float)o->count-A); if ( o->nvcomp ) fc += o->nvcomp - 1; a->funct_count = fc; } a->size = i; /* assess quality */ if ( clause_count ) { a->stdev = (float)sqrt(Q/(float)i); a->list = FALSE; if ( a->size == 1 ) /* Single value that is not compound */ { if ( !isFunctor(a->keys[0].key) ) return FALSE; } a->speedup = (float)(clause_count*a->size) / (float)(clause_count - a->var_count + a->var_count*a->size); /* punish bad distributions */ a->speedup /= (float)1.0 + a->stdev*(float)a->size/(float)clause_count; a->space = ( a->size * sizeof(struct clause_bucket) + clause_count * SIZEOF_CREF_CLAUSE + a->size * a->var_count * SIZEOF_CREF_CLAUSE ); if ( a->speedup < 0.8*(float)clause_count && a->funct_count > 0 && a->var_count == 0 ) /* See (*) */ { a->list = TRUE; a->space += a->size * SIZEOF_CREF_LIST; } if ( (float)a->var_count/(float)a->size > MAX_VAR_FRAC ) { a->speedup = 0.0; return FALSE; /* not indexable */ } } return TRUE; } static int assessAddKey(hash_assessment *a, word key, int nvcomp) { if ( a->size > 0 && a->keys[a->size-1].key == key ) { a->keys[a->size-1].count++; /* TBD: Keep last-key? */ if ( nvcomp ) a->keys[a->size-1].nvcomp++; } else if ( a->size < a->allocated ) { put_key: a->keys[a->size].key = key; a->keys[a->size].count = 1; a->keys[a->size].nvcomp = nvcomp; a->size++; } else { if ( a->allocated == 0 ) { a->allocated = 512; if ( !(a->keys = malloc(a->allocated*sizeof(*a->keys))) ) return FALSE; } else { assess_remove_duplicates(a, 0); if ( a->size*2 > a->allocated ) { key_asm *new = realloc(a->keys, a->allocated*2*sizeof(*a->keys)); if ( !new ) return FALSE; a->keys = new; a->allocated *= 2; } } goto put_key; } return TRUE; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Given an array of assessments, scan through all (life) clauses of the predicate and add their key to the assessment. To do this: - We first find all arguments involved in some index - Then, for each clause - We decompile adding placing the key of each argument involved in some index in the array `keys`. - Walk through the assessments and update them. TBD: if some argument has too many non-indexable values we could stop trying. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static Code skipToTerm(Clause clause, const iarg_t *position) { int an; Code pc = clause->codes; for(; (an = *position) != END_INDEX_POS; position++) { code c; DEBUG(MSG_INDEX_DEEP, Sdprintf("Skipping to arg %d for clause %d of %s\n", an, clauseNo(clause, 0), predicateName(clause->predicate))); if ( an > 0 ) pc = skipArgs(pc, an); again: c = decode(*pc); switch(c) { case H_FUNCTOR: case H_LIST: case H_RFUNCTOR: case H_RLIST: break; case H_LIST_FF: /* FF1, FF2 */ { static code var[2]; /* Create dummy code */ var[0] = encode(H_VOID); var[1] = encode(H_VOID); return var; } case I_CHP: pc = stepPC(pc); goto again; #if defined(O_DEBUG) || defined(O_MAINTENANCE) case H_FIRSTVAR: case H_VAR: case H_VOID: case H_VOID_N: case H_POP: case I_EXITCATCH: case I_EXITRESET: case I_EXITFACT: case I_EXIT: /* fact */ case I_ENTER: /* fix H_VOID, H_VOID, I_ENTER */ return pc; default: Sdprintf("Unexpected VM code %" PRIuPTR " at %p\n", c, pc); Sdprintf("\topcode=%s\n", codeTable[c].name); assert(0); #else default: return pc; #endif } pc = stepPC(pc); } return pc; } static int indexableCompound(Code pc) { pc = stepPC(pc); /* skip functor */ for(;; pc = stepPC(pc)) { switch(decode(*pc)) { case H_FIRSTVAR: case H_VAR: continue; case H_POP: return FALSE; default: return TRUE; } } } static void assess_scan_clauses(ClauseList clist, size_t arity, hash_assessment *assessments, int assess_count, IndexContext ctx) { hash_assessment *a; ClauseRef cref; int i; bit_vector *ai = alloca(sizeof_bitvector(arity)); int kp[MAXINDEXARG+1]; /* key-arg positions */ int nk = 0; /* number of key args */ int *kpp; word keys[MAXINDEXARG]; char nvcomp[MAXINDEXARG]; init_bitvector(ai, arity); for(i=0, a=assessments; iargs[j]; j++) { if ( !true_bit(ai, a->args[j]-1) ) { set_bit(ai, a->args[j]-1); } } } for(i=0; ifirst_clause; cref; cref=cref->next) { Clause cl = cref->value.clause; Code pc; int carg = 0; if ( true(cl, CL_ERASED) ) continue; pc = skipToTerm(cref->value.clause, ctx->position); for(kpp=kp; kpp[0] >= 0; kpp++) { if ( kpp[0] > carg ) pc = skipArgs(pc, kpp[0]-carg); carg = kpp[0]; argKey(pc, 0, &keys[kpp[0]]); nvcomp[kpp[0]] = FALSE; /* see whether this a compound */ if ( isFunctor(keys[kpp[0]]) ) /* with nonvar args */ { if ( indexableCompound(pc) ) nvcomp[kpp[0]] = TRUE; } } for(i=0, a=assessments; iargs[1] ) /* single argument index */ { word key; int an = a->args[0]-1; if ( (key=keys[an]) ) { assessAddKey(a, key, nvcomp[an]); } else { a->var_count++; goto next_assessment; } } else /* multi-argument index */ { word key[MAX_MULTI_INDEX]; int harg; for(harg=0; a->args[harg]; harg++) { if ( !(key[harg] = keys[a->args[harg]-1]) ) { a->var_count++; goto next_assessment; } } assessAddKey(a, murmur_key(key, sizeof(word)*harg), FALSE); } next_assessment: ; } } } static hash_assessment * best_assessment(hash_assessment *assessments, int count, size_t clause_count) { int i; hash_assessment *a, *best = NULL; float minbest = MIN_SPEEDUP; for(i=0, a=assessments; ispeedup > minbest ) { best = a; minbest = a->speedup; } } return best; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - bestHash() finds the best argument for creating a hash, given a concrete argument vector and a list of clauses. To do so, it establishes the following figures: - Total number of non-erased clauses in cref - For each indexable argument - The count of distinct values - The count of non-indexable clauses (i.e. clauses with a var at that argument) Now, the hash-table has a space that is #clauses*(nvars+1), while the expected speedup is #clauses * #distinct ---------------------------------- #clauses - #var + #var * #distinct @returns TRUE if a best hash was found. Details on the best hash are in *hints. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static int bestHash(DECL_LD Word av, size_t ac, ClauseList clist, float min_speedup, hash_hints *hints, IndexContext ctx) { int i; assessment_set aset; hash_assessment *a; int best = -1; float best_speedup = 0.0; iarg_t ia[MAX_MULTI_INDEX] = {0}; iarg_t *instantiated; int ninstantiated = 0; instantiated = alloca(ac*sizeof(*instantiated)); init_assessment_set(&aset); if ( !clist->args ) { arg_info *ai = allocHeapOrHalt(ac*sizeof(*ai)); memset(ai, 0, ac*sizeof(*ai)); if ( !COMPARE_AND_SWAP_PTR(&clist->args, NULL, ai) ) freeHeap(ai, ac*sizeof(*ai)); } /* Step 1: find instantiated args */ for(i=0; iargs[arg].assessed ) { ia[0] = arg+1; alloc_assessment(&aset, ia); } } if ( aset.count ) /* Step 3: assess them */ { assess_scan_clauses(clist, ac, aset.assessments, aset.count, ctx); for(i=0, a=aset.assessments; iargs[a->args[0]-1]; if ( assess_remove_duplicates(a, clist->number_of_clauses) ) { DEBUG(MSG_JIT, Sdprintf("Assess index %s of %s: speedup %f, stdev=%f\n", iargsName(a->args, NULL), predicateName(ctx->predicate), a->speedup, a->stdev)); ainfo->speedup = a->speedup; ainfo->list = a->list; ainfo->ln_buckets = MSB(a->size); } else { ainfo->speedup = 0.0; ainfo->list = FALSE; ainfo->ln_buckets = 0; DEBUG(MSG_JIT, Sdprintf("Assess index %s of %s: not indexable\n", iargsName(a->args, NULL), predicateName(ctx->predicate))); } ainfo->assessed = TRUE; if ( a->keys ) free(a->keys); } free_assessment_set(&aset); } /* Step 4: find the best (single) arg */ for(i=0; iargs[arg]; if ( ainfo->speedup > best_speedup ) { best = arg; best_speedup = ainfo->speedup; } } if ( best >= 0 && (float)clist->number_of_clauses/best_speedup > 3 ) { int ok, m, n; sort_assessments(clist, instantiated, ninstantiated); for( ok=0; okargs[instantiated[ok]].speedup > MIN_SPEEDUP; ok++ ) ; if ( ok >= 2 && ++clist->jiti_tried <= ac ) { hash_assessment *nbest; DEBUG(MSG_JIT, Sdprintf("%s: %zd clauses, index [%d]: speedup = %f" "; %d promising arguments\n", predicateName(ctx->predicate), clist->number_of_clauses, best+1, best_speedup, ok)); init_assessment_set(&aset); for(m=1; mnumber_of_clauses); if ( nbest && nbest->speedup > best_speedup*MIN_SPEEDUP ) { DEBUG(MSG_JIT, Sdprintf("%s: using index %s, speedup = %f\n", predicateName(ctx->predicate), iargsName(nbest->args, NULL), nbest->speedup)); memset(hints, 0, sizeof(*hints)); memcpy(hints->args, nbest->args, sizeof(nbest->args)); hints->ln_buckets = MSB(nbest->size); hints->speedup = nbest->speedup; free_keys_in_assessment_set(&aset); free_assessment_set(&aset); return TRUE; } free_keys_in_assessment_set(&aset); free_assessment_set(&aset); } } if ( best >= 0 && best_speedup > min_speedup ) { arg_info *ainfo = &clist->args[best]; memset(hints, 0, sizeof(*hints)); hints->args[0] = best+1; hints->ln_buckets = ainfo->ln_buckets; hints->speedup = ainfo->speedup; hints->list = ainfo->list; return TRUE; } return FALSE; } /******************************* * PREDICATE PROPERTY SUPPORT * *******************************/ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Index info is of the form Where - hash(Buckets, Speedup, SizeInBytes, IsList) Where is one of - Simple index single(ArgN) - Multi-argument index multi([Arg1,Arg2,...]) - Deep index deep([Arg1,Arg2,...]) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static size_t sizeofClauseIndex(ClauseIndex ci) { size_t size = sizeof(*ci); size_t vars = 0; ClauseRef cref; size_t usize = ci->is_list ? SIZEOF_CREF_LIST : SIZEOF_CREF_CLAUSE; size += ci->buckets * sizeof(*ci->entries); size += ci->size * usize; for(cref=ci->entries[0].head; cref; cref=cref->next) { if ( cref->d.key == 0 ) vars++; } size += vars * ci->buckets * usize; return size; } size_t sizeofClauseIndexes(Definition def) { GET_LD ClauseIndex *cip; size_t size = 0; if ( (cip=def->impl.clauses.clause_indexes) ) { acquire_def(def); for(; *cip; cip++) { ClauseIndex ci = *cip; if ( ISDEADCI(ci) ) continue; size += sizeofClauseIndex(ci); } release_def(def); } return size; } static int unify_clause_index(term_t t, ClauseIndex ci) { GET_LD term_t where = PL_new_term_ref(); term_t tmp = PL_new_term_ref(); if ( !(where=PL_new_term_ref()) || !(tmp =PL_new_term_ref()) ) return FALSE; if ( ci->args[1] ) { int i; PL_put_nil(where); for(i=MAX_MULTI_INDEX-1; i>= 0; i--) { if ( ci->args[i] ) { if ( !PL_put_integer(tmp, ci->args[i]) || !PL_cons_list(where, tmp, where) ) return FALSE; } } if ( !PL_cons_functor(where, FUNCTOR_multi1, where) ) return FALSE; } else { if ( !PL_put_integer(where, ci->args[0]) || !PL_cons_functor(where, FUNCTOR_single1, where) ) return FALSE; } if ( ci->position[0] != END_INDEX_POS ) { iarg_t *ap = ci->position; term_t nil; if ( !(nil=PL_new_term_ref()) || !PL_put_nil(nil) || !PL_cons_functor(where, FUNCTOR_dot2, where, nil) ) return FALSE; while(*ap != END_INDEX_POS) ap++; for(--ap; ap >= ci->position; ap--) { if ( !PL_put_integer(tmp, (*ap)+1) || !PL_cons_list(where, tmp, where) ) return FALSE; } if ( !PL_cons_functor(where, FUNCTOR_deep1, where) ) return FALSE; } return PL_unify_term(t, PL_FUNCTOR, FUNCTOR_minus2, PL_TERM, where, PL_FUNCTOR, FUNCTOR_hash4, PL_INT, (int)ci->buckets, PL_DOUBLE, (double)ci->speedup, PL_INT64, (int64_t)sizeofClauseIndex(ci), PL_BOOL, ci->is_list); } #define add_deep_indexes(ci, head, tail) LDFUNC(add_deep_indexes, ci, head, tail) static int add_deep_indexes(DECL_LD ClauseIndex ci, term_t head, term_t tail) { size_t i; for(i=0; ibuckets; i++) { ClauseRef cref = ci->entries[i].head; for(; cref; cref = cref->next) { if ( isFunctor(cref->d.key) ) { ClauseList cl = &cref->value.clauses; ClauseIndex *cip; if ( (cip = cl->clause_indexes) ) { for(; *cip; cip++) { ClauseIndex ci = *cip; if ( ISDEADCI(ci) ) continue; if ( !PL_unify_list(tail, head, tail) || !unify_clause_index(head, ci) ) return FALSE; if ( ci->is_list && !add_deep_indexes(ci, head, tail) ) return FALSE; } } } } } return TRUE; } bool unify_index_pattern(Procedure proc, term_t value) { GET_LD Definition def = getProcDefinition(proc); ClauseIndex *cip; int rc = FALSE; int found = 0; acquire_def(def); if ( (cip=def->impl.clauses.clause_indexes) ) { term_t tail = PL_copy_term_ref(value); term_t head = PL_new_term_ref(); for(; *cip; cip++) { ClauseIndex ci = *cip; if ( ISDEADCI(ci) ) continue; found++; if ( !PL_unify_list(tail, head, tail) || !unify_clause_index(head, ci) ) goto out; if ( ci->is_list ) { if ( !add_deep_indexes(ci, head, tail) ) goto out; } } rc = found && PL_unify_nil(tail); } out: release_def(def); return rc; } /******************************* * INIT * *******************************/ void initClauseIndexing(void) { GD->clause_index.min_speedup = 1.5f; GD->clause_index.max_var_fraction = 0.1f; GD->clause_index.min_speedup_ratio = 10.0f; GD->clause_index.max_lookahead = 100; GD->clause_index.min_clauses = 10; } /******************************* * PUBLISH PREDICATES * *******************************/ BeginPredDefs(index) EndPredDefs