/* * The MIT License * * Copyright 2020 The OpenNARS authors. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ #include "Narsese.h" #include "NAR.h" //Atomic term names: char Narsese_atomNames[ATOMS_MAX][ATOMIC_TERM_LEN_MAX]; char Narsese_operatorNames[OPERATIONS_MAX][ATOMIC_TERM_LEN_MAX]; //upper bound of multplier 3 given by [ becoming "(' " replacement #define REPLACEMENT_LEN 3*NARSESE_LEN_MAX //size for the expanded array with spaces for tokenization, has at most 3 times the amount of chars as the replacement array #define EXPANSION_LEN REPLACEMENT_LEN*3 //whether the package is initialized static bool initialized = false; //SELF atom, avoids strcmp for checking operator format Atom SELF; //Replace copulas with canonical single-char copulas, including sets and set elements! char* replaceWithCanonicalCopulas(char *narsese, int n) { static char narsese_replaced[REPLACEMENT_LEN]; memset(narsese_replaced, ' ', REPLACEMENT_LEN); //upper bound of 3 increment given by max. look-ahead of --> becoming :, plus 0 at the end assert(n+3 <= NARSESE_LEN_MAX, "NARSESE_LEN_MAX too small, consider increasing or split input into multiple statements! \n"); int j=0; for(int i=0; i') // }, ], > becomes ) { narsese_replaced[j] = ')'; i++; j++; } else if(narsese[i] == '<' && narsese[i+1] != '-') // < becomes ( { narsese_replaced[j] = '('; i++; j++; } else if(i+1 < n) { if(narsese[i] == '&' && narsese[i+1] == '/') // &/ becomes + { narsese_replaced[j] = '+'; i+=2; j++; } else if(narsese[i] == '&' && narsese[i+1] == '|') // &| becomes ; { narsese_replaced[j] = ';'; i+=2; j++; } else if(narsese[i] == '&' && narsese[i+1] == '&') // && becomes ; { narsese_replaced[j] = ';'; i+=2; j++; } else if(narsese[i] == '/' && narsese[i+1] == '1') // /1 becomes / { narsese_replaced[j] = '/'; i+=2; j++; } else if(narsese[i] == '/' && narsese[i+1] == '2') // /2 becomes % { narsese_replaced[j] = '%'; i+=2; j++; } else if(narsese[i] == '\\' && narsese[i+1] == '1') // \1 becomes backslash { narsese_replaced[j] = '\\'; i+=2; j++; } else if(narsese[i] == '\\' && narsese[i+1] == '2') // \2 becomes # { narsese_replaced[j] = '#'; i+=2; j++; } else if(i+2 < n) { if(narsese[i] == '-' && narsese[i+1] == '-' && narsese[i+2] == '>') // --> becomes : { narsese_replaced[j] = ':'; i+=3; j++; } else if(narsese[i] == '<' && narsese[i+1] == '-' && narsese[i+2] == '>') // -<-> becomes : { narsese_replaced[j] = '='; i+=3; j++; } else if(narsese[i] == '=' && narsese[i+1] == '/' && narsese[i+2] == '>') // =/> becomes $ { narsese_replaced[j] = '$'; i+=3; j++; } else if(narsese[i] == '=' && narsese[i+1] == '=' && narsese[i+2] == '>') // ==> becomes $ { narsese_replaced[j] = '$'; i+=3; j++; } else if(narsese[i] == '-' && narsese[i+1] == '-' && narsese[i+2] != '>') // -- becomes ! { narsese_replaced[j] = '!'; i+=3; j++; } else { goto DEFAULT; } } else { goto DEFAULT; } } else { goto DEFAULT; } continue; DEFAULT: narsese_replaced[j] = narsese[i]; //default i++; j++; } narsese_replaced[j] = 0; return narsese_replaced; } char* Narsese_Expand(char *narsese) { //upper bound being 3* the multiplier of the previous upper bound static char narsese_expanded[EXPANSION_LEN]; memset(narsese_expanded, ' ', EXPANSION_LEN); char *narsese_replaced = replaceWithCanonicalCopulas(narsese, strlen(narsese)); int k = 0, n = strlen(narsese_replaced); for(int i=0; i=i+2; j--) { char *temp = tokens[j]; tokens[j] = tokens[j-1]; tokens[j-1] = temp; } tokens[i+1][0] = copula; tokens[i+1][1] = 0; } } Continue:; } return tokens; } int operator_index = 0; int Narsese_OperatorIndex(char *name) { int ret_index = -1; for(int i=0; iatoms[tree_index-1] = Narsese_AtomicTermIndex(tokens_prefix[icop]); if(i1atoms[tree_index-1] = Narsese_AtomicTermIndex(tokens_prefix[i1]); } else { bintree->atoms[tree_index-1] = Narsese_AtomicTermIndex("@"); //just use "@" for second element as terminator, while "." acts for "deeper" sets than 2 } } } Term Narsese_Term(char *narsese) { assert(initialized, "Narsese not initialized, call Narsese_INIT first!"); //parse Narsese by expanding it, bringing into prefix form, then building a binary tree, and normalizing variables Term ret = {0}; char *narsese_expanded = Narsese_Expand(narsese); char** tokens_prefix = Narsese_PrefixTransform(narsese_expanded); int nt = 0; for(;tokens_prefix[nt] != NULL; nt++){} buildBinaryTree(&ret, tokens_prefix, 0, 1, nt); Variable_Normalize(&ret); Term_Hash(&ret); return ret; } void Narsese_Sentence(char *narsese, Term *destTerm, char *punctuation, bool *isEvent, bool *isUserKnowledge, Truth *destTv) { assert(initialized, "Narsese not initialized, call Narsese_INIT first!"); char narseseInplace[NARSESE_LEN_MAX] = {0}; destTv->frequency = NAR_DEFAULT_FREQUENCY; destTv->confidence = NAR_DEFAULT_CONFIDENCE; int len = strlen(narsese); assert(len > 1, "Parsing error: Narsese string too short!"); assert(len < NARSESE_LEN_MAX, "Parsing error: Narsese string too long!"); //< because of '0' terminated strings memcpy(narseseInplace, narsese, len); //tv is present if last letter is '}' if(len>=2 && narseseInplace[len-1] == '}') { //scan for opening '{' int openingIdx; for(openingIdx=len-2; openingIdx>=0 && narseseInplace[openingIdx] != '{'; openingIdx--); assert(narseseInplace[openingIdx] == '{', "Parsing error: Truth value opener not found!"); double conf, freq; sscanf(&narseseInplace[openingIdx], "{%lf %lf}", &freq, &conf); destTv->frequency = freq; destTv->confidence = conf; assert(narseseInplace[openingIdx-1] == ' ', "Parsing error: Space before truth value required!"); narseseInplace[openingIdx-1] = 0; //cut it away for further parsing of term } //parse event marker, punctuation, and finally the term: int str_len = strlen(narseseInplace); *isEvent = str_len >= 3 && narseseInplace[str_len-1] == ':' && narseseInplace[str_len-2] == '|' && narseseInplace[str_len-3] == ':'; *isUserKnowledge = str_len >= 3 && narseseInplace[str_len-1] == ':' && narseseInplace[str_len-2] == '@' && narseseInplace[str_len-3] == ':'; int punctuation_offset = (*isEvent || *isUserKnowledge) ? 5 : 1; *punctuation = narseseInplace[str_len-punctuation_offset]; assert(*punctuation == '!' || *punctuation == '?' || *punctuation == '.', "Parsing error: Punctuation has to be belief . goal ! or question ?"); narseseInplace[str_len-punctuation_offset] = 0; //we will only parse the term before it *destTerm = Narsese_Term(narseseInplace); } Term Narsese_Sequence(Term *a, Term *b, bool *success) { Term ret = {0}; ret.atoms[0] = Narsese_AtomicTermIndex("+"); *success = Term_OverrideSubterm(&ret,1,a) && Term_OverrideSubterm(&ret,2,b); return *success ? ret : (Term) {0}; } Term Narsese_AtomicTerm(char *name) { int number = Narsese_AtomicTermIndex(name); Term ret = {0}; ret.atoms[0] = number; return ret; } void Narsese_PrintAtom(Atom atom) { if(atom) { if(Narsese_copulaEquals(atom, ':')) { fputs("-->", stdout); } else if(Narsese_copulaEquals(atom, '$')) { fputs("=/>", stdout); } else if(Narsese_copulaEquals(atom, '+')) { fputs("&/", stdout); } else if(Narsese_copulaEquals(atom, ';')) { fputs("&|", stdout); } else if(Narsese_copulaEquals(atom, '=')) { fputs("<->", stdout); } else if(Narsese_copulaEquals(atom, '/')) { fputs("/1", stdout); } else if(Narsese_copulaEquals(atom, '%')) { fputs("/2", stdout); } else if(Narsese_copulaEquals(atom, '\\')) { fputs("\\1", stdout); } else if(Narsese_copulaEquals(atom, '#')) { fputs("\\2", stdout); } else { fputs(Narsese_atomNames[atom-1], stdout); } } else { fputs("@", stdout); } } void Narsese_PrintTermPrettyRecursive(Term *term, int index) //start with index=1! { Atom atom = term->atoms[index-1]; if(!atom) { return; } int child1 = index*2; int child2 = index*2+1; bool hasLeftChild = child1 < COMPOUND_TERM_SIZE_MAX && term->atoms[child1-1]; bool hasRightChild = child2 < COMPOUND_TERM_SIZE_MAX && term->atoms[child2-1] && !Narsese_copulaEquals(term->atoms[child2-1], '@'); bool isNegation = Narsese_copulaEquals(atom, '!'); bool isExtSet = Narsese_copulaEquals(atom, '"'); bool isIntSet = Narsese_copulaEquals(atom, '\''); bool isStatement = Narsese_copulaEquals(atom, '$') || Narsese_copulaEquals(atom, ':') || Narsese_copulaEquals(atom, '='); if(isExtSet) { fputs(hasLeftChild ? "{" : "", stdout); } else if(isIntSet) { fputs(hasLeftChild ? "[" : "", stdout); } else if(isStatement) { fputs(hasLeftChild ? "<" : "", stdout); } else { fputs(hasLeftChild ? "(" : "", stdout); if(isNegation) { Narsese_PrintAtom(atom); fputs(" ", stdout); } } if(child1 < COMPOUND_TERM_SIZE_MAX) { Narsese_PrintTermPrettyRecursive(term, child1); } if(hasRightChild) { fputs(hasLeftChild ? " " : "", stdout); } if(!isExtSet && !isIntSet && !Narsese_copulaEquals(atom, '@')) { if(!isNegation) { Narsese_PrintAtom(atom); fputs(hasLeftChild ? " " : "", stdout); } } if(child2 < COMPOUND_TERM_SIZE_MAX) { Narsese_PrintTermPrettyRecursive(term, child2); } if(isExtSet) { fputs(hasLeftChild ? "}" : "", stdout); } else if(isIntSet) { fputs(hasLeftChild ? "]" : "", stdout); } else if(isStatement) { fputs(hasLeftChild ? ">" : "", stdout); } else { fputs(hasLeftChild ? ")" : "", stdout); } } void Narsese_PrintTerm(Term *term) { Narsese_PrintTermPrettyRecursive(term, 1); } HASH_TYPE Narsese_StringHash(char *name) { assert(name != NULL, "NULL ptr in Narsese_StringHash"); char buffer[ATOMIC_TERM_LEN_MAX] = {0}; strncpy(buffer, name, ATOMIC_TERM_LEN_MAX-1); int pieces = ATOMIC_TERM_LEN_MAX / HASH_TYPE_SIZE; assert(HASH_TYPE_SIZE*pieces == ATOMIC_TERM_LEN_MAX, "Not a multiple, issue in hash calculation (StringHash)"); return Globals_Hash((HASH_TYPE*) buffer, pieces); } bool Narsese_StringEqual(char *name1, char *name2) { assert(name1 != NULL && name2 != NULL, "NULL ptr in Narsese_StringEqual"); return !strcmp(name1, name2); } void Narsese_INIT() { HashTable_INIT(&HTatoms, HTatoms_storage, HTatoms_storageptrs, HTatoms_HT, ATOMS_HASHTABLE_BUCKETS, ATOMS_MAX, (Equal) Narsese_StringEqual, (Hash) Narsese_StringHash); operator_index = term_index = 0; for(int i=0; i0 && Narsese_atomNames[(int) atom-1][0] == name && Narsese_atomNames[(int) atom-1][1] == 0; } bool Narsese_isOperator(Atom atom) { return atom>0 && Narsese_atomNames[(int) atom-1][0] == '^'; } bool Narsese_isOperation(Term *term) //<(*,{SELF},x) --> ^op> -> [: * ^op " x _ _ SELF] or simply ^op { return Narsese_isOperator(term->atoms[0]) || (Narsese_copulaEquals(term->atoms[0], ':') && Narsese_copulaEquals(term->atoms[1], '*') && //(_ * _) --> Narsese_isOperator(term->atoms[2]) && //^op Narsese_copulaEquals(term->atoms[3], '"') && term->atoms[7] == SELF); // { SELF } } int Narsese_getOperationID(Term *term) { if(Narsese_copulaEquals(term->atoms[0], '+')) //sequence { Term potential_operator = Term_ExtractSubterm(term, 2); //(a &/ ^op) assert(!Narsese_copulaEquals(potential_operator.atoms[0], '+'), "Sequences should be left-nested encoded, never right-nested!!"); return Narsese_getOperationID(&potential_operator); } if(Narsese_isOperator(term->atoms[0])) //atomic operator { return Narsese_OperatorIndex(Narsese_atomNames[(int) term->atoms[0]-1]); } if(Narsese_isOperation(term)) //an operation, we use the operator atom's index on the right side of the inheritance { return Narsese_OperatorIndex(Narsese_atomNames[(int) term->atoms[2]-1]); } return 0; //not an operation term } Term Narsese_GetPreconditionWithoutOp(Term *precondition) { if(Narsese_copulaEquals(precondition->atoms[0], '+')) { Term potential_op = Term_ExtractSubterm(precondition, 2); if(Narsese_isOperation(&potential_op)) { return Term_ExtractSubterm(precondition, 1); } } return *precondition; } bool Narsese_IsNonCopulaAtom(Atom atom) { return atom > 0 && (Narsese_atomNames[(int) atom - 1][0] == '^' || (Narsese_atomNames[(int) atom - 1][0] >= 'a' && Narsese_atomNames[(int) atom - 1][0] <= 'z') || (Narsese_atomNames[(int) atom - 1][0] >= 'A' && Narsese_atomNames[(int) atom - 1][0] <= 'Z') || (Narsese_atomNames[(int) atom - 1][0] >= '0' && Narsese_atomNames[(int) atom - 1][0] <= '9')); } bool Narsese_IsSimpleAtom(Atom atom) { return Narsese_IsNonCopulaAtom(atom) && !Variable_isVariable(atom); }