sl@0: /* sl@0: ** 2001 September 15 sl@0: ** sl@0: ** The author disclaims copyright to this source code. In place of sl@0: ** a legal notice, here is a blessing: sl@0: ** sl@0: ** May you do good and not evil. sl@0: ** May you find forgiveness for yourself and forgive others. sl@0: ** May you share freely, never taking more than you give. sl@0: ** sl@0: ************************************************************************* sl@0: ** An tokenizer for SQL sl@0: ** sl@0: ** This file contains C code that splits an SQL input string up into sl@0: ** individual tokens and sends those tokens one-by-one over to the sl@0: ** parser for analysis. sl@0: ** sl@0: ** $Id: tokenize.c,v 1.152 2008/09/01 15:52:11 drh Exp $ sl@0: */ sl@0: #include "sqliteInt.h" sl@0: #include sl@0: #include sl@0: sl@0: /* sl@0: ** The charMap() macro maps alphabetic characters into their sl@0: ** lower-case ASCII equivalent. On ASCII machines, this is just sl@0: ** an upper-to-lower case map. On EBCDIC machines we also need sl@0: ** to adjust the encoding. Only alphabetic characters and underscores sl@0: ** need to be translated. sl@0: */ sl@0: #ifdef SQLITE_ASCII sl@0: # define charMap(X) sqlite3UpperToLower[(unsigned char)X] sl@0: #endif sl@0: #ifdef SQLITE_EBCDIC sl@0: # define charMap(X) ebcdicToAscii[(unsigned char)X] sl@0: const unsigned char ebcdicToAscii[] = { sl@0: /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ sl@0: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ sl@0: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ sl@0: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ sl@0: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */ sl@0: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ sl@0: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5x */ sl@0: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, /* 6x */ sl@0: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7x */ sl@0: 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* 8x */ sl@0: 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* 9x */ sl@0: 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ax */ sl@0: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ sl@0: 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* Cx */ sl@0: 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* Dx */ sl@0: 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ex */ sl@0: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Fx */ sl@0: }; sl@0: #endif sl@0: sl@0: /* sl@0: ** The sqlite3KeywordCode function looks up an identifier to determine if sl@0: ** it is a keyword. If it is a keyword, the token code of that keyword is sl@0: ** returned. If the input is not a keyword, TK_ID is returned. sl@0: ** sl@0: ** The implementation of this routine was generated by a program, sl@0: ** mkkeywordhash.h, located in the tool subdirectory of the distribution. sl@0: ** The output of the mkkeywordhash.c program is written into a file sl@0: ** named keywordhash.h and then included into this source file by sl@0: ** the #include below. sl@0: */ sl@0: #include "keywordhash.h" sl@0: sl@0: sl@0: /* sl@0: ** If X is a character that can be used in an identifier then sl@0: ** IdChar(X) will be true. Otherwise it is false. sl@0: ** sl@0: ** For ASCII, any character with the high-order bit set is sl@0: ** allowed in an identifier. For 7-bit characters, sl@0: ** sqlite3IsIdChar[X] must be 1. sl@0: ** sl@0: ** For EBCDIC, the rules are more complex but have the same sl@0: ** end result. sl@0: ** sl@0: ** Ticket #1066. the SQL standard does not allow '$' in the sl@0: ** middle of identfiers. But many SQL implementations do. sl@0: ** SQLite will allow '$' in identifiers for compatibility. sl@0: ** But the feature is undocumented. sl@0: */ sl@0: #ifdef SQLITE_ASCII sl@0: const char sqlite3IsAsciiIdChar[] = { sl@0: /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ sl@0: 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ sl@0: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ sl@0: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ sl@0: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ sl@0: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ sl@0: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ sl@0: }; sl@0: #define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && sqlite3IsAsciiIdChar[c-0x20])) sl@0: #endif sl@0: #ifdef SQLITE_EBCDIC sl@0: const char sqlite3IsEbcdicIdChar[] = { sl@0: /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ sl@0: 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 4x */ sl@0: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, /* 5x */ sl@0: 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, /* 6x */ sl@0: 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, /* 7x */ sl@0: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, /* 8x */ sl@0: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, /* 9x */ sl@0: 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, /* Ax */ sl@0: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ sl@0: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Cx */ sl@0: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Dx */ sl@0: 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Ex */ sl@0: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, /* Fx */ sl@0: }; sl@0: #define IdChar(C) (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40])) sl@0: #endif sl@0: sl@0: sl@0: /* sl@0: ** Return the length of the token that begins at z[0]. sl@0: ** Store the token type in *tokenType before returning. sl@0: */ sl@0: int sqlite3GetToken(const unsigned char *z, int *tokenType){ sl@0: int i, c; sl@0: switch( *z ){ sl@0: case ' ': case '\t': case '\n': case '\f': case '\r': { sl@0: for(i=1; isspace(z[i]); i++){} sl@0: *tokenType = TK_SPACE; sl@0: return i; sl@0: } sl@0: case '-': { sl@0: if( z[1]=='-' ){ sl@0: for(i=2; (c=z[i])!=0 && c!='\n'; i++){} sl@0: *tokenType = TK_SPACE; sl@0: return i; sl@0: } sl@0: *tokenType = TK_MINUS; sl@0: return 1; sl@0: } sl@0: case '(': { sl@0: *tokenType = TK_LP; sl@0: return 1; sl@0: } sl@0: case ')': { sl@0: *tokenType = TK_RP; sl@0: return 1; sl@0: } sl@0: case ';': { sl@0: *tokenType = TK_SEMI; sl@0: return 1; sl@0: } sl@0: case '+': { sl@0: *tokenType = TK_PLUS; sl@0: return 1; sl@0: } sl@0: case '*': { sl@0: *tokenType = TK_STAR; sl@0: return 1; sl@0: } sl@0: case '/': { sl@0: if( z[1]!='*' || z[2]==0 ){ sl@0: *tokenType = TK_SLASH; sl@0: return 1; sl@0: } sl@0: for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){} sl@0: if( c ) i++; sl@0: *tokenType = TK_SPACE; sl@0: return i; sl@0: } sl@0: case '%': { sl@0: *tokenType = TK_REM; sl@0: return 1; sl@0: } sl@0: case '=': { sl@0: *tokenType = TK_EQ; sl@0: return 1 + (z[1]=='='); sl@0: } sl@0: case '<': { sl@0: if( (c=z[1])=='=' ){ sl@0: *tokenType = TK_LE; sl@0: return 2; sl@0: }else if( c=='>' ){ sl@0: *tokenType = TK_NE; sl@0: return 2; sl@0: }else if( c=='<' ){ sl@0: *tokenType = TK_LSHIFT; sl@0: return 2; sl@0: }else{ sl@0: *tokenType = TK_LT; sl@0: return 1; sl@0: } sl@0: } sl@0: case '>': { sl@0: if( (c=z[1])=='=' ){ sl@0: *tokenType = TK_GE; sl@0: return 2; sl@0: }else if( c=='>' ){ sl@0: *tokenType = TK_RSHIFT; sl@0: return 2; sl@0: }else{ sl@0: *tokenType = TK_GT; sl@0: return 1; sl@0: } sl@0: } sl@0: case '!': { sl@0: if( z[1]!='=' ){ sl@0: *tokenType = TK_ILLEGAL; sl@0: return 2; sl@0: }else{ sl@0: *tokenType = TK_NE; sl@0: return 2; sl@0: } sl@0: } sl@0: case '|': { sl@0: if( z[1]!='|' ){ sl@0: *tokenType = TK_BITOR; sl@0: return 1; sl@0: }else{ sl@0: *tokenType = TK_CONCAT; sl@0: return 2; sl@0: } sl@0: } sl@0: case ',': { sl@0: *tokenType = TK_COMMA; sl@0: return 1; sl@0: } sl@0: case '&': { sl@0: *tokenType = TK_BITAND; sl@0: return 1; sl@0: } sl@0: case '~': { sl@0: *tokenType = TK_BITNOT; sl@0: return 1; sl@0: } sl@0: case '`': sl@0: case '\'': sl@0: case '"': { sl@0: int delim = z[0]; sl@0: for(i=1; (c=z[i])!=0; i++){ sl@0: if( c==delim ){ sl@0: if( z[i+1]==delim ){ sl@0: i++; sl@0: }else{ sl@0: break; sl@0: } sl@0: } sl@0: } sl@0: if( c=='\'' ){ sl@0: *tokenType = TK_STRING; sl@0: return i+1; sl@0: }else if( c!=0 ){ sl@0: *tokenType = TK_ID; sl@0: return i+1; sl@0: }else{ sl@0: *tokenType = TK_ILLEGAL; sl@0: return i; sl@0: } sl@0: } sl@0: case '.': { sl@0: #ifndef SQLITE_OMIT_FLOATING_POINT sl@0: if( !isdigit(z[1]) ) sl@0: #endif sl@0: { sl@0: *tokenType = TK_DOT; sl@0: return 1; sl@0: } sl@0: /* If the next character is a digit, this is a floating point sl@0: ** number that begins with ".". Fall thru into the next case */ sl@0: } sl@0: case '0': case '1': case '2': case '3': case '4': sl@0: case '5': case '6': case '7': case '8': case '9': { sl@0: *tokenType = TK_INTEGER; sl@0: for(i=0; isdigit(z[i]); i++){} sl@0: #ifndef SQLITE_OMIT_FLOATING_POINT sl@0: if( z[i]=='.' ){ sl@0: i++; sl@0: while( isdigit(z[i]) ){ i++; } sl@0: *tokenType = TK_FLOAT; sl@0: } sl@0: if( (z[i]=='e' || z[i]=='E') && sl@0: ( isdigit(z[i+1]) sl@0: || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2])) sl@0: ) sl@0: ){ sl@0: i += 2; sl@0: while( isdigit(z[i]) ){ i++; } sl@0: *tokenType = TK_FLOAT; sl@0: } sl@0: #endif sl@0: while( IdChar(z[i]) ){ sl@0: *tokenType = TK_ILLEGAL; sl@0: i++; sl@0: } sl@0: return i; sl@0: } sl@0: case '[': { sl@0: for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} sl@0: *tokenType = c==']' ? TK_ID : TK_ILLEGAL; sl@0: return i; sl@0: } sl@0: case '?': { sl@0: *tokenType = TK_VARIABLE; sl@0: for(i=1; isdigit(z[i]); i++){} sl@0: return i; sl@0: } sl@0: case '#': { sl@0: for(i=1; isdigit(z[i]); i++){} sl@0: if( i>1 ){ sl@0: /* Parameters of the form #NNN (where NNN is a number) are used sl@0: ** internally by sqlite3NestedParse. */ sl@0: *tokenType = TK_REGISTER; sl@0: return i; sl@0: } sl@0: /* Fall through into the next case if the '#' is not followed by sl@0: ** a digit. Try to match #AAAA where AAAA is a parameter name. */ sl@0: } sl@0: #ifndef SQLITE_OMIT_TCL_VARIABLE sl@0: case '$': sl@0: #endif sl@0: case '@': /* For compatibility with MS SQL Server */ sl@0: case ':': { sl@0: int n = 0; sl@0: *tokenType = TK_VARIABLE; sl@0: for(i=1; (c=z[i])!=0; i++){ sl@0: if( IdChar(c) ){ sl@0: n++; sl@0: #ifndef SQLITE_OMIT_TCL_VARIABLE sl@0: }else if( c=='(' && n>0 ){ sl@0: do{ sl@0: i++; sl@0: }while( (c=z[i])!=0 && !isspace(c) && c!=')' ); sl@0: if( c==')' ){ sl@0: i++; sl@0: }else{ sl@0: *tokenType = TK_ILLEGAL; sl@0: } sl@0: break; sl@0: }else if( c==':' && z[i+1]==':' ){ sl@0: i++; sl@0: #endif sl@0: }else{ sl@0: break; sl@0: } sl@0: } sl@0: if( n==0 ) *tokenType = TK_ILLEGAL; sl@0: return i; sl@0: } sl@0: #ifndef SQLITE_OMIT_BLOB_LITERAL sl@0: case 'x': case 'X': { sl@0: if( z[1]=='\'' ){ sl@0: *tokenType = TK_BLOB; sl@0: for(i=2; (c=z[i])!=0 && c!='\''; i++){ sl@0: if( !isxdigit(c) ){ sl@0: *tokenType = TK_ILLEGAL; sl@0: } sl@0: } sl@0: if( i%2 || !c ) *tokenType = TK_ILLEGAL; sl@0: if( c ) i++; sl@0: return i; sl@0: } sl@0: /* Otherwise fall through to the next case */ sl@0: } sl@0: #endif sl@0: default: { sl@0: if( !IdChar(*z) ){ sl@0: break; sl@0: } sl@0: for(i=1; IdChar(z[i]); i++){} sl@0: *tokenType = keywordCode((char*)z, i); sl@0: return i; sl@0: } sl@0: } sl@0: *tokenType = TK_ILLEGAL; sl@0: return 1; sl@0: } sl@0: sl@0: /* sl@0: ** Run the parser on the given SQL string. The parser structure is sl@0: ** passed in. An SQLITE_ status code is returned. If an error occurs sl@0: ** then an and attempt is made to write an error message into sl@0: ** memory obtained from sqlite3_malloc() and to make *pzErrMsg point to that sl@0: ** error message. sl@0: */ sl@0: int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){ sl@0: int nErr = 0; sl@0: int i; sl@0: void *pEngine; sl@0: int tokenType; sl@0: int lastTokenParsed = -1; sl@0: sqlite3 *db = pParse->db; sl@0: int mxSqlLen = db->aLimit[SQLITE_LIMIT_SQL_LENGTH]; sl@0: sl@0: if( db->activeVdbeCnt==0 ){ sl@0: db->u1.isInterrupted = 0; sl@0: } sl@0: pParse->rc = SQLITE_OK; sl@0: pParse->zTail = pParse->zSql = zSql; sl@0: i = 0; sl@0: assert( pzErrMsg!=0 ); sl@0: pEngine = sqlite3ParserAlloc((void*(*)(size_t))sqlite3Malloc); sl@0: if( pEngine==0 ){ sl@0: db->mallocFailed = 1; sl@0: return SQLITE_NOMEM; sl@0: } sl@0: assert( pParse->sLastToken.dyn==0 ); sl@0: assert( pParse->pNewTable==0 ); sl@0: assert( pParse->pNewTrigger==0 ); sl@0: assert( pParse->nVar==0 ); sl@0: assert( pParse->nVarExpr==0 ); sl@0: assert( pParse->nVarExprAlloc==0 ); sl@0: assert( pParse->apVarExpr==0 ); sl@0: while( !db->mallocFailed && zSql[i]!=0 ){ sl@0: assert( i>=0 ); sl@0: pParse->sLastToken.z = (u8*)&zSql[i]; sl@0: assert( pParse->sLastToken.dyn==0 ); sl@0: pParse->sLastToken.n = sqlite3GetToken((unsigned char*)&zSql[i],&tokenType); sl@0: i += pParse->sLastToken.n; sl@0: if( i>mxSqlLen ){ sl@0: pParse->rc = SQLITE_TOOBIG; sl@0: break; sl@0: } sl@0: switch( tokenType ){ sl@0: case TK_SPACE: { sl@0: if( db->u1.isInterrupted ){ sl@0: pParse->rc = SQLITE_INTERRUPT; sl@0: sqlite3SetString(pzErrMsg, db, "interrupt"); sl@0: goto abort_parse; sl@0: } sl@0: break; sl@0: } sl@0: case TK_ILLEGAL: { sl@0: sqlite3DbFree(db, *pzErrMsg); sl@0: *pzErrMsg = sqlite3MPrintf(db, "unrecognized token: \"%T\"", sl@0: &pParse->sLastToken); sl@0: nErr++; sl@0: goto abort_parse; sl@0: } sl@0: case TK_SEMI: { sl@0: pParse->zTail = &zSql[i]; sl@0: /* Fall thru into the default case */ sl@0: } sl@0: default: { sl@0: sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse); sl@0: lastTokenParsed = tokenType; sl@0: if( pParse->rc!=SQLITE_OK ){ sl@0: goto abort_parse; sl@0: } sl@0: break; sl@0: } sl@0: } sl@0: } sl@0: abort_parse: sl@0: if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){ sl@0: if( lastTokenParsed!=TK_SEMI ){ sl@0: sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse); sl@0: pParse->zTail = &zSql[i]; sl@0: } sl@0: sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse); sl@0: } sl@0: #ifdef YYTRACKMAXSTACKDEPTH sl@0: sqlite3StatusSet(SQLITE_STATUS_PARSER_STACK, sl@0: sqlite3ParserStackPeak(pEngine) sl@0: ); sl@0: #endif /* YYDEBUG */ sl@0: sqlite3ParserFree(pEngine, sqlite3_free); sl@0: if( db->mallocFailed ){ sl@0: pParse->rc = SQLITE_NOMEM; sl@0: } sl@0: if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){ sl@0: sqlite3SetString(&pParse->zErrMsg, db, "%s", sqlite3ErrStr(pParse->rc)); sl@0: } sl@0: if( pParse->zErrMsg ){ sl@0: if( *pzErrMsg==0 ){ sl@0: *pzErrMsg = pParse->zErrMsg; sl@0: }else{ sl@0: sqlite3DbFree(db, pParse->zErrMsg); sl@0: } sl@0: pParse->zErrMsg = 0; sl@0: nErr++; sl@0: } sl@0: if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){ sl@0: sqlite3VdbeDelete(pParse->pVdbe); sl@0: pParse->pVdbe = 0; sl@0: } sl@0: #ifndef SQLITE_OMIT_SHARED_CACHE sl@0: if( pParse->nested==0 ){ sl@0: sqlite3DbFree(db, pParse->aTableLock); sl@0: pParse->aTableLock = 0; sl@0: pParse->nTableLock = 0; sl@0: } sl@0: #endif sl@0: #ifndef SQLITE_OMIT_VIRTUALTABLE sl@0: sqlite3DbFree(db, pParse->apVtabLock); sl@0: #endif sl@0: sl@0: if( !IN_DECLARE_VTAB ){ sl@0: /* If the pParse->declareVtab flag is set, do not delete any table sl@0: ** structure built up in pParse->pNewTable. The calling code (see vtab.c) sl@0: ** will take responsibility for freeing the Table structure. sl@0: */ sl@0: sqlite3DeleteTable(pParse->pNewTable); sl@0: } sl@0: sl@0: sqlite3DeleteTrigger(db, pParse->pNewTrigger); sl@0: sqlite3DbFree(db, pParse->apVarExpr); sl@0: sqlite3DbFree(db, pParse->aAlias); sl@0: while( pParse->pZombieTab ){ sl@0: Table *p = pParse->pZombieTab; sl@0: pParse->pZombieTab = p->pNextZombie; sl@0: sqlite3DeleteTable(p); sl@0: } sl@0: if( nErr>0 && (pParse->rc==SQLITE_OK || pParse->rc==SQLITE_DONE) ){ sl@0: pParse->rc = SQLITE_ERROR; sl@0: } sl@0: return nErr; sl@0: }