1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/persistentdata/persistentstorage/sql/SQLite/btree.c Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,7326 @@
1.4 +/*
1.5 +** 2004 April 6
1.6 +**
1.7 +** The author disclaims copyright to this source code. In place of
1.8 +** a legal notice, here is a blessing:
1.9 +**
1.10 +** May you do good and not evil.
1.11 +** May you find forgiveness for yourself and forgive others.
1.12 +** May you share freely, never taking more than you give.
1.13 +**
1.14 +*************************************************************************
1.15 +** $Id: btree.c,v 1.495 2008/08/02 17:36:46 danielk1977 Exp $
1.16 +**
1.17 +** This file implements a external (disk-based) database using BTrees.
1.18 +** See the header comment on "btreeInt.h" for additional information.
1.19 +** Including a description of file format and an overview of operation.
1.20 +*/
1.21 +#include "btreeInt.h"
1.22 +
1.23 +/*
1.24 +** The header string that appears at the beginning of every
1.25 +** SQLite database.
1.26 +*/
1.27 +static const char zMagicHeader[] = SQLITE_FILE_HEADER;
1.28 +
1.29 +/*
1.30 +** Set this global variable to 1 to enable tracing using the TRACE
1.31 +** macro.
1.32 +*/
1.33 +#if 0
1.34 +int sqlite3BtreeTrace=0; /* True to enable tracing */
1.35 +# define TRACE(X) if(sqlite3BtreeTrace){printf X;fflush(stdout);}
1.36 +#else
1.37 +# define TRACE(X)
1.38 +#endif
1.39 +
1.40 +
1.41 +
1.42 +#ifndef SQLITE_OMIT_SHARED_CACHE
1.43 +/*
1.44 +** A flag to indicate whether or not shared cache is enabled. Also,
1.45 +** a list of BtShared objects that are eligible for participation
1.46 +** in shared cache. The variables have file scope during normal builds,
1.47 +** but the test harness needs to access these variables so we make them
1.48 +** global for test builds.
1.49 +*/
1.50 +#ifdef SQLITE_TEST
1.51 +BtShared *sqlite3SharedCacheList = 0;
1.52 +int sqlite3SharedCacheEnabled = 0;
1.53 +#else
1.54 +static BtShared *sqlite3SharedCacheList = 0;
1.55 +static int sqlite3SharedCacheEnabled = 0;
1.56 +#endif
1.57 +#endif /* SQLITE_OMIT_SHARED_CACHE */
1.58 +
1.59 +#ifndef SQLITE_OMIT_SHARED_CACHE
1.60 +/*
1.61 +** Enable or disable the shared pager and schema features.
1.62 +**
1.63 +** This routine has no effect on existing database connections.
1.64 +** The shared cache setting effects only future calls to
1.65 +** sqlite3_open(), sqlite3_open16(), or sqlite3_open_v2().
1.66 +*/
1.67 +int sqlite3_enable_shared_cache(int enable){
1.68 + sqlite3SharedCacheEnabled = enable;
1.69 + return SQLITE_OK;
1.70 +}
1.71 +#endif
1.72 +
1.73 +
1.74 +/*
1.75 +** Forward declaration
1.76 +*/
1.77 +static int checkReadLocks(Btree*, Pgno, BtCursor*, i64);
1.78 +
1.79 +
1.80 +#ifdef SQLITE_OMIT_SHARED_CACHE
1.81 + /*
1.82 + ** The functions queryTableLock(), lockTable() and unlockAllTables()
1.83 + ** manipulate entries in the BtShared.pLock linked list used to store
1.84 + ** shared-cache table level locks. If the library is compiled with the
1.85 + ** shared-cache feature disabled, then there is only ever one user
1.86 + ** of each BtShared structure and so this locking is not necessary.
1.87 + ** So define the lock related functions as no-ops.
1.88 + */
1.89 + #define queryTableLock(a,b,c) SQLITE_OK
1.90 + #define lockTable(a,b,c) SQLITE_OK
1.91 + #define unlockAllTables(a)
1.92 +#endif
1.93 +
1.94 +#ifndef SQLITE_OMIT_SHARED_CACHE
1.95 +/*
1.96 +** Query to see if btree handle p may obtain a lock of type eLock
1.97 +** (READ_LOCK or WRITE_LOCK) on the table with root-page iTab. Return
1.98 +** SQLITE_OK if the lock may be obtained (by calling lockTable()), or
1.99 +** SQLITE_LOCKED if not.
1.100 +*/
1.101 +static int queryTableLock(Btree *p, Pgno iTab, u8 eLock){
1.102 + BtShared *pBt = p->pBt;
1.103 + BtLock *pIter;
1.104 +
1.105 + assert( sqlite3BtreeHoldsMutex(p) );
1.106 + assert( eLock==READ_LOCK || eLock==WRITE_LOCK );
1.107 + assert( p->db!=0 );
1.108 +
1.109 + /* This is a no-op if the shared-cache is not enabled */
1.110 + if( !p->sharable ){
1.111 + return SQLITE_OK;
1.112 + }
1.113 +
1.114 + /* If some other connection is holding an exclusive lock, the
1.115 + ** requested lock may not be obtained.
1.116 + */
1.117 + if( pBt->pExclusive && pBt->pExclusive!=p ){
1.118 + return SQLITE_LOCKED;
1.119 + }
1.120 +
1.121 + /* This (along with lockTable()) is where the ReadUncommitted flag is
1.122 + ** dealt with. If the caller is querying for a read-lock and the flag is
1.123 + ** set, it is unconditionally granted - even if there are write-locks
1.124 + ** on the table. If a write-lock is requested, the ReadUncommitted flag
1.125 + ** is not considered.
1.126 + **
1.127 + ** In function lockTable(), if a read-lock is demanded and the
1.128 + ** ReadUncommitted flag is set, no entry is added to the locks list
1.129 + ** (BtShared.pLock).
1.130 + **
1.131 + ** To summarize: If the ReadUncommitted flag is set, then read cursors do
1.132 + ** not create or respect table locks. The locking procedure for a
1.133 + ** write-cursor does not change.
1.134 + */
1.135 + if(
1.136 + 0==(p->db->flags&SQLITE_ReadUncommitted) ||
1.137 + eLock==WRITE_LOCK ||
1.138 + iTab==MASTER_ROOT
1.139 + ){
1.140 + for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
1.141 + if( pIter->pBtree!=p && pIter->iTable==iTab &&
1.142 + (pIter->eLock!=eLock || eLock!=READ_LOCK) ){
1.143 + return SQLITE_LOCKED;
1.144 + }
1.145 + }
1.146 + }
1.147 + return SQLITE_OK;
1.148 +}
1.149 +#endif /* !SQLITE_OMIT_SHARED_CACHE */
1.150 +
1.151 +#ifndef SQLITE_OMIT_SHARED_CACHE
1.152 +/*
1.153 +** Add a lock on the table with root-page iTable to the shared-btree used
1.154 +** by Btree handle p. Parameter eLock must be either READ_LOCK or
1.155 +** WRITE_LOCK.
1.156 +**
1.157 +** SQLITE_OK is returned if the lock is added successfully. SQLITE_BUSY and
1.158 +** SQLITE_NOMEM may also be returned.
1.159 +*/
1.160 +static int lockTable(Btree *p, Pgno iTable, u8 eLock){
1.161 + BtShared *pBt = p->pBt;
1.162 + BtLock *pLock = 0;
1.163 + BtLock *pIter;
1.164 +
1.165 + assert( sqlite3BtreeHoldsMutex(p) );
1.166 + assert( eLock==READ_LOCK || eLock==WRITE_LOCK );
1.167 + assert( p->db!=0 );
1.168 +
1.169 + /* This is a no-op if the shared-cache is not enabled */
1.170 + if( !p->sharable ){
1.171 + return SQLITE_OK;
1.172 + }
1.173 +
1.174 + assert( SQLITE_OK==queryTableLock(p, iTable, eLock) );
1.175 +
1.176 + /* If the read-uncommitted flag is set and a read-lock is requested,
1.177 + ** return early without adding an entry to the BtShared.pLock list. See
1.178 + ** comment in function queryTableLock() for more info on handling
1.179 + ** the ReadUncommitted flag.
1.180 + */
1.181 + if(
1.182 + (p->db->flags&SQLITE_ReadUncommitted) &&
1.183 + (eLock==READ_LOCK) &&
1.184 + iTable!=MASTER_ROOT
1.185 + ){
1.186 + return SQLITE_OK;
1.187 + }
1.188 +
1.189 + /* First search the list for an existing lock on this table. */
1.190 + for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
1.191 + if( pIter->iTable==iTable && pIter->pBtree==p ){
1.192 + pLock = pIter;
1.193 + break;
1.194 + }
1.195 + }
1.196 +
1.197 + /* If the above search did not find a BtLock struct associating Btree p
1.198 + ** with table iTable, allocate one and link it into the list.
1.199 + */
1.200 + if( !pLock ){
1.201 + pLock = (BtLock *)sqlite3MallocZero(sizeof(BtLock));
1.202 + if( !pLock ){
1.203 + return SQLITE_NOMEM;
1.204 + }
1.205 + pLock->iTable = iTable;
1.206 + pLock->pBtree = p;
1.207 + pLock->pNext = pBt->pLock;
1.208 + pBt->pLock = pLock;
1.209 + }
1.210 +
1.211 + /* Set the BtLock.eLock variable to the maximum of the current lock
1.212 + ** and the requested lock. This means if a write-lock was already held
1.213 + ** and a read-lock requested, we don't incorrectly downgrade the lock.
1.214 + */
1.215 + assert( WRITE_LOCK>READ_LOCK );
1.216 + if( eLock>pLock->eLock ){
1.217 + pLock->eLock = eLock;
1.218 + }
1.219 +
1.220 + return SQLITE_OK;
1.221 +}
1.222 +#endif /* !SQLITE_OMIT_SHARED_CACHE */
1.223 +
1.224 +#ifndef SQLITE_OMIT_SHARED_CACHE
1.225 +/*
1.226 +** Release all the table locks (locks obtained via calls to the lockTable()
1.227 +** procedure) held by Btree handle p.
1.228 +*/
1.229 +static void unlockAllTables(Btree *p){
1.230 + BtShared *pBt = p->pBt;
1.231 + BtLock **ppIter = &pBt->pLock;
1.232 +
1.233 + assert( sqlite3BtreeHoldsMutex(p) );
1.234 + assert( p->sharable || 0==*ppIter );
1.235 +
1.236 + while( *ppIter ){
1.237 + BtLock *pLock = *ppIter;
1.238 + assert( pBt->pExclusive==0 || pBt->pExclusive==pLock->pBtree );
1.239 + if( pLock->pBtree==p ){
1.240 + *ppIter = pLock->pNext;
1.241 + sqlite3_free(pLock);
1.242 + }else{
1.243 + ppIter = &pLock->pNext;
1.244 + }
1.245 + }
1.246 +
1.247 + if( pBt->pExclusive==p ){
1.248 + pBt->pExclusive = 0;
1.249 + }
1.250 +}
1.251 +#endif /* SQLITE_OMIT_SHARED_CACHE */
1.252 +
1.253 +static void releasePage(MemPage *pPage); /* Forward reference */
1.254 +
1.255 +/*
1.256 +** Verify that the cursor holds a mutex on the BtShared
1.257 +*/
1.258 +#ifndef NDEBUG
1.259 +static int cursorHoldsMutex(BtCursor *p){
1.260 + return sqlite3_mutex_held(p->pBt->mutex);
1.261 +}
1.262 +#endif
1.263 +
1.264 +
1.265 +#ifndef SQLITE_OMIT_INCRBLOB
1.266 +/*
1.267 +** Invalidate the overflow page-list cache for cursor pCur, if any.
1.268 +*/
1.269 +static void invalidateOverflowCache(BtCursor *pCur){
1.270 + assert( cursorHoldsMutex(pCur) );
1.271 + sqlite3_free(pCur->aOverflow);
1.272 + pCur->aOverflow = 0;
1.273 +}
1.274 +
1.275 +/*
1.276 +** Invalidate the overflow page-list cache for all cursors opened
1.277 +** on the shared btree structure pBt.
1.278 +*/
1.279 +static void invalidateAllOverflowCache(BtShared *pBt){
1.280 + BtCursor *p;
1.281 + assert( sqlite3_mutex_held(pBt->mutex) );
1.282 + for(p=pBt->pCursor; p; p=p->pNext){
1.283 + invalidateOverflowCache(p);
1.284 + }
1.285 +}
1.286 +#else
1.287 + #define invalidateOverflowCache(x)
1.288 + #define invalidateAllOverflowCache(x)
1.289 +#endif
1.290 +
1.291 +/*
1.292 +** Save the current cursor position in the variables BtCursor.nKey
1.293 +** and BtCursor.pKey. The cursor's state is set to CURSOR_REQUIRESEEK.
1.294 +*/
1.295 +static int saveCursorPosition(BtCursor *pCur){
1.296 + int rc;
1.297 +
1.298 + assert( CURSOR_VALID==pCur->eState );
1.299 + assert( 0==pCur->pKey );
1.300 + assert( cursorHoldsMutex(pCur) );
1.301 +
1.302 + rc = sqlite3BtreeKeySize(pCur, &pCur->nKey);
1.303 +
1.304 + /* If this is an intKey table, then the above call to BtreeKeySize()
1.305 + ** stores the integer key in pCur->nKey. In this case this value is
1.306 + ** all that is required. Otherwise, if pCur is not open on an intKey
1.307 + ** table, then malloc space for and store the pCur->nKey bytes of key
1.308 + ** data.
1.309 + */
1.310 + if( rc==SQLITE_OK && 0==pCur->pPage->intKey){
1.311 + void *pKey = sqlite3Malloc(pCur->nKey);
1.312 + if( pKey ){
1.313 + rc = sqlite3BtreeKey(pCur, 0, pCur->nKey, pKey);
1.314 + if( rc==SQLITE_OK ){
1.315 + pCur->pKey = pKey;
1.316 + }else{
1.317 + sqlite3_free(pKey);
1.318 + }
1.319 + }else{
1.320 + rc = SQLITE_NOMEM;
1.321 + }
1.322 + }
1.323 + assert( !pCur->pPage->intKey || !pCur->pKey );
1.324 +
1.325 + if( rc==SQLITE_OK ){
1.326 + releasePage(pCur->pPage);
1.327 + pCur->pPage = 0;
1.328 + pCur->eState = CURSOR_REQUIRESEEK;
1.329 + }
1.330 +
1.331 + invalidateOverflowCache(pCur);
1.332 + return rc;
1.333 +}
1.334 +
1.335 +/*
1.336 +** Save the positions of all cursors except pExcept open on the table
1.337 +** with root-page iRoot. Usually, this is called just before cursor
1.338 +** pExcept is used to modify the table (BtreeDelete() or BtreeInsert()).
1.339 +*/
1.340 +static int saveAllCursors(BtShared *pBt, Pgno iRoot, BtCursor *pExcept){
1.341 + BtCursor *p;
1.342 + assert( sqlite3_mutex_held(pBt->mutex) );
1.343 + assert( pExcept==0 || pExcept->pBt==pBt );
1.344 + for(p=pBt->pCursor; p; p=p->pNext){
1.345 + if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) &&
1.346 + p->eState==CURSOR_VALID ){
1.347 + int rc = saveCursorPosition(p);
1.348 + if( SQLITE_OK!=rc ){
1.349 + return rc;
1.350 + }
1.351 + }
1.352 + }
1.353 + return SQLITE_OK;
1.354 +}
1.355 +
1.356 +/*
1.357 +** Clear the current cursor position.
1.358 +*/
1.359 +static void clearCursorPosition(BtCursor *pCur){
1.360 + assert( cursorHoldsMutex(pCur) );
1.361 + sqlite3_free(pCur->pKey);
1.362 + pCur->pKey = 0;
1.363 + pCur->eState = CURSOR_INVALID;
1.364 +}
1.365 +
1.366 +/*
1.367 +** Restore the cursor to the position it was in (or as close to as possible)
1.368 +** when saveCursorPosition() was called. Note that this call deletes the
1.369 +** saved position info stored by saveCursorPosition(), so there can be
1.370 +** at most one effective restoreCursorPosition() call after each
1.371 +** saveCursorPosition().
1.372 +*/
1.373 +int sqlite3BtreeRestoreCursorPosition(BtCursor *pCur){
1.374 + int rc;
1.375 + assert( cursorHoldsMutex(pCur) );
1.376 + assert( pCur->eState>=CURSOR_REQUIRESEEK );
1.377 + if( pCur->eState==CURSOR_FAULT ){
1.378 + return pCur->skip;
1.379 + }
1.380 + pCur->eState = CURSOR_INVALID;
1.381 + rc = sqlite3BtreeMoveto(pCur, pCur->pKey, 0, pCur->nKey, 0, &pCur->skip);
1.382 + if( rc==SQLITE_OK ){
1.383 + sqlite3_free(pCur->pKey);
1.384 + pCur->pKey = 0;
1.385 + assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_INVALID );
1.386 + }
1.387 + return rc;
1.388 +}
1.389 +
1.390 +#define restoreCursorPosition(p) \
1.391 + (p->eState>=CURSOR_REQUIRESEEK ? \
1.392 + sqlite3BtreeRestoreCursorPosition(p) : \
1.393 + SQLITE_OK)
1.394 +
1.395 +/*
1.396 +** Determine whether or not a cursor has moved from the position it
1.397 +** was last placed at. Cursor can move when the row they are pointing
1.398 +** at is deleted out from under them.
1.399 +**
1.400 +** This routine returns an error code if something goes wrong. The
1.401 +** integer *pHasMoved is set to one if the cursor has moved and 0 if not.
1.402 +*/
1.403 +int sqlite3BtreeCursorHasMoved(BtCursor *pCur, int *pHasMoved){
1.404 + int rc;
1.405 +
1.406 + rc = restoreCursorPosition(pCur);
1.407 + if( rc ){
1.408 + *pHasMoved = 1;
1.409 + return rc;
1.410 + }
1.411 + if( pCur->eState!=CURSOR_VALID || pCur->skip!=0 ){
1.412 + *pHasMoved = 1;
1.413 + }else{
1.414 + *pHasMoved = 0;
1.415 + }
1.416 + return SQLITE_OK;
1.417 +}
1.418 +
1.419 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.420 +/*
1.421 +** Given a page number of a regular database page, return the page
1.422 +** number for the pointer-map page that contains the entry for the
1.423 +** input page number.
1.424 +*/
1.425 +static Pgno ptrmapPageno(BtShared *pBt, Pgno pgno){
1.426 + int nPagesPerMapPage, iPtrMap, ret;
1.427 + assert( sqlite3_mutex_held(pBt->mutex) );
1.428 + nPagesPerMapPage = (pBt->usableSize/5)+1;
1.429 + iPtrMap = (pgno-2)/nPagesPerMapPage;
1.430 + ret = (iPtrMap*nPagesPerMapPage) + 2;
1.431 + if( ret==PENDING_BYTE_PAGE(pBt) ){
1.432 + ret++;
1.433 + }
1.434 + return ret;
1.435 +}
1.436 +
1.437 +/*
1.438 +** Write an entry into the pointer map.
1.439 +**
1.440 +** This routine updates the pointer map entry for page number 'key'
1.441 +** so that it maps to type 'eType' and parent page number 'pgno'.
1.442 +** An error code is returned if something goes wrong, otherwise SQLITE_OK.
1.443 +*/
1.444 +static int ptrmapPut(BtShared *pBt, Pgno key, u8 eType, Pgno parent){
1.445 + DbPage *pDbPage; /* The pointer map page */
1.446 + u8 *pPtrmap; /* The pointer map data */
1.447 + Pgno iPtrmap; /* The pointer map page number */
1.448 + int offset; /* Offset in pointer map page */
1.449 + int rc;
1.450 +
1.451 + assert( sqlite3_mutex_held(pBt->mutex) );
1.452 + /* The master-journal page number must never be used as a pointer map page */
1.453 + assert( 0==PTRMAP_ISPAGE(pBt, PENDING_BYTE_PAGE(pBt)) );
1.454 +
1.455 + assert( pBt->autoVacuum );
1.456 + if( key==0 ){
1.457 + return SQLITE_CORRUPT_BKPT;
1.458 + }
1.459 + iPtrmap = PTRMAP_PAGENO(pBt, key);
1.460 + rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage);
1.461 + if( rc!=SQLITE_OK ){
1.462 + return rc;
1.463 + }
1.464 + offset = PTRMAP_PTROFFSET(iPtrmap, key);
1.465 + pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage);
1.466 +
1.467 + if( eType!=pPtrmap[offset] || get4byte(&pPtrmap[offset+1])!=parent ){
1.468 + TRACE(("PTRMAP_UPDATE: %d->(%d,%d)\n", key, eType, parent));
1.469 + rc = sqlite3PagerWrite(pDbPage);
1.470 + if( rc==SQLITE_OK ){
1.471 + pPtrmap[offset] = eType;
1.472 + put4byte(&pPtrmap[offset+1], parent);
1.473 + }
1.474 + }
1.475 +
1.476 + sqlite3PagerUnref(pDbPage);
1.477 + return rc;
1.478 +}
1.479 +
1.480 +/*
1.481 +** Read an entry from the pointer map.
1.482 +**
1.483 +** This routine retrieves the pointer map entry for page 'key', writing
1.484 +** the type and parent page number to *pEType and *pPgno respectively.
1.485 +** An error code is returned if something goes wrong, otherwise SQLITE_OK.
1.486 +*/
1.487 +static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){
1.488 + DbPage *pDbPage; /* The pointer map page */
1.489 + int iPtrmap; /* Pointer map page index */
1.490 + u8 *pPtrmap; /* Pointer map page data */
1.491 + int offset; /* Offset of entry in pointer map */
1.492 + int rc;
1.493 +
1.494 + assert( sqlite3_mutex_held(pBt->mutex) );
1.495 +
1.496 + iPtrmap = PTRMAP_PAGENO(pBt, key);
1.497 + rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage);
1.498 + if( rc!=0 ){
1.499 + return rc;
1.500 + }
1.501 + pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage);
1.502 +
1.503 + offset = PTRMAP_PTROFFSET(iPtrmap, key);
1.504 + assert( pEType!=0 );
1.505 + *pEType = pPtrmap[offset];
1.506 + if( pPgno ) *pPgno = get4byte(&pPtrmap[offset+1]);
1.507 +
1.508 + sqlite3PagerUnref(pDbPage);
1.509 + if( *pEType<1 || *pEType>5 ) return SQLITE_CORRUPT_BKPT;
1.510 + return SQLITE_OK;
1.511 +}
1.512 +
1.513 +#else /* if defined SQLITE_OMIT_AUTOVACUUM */
1.514 + #define ptrmapPut(w,x,y,z) SQLITE_OK
1.515 + #define ptrmapGet(w,x,y,z) SQLITE_OK
1.516 + #define ptrmapPutOvfl(y,z) SQLITE_OK
1.517 +#endif
1.518 +
1.519 +/*
1.520 +** Given a btree page and a cell index (0 means the first cell on
1.521 +** the page, 1 means the second cell, and so forth) return a pointer
1.522 +** to the cell content.
1.523 +**
1.524 +** This routine works only for pages that do not contain overflow cells.
1.525 +*/
1.526 +#define findCell(P,I) \
1.527 + ((P)->aData + ((P)->maskPage & get2byte(&(P)->aData[(P)->cellOffset+2*(I)])))
1.528 +
1.529 +/*
1.530 +** This a more complex version of findCell() that works for
1.531 +** pages that do contain overflow cells. See insert
1.532 +*/
1.533 +static u8 *findOverflowCell(MemPage *pPage, int iCell){
1.534 + int i;
1.535 + assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1.536 + for(i=pPage->nOverflow-1; i>=0; i--){
1.537 + int k;
1.538 + struct _OvflCell *pOvfl;
1.539 + pOvfl = &pPage->aOvfl[i];
1.540 + k = pOvfl->idx;
1.541 + if( k<=iCell ){
1.542 + if( k==iCell ){
1.543 + return pOvfl->pCell;
1.544 + }
1.545 + iCell--;
1.546 + }
1.547 + }
1.548 + return findCell(pPage, iCell);
1.549 +}
1.550 +
1.551 +/*
1.552 +** Parse a cell content block and fill in the CellInfo structure. There
1.553 +** are two versions of this function. sqlite3BtreeParseCell() takes a
1.554 +** cell index as the second argument and sqlite3BtreeParseCellPtr()
1.555 +** takes a pointer to the body of the cell as its second argument.
1.556 +**
1.557 +** Within this file, the parseCell() macro can be called instead of
1.558 +** sqlite3BtreeParseCellPtr(). Using some compilers, this will be faster.
1.559 +*/
1.560 +void sqlite3BtreeParseCellPtr(
1.561 + MemPage *pPage, /* Page containing the cell */
1.562 + u8 *pCell, /* Pointer to the cell text. */
1.563 + CellInfo *pInfo /* Fill in this structure */
1.564 +){
1.565 + int n; /* Number bytes in cell content header */
1.566 + u32 nPayload; /* Number of bytes of cell payload */
1.567 +
1.568 + assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1.569 +
1.570 + pInfo->pCell = pCell;
1.571 + assert( pPage->leaf==0 || pPage->leaf==1 );
1.572 + n = pPage->childPtrSize;
1.573 + assert( n==4-4*pPage->leaf );
1.574 + if( pPage->intKey ){
1.575 + if( pPage->hasData ){
1.576 + n += getVarint32(&pCell[n], nPayload);
1.577 + }else{
1.578 + nPayload = 0;
1.579 + }
1.580 + n += getVarint(&pCell[n], (u64*)&pInfo->nKey);
1.581 + pInfo->nData = nPayload;
1.582 + }else{
1.583 + pInfo->nData = 0;
1.584 + n += getVarint32(&pCell[n], nPayload);
1.585 + pInfo->nKey = nPayload;
1.586 + }
1.587 + pInfo->nPayload = nPayload;
1.588 + pInfo->nHeader = n;
1.589 + if( likely(nPayload<=pPage->maxLocal) ){
1.590 + /* This is the (easy) common case where the entire payload fits
1.591 + ** on the local page. No overflow is required.
1.592 + */
1.593 + int nSize; /* Total size of cell content in bytes */
1.594 + nSize = nPayload + n;
1.595 + pInfo->nLocal = nPayload;
1.596 + pInfo->iOverflow = 0;
1.597 + if( (nSize & ~3)==0 ){
1.598 + nSize = 4; /* Minimum cell size is 4 */
1.599 + }
1.600 + pInfo->nSize = nSize;
1.601 + }else{
1.602 + /* If the payload will not fit completely on the local page, we have
1.603 + ** to decide how much to store locally and how much to spill onto
1.604 + ** overflow pages. The strategy is to minimize the amount of unused
1.605 + ** space on overflow pages while keeping the amount of local storage
1.606 + ** in between minLocal and maxLocal.
1.607 + **
1.608 + ** Warning: changing the way overflow payload is distributed in any
1.609 + ** way will result in an incompatible file format.
1.610 + */
1.611 + int minLocal; /* Minimum amount of payload held locally */
1.612 + int maxLocal; /* Maximum amount of payload held locally */
1.613 + int surplus; /* Overflow payload available for local storage */
1.614 +
1.615 + minLocal = pPage->minLocal;
1.616 + maxLocal = pPage->maxLocal;
1.617 + surplus = minLocal + (nPayload - minLocal)%(pPage->pBt->usableSize - 4);
1.618 + if( surplus <= maxLocal ){
1.619 + pInfo->nLocal = surplus;
1.620 + }else{
1.621 + pInfo->nLocal = minLocal;
1.622 + }
1.623 + pInfo->iOverflow = pInfo->nLocal + n;
1.624 + pInfo->nSize = pInfo->iOverflow + 4;
1.625 + }
1.626 +}
1.627 +#define parseCell(pPage, iCell, pInfo) \
1.628 + sqlite3BtreeParseCellPtr((pPage), findCell((pPage), (iCell)), (pInfo))
1.629 +void sqlite3BtreeParseCell(
1.630 + MemPage *pPage, /* Page containing the cell */
1.631 + int iCell, /* The cell index. First cell is 0 */
1.632 + CellInfo *pInfo /* Fill in this structure */
1.633 +){
1.634 + parseCell(pPage, iCell, pInfo);
1.635 +}
1.636 +
1.637 +/*
1.638 +** Compute the total number of bytes that a Cell needs in the cell
1.639 +** data area of the btree-page. The return number includes the cell
1.640 +** data header and the local payload, but not any overflow page or
1.641 +** the space used by the cell pointer.
1.642 +*/
1.643 +#ifndef NDEBUG
1.644 +static u16 cellSize(MemPage *pPage, int iCell){
1.645 + CellInfo info;
1.646 + sqlite3BtreeParseCell(pPage, iCell, &info);
1.647 + return info.nSize;
1.648 +}
1.649 +#endif
1.650 +static u16 cellSizePtr(MemPage *pPage, u8 *pCell){
1.651 + CellInfo info;
1.652 + sqlite3BtreeParseCellPtr(pPage, pCell, &info);
1.653 + return info.nSize;
1.654 +}
1.655 +
1.656 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.657 +/*
1.658 +** If the cell pCell, part of page pPage contains a pointer
1.659 +** to an overflow page, insert an entry into the pointer-map
1.660 +** for the overflow page.
1.661 +*/
1.662 +static int ptrmapPutOvflPtr(MemPage *pPage, u8 *pCell){
1.663 + CellInfo info;
1.664 + assert( pCell!=0 );
1.665 + sqlite3BtreeParseCellPtr(pPage, pCell, &info);
1.666 + assert( (info.nData+(pPage->intKey?0:info.nKey))==info.nPayload );
1.667 + if( (info.nData+(pPage->intKey?0:info.nKey))>info.nLocal ){
1.668 + Pgno ovfl = get4byte(&pCell[info.iOverflow]);
1.669 + return ptrmapPut(pPage->pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno);
1.670 + }
1.671 + return SQLITE_OK;
1.672 +}
1.673 +/*
1.674 +** If the cell with index iCell on page pPage contains a pointer
1.675 +** to an overflow page, insert an entry into the pointer-map
1.676 +** for the overflow page.
1.677 +*/
1.678 +static int ptrmapPutOvfl(MemPage *pPage, int iCell){
1.679 + u8 *pCell;
1.680 + assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1.681 + pCell = findOverflowCell(pPage, iCell);
1.682 + return ptrmapPutOvflPtr(pPage, pCell);
1.683 +}
1.684 +#endif
1.685 +
1.686 +
1.687 +/*
1.688 +** Defragment the page given. All Cells are moved to the
1.689 +** end of the page and all free space is collected into one
1.690 +** big FreeBlk that occurs in between the header and cell
1.691 +** pointer array and the cell content area.
1.692 +*/
1.693 +static void defragmentPage(MemPage *pPage){
1.694 + int i; /* Loop counter */
1.695 + int pc; /* Address of a i-th cell */
1.696 + int addr; /* Offset of first byte after cell pointer array */
1.697 + int hdr; /* Offset to the page header */
1.698 + int size; /* Size of a cell */
1.699 + int usableSize; /* Number of usable bytes on a page */
1.700 + int cellOffset; /* Offset to the cell pointer array */
1.701 + int brk; /* Offset to the cell content area */
1.702 + int nCell; /* Number of cells on the page */
1.703 + unsigned char *data; /* The page data */
1.704 + unsigned char *temp; /* Temp area for cell content */
1.705 +
1.706 + assert( sqlite3PagerIswriteable(pPage->pDbPage) );
1.707 + assert( pPage->pBt!=0 );
1.708 + assert( pPage->pBt->usableSize <= SQLITE_MAX_PAGE_SIZE );
1.709 + assert( pPage->nOverflow==0 );
1.710 + assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1.711 + temp = sqlite3PagerTempSpace(pPage->pBt->pPager);
1.712 + data = pPage->aData;
1.713 + hdr = pPage->hdrOffset;
1.714 + cellOffset = pPage->cellOffset;
1.715 + nCell = pPage->nCell;
1.716 + assert( nCell==get2byte(&data[hdr+3]) );
1.717 + usableSize = pPage->pBt->usableSize;
1.718 + brk = get2byte(&data[hdr+5]);
1.719 + memcpy(&temp[brk], &data[brk], usableSize - brk);
1.720 + brk = usableSize;
1.721 + for(i=0; i<nCell; i++){
1.722 + u8 *pAddr; /* The i-th cell pointer */
1.723 + pAddr = &data[cellOffset + i*2];
1.724 + pc = get2byte(pAddr);
1.725 + assert( pc<pPage->pBt->usableSize );
1.726 + size = cellSizePtr(pPage, &temp[pc]);
1.727 + brk -= size;
1.728 + memcpy(&data[brk], &temp[pc], size);
1.729 + put2byte(pAddr, brk);
1.730 + }
1.731 + assert( brk>=cellOffset+2*nCell );
1.732 + put2byte(&data[hdr+5], brk);
1.733 + data[hdr+1] = 0;
1.734 + data[hdr+2] = 0;
1.735 + data[hdr+7] = 0;
1.736 + addr = cellOffset+2*nCell;
1.737 + memset(&data[addr], 0, brk-addr);
1.738 +}
1.739 +
1.740 +/*
1.741 +** Allocate nByte bytes of space on a page.
1.742 +**
1.743 +** Return the index into pPage->aData[] of the first byte of
1.744 +** the new allocation. The caller guarantees that there is enough
1.745 +** space. This routine will never fail.
1.746 +**
1.747 +** If the page contains nBytes of free space but does not contain
1.748 +** nBytes of contiguous free space, then this routine automatically
1.749 +** calls defragementPage() to consolidate all free space before
1.750 +** allocating the new chunk.
1.751 +*/
1.752 +static int allocateSpace(MemPage *pPage, int nByte){
1.753 + int addr, pc, hdr;
1.754 + int size;
1.755 + int nFrag;
1.756 + int top;
1.757 + int nCell;
1.758 + int cellOffset;
1.759 + unsigned char *data;
1.760 +
1.761 + data = pPage->aData;
1.762 + assert( sqlite3PagerIswriteable(pPage->pDbPage) );
1.763 + assert( pPage->pBt );
1.764 + assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1.765 + assert( nByte>=0 ); /* Minimum cell size is 4 */
1.766 + assert( pPage->nFree>=nByte );
1.767 + assert( pPage->nOverflow==0 );
1.768 + pPage->nFree -= nByte;
1.769 + hdr = pPage->hdrOffset;
1.770 +
1.771 + nFrag = data[hdr+7];
1.772 + if( nFrag<60 ){
1.773 + /* Search the freelist looking for a slot big enough to satisfy the
1.774 + ** space request. */
1.775 + addr = hdr+1;
1.776 + while( (pc = get2byte(&data[addr]))>0 ){
1.777 + size = get2byte(&data[pc+2]);
1.778 + if( size>=nByte ){
1.779 + if( size<nByte+4 ){
1.780 + memcpy(&data[addr], &data[pc], 2);
1.781 + data[hdr+7] = nFrag + size - nByte;
1.782 + return pc;
1.783 + }else{
1.784 + put2byte(&data[pc+2], size-nByte);
1.785 + return pc + size - nByte;
1.786 + }
1.787 + }
1.788 + addr = pc;
1.789 + }
1.790 + }
1.791 +
1.792 + /* Allocate memory from the gap in between the cell pointer array
1.793 + ** and the cell content area.
1.794 + */
1.795 + top = get2byte(&data[hdr+5]);
1.796 + nCell = get2byte(&data[hdr+3]);
1.797 + cellOffset = pPage->cellOffset;
1.798 + if( nFrag>=60 || cellOffset + 2*nCell > top - nByte ){
1.799 + defragmentPage(pPage);
1.800 + top = get2byte(&data[hdr+5]);
1.801 + }
1.802 + top -= nByte;
1.803 + assert( cellOffset + 2*nCell <= top );
1.804 + put2byte(&data[hdr+5], top);
1.805 + return top;
1.806 +}
1.807 +
1.808 +/*
1.809 +** Return a section of the pPage->aData to the freelist.
1.810 +** The first byte of the new free block is pPage->aDisk[start]
1.811 +** and the size of the block is "size" bytes.
1.812 +**
1.813 +** Most of the effort here is involved in coalesing adjacent
1.814 +** free blocks into a single big free block.
1.815 +*/
1.816 +static void freeSpace(MemPage *pPage, int start, int size){
1.817 + int addr, pbegin, hdr;
1.818 + unsigned char *data = pPage->aData;
1.819 +
1.820 + assert( pPage->pBt!=0 );
1.821 + assert( sqlite3PagerIswriteable(pPage->pDbPage) );
1.822 + assert( start>=pPage->hdrOffset+6+(pPage->leaf?0:4) );
1.823 + assert( (start + size)<=pPage->pBt->usableSize );
1.824 + assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1.825 + assert( size>=0 ); /* Minimum cell size is 4 */
1.826 +
1.827 +#ifdef SQLITE_SECURE_DELETE
1.828 + /* Overwrite deleted information with zeros when the SECURE_DELETE
1.829 + ** option is enabled at compile-time */
1.830 + memset(&data[start], 0, size);
1.831 +#endif
1.832 +
1.833 + /* Add the space back into the linked list of freeblocks */
1.834 + hdr = pPage->hdrOffset;
1.835 + addr = hdr + 1;
1.836 + while( (pbegin = get2byte(&data[addr]))<start && pbegin>0 ){
1.837 + assert( pbegin<=pPage->pBt->usableSize-4 );
1.838 + assert( pbegin>addr );
1.839 + addr = pbegin;
1.840 + }
1.841 + assert( pbegin<=pPage->pBt->usableSize-4 );
1.842 + assert( pbegin>addr || pbegin==0 );
1.843 + put2byte(&data[addr], start);
1.844 + put2byte(&data[start], pbegin);
1.845 + put2byte(&data[start+2], size);
1.846 + pPage->nFree += size;
1.847 +
1.848 + /* Coalesce adjacent free blocks */
1.849 + addr = pPage->hdrOffset + 1;
1.850 + while( (pbegin = get2byte(&data[addr]))>0 ){
1.851 + int pnext, psize;
1.852 + assert( pbegin>addr );
1.853 + assert( pbegin<=pPage->pBt->usableSize-4 );
1.854 + pnext = get2byte(&data[pbegin]);
1.855 + psize = get2byte(&data[pbegin+2]);
1.856 + if( pbegin + psize + 3 >= pnext && pnext>0 ){
1.857 + int frag = pnext - (pbegin+psize);
1.858 + assert( frag<=data[pPage->hdrOffset+7] );
1.859 + data[pPage->hdrOffset+7] -= frag;
1.860 + put2byte(&data[pbegin], get2byte(&data[pnext]));
1.861 + put2byte(&data[pbegin+2], pnext+get2byte(&data[pnext+2])-pbegin);
1.862 + }else{
1.863 + addr = pbegin;
1.864 + }
1.865 + }
1.866 +
1.867 + /* If the cell content area begins with a freeblock, remove it. */
1.868 + if( data[hdr+1]==data[hdr+5] && data[hdr+2]==data[hdr+6] ){
1.869 + int top;
1.870 + pbegin = get2byte(&data[hdr+1]);
1.871 + memcpy(&data[hdr+1], &data[pbegin], 2);
1.872 + top = get2byte(&data[hdr+5]);
1.873 + put2byte(&data[hdr+5], top + get2byte(&data[pbegin+2]));
1.874 + }
1.875 +}
1.876 +
1.877 +/*
1.878 +** Decode the flags byte (the first byte of the header) for a page
1.879 +** and initialize fields of the MemPage structure accordingly.
1.880 +**
1.881 +** Only the following combinations are supported. Anything different
1.882 +** indicates a corrupt database files:
1.883 +**
1.884 +** PTF_ZERODATA
1.885 +** PTF_ZERODATA | PTF_LEAF
1.886 +** PTF_LEAFDATA | PTF_INTKEY
1.887 +** PTF_LEAFDATA | PTF_INTKEY | PTF_LEAF
1.888 +*/
1.889 +static int decodeFlags(MemPage *pPage, int flagByte){
1.890 + BtShared *pBt; /* A copy of pPage->pBt */
1.891 +
1.892 + assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) );
1.893 + assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1.894 + pPage->leaf = flagByte>>3; assert( PTF_LEAF == 1<<3 );
1.895 + flagByte &= ~PTF_LEAF;
1.896 + pPage->childPtrSize = 4-4*pPage->leaf;
1.897 + pBt = pPage->pBt;
1.898 + if( flagByte==(PTF_LEAFDATA | PTF_INTKEY) ){
1.899 + pPage->intKey = 1;
1.900 + pPage->hasData = pPage->leaf;
1.901 + pPage->maxLocal = pBt->maxLeaf;
1.902 + pPage->minLocal = pBt->minLeaf;
1.903 + }else if( flagByte==PTF_ZERODATA ){
1.904 + pPage->intKey = 0;
1.905 + pPage->hasData = 0;
1.906 + pPage->maxLocal = pBt->maxLocal;
1.907 + pPage->minLocal = pBt->minLocal;
1.908 + }else{
1.909 + return SQLITE_CORRUPT_BKPT;
1.910 + }
1.911 + return SQLITE_OK;
1.912 +}
1.913 +
1.914 +/*
1.915 +** Initialize the auxiliary information for a disk block.
1.916 +**
1.917 +** The pParent parameter must be a pointer to the MemPage which
1.918 +** is the parent of the page being initialized. The root of a
1.919 +** BTree has no parent and so for that page, pParent==NULL.
1.920 +**
1.921 +** Return SQLITE_OK on success. If we see that the page does
1.922 +** not contain a well-formed database page, then return
1.923 +** SQLITE_CORRUPT. Note that a return of SQLITE_OK does not
1.924 +** guarantee that the page is well-formed. It only shows that
1.925 +** we failed to detect any corruption.
1.926 +*/
1.927 +int sqlite3BtreeInitPage(
1.928 + MemPage *pPage, /* The page to be initialized */
1.929 + MemPage *pParent /* The parent. Might be NULL */
1.930 +){
1.931 + int pc; /* Address of a freeblock within pPage->aData[] */
1.932 + int hdr; /* Offset to beginning of page header */
1.933 + u8 *data; /* Equal to pPage->aData */
1.934 + BtShared *pBt; /* The main btree structure */
1.935 + int usableSize; /* Amount of usable space on each page */
1.936 + int cellOffset; /* Offset from start of page to first cell pointer */
1.937 + int nFree; /* Number of unused bytes on the page */
1.938 + int top; /* First byte of the cell content area */
1.939 +
1.940 + pBt = pPage->pBt;
1.941 + assert( pBt!=0 );
1.942 + assert( pParent==0 || pParent->pBt==pBt );
1.943 + assert( sqlite3_mutex_held(pBt->mutex) );
1.944 + assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) );
1.945 + assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) );
1.946 + assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) );
1.947 + if( pPage->pParent!=pParent && (pPage->pParent!=0 || pPage->isInit) ){
1.948 + /* The parent page should never change unless the file is corrupt */
1.949 + return SQLITE_CORRUPT_BKPT;
1.950 + }
1.951 + if( pPage->isInit ) return SQLITE_OK;
1.952 + if( pPage->pParent==0 && pParent!=0 ){
1.953 + pPage->pParent = pParent;
1.954 + sqlite3PagerRef(pParent->pDbPage);
1.955 + }
1.956 + hdr = pPage->hdrOffset;
1.957 + data = pPage->aData;
1.958 + if( decodeFlags(pPage, data[hdr]) ) return SQLITE_CORRUPT_BKPT;
1.959 + assert( pBt->pageSize>=512 && pBt->pageSize<=32768 );
1.960 + pPage->maskPage = pBt->pageSize - 1;
1.961 + pPage->nOverflow = 0;
1.962 + pPage->idxShift = 0;
1.963 + usableSize = pBt->usableSize;
1.964 + pPage->cellOffset = cellOffset = hdr + 12 - 4*pPage->leaf;
1.965 + top = get2byte(&data[hdr+5]);
1.966 + pPage->nCell = get2byte(&data[hdr+3]);
1.967 + if( pPage->nCell>MX_CELL(pBt) ){
1.968 + /* To many cells for a single page. The page must be corrupt */
1.969 + return SQLITE_CORRUPT_BKPT;
1.970 + }
1.971 + if( pPage->nCell==0 && pParent!=0 && pParent->pgno!=1 ){
1.972 + /* All pages must have at least one cell, except for root pages */
1.973 + return SQLITE_CORRUPT_BKPT;
1.974 + }
1.975 +
1.976 + /* Compute the total free space on the page */
1.977 + pc = get2byte(&data[hdr+1]);
1.978 + nFree = data[hdr+7] + top - (cellOffset + 2*pPage->nCell);
1.979 + while( pc>0 ){
1.980 + int next, size;
1.981 + if( pc>usableSize-4 ){
1.982 + /* Free block is off the page */
1.983 + return SQLITE_CORRUPT_BKPT;
1.984 + }
1.985 + next = get2byte(&data[pc]);
1.986 + size = get2byte(&data[pc+2]);
1.987 + if( next>0 && next<=pc+size+3 ){
1.988 + /* Free blocks must be in accending order */
1.989 + return SQLITE_CORRUPT_BKPT;
1.990 + }
1.991 + nFree += size;
1.992 + pc = next;
1.993 + }
1.994 + pPage->nFree = nFree;
1.995 + if( nFree>=usableSize ){
1.996 + /* Free space cannot exceed total page size */
1.997 + return SQLITE_CORRUPT_BKPT;
1.998 + }
1.999 +
1.1000 +#if 0
1.1001 + /* Check that all the offsets in the cell offset array are within range.
1.1002 + **
1.1003 + ** Omitting this consistency check and using the pPage->maskPage mask
1.1004 + ** to prevent overrunning the page buffer in findCell() results in a
1.1005 + ** 2.5% performance gain.
1.1006 + */
1.1007 + {
1.1008 + u8 *pOff; /* Iterator used to check all cell offsets are in range */
1.1009 + u8 *pEnd; /* Pointer to end of cell offset array */
1.1010 + u8 mask; /* Mask of bits that must be zero in MSB of cell offsets */
1.1011 + mask = ~(((u8)(pBt->pageSize>>8))-1);
1.1012 + pEnd = &data[cellOffset + pPage->nCell*2];
1.1013 + for(pOff=&data[cellOffset]; pOff!=pEnd && !((*pOff)&mask); pOff+=2);
1.1014 + if( pOff!=pEnd ){
1.1015 + return SQLITE_CORRUPT_BKPT;
1.1016 + }
1.1017 + }
1.1018 +#endif
1.1019 +
1.1020 + pPage->isInit = 1;
1.1021 + return SQLITE_OK;
1.1022 +}
1.1023 +
1.1024 +/*
1.1025 +** Set up a raw page so that it looks like a database page holding
1.1026 +** no entries.
1.1027 +*/
1.1028 +static void zeroPage(MemPage *pPage, int flags){
1.1029 + unsigned char *data = pPage->aData;
1.1030 + BtShared *pBt = pPage->pBt;
1.1031 + int hdr = pPage->hdrOffset;
1.1032 + int first;
1.1033 +
1.1034 + assert( sqlite3PagerPagenumber(pPage->pDbPage)==pPage->pgno );
1.1035 + assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage );
1.1036 + assert( sqlite3PagerGetData(pPage->pDbPage) == data );
1.1037 + assert( sqlite3PagerIswriteable(pPage->pDbPage) );
1.1038 + assert( sqlite3_mutex_held(pBt->mutex) );
1.1039 + /*memset(&data[hdr], 0, pBt->usableSize - hdr);*/
1.1040 + data[hdr] = flags;
1.1041 + first = hdr + 8 + 4*((flags&PTF_LEAF)==0);
1.1042 + memset(&data[hdr+1], 0, 4);
1.1043 + data[hdr+7] = 0;
1.1044 + put2byte(&data[hdr+5], pBt->usableSize);
1.1045 + pPage->nFree = pBt->usableSize - first;
1.1046 + decodeFlags(pPage, flags);
1.1047 + pPage->hdrOffset = hdr;
1.1048 + pPage->cellOffset = first;
1.1049 + pPage->nOverflow = 0;
1.1050 + assert( pBt->pageSize>=512 && pBt->pageSize<=32768 );
1.1051 + pPage->maskPage = pBt->pageSize - 1;
1.1052 + pPage->idxShift = 0;
1.1053 + pPage->nCell = 0;
1.1054 + pPage->isInit = 1;
1.1055 +}
1.1056 +
1.1057 +/*
1.1058 +** Get a page from the pager. Initialize the MemPage.pBt and
1.1059 +** MemPage.aData elements if needed.
1.1060 +**
1.1061 +** If the noContent flag is set, it means that we do not care about
1.1062 +** the content of the page at this time. So do not go to the disk
1.1063 +** to fetch the content. Just fill in the content with zeros for now.
1.1064 +** If in the future we call sqlite3PagerWrite() on this page, that
1.1065 +** means we have started to be concerned about content and the disk
1.1066 +** read should occur at that point.
1.1067 +*/
1.1068 +int sqlite3BtreeGetPage(
1.1069 + BtShared *pBt, /* The btree */
1.1070 + Pgno pgno, /* Number of the page to fetch */
1.1071 + MemPage **ppPage, /* Return the page in this parameter */
1.1072 + int noContent /* Do not load page content if true */
1.1073 +){
1.1074 + int rc;
1.1075 + MemPage *pPage;
1.1076 + DbPage *pDbPage;
1.1077 +
1.1078 + assert( sqlite3_mutex_held(pBt->mutex) );
1.1079 + rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, noContent);
1.1080 + if( rc ) return rc;
1.1081 + pPage = (MemPage *)sqlite3PagerGetExtra(pDbPage);
1.1082 + pPage->aData = sqlite3PagerGetData(pDbPage);
1.1083 + pPage->pDbPage = pDbPage;
1.1084 + pPage->pBt = pBt;
1.1085 + pPage->pgno = pgno;
1.1086 + pPage->hdrOffset = pPage->pgno==1 ? 100 : 0;
1.1087 + *ppPage = pPage;
1.1088 + return SQLITE_OK;
1.1089 +}
1.1090 +
1.1091 +/*
1.1092 +** Get a page from the pager and initialize it. This routine
1.1093 +** is just a convenience wrapper around separate calls to
1.1094 +** sqlite3BtreeGetPage() and sqlite3BtreeInitPage().
1.1095 +*/
1.1096 +static int getAndInitPage(
1.1097 + BtShared *pBt, /* The database file */
1.1098 + Pgno pgno, /* Number of the page to get */
1.1099 + MemPage **ppPage, /* Write the page pointer here */
1.1100 + MemPage *pParent /* Parent of the page */
1.1101 +){
1.1102 + int rc;
1.1103 + assert( sqlite3_mutex_held(pBt->mutex) );
1.1104 + if( pgno==0 ){
1.1105 + return SQLITE_CORRUPT_BKPT;
1.1106 + }
1.1107 + rc = sqlite3BtreeGetPage(pBt, pgno, ppPage, 0);
1.1108 + if( rc==SQLITE_OK && (*ppPage)->isInit==0 ){
1.1109 + rc = sqlite3BtreeInitPage(*ppPage, pParent);
1.1110 + if( rc!=SQLITE_OK ){
1.1111 + releasePage(*ppPage);
1.1112 + *ppPage = 0;
1.1113 + }
1.1114 + }
1.1115 + return rc;
1.1116 +}
1.1117 +
1.1118 +/*
1.1119 +** Release a MemPage. This should be called once for each prior
1.1120 +** call to sqlite3BtreeGetPage.
1.1121 +*/
1.1122 +static void releasePage(MemPage *pPage){
1.1123 + if( pPage ){
1.1124 + assert( pPage->aData );
1.1125 + assert( pPage->pBt );
1.1126 + assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage );
1.1127 + assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData );
1.1128 + assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1.1129 + sqlite3PagerUnref(pPage->pDbPage);
1.1130 + }
1.1131 +}
1.1132 +
1.1133 +/*
1.1134 +** This routine is called when the reference count for a page
1.1135 +** reaches zero. We need to unref the pParent pointer when that
1.1136 +** happens.
1.1137 +*/
1.1138 +static void pageDestructor(DbPage *pData, int pageSize){
1.1139 + MemPage *pPage;
1.1140 + assert( (pageSize & 7)==0 );
1.1141 + pPage = (MemPage *)sqlite3PagerGetExtra(pData);
1.1142 + assert( pPage->isInit==0 || sqlite3_mutex_held(pPage->pBt->mutex) );
1.1143 + if( pPage->pParent ){
1.1144 + MemPage *pParent = pPage->pParent;
1.1145 + assert( pParent->pBt==pPage->pBt );
1.1146 + pPage->pParent = 0;
1.1147 + releasePage(pParent);
1.1148 + }
1.1149 + pPage->isInit = 0;
1.1150 +}
1.1151 +
1.1152 +/*
1.1153 +** During a rollback, when the pager reloads information into the cache
1.1154 +** so that the cache is restored to its original state at the start of
1.1155 +** the transaction, for each page restored this routine is called.
1.1156 +**
1.1157 +** This routine needs to reset the extra data section at the end of the
1.1158 +** page to agree with the restored data.
1.1159 +*/
1.1160 +static void pageReinit(DbPage *pData, int pageSize){
1.1161 + MemPage *pPage;
1.1162 + assert( (pageSize & 7)==0 );
1.1163 + pPage = (MemPage *)sqlite3PagerGetExtra(pData);
1.1164 + if( pPage->isInit ){
1.1165 + assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1.1166 + pPage->isInit = 0;
1.1167 + sqlite3BtreeInitPage(pPage, pPage->pParent);
1.1168 + }
1.1169 +}
1.1170 +
1.1171 +/*
1.1172 +** Invoke the busy handler for a btree.
1.1173 +*/
1.1174 +static int sqlite3BtreeInvokeBusyHandler(void *pArg, int n){
1.1175 + BtShared *pBt = (BtShared*)pArg;
1.1176 + assert( pBt->db );
1.1177 + assert( sqlite3_mutex_held(pBt->db->mutex) );
1.1178 + return sqlite3InvokeBusyHandler(&pBt->db->busyHandler);
1.1179 +}
1.1180 +
1.1181 +/*
1.1182 +** Open a database file.
1.1183 +**
1.1184 +** zFilename is the name of the database file. If zFilename is NULL
1.1185 +** a new database with a random name is created. This randomly named
1.1186 +** database file will be deleted when sqlite3BtreeClose() is called.
1.1187 +** If zFilename is ":memory:" then an in-memory database is created
1.1188 +** that is automatically destroyed when it is closed.
1.1189 +*/
1.1190 +int sqlite3BtreeOpen(
1.1191 + const char *zFilename, /* Name of the file containing the BTree database */
1.1192 + sqlite3 *db, /* Associated database handle */
1.1193 + Btree **ppBtree, /* Pointer to new Btree object written here */
1.1194 + int flags, /* Options */
1.1195 + int vfsFlags /* Flags passed through to sqlite3_vfs.xOpen() */
1.1196 +){
1.1197 + sqlite3_vfs *pVfs; /* The VFS to use for this btree */
1.1198 + BtShared *pBt = 0; /* Shared part of btree structure */
1.1199 + Btree *p; /* Handle to return */
1.1200 + int rc = SQLITE_OK;
1.1201 + int nReserve;
1.1202 + unsigned char zDbHeader[100];
1.1203 +
1.1204 + /* Set the variable isMemdb to true for an in-memory database, or
1.1205 + ** false for a file-based database. This symbol is only required if
1.1206 + ** either of the shared-data or autovacuum features are compiled
1.1207 + ** into the library.
1.1208 + */
1.1209 +#if !defined(SQLITE_OMIT_SHARED_CACHE) || !defined(SQLITE_OMIT_AUTOVACUUM)
1.1210 + #ifdef SQLITE_OMIT_MEMORYDB
1.1211 + const int isMemdb = 0;
1.1212 + #else
1.1213 + const int isMemdb = zFilename && !strcmp(zFilename, ":memory:");
1.1214 + #endif
1.1215 +#endif
1.1216 +
1.1217 + assert( db!=0 );
1.1218 + assert( sqlite3_mutex_held(db->mutex) );
1.1219 +
1.1220 + pVfs = db->pVfs;
1.1221 + p = sqlite3MallocZero(sizeof(Btree));
1.1222 + if( !p ){
1.1223 + return SQLITE_NOMEM;
1.1224 + }
1.1225 + p->inTrans = TRANS_NONE;
1.1226 + p->db = db;
1.1227 +
1.1228 +#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
1.1229 + /*
1.1230 + ** If this Btree is a candidate for shared cache, try to find an
1.1231 + ** existing BtShared object that we can share with
1.1232 + */
1.1233 + if( isMemdb==0
1.1234 + && (db->flags & SQLITE_Vtab)==0
1.1235 + && zFilename && zFilename[0]
1.1236 + ){
1.1237 + if( sqlite3SharedCacheEnabled ){
1.1238 + int nFullPathname = pVfs->mxPathname+1;
1.1239 + char *zFullPathname = sqlite3Malloc(nFullPathname);
1.1240 + sqlite3_mutex *mutexShared;
1.1241 + p->sharable = 1;
1.1242 + db->flags |= SQLITE_SharedCache;
1.1243 + if( !zFullPathname ){
1.1244 + sqlite3_free(p);
1.1245 + return SQLITE_NOMEM;
1.1246 + }
1.1247 + sqlite3OsFullPathname(pVfs, zFilename, nFullPathname, zFullPathname);
1.1248 + mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);
1.1249 + sqlite3_mutex_enter(mutexShared);
1.1250 + for(pBt=sqlite3SharedCacheList; pBt; pBt=pBt->pNext){
1.1251 + assert( pBt->nRef>0 );
1.1252 + if( 0==strcmp(zFullPathname, sqlite3PagerFilename(pBt->pPager))
1.1253 + && sqlite3PagerVfs(pBt->pPager)==pVfs ){
1.1254 + p->pBt = pBt;
1.1255 + pBt->nRef++;
1.1256 + break;
1.1257 + }
1.1258 + }
1.1259 + sqlite3_mutex_leave(mutexShared);
1.1260 + sqlite3_free(zFullPathname);
1.1261 + }
1.1262 +#ifdef SQLITE_DEBUG
1.1263 + else{
1.1264 + /* In debug mode, we mark all persistent databases as sharable
1.1265 + ** even when they are not. This exercises the locking code and
1.1266 + ** gives more opportunity for asserts(sqlite3_mutex_held())
1.1267 + ** statements to find locking problems.
1.1268 + */
1.1269 + p->sharable = 1;
1.1270 + }
1.1271 +#endif
1.1272 + }
1.1273 +#endif
1.1274 + if( pBt==0 ){
1.1275 + /*
1.1276 + ** The following asserts make sure that structures used by the btree are
1.1277 + ** the right size. This is to guard against size changes that result
1.1278 + ** when compiling on a different architecture.
1.1279 + */
1.1280 + assert( sizeof(i64)==8 || sizeof(i64)==4 );
1.1281 + assert( sizeof(u64)==8 || sizeof(u64)==4 );
1.1282 + assert( sizeof(u32)==4 );
1.1283 + assert( sizeof(u16)==2 );
1.1284 + assert( sizeof(Pgno)==4 );
1.1285 +
1.1286 + pBt = sqlite3MallocZero( sizeof(*pBt) );
1.1287 + if( pBt==0 ){
1.1288 + rc = SQLITE_NOMEM;
1.1289 + goto btree_open_out;
1.1290 + }
1.1291 + pBt->busyHdr.xFunc = sqlite3BtreeInvokeBusyHandler;
1.1292 + pBt->busyHdr.pArg = pBt;
1.1293 + rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename,
1.1294 + EXTRA_SIZE, flags, vfsFlags);
1.1295 + if( rc==SQLITE_OK ){
1.1296 + rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader);
1.1297 + }
1.1298 + if( rc!=SQLITE_OK ){
1.1299 + goto btree_open_out;
1.1300 + }
1.1301 + sqlite3PagerSetBusyhandler(pBt->pPager, &pBt->busyHdr);
1.1302 + p->pBt = pBt;
1.1303 +
1.1304 + sqlite3PagerSetDestructor(pBt->pPager, pageDestructor);
1.1305 + sqlite3PagerSetReiniter(pBt->pPager, pageReinit);
1.1306 + pBt->pCursor = 0;
1.1307 + pBt->pPage1 = 0;
1.1308 + pBt->readOnly = sqlite3PagerIsreadonly(pBt->pPager);
1.1309 + pBt->pageSize = get2byte(&zDbHeader[16]);
1.1310 + if( pBt->pageSize<512 || pBt->pageSize>SQLITE_MAX_PAGE_SIZE
1.1311 + || ((pBt->pageSize-1)&pBt->pageSize)!=0 ){
1.1312 + pBt->pageSize = 0;
1.1313 + sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize);
1.1314 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.1315 + /* If the magic name ":memory:" will create an in-memory database, then
1.1316 + ** leave the autoVacuum mode at 0 (do not auto-vacuum), even if
1.1317 + ** SQLITE_DEFAULT_AUTOVACUUM is true. On the other hand, if
1.1318 + ** SQLITE_OMIT_MEMORYDB has been defined, then ":memory:" is just a
1.1319 + ** regular file-name. In this case the auto-vacuum applies as per normal.
1.1320 + */
1.1321 + if( zFilename && !isMemdb ){
1.1322 + pBt->autoVacuum = (SQLITE_DEFAULT_AUTOVACUUM ? 1 : 0);
1.1323 + pBt->incrVacuum = (SQLITE_DEFAULT_AUTOVACUUM==2 ? 1 : 0);
1.1324 + }
1.1325 +#endif
1.1326 + nReserve = 0;
1.1327 + }else{
1.1328 + nReserve = zDbHeader[20];
1.1329 + pBt->pageSizeFixed = 1;
1.1330 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.1331 + pBt->autoVacuum = (get4byte(&zDbHeader[36 + 4*4])?1:0);
1.1332 + pBt->incrVacuum = (get4byte(&zDbHeader[36 + 7*4])?1:0);
1.1333 +#endif
1.1334 + }
1.1335 + pBt->usableSize = pBt->pageSize - nReserve;
1.1336 + assert( (pBt->pageSize & 7)==0 ); /* 8-byte alignment of pageSize */
1.1337 + sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize);
1.1338 +
1.1339 +#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
1.1340 + /* Add the new BtShared object to the linked list sharable BtShareds.
1.1341 + */
1.1342 + if( p->sharable ){
1.1343 + sqlite3_mutex *mutexShared;
1.1344 + pBt->nRef = 1;
1.1345 + mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);
1.1346 + if( SQLITE_THREADSAFE && sqlite3Config.bCoreMutex ){
1.1347 + pBt->mutex = sqlite3MutexAlloc(SQLITE_MUTEX_FAST);
1.1348 + if( pBt->mutex==0 ){
1.1349 + rc = SQLITE_NOMEM;
1.1350 + db->mallocFailed = 0;
1.1351 + goto btree_open_out;
1.1352 + }
1.1353 + }
1.1354 + sqlite3_mutex_enter(mutexShared);
1.1355 + pBt->pNext = sqlite3SharedCacheList;
1.1356 + sqlite3SharedCacheList = pBt;
1.1357 + sqlite3_mutex_leave(mutexShared);
1.1358 + }
1.1359 +#endif
1.1360 + }
1.1361 +
1.1362 +#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
1.1363 + /* If the new Btree uses a sharable pBtShared, then link the new
1.1364 + ** Btree into the list of all sharable Btrees for the same connection.
1.1365 + ** The list is kept in ascending order by pBt address.
1.1366 + */
1.1367 + if( p->sharable ){
1.1368 + int i;
1.1369 + Btree *pSib;
1.1370 + for(i=0; i<db->nDb; i++){
1.1371 + if( (pSib = db->aDb[i].pBt)!=0 && pSib->sharable ){
1.1372 + while( pSib->pPrev ){ pSib = pSib->pPrev; }
1.1373 + if( p->pBt<pSib->pBt ){
1.1374 + p->pNext = pSib;
1.1375 + p->pPrev = 0;
1.1376 + pSib->pPrev = p;
1.1377 + }else{
1.1378 + while( pSib->pNext && pSib->pNext->pBt<p->pBt ){
1.1379 + pSib = pSib->pNext;
1.1380 + }
1.1381 + p->pNext = pSib->pNext;
1.1382 + p->pPrev = pSib;
1.1383 + if( p->pNext ){
1.1384 + p->pNext->pPrev = p;
1.1385 + }
1.1386 + pSib->pNext = p;
1.1387 + }
1.1388 + break;
1.1389 + }
1.1390 + }
1.1391 + }
1.1392 +#endif
1.1393 + *ppBtree = p;
1.1394 +
1.1395 +btree_open_out:
1.1396 + if( rc!=SQLITE_OK ){
1.1397 + if( pBt && pBt->pPager ){
1.1398 + sqlite3PagerClose(pBt->pPager);
1.1399 + }
1.1400 + sqlite3_free(pBt);
1.1401 + sqlite3_free(p);
1.1402 + *ppBtree = 0;
1.1403 + }
1.1404 + return rc;
1.1405 +}
1.1406 +
1.1407 +/*
1.1408 +** Decrement the BtShared.nRef counter. When it reaches zero,
1.1409 +** remove the BtShared structure from the sharing list. Return
1.1410 +** true if the BtShared.nRef counter reaches zero and return
1.1411 +** false if it is still positive.
1.1412 +*/
1.1413 +static int removeFromSharingList(BtShared *pBt){
1.1414 +#ifndef SQLITE_OMIT_SHARED_CACHE
1.1415 + sqlite3_mutex *pMaster;
1.1416 + BtShared *pList;
1.1417 + int removed = 0;
1.1418 +
1.1419 + assert( sqlite3_mutex_notheld(pBt->mutex) );
1.1420 + pMaster = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);
1.1421 + sqlite3_mutex_enter(pMaster);
1.1422 + pBt->nRef--;
1.1423 + if( pBt->nRef<=0 ){
1.1424 + if( sqlite3SharedCacheList==pBt ){
1.1425 + sqlite3SharedCacheList = pBt->pNext;
1.1426 + }else{
1.1427 + pList = sqlite3SharedCacheList;
1.1428 + while( ALWAYS(pList) && pList->pNext!=pBt ){
1.1429 + pList=pList->pNext;
1.1430 + }
1.1431 + if( ALWAYS(pList) ){
1.1432 + pList->pNext = pBt->pNext;
1.1433 + }
1.1434 + }
1.1435 + if( SQLITE_THREADSAFE ){
1.1436 + sqlite3_mutex_free(pBt->mutex);
1.1437 + }
1.1438 + removed = 1;
1.1439 + }
1.1440 + sqlite3_mutex_leave(pMaster);
1.1441 + return removed;
1.1442 +#else
1.1443 + return 1;
1.1444 +#endif
1.1445 +}
1.1446 +
1.1447 +/*
1.1448 +** Make sure pBt->pTmpSpace points to an allocation of
1.1449 +** MX_CELL_SIZE(pBt) bytes.
1.1450 +*/
1.1451 +static void allocateTempSpace(BtShared *pBt){
1.1452 + if( !pBt->pTmpSpace ){
1.1453 + pBt->pTmpSpace = sqlite3PageMalloc( pBt->pageSize );
1.1454 + }
1.1455 +}
1.1456 +
1.1457 +/*
1.1458 +** Free the pBt->pTmpSpace allocation
1.1459 +*/
1.1460 +static void freeTempSpace(BtShared *pBt){
1.1461 + sqlite3PageFree( pBt->pTmpSpace);
1.1462 + pBt->pTmpSpace = 0;
1.1463 +}
1.1464 +
1.1465 +/*
1.1466 +** Close an open database and invalidate all cursors.
1.1467 +*/
1.1468 +int sqlite3BtreeClose(Btree *p){
1.1469 + BtShared *pBt = p->pBt;
1.1470 + BtCursor *pCur;
1.1471 +
1.1472 + /* Close all cursors opened via this handle. */
1.1473 + assert( sqlite3_mutex_held(p->db->mutex) );
1.1474 + sqlite3BtreeEnter(p);
1.1475 + pBt->db = p->db;
1.1476 + pCur = pBt->pCursor;
1.1477 + while( pCur ){
1.1478 + BtCursor *pTmp = pCur;
1.1479 + pCur = pCur->pNext;
1.1480 + if( pTmp->pBtree==p ){
1.1481 + sqlite3BtreeCloseCursor(pTmp);
1.1482 + }
1.1483 + }
1.1484 +
1.1485 + /* Rollback any active transaction and free the handle structure.
1.1486 + ** The call to sqlite3BtreeRollback() drops any table-locks held by
1.1487 + ** this handle.
1.1488 + */
1.1489 + sqlite3BtreeRollback(p);
1.1490 + sqlite3BtreeLeave(p);
1.1491 +
1.1492 + /* If there are still other outstanding references to the shared-btree
1.1493 + ** structure, return now. The remainder of this procedure cleans
1.1494 + ** up the shared-btree.
1.1495 + */
1.1496 + assert( p->wantToLock==0 && p->locked==0 );
1.1497 + if( !p->sharable || removeFromSharingList(pBt) ){
1.1498 + /* The pBt is no longer on the sharing list, so we can access
1.1499 + ** it without having to hold the mutex.
1.1500 + **
1.1501 + ** Clean out and delete the BtShared object.
1.1502 + */
1.1503 + assert( !pBt->pCursor );
1.1504 + sqlite3PagerClose(pBt->pPager);
1.1505 + if( pBt->xFreeSchema && pBt->pSchema ){
1.1506 + pBt->xFreeSchema(pBt->pSchema);
1.1507 + }
1.1508 + sqlite3_free(pBt->pSchema);
1.1509 + freeTempSpace(pBt);
1.1510 + sqlite3_free(pBt);
1.1511 + }
1.1512 +
1.1513 +#ifndef SQLITE_OMIT_SHARED_CACHE
1.1514 + assert( p->wantToLock==0 );
1.1515 + assert( p->locked==0 );
1.1516 + if( p->pPrev ) p->pPrev->pNext = p->pNext;
1.1517 + if( p->pNext ) p->pNext->pPrev = p->pPrev;
1.1518 +#endif
1.1519 +
1.1520 + sqlite3_free(p);
1.1521 + return SQLITE_OK;
1.1522 +}
1.1523 +
1.1524 +/*
1.1525 +** Change the limit on the number of pages allowed in the cache.
1.1526 +**
1.1527 +** The maximum number of cache pages is set to the absolute
1.1528 +** value of mxPage. If mxPage is negative, the pager will
1.1529 +** operate asynchronously - it will not stop to do fsync()s
1.1530 +** to insure data is written to the disk surface before
1.1531 +** continuing. Transactions still work if synchronous is off,
1.1532 +** and the database cannot be corrupted if this program
1.1533 +** crashes. But if the operating system crashes or there is
1.1534 +** an abrupt power failure when synchronous is off, the database
1.1535 +** could be left in an inconsistent and unrecoverable state.
1.1536 +** Synchronous is on by default so database corruption is not
1.1537 +** normally a worry.
1.1538 +*/
1.1539 +int sqlite3BtreeSetCacheSize(Btree *p, int mxPage){
1.1540 + BtShared *pBt = p->pBt;
1.1541 + assert( sqlite3_mutex_held(p->db->mutex) );
1.1542 + sqlite3BtreeEnter(p);
1.1543 + sqlite3PagerSetCachesize(pBt->pPager, mxPage);
1.1544 + sqlite3BtreeLeave(p);
1.1545 + return SQLITE_OK;
1.1546 +}
1.1547 +
1.1548 +/*
1.1549 +** Change the way data is synced to disk in order to increase or decrease
1.1550 +** how well the database resists damage due to OS crashes and power
1.1551 +** failures. Level 1 is the same as asynchronous (no syncs() occur and
1.1552 +** there is a high probability of damage) Level 2 is the default. There
1.1553 +** is a very low but non-zero probability of damage. Level 3 reduces the
1.1554 +** probability of damage to near zero but with a write performance reduction.
1.1555 +*/
1.1556 +#ifndef SQLITE_OMIT_PAGER_PRAGMAS
1.1557 +int sqlite3BtreeSetSafetyLevel(Btree *p, int level, int fullSync){
1.1558 + BtShared *pBt = p->pBt;
1.1559 + assert( sqlite3_mutex_held(p->db->mutex) );
1.1560 + sqlite3BtreeEnter(p);
1.1561 + sqlite3PagerSetSafetyLevel(pBt->pPager, level, fullSync);
1.1562 + sqlite3BtreeLeave(p);
1.1563 + return SQLITE_OK;
1.1564 +}
1.1565 +#endif
1.1566 +
1.1567 +/*
1.1568 +** Return TRUE if the given btree is set to safety level 1. In other
1.1569 +** words, return TRUE if no sync() occurs on the disk files.
1.1570 +*/
1.1571 +int sqlite3BtreeSyncDisabled(Btree *p){
1.1572 + BtShared *pBt = p->pBt;
1.1573 + int rc;
1.1574 + assert( sqlite3_mutex_held(p->db->mutex) );
1.1575 + sqlite3BtreeEnter(p);
1.1576 + assert( pBt && pBt->pPager );
1.1577 + rc = sqlite3PagerNosync(pBt->pPager);
1.1578 + sqlite3BtreeLeave(p);
1.1579 + return rc;
1.1580 +}
1.1581 +
1.1582 +#if !defined(SQLITE_OMIT_PAGER_PRAGMAS) || !defined(SQLITE_OMIT_VACUUM)
1.1583 +/*
1.1584 +** Change the default pages size and the number of reserved bytes per page.
1.1585 +**
1.1586 +** The page size must be a power of 2 between 512 and 65536. If the page
1.1587 +** size supplied does not meet this constraint then the page size is not
1.1588 +** changed.
1.1589 +**
1.1590 +** Page sizes are constrained to be a power of two so that the region
1.1591 +** of the database file used for locking (beginning at PENDING_BYTE,
1.1592 +** the first byte past the 1GB boundary, 0x40000000) needs to occur
1.1593 +** at the beginning of a page.
1.1594 +**
1.1595 +** If parameter nReserve is less than zero, then the number of reserved
1.1596 +** bytes per page is left unchanged.
1.1597 +*/
1.1598 +int sqlite3BtreeSetPageSize(Btree *p, int pageSize, int nReserve){
1.1599 + int rc = SQLITE_OK;
1.1600 + BtShared *pBt = p->pBt;
1.1601 + sqlite3BtreeEnter(p);
1.1602 + if( pBt->pageSizeFixed ){
1.1603 + sqlite3BtreeLeave(p);
1.1604 + return SQLITE_READONLY;
1.1605 + }
1.1606 + if( nReserve<0 ){
1.1607 + nReserve = pBt->pageSize - pBt->usableSize;
1.1608 + }
1.1609 + if( pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE &&
1.1610 + ((pageSize-1)&pageSize)==0 ){
1.1611 + assert( (pageSize & 7)==0 );
1.1612 + assert( !pBt->pPage1 && !pBt->pCursor );
1.1613 + pBt->pageSize = pageSize;
1.1614 + freeTempSpace(pBt);
1.1615 + rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize);
1.1616 + }
1.1617 + pBt->usableSize = pBt->pageSize - nReserve;
1.1618 + sqlite3BtreeLeave(p);
1.1619 + return rc;
1.1620 +}
1.1621 +
1.1622 +/*
1.1623 +** Return the currently defined page size
1.1624 +*/
1.1625 +int sqlite3BtreeGetPageSize(Btree *p){
1.1626 + return p->pBt->pageSize;
1.1627 +}
1.1628 +int sqlite3BtreeGetReserve(Btree *p){
1.1629 + int n;
1.1630 + sqlite3BtreeEnter(p);
1.1631 + n = p->pBt->pageSize - p->pBt->usableSize;
1.1632 + sqlite3BtreeLeave(p);
1.1633 + return n;
1.1634 +}
1.1635 +
1.1636 +/*
1.1637 +** Set the maximum page count for a database if mxPage is positive.
1.1638 +** No changes are made if mxPage is 0 or negative.
1.1639 +** Regardless of the value of mxPage, return the maximum page count.
1.1640 +*/
1.1641 +int sqlite3BtreeMaxPageCount(Btree *p, int mxPage){
1.1642 + int n;
1.1643 + sqlite3BtreeEnter(p);
1.1644 + n = sqlite3PagerMaxPageCount(p->pBt->pPager, mxPage);
1.1645 + sqlite3BtreeLeave(p);
1.1646 + return n;
1.1647 +}
1.1648 +#endif /* !defined(SQLITE_OMIT_PAGER_PRAGMAS) || !defined(SQLITE_OMIT_VACUUM) */
1.1649 +
1.1650 +/*
1.1651 +** Change the 'auto-vacuum' property of the database. If the 'autoVacuum'
1.1652 +** parameter is non-zero, then auto-vacuum mode is enabled. If zero, it
1.1653 +** is disabled. The default value for the auto-vacuum property is
1.1654 +** determined by the SQLITE_DEFAULT_AUTOVACUUM macro.
1.1655 +*/
1.1656 +int sqlite3BtreeSetAutoVacuum(Btree *p, int autoVacuum){
1.1657 +#ifdef SQLITE_OMIT_AUTOVACUUM
1.1658 + return SQLITE_READONLY;
1.1659 +#else
1.1660 + BtShared *pBt = p->pBt;
1.1661 + int rc = SQLITE_OK;
1.1662 + int av = (autoVacuum?1:0);
1.1663 +
1.1664 + sqlite3BtreeEnter(p);
1.1665 + if( pBt->pageSizeFixed && av!=pBt->autoVacuum ){
1.1666 + rc = SQLITE_READONLY;
1.1667 + }else{
1.1668 + pBt->autoVacuum = av;
1.1669 + }
1.1670 + sqlite3BtreeLeave(p);
1.1671 + return rc;
1.1672 +#endif
1.1673 +}
1.1674 +
1.1675 +/*
1.1676 +** Return the value of the 'auto-vacuum' property. If auto-vacuum is
1.1677 +** enabled 1 is returned. Otherwise 0.
1.1678 +*/
1.1679 +int sqlite3BtreeGetAutoVacuum(Btree *p){
1.1680 +#ifdef SQLITE_OMIT_AUTOVACUUM
1.1681 + return BTREE_AUTOVACUUM_NONE;
1.1682 +#else
1.1683 + int rc;
1.1684 + sqlite3BtreeEnter(p);
1.1685 + rc = (
1.1686 + (!p->pBt->autoVacuum)?BTREE_AUTOVACUUM_NONE:
1.1687 + (!p->pBt->incrVacuum)?BTREE_AUTOVACUUM_FULL:
1.1688 + BTREE_AUTOVACUUM_INCR
1.1689 + );
1.1690 + sqlite3BtreeLeave(p);
1.1691 + return rc;
1.1692 +#endif
1.1693 +}
1.1694 +
1.1695 +
1.1696 +/*
1.1697 +** Get a reference to pPage1 of the database file. This will
1.1698 +** also acquire a readlock on that file.
1.1699 +**
1.1700 +** SQLITE_OK is returned on success. If the file is not a
1.1701 +** well-formed database file, then SQLITE_CORRUPT is returned.
1.1702 +** SQLITE_BUSY is returned if the database is locked. SQLITE_NOMEM
1.1703 +** is returned if we run out of memory.
1.1704 +*/
1.1705 +static int lockBtree(BtShared *pBt){
1.1706 + int rc;
1.1707 + MemPage *pPage1;
1.1708 + int nPage;
1.1709 +
1.1710 + assert( sqlite3_mutex_held(pBt->mutex) );
1.1711 + if( pBt->pPage1 ) return SQLITE_OK;
1.1712 + rc = sqlite3BtreeGetPage(pBt, 1, &pPage1, 0);
1.1713 + if( rc!=SQLITE_OK ) return rc;
1.1714 +
1.1715 + /* Do some checking to help insure the file we opened really is
1.1716 + ** a valid database file.
1.1717 + */
1.1718 + rc = sqlite3PagerPagecount(pBt->pPager, &nPage);
1.1719 + if( rc!=SQLITE_OK ){
1.1720 + goto page1_init_failed;
1.1721 + }else if( nPage>0 ){
1.1722 + int pageSize;
1.1723 + int usableSize;
1.1724 + u8 *page1 = pPage1->aData;
1.1725 + rc = SQLITE_NOTADB;
1.1726 + if( memcmp(page1, zMagicHeader, 16)!=0 ){
1.1727 + goto page1_init_failed;
1.1728 + }
1.1729 + if( page1[18]>1 ){
1.1730 + pBt->readOnly = 1;
1.1731 + }
1.1732 + if( page1[19]>1 ){
1.1733 + goto page1_init_failed;
1.1734 + }
1.1735 +
1.1736 + /* The maximum embedded fraction must be exactly 25%. And the minimum
1.1737 + ** embedded fraction must be 12.5% for both leaf-data and non-leaf-data.
1.1738 + ** The original design allowed these amounts to vary, but as of
1.1739 + ** version 3.6.0, we require them to be fixed.
1.1740 + */
1.1741 + if( memcmp(&page1[21], "\100\040\040",3)!=0 ){
1.1742 + goto page1_init_failed;
1.1743 + }
1.1744 + pageSize = get2byte(&page1[16]);
1.1745 + if( ((pageSize-1)&pageSize)!=0 || pageSize<512 ||
1.1746 + (SQLITE_MAX_PAGE_SIZE<32768 && pageSize>SQLITE_MAX_PAGE_SIZE)
1.1747 + ){
1.1748 + goto page1_init_failed;
1.1749 + }
1.1750 + assert( (pageSize & 7)==0 );
1.1751 + usableSize = pageSize - page1[20];
1.1752 + if( pageSize!=pBt->pageSize ){
1.1753 + /* After reading the first page of the database assuming a page size
1.1754 + ** of BtShared.pageSize, we have discovered that the page-size is
1.1755 + ** actually pageSize. Unlock the database, leave pBt->pPage1 at
1.1756 + ** zero and return SQLITE_OK. The caller will call this function
1.1757 + ** again with the correct page-size.
1.1758 + */
1.1759 + releasePage(pPage1);
1.1760 + pBt->usableSize = usableSize;
1.1761 + pBt->pageSize = pageSize;
1.1762 + freeTempSpace(pBt);
1.1763 + sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize);
1.1764 + return SQLITE_OK;
1.1765 + }
1.1766 + if( usableSize<500 ){
1.1767 + goto page1_init_failed;
1.1768 + }
1.1769 + pBt->pageSize = pageSize;
1.1770 + pBt->usableSize = usableSize;
1.1771 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.1772 + pBt->autoVacuum = (get4byte(&page1[36 + 4*4])?1:0);
1.1773 + pBt->incrVacuum = (get4byte(&page1[36 + 7*4])?1:0);
1.1774 +#endif
1.1775 + }
1.1776 +
1.1777 + /* maxLocal is the maximum amount of payload to store locally for
1.1778 + ** a cell. Make sure it is small enough so that at least minFanout
1.1779 + ** cells can will fit on one page. We assume a 10-byte page header.
1.1780 + ** Besides the payload, the cell must store:
1.1781 + ** 2-byte pointer to the cell
1.1782 + ** 4-byte child pointer
1.1783 + ** 9-byte nKey value
1.1784 + ** 4-byte nData value
1.1785 + ** 4-byte overflow page pointer
1.1786 + ** So a cell consists of a 2-byte poiner, a header which is as much as
1.1787 + ** 17 bytes long, 0 to N bytes of payload, and an optional 4 byte overflow
1.1788 + ** page pointer.
1.1789 + */
1.1790 + pBt->maxLocal = (pBt->usableSize-12)*64/255 - 23;
1.1791 + pBt->minLocal = (pBt->usableSize-12)*32/255 - 23;
1.1792 + pBt->maxLeaf = pBt->usableSize - 35;
1.1793 + pBt->minLeaf = (pBt->usableSize-12)*32/255 - 23;
1.1794 + assert( pBt->maxLeaf + 23 <= MX_CELL_SIZE(pBt) );
1.1795 + pBt->pPage1 = pPage1;
1.1796 + return SQLITE_OK;
1.1797 +
1.1798 +page1_init_failed:
1.1799 + releasePage(pPage1);
1.1800 + pBt->pPage1 = 0;
1.1801 + return rc;
1.1802 +}
1.1803 +
1.1804 +/*
1.1805 +** This routine works like lockBtree() except that it also invokes the
1.1806 +** busy callback if there is lock contention.
1.1807 +*/
1.1808 +static int lockBtreeWithRetry(Btree *pRef){
1.1809 + int rc = SQLITE_OK;
1.1810 +
1.1811 + assert( sqlite3BtreeHoldsMutex(pRef) );
1.1812 + if( pRef->inTrans==TRANS_NONE ){
1.1813 + u8 inTransaction = pRef->pBt->inTransaction;
1.1814 + btreeIntegrity(pRef);
1.1815 + rc = sqlite3BtreeBeginTrans(pRef, 0);
1.1816 + pRef->pBt->inTransaction = inTransaction;
1.1817 + pRef->inTrans = TRANS_NONE;
1.1818 + if( rc==SQLITE_OK ){
1.1819 + pRef->pBt->nTransaction--;
1.1820 + }
1.1821 + btreeIntegrity(pRef);
1.1822 + }
1.1823 + return rc;
1.1824 +}
1.1825 +
1.1826 +
1.1827 +/*
1.1828 +** If there are no outstanding cursors and we are not in the middle
1.1829 +** of a transaction but there is a read lock on the database, then
1.1830 +** this routine unrefs the first page of the database file which
1.1831 +** has the effect of releasing the read lock.
1.1832 +**
1.1833 +** If there are any outstanding cursors, this routine is a no-op.
1.1834 +**
1.1835 +** If there is a transaction in progress, this routine is a no-op.
1.1836 +*/
1.1837 +static void unlockBtreeIfUnused(BtShared *pBt){
1.1838 + assert( sqlite3_mutex_held(pBt->mutex) );
1.1839 + if( pBt->inTransaction==TRANS_NONE && pBt->pCursor==0 && pBt->pPage1!=0 ){
1.1840 + if( sqlite3PagerRefcount(pBt->pPager)>=1 ){
1.1841 + assert( pBt->pPage1->aData );
1.1842 +#if 0
1.1843 + if( pBt->pPage1->aData==0 ){
1.1844 + MemPage *pPage = pBt->pPage1;
1.1845 + pPage->aData = sqlite3PagerGetData(pPage->pDbPage);
1.1846 + pPage->pBt = pBt;
1.1847 + pPage->pgno = 1;
1.1848 + }
1.1849 +#endif
1.1850 + releasePage(pBt->pPage1);
1.1851 + }
1.1852 + pBt->pPage1 = 0;
1.1853 + pBt->inStmt = 0;
1.1854 + }
1.1855 +}
1.1856 +
1.1857 +/*
1.1858 +** Create a new database by initializing the first page of the
1.1859 +** file.
1.1860 +*/
1.1861 +static int newDatabase(BtShared *pBt){
1.1862 + MemPage *pP1;
1.1863 + unsigned char *data;
1.1864 + int rc;
1.1865 + int nPage;
1.1866 +
1.1867 + assert( sqlite3_mutex_held(pBt->mutex) );
1.1868 + rc = sqlite3PagerPagecount(pBt->pPager, &nPage);
1.1869 + if( rc!=SQLITE_OK || nPage>0 ){
1.1870 + return rc;
1.1871 + }
1.1872 + pP1 = pBt->pPage1;
1.1873 + assert( pP1!=0 );
1.1874 + data = pP1->aData;
1.1875 + rc = sqlite3PagerWrite(pP1->pDbPage);
1.1876 + if( rc ) return rc;
1.1877 + memcpy(data, zMagicHeader, sizeof(zMagicHeader));
1.1878 + assert( sizeof(zMagicHeader)==16 );
1.1879 + put2byte(&data[16], pBt->pageSize);
1.1880 + data[18] = 1;
1.1881 + data[19] = 1;
1.1882 + data[20] = pBt->pageSize - pBt->usableSize;
1.1883 + data[21] = 64;
1.1884 + data[22] = 32;
1.1885 + data[23] = 32;
1.1886 + memset(&data[24], 0, 100-24);
1.1887 + zeroPage(pP1, PTF_INTKEY|PTF_LEAF|PTF_LEAFDATA );
1.1888 + pBt->pageSizeFixed = 1;
1.1889 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.1890 + assert( pBt->autoVacuum==1 || pBt->autoVacuum==0 );
1.1891 + assert( pBt->incrVacuum==1 || pBt->incrVacuum==0 );
1.1892 + put4byte(&data[36 + 4*4], pBt->autoVacuum);
1.1893 + put4byte(&data[36 + 7*4], pBt->incrVacuum);
1.1894 +#endif
1.1895 + return SQLITE_OK;
1.1896 +}
1.1897 +
1.1898 +/*
1.1899 +** Attempt to start a new transaction. A write-transaction
1.1900 +** is started if the second argument is nonzero, otherwise a read-
1.1901 +** transaction. If the second argument is 2 or more and exclusive
1.1902 +** transaction is started, meaning that no other process is allowed
1.1903 +** to access the database. A preexisting transaction may not be
1.1904 +** upgraded to exclusive by calling this routine a second time - the
1.1905 +** exclusivity flag only works for a new transaction.
1.1906 +**
1.1907 +** A write-transaction must be started before attempting any
1.1908 +** changes to the database. None of the following routines
1.1909 +** will work unless a transaction is started first:
1.1910 +**
1.1911 +** sqlite3BtreeCreateTable()
1.1912 +** sqlite3BtreeCreateIndex()
1.1913 +** sqlite3BtreeClearTable()
1.1914 +** sqlite3BtreeDropTable()
1.1915 +** sqlite3BtreeInsert()
1.1916 +** sqlite3BtreeDelete()
1.1917 +** sqlite3BtreeUpdateMeta()
1.1918 +**
1.1919 +** If an initial attempt to acquire the lock fails because of lock contention
1.1920 +** and the database was previously unlocked, then invoke the busy handler
1.1921 +** if there is one. But if there was previously a read-lock, do not
1.1922 +** invoke the busy handler - just return SQLITE_BUSY. SQLITE_BUSY is
1.1923 +** returned when there is already a read-lock in order to avoid a deadlock.
1.1924 +**
1.1925 +** Suppose there are two processes A and B. A has a read lock and B has
1.1926 +** a reserved lock. B tries to promote to exclusive but is blocked because
1.1927 +** of A's read lock. A tries to promote to reserved but is blocked by B.
1.1928 +** One or the other of the two processes must give way or there can be
1.1929 +** no progress. By returning SQLITE_BUSY and not invoking the busy callback
1.1930 +** when A already has a read lock, we encourage A to give up and let B
1.1931 +** proceed.
1.1932 +*/
1.1933 +int sqlite3BtreeBeginTrans(Btree *p, int wrflag){
1.1934 + BtShared *pBt = p->pBt;
1.1935 + int rc = SQLITE_OK;
1.1936 +
1.1937 + sqlite3BtreeEnter(p);
1.1938 + pBt->db = p->db;
1.1939 + btreeIntegrity(p);
1.1940 +
1.1941 + /* If the btree is already in a write-transaction, or it
1.1942 + ** is already in a read-transaction and a read-transaction
1.1943 + ** is requested, this is a no-op.
1.1944 + */
1.1945 + if( p->inTrans==TRANS_WRITE || (p->inTrans==TRANS_READ && !wrflag) ){
1.1946 + goto trans_begun;
1.1947 + }
1.1948 +
1.1949 + /* Write transactions are not possible on a read-only database */
1.1950 + if( pBt->readOnly && wrflag ){
1.1951 + rc = SQLITE_READONLY;
1.1952 + goto trans_begun;
1.1953 + }
1.1954 +
1.1955 + /* If another database handle has already opened a write transaction
1.1956 + ** on this shared-btree structure and a second write transaction is
1.1957 + ** requested, return SQLITE_BUSY.
1.1958 + */
1.1959 + if( pBt->inTransaction==TRANS_WRITE && wrflag ){
1.1960 + rc = SQLITE_BUSY;
1.1961 + goto trans_begun;
1.1962 + }
1.1963 +
1.1964 +#ifndef SQLITE_OMIT_SHARED_CACHE
1.1965 + if( wrflag>1 ){
1.1966 + BtLock *pIter;
1.1967 + for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
1.1968 + if( pIter->pBtree!=p ){
1.1969 + rc = SQLITE_BUSY;
1.1970 + goto trans_begun;
1.1971 + }
1.1972 + }
1.1973 + }
1.1974 +#endif
1.1975 +
1.1976 + do {
1.1977 + if( pBt->pPage1==0 ){
1.1978 + do{
1.1979 + rc = lockBtree(pBt);
1.1980 + }while( pBt->pPage1==0 && rc==SQLITE_OK );
1.1981 + }
1.1982 +
1.1983 + if( rc==SQLITE_OK && wrflag ){
1.1984 + if( pBt->readOnly ){
1.1985 + rc = SQLITE_READONLY;
1.1986 + }else{
1.1987 + rc = sqlite3PagerBegin(pBt->pPage1->pDbPage, wrflag>1);
1.1988 + if( rc==SQLITE_OK ){
1.1989 + rc = newDatabase(pBt);
1.1990 + }
1.1991 + }
1.1992 + }
1.1993 +
1.1994 + if( rc==SQLITE_OK ){
1.1995 + if( wrflag ) pBt->inStmt = 0;
1.1996 + }else{
1.1997 + unlockBtreeIfUnused(pBt);
1.1998 + }
1.1999 + }while( rc==SQLITE_BUSY && pBt->inTransaction==TRANS_NONE &&
1.2000 + sqlite3BtreeInvokeBusyHandler(pBt, 0) );
1.2001 +
1.2002 + if( rc==SQLITE_OK ){
1.2003 + if( p->inTrans==TRANS_NONE ){
1.2004 + pBt->nTransaction++;
1.2005 + }
1.2006 + p->inTrans = (wrflag?TRANS_WRITE:TRANS_READ);
1.2007 + if( p->inTrans>pBt->inTransaction ){
1.2008 + pBt->inTransaction = p->inTrans;
1.2009 + }
1.2010 +#ifndef SQLITE_OMIT_SHARED_CACHE
1.2011 + if( wrflag>1 ){
1.2012 + assert( !pBt->pExclusive );
1.2013 + pBt->pExclusive = p;
1.2014 + }
1.2015 +#endif
1.2016 + }
1.2017 +
1.2018 +
1.2019 +trans_begun:
1.2020 + btreeIntegrity(p);
1.2021 + sqlite3BtreeLeave(p);
1.2022 + return rc;
1.2023 +}
1.2024 +
1.2025 +/*
1.2026 +** Return the size of the database file in pages. Or return -1 if
1.2027 +** there is any kind of error.
1.2028 +*/
1.2029 +static int pagerPagecount(Pager *pPager){
1.2030 + int rc;
1.2031 + int nPage;
1.2032 + rc = sqlite3PagerPagecount(pPager, &nPage);
1.2033 + return (rc==SQLITE_OK?nPage:-1);
1.2034 +}
1.2035 +
1.2036 +
1.2037 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.2038 +
1.2039 +/*
1.2040 +** Set the pointer-map entries for all children of page pPage. Also, if
1.2041 +** pPage contains cells that point to overflow pages, set the pointer
1.2042 +** map entries for the overflow pages as well.
1.2043 +*/
1.2044 +static int setChildPtrmaps(MemPage *pPage){
1.2045 + int i; /* Counter variable */
1.2046 + int nCell; /* Number of cells in page pPage */
1.2047 + int rc; /* Return code */
1.2048 + BtShared *pBt = pPage->pBt;
1.2049 + int isInitOrig = pPage->isInit;
1.2050 + Pgno pgno = pPage->pgno;
1.2051 +
1.2052 + assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1.2053 + rc = sqlite3BtreeInitPage(pPage, pPage->pParent);
1.2054 + if( rc!=SQLITE_OK ){
1.2055 + goto set_child_ptrmaps_out;
1.2056 + }
1.2057 + nCell = pPage->nCell;
1.2058 +
1.2059 + for(i=0; i<nCell; i++){
1.2060 + u8 *pCell = findCell(pPage, i);
1.2061 +
1.2062 + rc = ptrmapPutOvflPtr(pPage, pCell);
1.2063 + if( rc!=SQLITE_OK ){
1.2064 + goto set_child_ptrmaps_out;
1.2065 + }
1.2066 +
1.2067 + if( !pPage->leaf ){
1.2068 + Pgno childPgno = get4byte(pCell);
1.2069 + rc = ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno);
1.2070 + if( rc!=SQLITE_OK ) goto set_child_ptrmaps_out;
1.2071 + }
1.2072 + }
1.2073 +
1.2074 + if( !pPage->leaf ){
1.2075 + Pgno childPgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
1.2076 + rc = ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno);
1.2077 + }
1.2078 +
1.2079 +set_child_ptrmaps_out:
1.2080 + pPage->isInit = isInitOrig;
1.2081 + return rc;
1.2082 +}
1.2083 +
1.2084 +/*
1.2085 +** Somewhere on pPage, which is guarenteed to be a btree page, not an overflow
1.2086 +** page, is a pointer to page iFrom. Modify this pointer so that it points to
1.2087 +** iTo. Parameter eType describes the type of pointer to be modified, as
1.2088 +** follows:
1.2089 +**
1.2090 +** PTRMAP_BTREE: pPage is a btree-page. The pointer points at a child
1.2091 +** page of pPage.
1.2092 +**
1.2093 +** PTRMAP_OVERFLOW1: pPage is a btree-page. The pointer points at an overflow
1.2094 +** page pointed to by one of the cells on pPage.
1.2095 +**
1.2096 +** PTRMAP_OVERFLOW2: pPage is an overflow-page. The pointer points at the next
1.2097 +** overflow page in the list.
1.2098 +*/
1.2099 +static int modifyPagePointer(MemPage *pPage, Pgno iFrom, Pgno iTo, u8 eType){
1.2100 + assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1.2101 + if( eType==PTRMAP_OVERFLOW2 ){
1.2102 + /* The pointer is always the first 4 bytes of the page in this case. */
1.2103 + if( get4byte(pPage->aData)!=iFrom ){
1.2104 + return SQLITE_CORRUPT_BKPT;
1.2105 + }
1.2106 + put4byte(pPage->aData, iTo);
1.2107 + }else{
1.2108 + int isInitOrig = pPage->isInit;
1.2109 + int i;
1.2110 + int nCell;
1.2111 +
1.2112 + sqlite3BtreeInitPage(pPage, 0);
1.2113 + nCell = pPage->nCell;
1.2114 +
1.2115 + for(i=0; i<nCell; i++){
1.2116 + u8 *pCell = findCell(pPage, i);
1.2117 + if( eType==PTRMAP_OVERFLOW1 ){
1.2118 + CellInfo info;
1.2119 + sqlite3BtreeParseCellPtr(pPage, pCell, &info);
1.2120 + if( info.iOverflow ){
1.2121 + if( iFrom==get4byte(&pCell[info.iOverflow]) ){
1.2122 + put4byte(&pCell[info.iOverflow], iTo);
1.2123 + break;
1.2124 + }
1.2125 + }
1.2126 + }else{
1.2127 + if( get4byte(pCell)==iFrom ){
1.2128 + put4byte(pCell, iTo);
1.2129 + break;
1.2130 + }
1.2131 + }
1.2132 + }
1.2133 +
1.2134 + if( i==nCell ){
1.2135 + if( eType!=PTRMAP_BTREE ||
1.2136 + get4byte(&pPage->aData[pPage->hdrOffset+8])!=iFrom ){
1.2137 + return SQLITE_CORRUPT_BKPT;
1.2138 + }
1.2139 + put4byte(&pPage->aData[pPage->hdrOffset+8], iTo);
1.2140 + }
1.2141 +
1.2142 + pPage->isInit = isInitOrig;
1.2143 + }
1.2144 + return SQLITE_OK;
1.2145 +}
1.2146 +
1.2147 +
1.2148 +/*
1.2149 +** Move the open database page pDbPage to location iFreePage in the
1.2150 +** database. The pDbPage reference remains valid.
1.2151 +*/
1.2152 +static int relocatePage(
1.2153 + BtShared *pBt, /* Btree */
1.2154 + MemPage *pDbPage, /* Open page to move */
1.2155 + u8 eType, /* Pointer map 'type' entry for pDbPage */
1.2156 + Pgno iPtrPage, /* Pointer map 'page-no' entry for pDbPage */
1.2157 + Pgno iFreePage, /* The location to move pDbPage to */
1.2158 + int isCommit
1.2159 +){
1.2160 + MemPage *pPtrPage; /* The page that contains a pointer to pDbPage */
1.2161 + Pgno iDbPage = pDbPage->pgno;
1.2162 + Pager *pPager = pBt->pPager;
1.2163 + int rc;
1.2164 +
1.2165 + assert( eType==PTRMAP_OVERFLOW2 || eType==PTRMAP_OVERFLOW1 ||
1.2166 + eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE );
1.2167 + assert( sqlite3_mutex_held(pBt->mutex) );
1.2168 + assert( pDbPage->pBt==pBt );
1.2169 +
1.2170 + /* Move page iDbPage from its current location to page number iFreePage */
1.2171 + TRACE(("AUTOVACUUM: Moving %d to free page %d (ptr page %d type %d)\n",
1.2172 + iDbPage, iFreePage, iPtrPage, eType));
1.2173 + rc = sqlite3PagerMovepage(pPager, pDbPage->pDbPage, iFreePage, isCommit);
1.2174 + if( rc!=SQLITE_OK ){
1.2175 + return rc;
1.2176 + }
1.2177 + pDbPage->pgno = iFreePage;
1.2178 +
1.2179 + /* If pDbPage was a btree-page, then it may have child pages and/or cells
1.2180 + ** that point to overflow pages. The pointer map entries for all these
1.2181 + ** pages need to be changed.
1.2182 + **
1.2183 + ** If pDbPage is an overflow page, then the first 4 bytes may store a
1.2184 + ** pointer to a subsequent overflow page. If this is the case, then
1.2185 + ** the pointer map needs to be updated for the subsequent overflow page.
1.2186 + */
1.2187 + if( eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE ){
1.2188 + rc = setChildPtrmaps(pDbPage);
1.2189 + if( rc!=SQLITE_OK ){
1.2190 + return rc;
1.2191 + }
1.2192 + }else{
1.2193 + Pgno nextOvfl = get4byte(pDbPage->aData);
1.2194 + if( nextOvfl!=0 ){
1.2195 + rc = ptrmapPut(pBt, nextOvfl, PTRMAP_OVERFLOW2, iFreePage);
1.2196 + if( rc!=SQLITE_OK ){
1.2197 + return rc;
1.2198 + }
1.2199 + }
1.2200 + }
1.2201 +
1.2202 + /* Fix the database pointer on page iPtrPage that pointed at iDbPage so
1.2203 + ** that it points at iFreePage. Also fix the pointer map entry for
1.2204 + ** iPtrPage.
1.2205 + */
1.2206 + if( eType!=PTRMAP_ROOTPAGE ){
1.2207 + rc = sqlite3BtreeGetPage(pBt, iPtrPage, &pPtrPage, 0);
1.2208 + if( rc!=SQLITE_OK ){
1.2209 + return rc;
1.2210 + }
1.2211 + rc = sqlite3PagerWrite(pPtrPage->pDbPage);
1.2212 + if( rc!=SQLITE_OK ){
1.2213 + releasePage(pPtrPage);
1.2214 + return rc;
1.2215 + }
1.2216 + rc = modifyPagePointer(pPtrPage, iDbPage, iFreePage, eType);
1.2217 + releasePage(pPtrPage);
1.2218 + if( rc==SQLITE_OK ){
1.2219 + rc = ptrmapPut(pBt, iFreePage, eType, iPtrPage);
1.2220 + }
1.2221 + }
1.2222 + return rc;
1.2223 +}
1.2224 +
1.2225 +/* Forward declaration required by incrVacuumStep(). */
1.2226 +static int allocateBtreePage(BtShared *, MemPage **, Pgno *, Pgno, u8);
1.2227 +
1.2228 +/*
1.2229 +** Perform a single step of an incremental-vacuum. If successful,
1.2230 +** return SQLITE_OK. If there is no work to do (and therefore no
1.2231 +** point in calling this function again), return SQLITE_DONE.
1.2232 +**
1.2233 +** More specificly, this function attempts to re-organize the
1.2234 +** database so that the last page of the file currently in use
1.2235 +** is no longer in use.
1.2236 +**
1.2237 +** If the nFin parameter is non-zero, the implementation assumes
1.2238 +** that the caller will keep calling incrVacuumStep() until
1.2239 +** it returns SQLITE_DONE or an error, and that nFin is the
1.2240 +** number of pages the database file will contain after this
1.2241 +** process is complete.
1.2242 +*/
1.2243 +static int incrVacuumStep(BtShared *pBt, Pgno nFin){
1.2244 + Pgno iLastPg; /* Last page in the database */
1.2245 + Pgno nFreeList; /* Number of pages still on the free-list */
1.2246 +
1.2247 + assert( sqlite3_mutex_held(pBt->mutex) );
1.2248 + iLastPg = pBt->nTrunc;
1.2249 + if( iLastPg==0 ){
1.2250 + iLastPg = pagerPagecount(pBt->pPager);
1.2251 + }
1.2252 +
1.2253 + if( !PTRMAP_ISPAGE(pBt, iLastPg) && iLastPg!=PENDING_BYTE_PAGE(pBt) ){
1.2254 + int rc;
1.2255 + u8 eType;
1.2256 + Pgno iPtrPage;
1.2257 +
1.2258 + nFreeList = get4byte(&pBt->pPage1->aData[36]);
1.2259 + if( nFreeList==0 || nFin==iLastPg ){
1.2260 + return SQLITE_DONE;
1.2261 + }
1.2262 +
1.2263 + rc = ptrmapGet(pBt, iLastPg, &eType, &iPtrPage);
1.2264 + if( rc!=SQLITE_OK ){
1.2265 + return rc;
1.2266 + }
1.2267 + if( eType==PTRMAP_ROOTPAGE ){
1.2268 + return SQLITE_CORRUPT_BKPT;
1.2269 + }
1.2270 +
1.2271 + if( eType==PTRMAP_FREEPAGE ){
1.2272 + if( nFin==0 ){
1.2273 + /* Remove the page from the files free-list. This is not required
1.2274 + ** if nFin is non-zero. In that case, the free-list will be
1.2275 + ** truncated to zero after this function returns, so it doesn't
1.2276 + ** matter if it still contains some garbage entries.
1.2277 + */
1.2278 + Pgno iFreePg;
1.2279 + MemPage *pFreePg;
1.2280 + rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iLastPg, 1);
1.2281 + if( rc!=SQLITE_OK ){
1.2282 + return rc;
1.2283 + }
1.2284 + assert( iFreePg==iLastPg );
1.2285 + releasePage(pFreePg);
1.2286 + }
1.2287 + } else {
1.2288 + Pgno iFreePg; /* Index of free page to move pLastPg to */
1.2289 + MemPage *pLastPg;
1.2290 +
1.2291 + rc = sqlite3BtreeGetPage(pBt, iLastPg, &pLastPg, 0);
1.2292 + if( rc!=SQLITE_OK ){
1.2293 + return rc;
1.2294 + }
1.2295 +
1.2296 + /* If nFin is zero, this loop runs exactly once and page pLastPg
1.2297 + ** is swapped with the first free page pulled off the free list.
1.2298 + **
1.2299 + ** On the other hand, if nFin is greater than zero, then keep
1.2300 + ** looping until a free-page located within the first nFin pages
1.2301 + ** of the file is found.
1.2302 + */
1.2303 + do {
1.2304 + MemPage *pFreePg;
1.2305 + rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, 0, 0);
1.2306 + if( rc!=SQLITE_OK ){
1.2307 + releasePage(pLastPg);
1.2308 + return rc;
1.2309 + }
1.2310 + releasePage(pFreePg);
1.2311 + }while( nFin!=0 && iFreePg>nFin );
1.2312 + assert( iFreePg<iLastPg );
1.2313 +
1.2314 + rc = sqlite3PagerWrite(pLastPg->pDbPage);
1.2315 + if( rc==SQLITE_OK ){
1.2316 + rc = relocatePage(pBt, pLastPg, eType, iPtrPage, iFreePg, nFin!=0);
1.2317 + }
1.2318 + releasePage(pLastPg);
1.2319 + if( rc!=SQLITE_OK ){
1.2320 + return rc;
1.2321 + }
1.2322 + }
1.2323 + }
1.2324 +
1.2325 + pBt->nTrunc = iLastPg - 1;
1.2326 + while( pBt->nTrunc==PENDING_BYTE_PAGE(pBt)||PTRMAP_ISPAGE(pBt, pBt->nTrunc) ){
1.2327 + pBt->nTrunc--;
1.2328 + }
1.2329 + return SQLITE_OK;
1.2330 +}
1.2331 +
1.2332 +/*
1.2333 +** A write-transaction must be opened before calling this function.
1.2334 +** It performs a single unit of work towards an incremental vacuum.
1.2335 +**
1.2336 +** If the incremental vacuum is finished after this function has run,
1.2337 +** SQLITE_DONE is returned. If it is not finished, but no error occured,
1.2338 +** SQLITE_OK is returned. Otherwise an SQLite error code.
1.2339 +*/
1.2340 +int sqlite3BtreeIncrVacuum(Btree *p){
1.2341 + int rc;
1.2342 + BtShared *pBt = p->pBt;
1.2343 +
1.2344 + sqlite3BtreeEnter(p);
1.2345 + pBt->db = p->db;
1.2346 + assert( pBt->inTransaction==TRANS_WRITE && p->inTrans==TRANS_WRITE );
1.2347 + if( !pBt->autoVacuum ){
1.2348 + rc = SQLITE_DONE;
1.2349 + }else{
1.2350 + invalidateAllOverflowCache(pBt);
1.2351 + rc = incrVacuumStep(pBt, 0);
1.2352 + }
1.2353 + sqlite3BtreeLeave(p);
1.2354 + return rc;
1.2355 +}
1.2356 +
1.2357 +/*
1.2358 +** This routine is called prior to sqlite3PagerCommit when a transaction
1.2359 +** is commited for an auto-vacuum database.
1.2360 +**
1.2361 +** If SQLITE_OK is returned, then *pnTrunc is set to the number of pages
1.2362 +** the database file should be truncated to during the commit process.
1.2363 +** i.e. the database has been reorganized so that only the first *pnTrunc
1.2364 +** pages are in use.
1.2365 +*/
1.2366 +static int autoVacuumCommit(BtShared *pBt, Pgno *pnTrunc){
1.2367 + int rc = SQLITE_OK;
1.2368 + Pager *pPager = pBt->pPager;
1.2369 +#ifndef NDEBUG
1.2370 + int nRef = sqlite3PagerRefcount(pPager);
1.2371 +#endif
1.2372 +
1.2373 + assert( sqlite3_mutex_held(pBt->mutex) );
1.2374 + invalidateAllOverflowCache(pBt);
1.2375 + assert(pBt->autoVacuum);
1.2376 + if( !pBt->incrVacuum ){
1.2377 + Pgno nFin = 0;
1.2378 +
1.2379 + if( pBt->nTrunc==0 ){
1.2380 + Pgno nFree;
1.2381 + Pgno nPtrmap;
1.2382 + const int pgsz = pBt->pageSize;
1.2383 + int nOrig = pagerPagecount(pBt->pPager);
1.2384 +
1.2385 + if( PTRMAP_ISPAGE(pBt, nOrig) ){
1.2386 + return SQLITE_CORRUPT_BKPT;
1.2387 + }
1.2388 + if( nOrig==PENDING_BYTE_PAGE(pBt) ){
1.2389 + nOrig--;
1.2390 + }
1.2391 + nFree = get4byte(&pBt->pPage1->aData[36]);
1.2392 + nPtrmap = (nFree-nOrig+PTRMAP_PAGENO(pBt, nOrig)+pgsz/5)/(pgsz/5);
1.2393 + nFin = nOrig - nFree - nPtrmap;
1.2394 + if( nOrig>PENDING_BYTE_PAGE(pBt) && nFin<=PENDING_BYTE_PAGE(pBt) ){
1.2395 + nFin--;
1.2396 + }
1.2397 + while( PTRMAP_ISPAGE(pBt, nFin) || nFin==PENDING_BYTE_PAGE(pBt) ){
1.2398 + nFin--;
1.2399 + }
1.2400 + }
1.2401 +
1.2402 + while( rc==SQLITE_OK ){
1.2403 + rc = incrVacuumStep(pBt, nFin);
1.2404 + }
1.2405 + if( rc==SQLITE_DONE ){
1.2406 + assert(nFin==0 || pBt->nTrunc==0 || nFin<=pBt->nTrunc);
1.2407 + rc = SQLITE_OK;
1.2408 + if( pBt->nTrunc && nFin ){
1.2409 + rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
1.2410 + put4byte(&pBt->pPage1->aData[32], 0);
1.2411 + put4byte(&pBt->pPage1->aData[36], 0);
1.2412 + pBt->nTrunc = nFin;
1.2413 + }
1.2414 + }
1.2415 + if( rc!=SQLITE_OK ){
1.2416 + sqlite3PagerRollback(pPager);
1.2417 + }
1.2418 + }
1.2419 +
1.2420 + if( rc==SQLITE_OK ){
1.2421 + *pnTrunc = pBt->nTrunc;
1.2422 + pBt->nTrunc = 0;
1.2423 + }
1.2424 + assert( nRef==sqlite3PagerRefcount(pPager) );
1.2425 + return rc;
1.2426 +}
1.2427 +
1.2428 +#endif
1.2429 +
1.2430 +/*
1.2431 +** This routine does the first phase of a two-phase commit. This routine
1.2432 +** causes a rollback journal to be created (if it does not already exist)
1.2433 +** and populated with enough information so that if a power loss occurs
1.2434 +** the database can be restored to its original state by playing back
1.2435 +** the journal. Then the contents of the journal are flushed out to
1.2436 +** the disk. After the journal is safely on oxide, the changes to the
1.2437 +** database are written into the database file and flushed to oxide.
1.2438 +** At the end of this call, the rollback journal still exists on the
1.2439 +** disk and we are still holding all locks, so the transaction has not
1.2440 +** committed. See sqlite3BtreeCommit() for the second phase of the
1.2441 +** commit process.
1.2442 +**
1.2443 +** This call is a no-op if no write-transaction is currently active on pBt.
1.2444 +**
1.2445 +** Otherwise, sync the database file for the btree pBt. zMaster points to
1.2446 +** the name of a master journal file that should be written into the
1.2447 +** individual journal file, or is NULL, indicating no master journal file
1.2448 +** (single database transaction).
1.2449 +**
1.2450 +** When this is called, the master journal should already have been
1.2451 +** created, populated with this journal pointer and synced to disk.
1.2452 +**
1.2453 +** Once this is routine has returned, the only thing required to commit
1.2454 +** the write-transaction for this database file is to delete the journal.
1.2455 +*/
1.2456 +int sqlite3BtreeCommitPhaseOne(Btree *p, const char *zMaster){
1.2457 + int rc = SQLITE_OK;
1.2458 + if( p->inTrans==TRANS_WRITE ){
1.2459 + BtShared *pBt = p->pBt;
1.2460 + Pgno nTrunc = 0;
1.2461 + sqlite3BtreeEnter(p);
1.2462 + pBt->db = p->db;
1.2463 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.2464 + if( pBt->autoVacuum ){
1.2465 + rc = autoVacuumCommit(pBt, &nTrunc);
1.2466 + if( rc!=SQLITE_OK ){
1.2467 + sqlite3BtreeLeave(p);
1.2468 + return rc;
1.2469 + }
1.2470 + }
1.2471 +#endif
1.2472 + rc = sqlite3PagerCommitPhaseOne(pBt->pPager, zMaster, nTrunc, 0);
1.2473 + sqlite3BtreeLeave(p);
1.2474 + }
1.2475 + return rc;
1.2476 +}
1.2477 +
1.2478 +/*
1.2479 +** Commit the transaction currently in progress.
1.2480 +**
1.2481 +** This routine implements the second phase of a 2-phase commit. The
1.2482 +** sqlite3BtreeSync() routine does the first phase and should be invoked
1.2483 +** prior to calling this routine. The sqlite3BtreeSync() routine did
1.2484 +** all the work of writing information out to disk and flushing the
1.2485 +** contents so that they are written onto the disk platter. All this
1.2486 +** routine has to do is delete or truncate the rollback journal
1.2487 +** (which causes the transaction to commit) and drop locks.
1.2488 +**
1.2489 +** This will release the write lock on the database file. If there
1.2490 +** are no active cursors, it also releases the read lock.
1.2491 +*/
1.2492 +int sqlite3BtreeCommitPhaseTwo(Btree *p){
1.2493 + BtShared *pBt = p->pBt;
1.2494 +
1.2495 + sqlite3BtreeEnter(p);
1.2496 + pBt->db = p->db;
1.2497 + btreeIntegrity(p);
1.2498 +
1.2499 + /* If the handle has a write-transaction open, commit the shared-btrees
1.2500 + ** transaction and set the shared state to TRANS_READ.
1.2501 + */
1.2502 + if( p->inTrans==TRANS_WRITE ){
1.2503 + int rc;
1.2504 + assert( pBt->inTransaction==TRANS_WRITE );
1.2505 + assert( pBt->nTransaction>0 );
1.2506 + rc = sqlite3PagerCommitPhaseTwo(pBt->pPager);
1.2507 + if( rc!=SQLITE_OK ){
1.2508 + sqlite3BtreeLeave(p);
1.2509 + return rc;
1.2510 + }
1.2511 + pBt->inTransaction = TRANS_READ;
1.2512 + pBt->inStmt = 0;
1.2513 + }
1.2514 + unlockAllTables(p);
1.2515 +
1.2516 + /* If the handle has any kind of transaction open, decrement the transaction
1.2517 + ** count of the shared btree. If the transaction count reaches 0, set
1.2518 + ** the shared state to TRANS_NONE. The unlockBtreeIfUnused() call below
1.2519 + ** will unlock the pager.
1.2520 + */
1.2521 + if( p->inTrans!=TRANS_NONE ){
1.2522 + pBt->nTransaction--;
1.2523 + if( 0==pBt->nTransaction ){
1.2524 + pBt->inTransaction = TRANS_NONE;
1.2525 + }
1.2526 + }
1.2527 +
1.2528 + /* Set the handles current transaction state to TRANS_NONE and unlock
1.2529 + ** the pager if this call closed the only read or write transaction.
1.2530 + */
1.2531 + p->inTrans = TRANS_NONE;
1.2532 + unlockBtreeIfUnused(pBt);
1.2533 +
1.2534 + btreeIntegrity(p);
1.2535 + sqlite3BtreeLeave(p);
1.2536 + return SQLITE_OK;
1.2537 +}
1.2538 +
1.2539 +/*
1.2540 +** Do both phases of a commit.
1.2541 +*/
1.2542 +int sqlite3BtreeCommit(Btree *p){
1.2543 + int rc;
1.2544 + sqlite3BtreeEnter(p);
1.2545 + rc = sqlite3BtreeCommitPhaseOne(p, 0);
1.2546 + if( rc==SQLITE_OK ){
1.2547 + rc = sqlite3BtreeCommitPhaseTwo(p);
1.2548 + }
1.2549 + sqlite3BtreeLeave(p);
1.2550 + return rc;
1.2551 +}
1.2552 +
1.2553 +#ifndef NDEBUG
1.2554 +/*
1.2555 +** Return the number of write-cursors open on this handle. This is for use
1.2556 +** in assert() expressions, so it is only compiled if NDEBUG is not
1.2557 +** defined.
1.2558 +**
1.2559 +** For the purposes of this routine, a write-cursor is any cursor that
1.2560 +** is capable of writing to the databse. That means the cursor was
1.2561 +** originally opened for writing and the cursor has not be disabled
1.2562 +** by having its state changed to CURSOR_FAULT.
1.2563 +*/
1.2564 +static int countWriteCursors(BtShared *pBt){
1.2565 + BtCursor *pCur;
1.2566 + int r = 0;
1.2567 + for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
1.2568 + if( pCur->wrFlag && pCur->eState!=CURSOR_FAULT ) r++;
1.2569 + }
1.2570 + return r;
1.2571 +}
1.2572 +#endif
1.2573 +
1.2574 +/*
1.2575 +** This routine sets the state to CURSOR_FAULT and the error
1.2576 +** code to errCode for every cursor on BtShared that pBtree
1.2577 +** references.
1.2578 +**
1.2579 +** Every cursor is tripped, including cursors that belong
1.2580 +** to other database connections that happen to be sharing
1.2581 +** the cache with pBtree.
1.2582 +**
1.2583 +** This routine gets called when a rollback occurs.
1.2584 +** All cursors using the same cache must be tripped
1.2585 +** to prevent them from trying to use the btree after
1.2586 +** the rollback. The rollback may have deleted tables
1.2587 +** or moved root pages, so it is not sufficient to
1.2588 +** save the state of the cursor. The cursor must be
1.2589 +** invalidated.
1.2590 +*/
1.2591 +void sqlite3BtreeTripAllCursors(Btree *pBtree, int errCode){
1.2592 + BtCursor *p;
1.2593 + sqlite3BtreeEnter(pBtree);
1.2594 + for(p=pBtree->pBt->pCursor; p; p=p->pNext){
1.2595 + clearCursorPosition(p);
1.2596 + p->eState = CURSOR_FAULT;
1.2597 + p->skip = errCode;
1.2598 + }
1.2599 + sqlite3BtreeLeave(pBtree);
1.2600 +}
1.2601 +
1.2602 +/*
1.2603 +** Rollback the transaction in progress. All cursors will be
1.2604 +** invalided by this operation. Any attempt to use a cursor
1.2605 +** that was open at the beginning of this operation will result
1.2606 +** in an error.
1.2607 +**
1.2608 +** This will release the write lock on the database file. If there
1.2609 +** are no active cursors, it also releases the read lock.
1.2610 +*/
1.2611 +int sqlite3BtreeRollback(Btree *p){
1.2612 + int rc;
1.2613 + BtShared *pBt = p->pBt;
1.2614 + MemPage *pPage1;
1.2615 +
1.2616 + sqlite3BtreeEnter(p);
1.2617 + pBt->db = p->db;
1.2618 + rc = saveAllCursors(pBt, 0, 0);
1.2619 +#ifndef SQLITE_OMIT_SHARED_CACHE
1.2620 + if( rc!=SQLITE_OK ){
1.2621 + /* This is a horrible situation. An IO or malloc() error occured whilst
1.2622 + ** trying to save cursor positions. If this is an automatic rollback (as
1.2623 + ** the result of a constraint, malloc() failure or IO error) then
1.2624 + ** the cache may be internally inconsistent (not contain valid trees) so
1.2625 + ** we cannot simply return the error to the caller. Instead, abort
1.2626 + ** all queries that may be using any of the cursors that failed to save.
1.2627 + */
1.2628 + sqlite3BtreeTripAllCursors(p, rc);
1.2629 + }
1.2630 +#endif
1.2631 + btreeIntegrity(p);
1.2632 + unlockAllTables(p);
1.2633 +
1.2634 + if( p->inTrans==TRANS_WRITE ){
1.2635 + int rc2;
1.2636 +
1.2637 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.2638 + pBt->nTrunc = 0;
1.2639 +#endif
1.2640 +
1.2641 + assert( TRANS_WRITE==pBt->inTransaction );
1.2642 + rc2 = sqlite3PagerRollback(pBt->pPager);
1.2643 + if( rc2!=SQLITE_OK ){
1.2644 + rc = rc2;
1.2645 + }
1.2646 +
1.2647 + /* The rollback may have destroyed the pPage1->aData value. So
1.2648 + ** call sqlite3BtreeGetPage() on page 1 again to make
1.2649 + ** sure pPage1->aData is set correctly. */
1.2650 + if( sqlite3BtreeGetPage(pBt, 1, &pPage1, 0)==SQLITE_OK ){
1.2651 + releasePage(pPage1);
1.2652 + }
1.2653 + assert( countWriteCursors(pBt)==0 );
1.2654 + pBt->inTransaction = TRANS_READ;
1.2655 + }
1.2656 +
1.2657 + if( p->inTrans!=TRANS_NONE ){
1.2658 + assert( pBt->nTransaction>0 );
1.2659 + pBt->nTransaction--;
1.2660 + if( 0==pBt->nTransaction ){
1.2661 + pBt->inTransaction = TRANS_NONE;
1.2662 + }
1.2663 + }
1.2664 +
1.2665 + p->inTrans = TRANS_NONE;
1.2666 + pBt->inStmt = 0;
1.2667 + unlockBtreeIfUnused(pBt);
1.2668 +
1.2669 + btreeIntegrity(p);
1.2670 + sqlite3BtreeLeave(p);
1.2671 + return rc;
1.2672 +}
1.2673 +
1.2674 +/*
1.2675 +** Start a statement subtransaction. The subtransaction can
1.2676 +** can be rolled back independently of the main transaction.
1.2677 +** You must start a transaction before starting a subtransaction.
1.2678 +** The subtransaction is ended automatically if the main transaction
1.2679 +** commits or rolls back.
1.2680 +**
1.2681 +** Only one subtransaction may be active at a time. It is an error to try
1.2682 +** to start a new subtransaction if another subtransaction is already active.
1.2683 +**
1.2684 +** Statement subtransactions are used around individual SQL statements
1.2685 +** that are contained within a BEGIN...COMMIT block. If a constraint
1.2686 +** error occurs within the statement, the effect of that one statement
1.2687 +** can be rolled back without having to rollback the entire transaction.
1.2688 +*/
1.2689 +int sqlite3BtreeBeginStmt(Btree *p){
1.2690 + int rc;
1.2691 + BtShared *pBt = p->pBt;
1.2692 + sqlite3BtreeEnter(p);
1.2693 + pBt->db = p->db;
1.2694 + if( (p->inTrans!=TRANS_WRITE) || pBt->inStmt ){
1.2695 + rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
1.2696 + }else{
1.2697 + assert( pBt->inTransaction==TRANS_WRITE );
1.2698 + rc = pBt->readOnly ? SQLITE_OK : sqlite3PagerStmtBegin(pBt->pPager);
1.2699 + pBt->inStmt = 1;
1.2700 + }
1.2701 + sqlite3BtreeLeave(p);
1.2702 + return rc;
1.2703 +}
1.2704 +
1.2705 +
1.2706 +/*
1.2707 +** Commit the statment subtransaction currently in progress. If no
1.2708 +** subtransaction is active, this is a no-op.
1.2709 +*/
1.2710 +int sqlite3BtreeCommitStmt(Btree *p){
1.2711 + int rc;
1.2712 + BtShared *pBt = p->pBt;
1.2713 + sqlite3BtreeEnter(p);
1.2714 + pBt->db = p->db;
1.2715 + if( pBt->inStmt && !pBt->readOnly ){
1.2716 + rc = sqlite3PagerStmtCommit(pBt->pPager);
1.2717 + }else{
1.2718 + rc = SQLITE_OK;
1.2719 + }
1.2720 + pBt->inStmt = 0;
1.2721 + sqlite3BtreeLeave(p);
1.2722 + return rc;
1.2723 +}
1.2724 +
1.2725 +/*
1.2726 +** Rollback the active statement subtransaction. If no subtransaction
1.2727 +** is active this routine is a no-op.
1.2728 +**
1.2729 +** All cursors will be invalidated by this operation. Any attempt
1.2730 +** to use a cursor that was open at the beginning of this operation
1.2731 +** will result in an error.
1.2732 +*/
1.2733 +int sqlite3BtreeRollbackStmt(Btree *p){
1.2734 + int rc = SQLITE_OK;
1.2735 + BtShared *pBt = p->pBt;
1.2736 + sqlite3BtreeEnter(p);
1.2737 + pBt->db = p->db;
1.2738 + if( pBt->inStmt && !pBt->readOnly ){
1.2739 + rc = sqlite3PagerStmtRollback(pBt->pPager);
1.2740 + pBt->inStmt = 0;
1.2741 + }
1.2742 + sqlite3BtreeLeave(p);
1.2743 + return rc;
1.2744 +}
1.2745 +
1.2746 +/*
1.2747 +** Create a new cursor for the BTree whose root is on the page
1.2748 +** iTable. The act of acquiring a cursor gets a read lock on
1.2749 +** the database file.
1.2750 +**
1.2751 +** If wrFlag==0, then the cursor can only be used for reading.
1.2752 +** If wrFlag==1, then the cursor can be used for reading or for
1.2753 +** writing if other conditions for writing are also met. These
1.2754 +** are the conditions that must be met in order for writing to
1.2755 +** be allowed:
1.2756 +**
1.2757 +** 1: The cursor must have been opened with wrFlag==1
1.2758 +**
1.2759 +** 2: Other database connections that share the same pager cache
1.2760 +** but which are not in the READ_UNCOMMITTED state may not have
1.2761 +** cursors open with wrFlag==0 on the same table. Otherwise
1.2762 +** the changes made by this write cursor would be visible to
1.2763 +** the read cursors in the other database connection.
1.2764 +**
1.2765 +** 3: The database must be writable (not on read-only media)
1.2766 +**
1.2767 +** 4: There must be an active transaction.
1.2768 +**
1.2769 +** No checking is done to make sure that page iTable really is the
1.2770 +** root page of a b-tree. If it is not, then the cursor acquired
1.2771 +** will not work correctly.
1.2772 +*/
1.2773 +static int btreeCursor(
1.2774 + Btree *p, /* The btree */
1.2775 + int iTable, /* Root page of table to open */
1.2776 + int wrFlag, /* 1 to write. 0 read-only */
1.2777 + struct KeyInfo *pKeyInfo, /* First arg to comparison function */
1.2778 + BtCursor *pCur /* Space for new cursor */
1.2779 +){
1.2780 + int rc;
1.2781 + BtShared *pBt = p->pBt;
1.2782 +
1.2783 + assert( sqlite3BtreeHoldsMutex(p) );
1.2784 + if( wrFlag ){
1.2785 + if( pBt->readOnly ){
1.2786 + return SQLITE_READONLY;
1.2787 + }
1.2788 + if( checkReadLocks(p, iTable, 0, 0) ){
1.2789 + return SQLITE_LOCKED;
1.2790 + }
1.2791 + }
1.2792 +
1.2793 + if( pBt->pPage1==0 ){
1.2794 + rc = lockBtreeWithRetry(p);
1.2795 + if( rc!=SQLITE_OK ){
1.2796 + return rc;
1.2797 + }
1.2798 + if( pBt->readOnly && wrFlag ){
1.2799 + return SQLITE_READONLY;
1.2800 + }
1.2801 + }
1.2802 + pCur->pgnoRoot = (Pgno)iTable;
1.2803 + if( iTable==1 && pagerPagecount(pBt->pPager)==0 ){
1.2804 + rc = SQLITE_EMPTY;
1.2805 + goto create_cursor_exception;
1.2806 + }
1.2807 + rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->pPage, 0);
1.2808 + if( rc!=SQLITE_OK ){
1.2809 + goto create_cursor_exception;
1.2810 + }
1.2811 +
1.2812 + /* Now that no other errors can occur, finish filling in the BtCursor
1.2813 + ** variables, link the cursor into the BtShared list and set *ppCur (the
1.2814 + ** output argument to this function).
1.2815 + */
1.2816 + pCur->pKeyInfo = pKeyInfo;
1.2817 + pCur->pBtree = p;
1.2818 + pCur->pBt = pBt;
1.2819 + pCur->wrFlag = wrFlag;
1.2820 + pCur->pNext = pBt->pCursor;
1.2821 + if( pCur->pNext ){
1.2822 + pCur->pNext->pPrev = pCur;
1.2823 + }
1.2824 + pBt->pCursor = pCur;
1.2825 + pCur->eState = CURSOR_INVALID;
1.2826 +
1.2827 + return SQLITE_OK;
1.2828 +
1.2829 +create_cursor_exception:
1.2830 + releasePage(pCur->pPage);
1.2831 + unlockBtreeIfUnused(pBt);
1.2832 + return rc;
1.2833 +}
1.2834 +int sqlite3BtreeCursor(
1.2835 + Btree *p, /* The btree */
1.2836 + int iTable, /* Root page of table to open */
1.2837 + int wrFlag, /* 1 to write. 0 read-only */
1.2838 + struct KeyInfo *pKeyInfo, /* First arg to xCompare() */
1.2839 + BtCursor *pCur /* Write new cursor here */
1.2840 +){
1.2841 + int rc;
1.2842 + sqlite3BtreeEnter(p);
1.2843 + p->pBt->db = p->db;
1.2844 + rc = btreeCursor(p, iTable, wrFlag, pKeyInfo, pCur);
1.2845 + sqlite3BtreeLeave(p);
1.2846 + return rc;
1.2847 +}
1.2848 +int sqlite3BtreeCursorSize(){
1.2849 + return sizeof(BtCursor);
1.2850 +}
1.2851 +
1.2852 +
1.2853 +
1.2854 +/*
1.2855 +** Close a cursor. The read lock on the database file is released
1.2856 +** when the last cursor is closed.
1.2857 +*/
1.2858 +int sqlite3BtreeCloseCursor(BtCursor *pCur){
1.2859 + Btree *pBtree = pCur->pBtree;
1.2860 + if( pBtree ){
1.2861 + BtShared *pBt = pCur->pBt;
1.2862 + sqlite3BtreeEnter(pBtree);
1.2863 + pBt->db = pBtree->db;
1.2864 + clearCursorPosition(pCur);
1.2865 + if( pCur->pPrev ){
1.2866 + pCur->pPrev->pNext = pCur->pNext;
1.2867 + }else{
1.2868 + pBt->pCursor = pCur->pNext;
1.2869 + }
1.2870 + if( pCur->pNext ){
1.2871 + pCur->pNext->pPrev = pCur->pPrev;
1.2872 + }
1.2873 + releasePage(pCur->pPage);
1.2874 + unlockBtreeIfUnused(pBt);
1.2875 + invalidateOverflowCache(pCur);
1.2876 + /* sqlite3_free(pCur); */
1.2877 + sqlite3BtreeLeave(pBtree);
1.2878 + }
1.2879 + return SQLITE_OK;
1.2880 +}
1.2881 +
1.2882 +/*
1.2883 +** Make a temporary cursor by filling in the fields of pTempCur.
1.2884 +** The temporary cursor is not on the cursor list for the Btree.
1.2885 +*/
1.2886 +void sqlite3BtreeGetTempCursor(BtCursor *pCur, BtCursor *pTempCur){
1.2887 + assert( cursorHoldsMutex(pCur) );
1.2888 + memcpy(pTempCur, pCur, sizeof(*pCur));
1.2889 + pTempCur->pNext = 0;
1.2890 + pTempCur->pPrev = 0;
1.2891 + if( pTempCur->pPage ){
1.2892 + sqlite3PagerRef(pTempCur->pPage->pDbPage);
1.2893 + }
1.2894 +}
1.2895 +
1.2896 +/*
1.2897 +** Delete a temporary cursor such as was made by the CreateTemporaryCursor()
1.2898 +** function above.
1.2899 +*/
1.2900 +void sqlite3BtreeReleaseTempCursor(BtCursor *pCur){
1.2901 + assert( cursorHoldsMutex(pCur) );
1.2902 + if( pCur->pPage ){
1.2903 + sqlite3PagerUnref(pCur->pPage->pDbPage);
1.2904 + }
1.2905 +}
1.2906 +
1.2907 +/*
1.2908 +** Make sure the BtCursor* given in the argument has a valid
1.2909 +** BtCursor.info structure. If it is not already valid, call
1.2910 +** sqlite3BtreeParseCell() to fill it in.
1.2911 +**
1.2912 +** BtCursor.info is a cache of the information in the current cell.
1.2913 +** Using this cache reduces the number of calls to sqlite3BtreeParseCell().
1.2914 +**
1.2915 +** 2007-06-25: There is a bug in some versions of MSVC that cause the
1.2916 +** compiler to crash when getCellInfo() is implemented as a macro.
1.2917 +** But there is a measureable speed advantage to using the macro on gcc
1.2918 +** (when less compiler optimizations like -Os or -O0 are used and the
1.2919 +** compiler is not doing agressive inlining.) So we use a real function
1.2920 +** for MSVC and a macro for everything else. Ticket #2457.
1.2921 +*/
1.2922 +#ifndef NDEBUG
1.2923 + static void assertCellInfo(BtCursor *pCur){
1.2924 + CellInfo info;
1.2925 + memset(&info, 0, sizeof(info));
1.2926 + sqlite3BtreeParseCell(pCur->pPage, pCur->idx, &info);
1.2927 + assert( memcmp(&info, &pCur->info, sizeof(info))==0 );
1.2928 + }
1.2929 +#else
1.2930 + #define assertCellInfo(x)
1.2931 +#endif
1.2932 +#ifdef _MSC_VER
1.2933 + /* Use a real function in MSVC to work around bugs in that compiler. */
1.2934 + static void getCellInfo(BtCursor *pCur){
1.2935 + if( pCur->info.nSize==0 ){
1.2936 + sqlite3BtreeParseCell(pCur->pPage, pCur->idx, &pCur->info);
1.2937 + pCur->validNKey = 1;
1.2938 + }else{
1.2939 + assertCellInfo(pCur);
1.2940 + }
1.2941 + }
1.2942 +#else /* if not _MSC_VER */
1.2943 + /* Use a macro in all other compilers so that the function is inlined */
1.2944 +#define getCellInfo(pCur) \
1.2945 + if( pCur->info.nSize==0 ){ \
1.2946 + sqlite3BtreeParseCell(pCur->pPage, pCur->idx, &pCur->info); \
1.2947 + pCur->validNKey = 1; \
1.2948 + }else{ \
1.2949 + assertCellInfo(pCur); \
1.2950 + }
1.2951 +#endif /* _MSC_VER */
1.2952 +
1.2953 +/*
1.2954 +** Set *pSize to the size of the buffer needed to hold the value of
1.2955 +** the key for the current entry. If the cursor is not pointing
1.2956 +** to a valid entry, *pSize is set to 0.
1.2957 +**
1.2958 +** For a table with the INTKEY flag set, this routine returns the key
1.2959 +** itself, not the number of bytes in the key.
1.2960 +*/
1.2961 +int sqlite3BtreeKeySize(BtCursor *pCur, i64 *pSize){
1.2962 + int rc;
1.2963 +
1.2964 + assert( cursorHoldsMutex(pCur) );
1.2965 + rc = restoreCursorPosition(pCur);
1.2966 + if( rc==SQLITE_OK ){
1.2967 + assert( pCur->eState==CURSOR_INVALID || pCur->eState==CURSOR_VALID );
1.2968 + if( pCur->eState==CURSOR_INVALID ){
1.2969 + *pSize = 0;
1.2970 + }else{
1.2971 + getCellInfo(pCur);
1.2972 + *pSize = pCur->info.nKey;
1.2973 + }
1.2974 + }
1.2975 + return rc;
1.2976 +}
1.2977 +
1.2978 +/*
1.2979 +** Set *pSize to the number of bytes of data in the entry the
1.2980 +** cursor currently points to. Always return SQLITE_OK.
1.2981 +** Failure is not possible. If the cursor is not currently
1.2982 +** pointing to an entry (which can happen, for example, if
1.2983 +** the database is empty) then *pSize is set to 0.
1.2984 +*/
1.2985 +int sqlite3BtreeDataSize(BtCursor *pCur, u32 *pSize){
1.2986 + int rc;
1.2987 +
1.2988 + assert( cursorHoldsMutex(pCur) );
1.2989 + rc = restoreCursorPosition(pCur);
1.2990 + if( rc==SQLITE_OK ){
1.2991 + assert( pCur->eState==CURSOR_INVALID || pCur->eState==CURSOR_VALID );
1.2992 + if( pCur->eState==CURSOR_INVALID ){
1.2993 + /* Not pointing at a valid entry - set *pSize to 0. */
1.2994 + *pSize = 0;
1.2995 + }else{
1.2996 + getCellInfo(pCur);
1.2997 + *pSize = pCur->info.nData;
1.2998 + }
1.2999 + }
1.3000 + return rc;
1.3001 +}
1.3002 +
1.3003 +/*
1.3004 +** Given the page number of an overflow page in the database (parameter
1.3005 +** ovfl), this function finds the page number of the next page in the
1.3006 +** linked list of overflow pages. If possible, it uses the auto-vacuum
1.3007 +** pointer-map data instead of reading the content of page ovfl to do so.
1.3008 +**
1.3009 +** If an error occurs an SQLite error code is returned. Otherwise:
1.3010 +**
1.3011 +** Unless pPgnoNext is NULL, the page number of the next overflow
1.3012 +** page in the linked list is written to *pPgnoNext. If page ovfl
1.3013 +** is the last page in its linked list, *pPgnoNext is set to zero.
1.3014 +**
1.3015 +** If ppPage is not NULL, *ppPage is set to the MemPage* handle
1.3016 +** for page ovfl. The underlying pager page may have been requested
1.3017 +** with the noContent flag set, so the page data accessable via
1.3018 +** this handle may not be trusted.
1.3019 +*/
1.3020 +static int getOverflowPage(
1.3021 + BtShared *pBt,
1.3022 + Pgno ovfl, /* Overflow page */
1.3023 + MemPage **ppPage, /* OUT: MemPage handle */
1.3024 + Pgno *pPgnoNext /* OUT: Next overflow page number */
1.3025 +){
1.3026 + Pgno next = 0;
1.3027 + int rc = SQLITE_OK; /* Initialized to placate warning */
1.3028 +
1.3029 + assert( sqlite3_mutex_held(pBt->mutex) );
1.3030 + /* One of these must not be NULL. Otherwise, why call this function? */
1.3031 + assert(ppPage || pPgnoNext);
1.3032 +
1.3033 + /* If pPgnoNext is NULL, then this function is being called to obtain
1.3034 + ** a MemPage* reference only. No page-data is required in this case.
1.3035 + */
1.3036 + if( !pPgnoNext ){
1.3037 + return sqlite3BtreeGetPage(pBt, ovfl, ppPage, 1);
1.3038 + }
1.3039 +
1.3040 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.3041 + /* Try to find the next page in the overflow list using the
1.3042 + ** autovacuum pointer-map pages. Guess that the next page in
1.3043 + ** the overflow list is page number (ovfl+1). If that guess turns
1.3044 + ** out to be wrong, fall back to loading the data of page
1.3045 + ** number ovfl to determine the next page number.
1.3046 + */
1.3047 + if( pBt->autoVacuum ){
1.3048 + Pgno pgno;
1.3049 + Pgno iGuess = ovfl+1;
1.3050 + u8 eType;
1.3051 +
1.3052 + while( PTRMAP_ISPAGE(pBt, iGuess) || iGuess==PENDING_BYTE_PAGE(pBt) ){
1.3053 + iGuess++;
1.3054 + }
1.3055 +
1.3056 + if( iGuess<=pagerPagecount(pBt->pPager) ){
1.3057 + rc = ptrmapGet(pBt, iGuess, &eType, &pgno);
1.3058 + if( rc!=SQLITE_OK ){
1.3059 + return rc;
1.3060 + }
1.3061 + if( eType==PTRMAP_OVERFLOW2 && pgno==ovfl ){
1.3062 + next = iGuess;
1.3063 + }
1.3064 + }
1.3065 + }
1.3066 +#endif
1.3067 +
1.3068 + if( next==0 || ppPage ){
1.3069 + MemPage *pPage = 0;
1.3070 +
1.3071 + rc = sqlite3BtreeGetPage(pBt, ovfl, &pPage, next!=0);
1.3072 + assert(rc==SQLITE_OK || pPage==0);
1.3073 + if( next==0 && rc==SQLITE_OK ){
1.3074 + next = get4byte(pPage->aData);
1.3075 + }
1.3076 +
1.3077 + if( ppPage ){
1.3078 + *ppPage = pPage;
1.3079 + }else{
1.3080 + releasePage(pPage);
1.3081 + }
1.3082 + }
1.3083 + *pPgnoNext = next;
1.3084 +
1.3085 + return rc;
1.3086 +}
1.3087 +
1.3088 +/*
1.3089 +** Copy data from a buffer to a page, or from a page to a buffer.
1.3090 +**
1.3091 +** pPayload is a pointer to data stored on database page pDbPage.
1.3092 +** If argument eOp is false, then nByte bytes of data are copied
1.3093 +** from pPayload to the buffer pointed at by pBuf. If eOp is true,
1.3094 +** then sqlite3PagerWrite() is called on pDbPage and nByte bytes
1.3095 +** of data are copied from the buffer pBuf to pPayload.
1.3096 +**
1.3097 +** SQLITE_OK is returned on success, otherwise an error code.
1.3098 +*/
1.3099 +static int copyPayload(
1.3100 + void *pPayload, /* Pointer to page data */
1.3101 + void *pBuf, /* Pointer to buffer */
1.3102 + int nByte, /* Number of bytes to copy */
1.3103 + int eOp, /* 0 -> copy from page, 1 -> copy to page */
1.3104 + DbPage *pDbPage /* Page containing pPayload */
1.3105 +){
1.3106 + if( eOp ){
1.3107 + /* Copy data from buffer to page (a write operation) */
1.3108 + int rc = sqlite3PagerWrite(pDbPage);
1.3109 + if( rc!=SQLITE_OK ){
1.3110 + return rc;
1.3111 + }
1.3112 + memcpy(pPayload, pBuf, nByte);
1.3113 + }else{
1.3114 + /* Copy data from page to buffer (a read operation) */
1.3115 + memcpy(pBuf, pPayload, nByte);
1.3116 + }
1.3117 + return SQLITE_OK;
1.3118 +}
1.3119 +
1.3120 +/*
1.3121 +** This function is used to read or overwrite payload information
1.3122 +** for the entry that the pCur cursor is pointing to. If the eOp
1.3123 +** parameter is 0, this is a read operation (data copied into
1.3124 +** buffer pBuf). If it is non-zero, a write (data copied from
1.3125 +** buffer pBuf).
1.3126 +**
1.3127 +** A total of "amt" bytes are read or written beginning at "offset".
1.3128 +** Data is read to or from the buffer pBuf.
1.3129 +**
1.3130 +** This routine does not make a distinction between key and data.
1.3131 +** It just reads or writes bytes from the payload area. Data might
1.3132 +** appear on the main page or be scattered out on multiple overflow
1.3133 +** pages.
1.3134 +**
1.3135 +** If the BtCursor.isIncrblobHandle flag is set, and the current
1.3136 +** cursor entry uses one or more overflow pages, this function
1.3137 +** allocates space for and lazily popluates the overflow page-list
1.3138 +** cache array (BtCursor.aOverflow). Subsequent calls use this
1.3139 +** cache to make seeking to the supplied offset more efficient.
1.3140 +**
1.3141 +** Once an overflow page-list cache has been allocated, it may be
1.3142 +** invalidated if some other cursor writes to the same table, or if
1.3143 +** the cursor is moved to a different row. Additionally, in auto-vacuum
1.3144 +** mode, the following events may invalidate an overflow page-list cache.
1.3145 +**
1.3146 +** * An incremental vacuum,
1.3147 +** * A commit in auto_vacuum="full" mode,
1.3148 +** * Creating a table (may require moving an overflow page).
1.3149 +*/
1.3150 +static int accessPayload(
1.3151 + BtCursor *pCur, /* Cursor pointing to entry to read from */
1.3152 + int offset, /* Begin reading this far into payload */
1.3153 + int amt, /* Read this many bytes */
1.3154 + unsigned char *pBuf, /* Write the bytes into this buffer */
1.3155 + int skipKey, /* offset begins at data if this is true */
1.3156 + int eOp /* zero to read. non-zero to write. */
1.3157 +){
1.3158 + unsigned char *aPayload;
1.3159 + int rc = SQLITE_OK;
1.3160 + u32 nKey;
1.3161 + int iIdx = 0;
1.3162 + MemPage *pPage = pCur->pPage; /* Btree page of current cursor entry */
1.3163 + BtShared *pBt; /* Btree this cursor belongs to */
1.3164 +
1.3165 + assert( pPage );
1.3166 + assert( pCur->eState==CURSOR_VALID );
1.3167 + assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
1.3168 + assert( offset>=0 );
1.3169 + assert( cursorHoldsMutex(pCur) );
1.3170 +
1.3171 + getCellInfo(pCur);
1.3172 + aPayload = pCur->info.pCell + pCur->info.nHeader;
1.3173 + nKey = (pPage->intKey ? 0 : pCur->info.nKey);
1.3174 +
1.3175 + if( skipKey ){
1.3176 + offset += nKey;
1.3177 + }
1.3178 + if( offset+amt > nKey+pCur->info.nData ){
1.3179 + /* Trying to read or write past the end of the data is an error */
1.3180 + return SQLITE_ERROR;
1.3181 + }
1.3182 +
1.3183 + /* Check if data must be read/written to/from the btree page itself. */
1.3184 + if( offset<pCur->info.nLocal ){
1.3185 + int a = amt;
1.3186 + if( a+offset>pCur->info.nLocal ){
1.3187 + a = pCur->info.nLocal - offset;
1.3188 + }
1.3189 + rc = copyPayload(&aPayload[offset], pBuf, a, eOp, pPage->pDbPage);
1.3190 + offset = 0;
1.3191 + pBuf += a;
1.3192 + amt -= a;
1.3193 + }else{
1.3194 + offset -= pCur->info.nLocal;
1.3195 + }
1.3196 +
1.3197 + pBt = pCur->pBt;
1.3198 + if( rc==SQLITE_OK && amt>0 ){
1.3199 + const int ovflSize = pBt->usableSize - 4; /* Bytes content per ovfl page */
1.3200 + Pgno nextPage;
1.3201 +
1.3202 + nextPage = get4byte(&aPayload[pCur->info.nLocal]);
1.3203 +
1.3204 +#ifndef SQLITE_OMIT_INCRBLOB
1.3205 + /* If the isIncrblobHandle flag is set and the BtCursor.aOverflow[]
1.3206 + ** has not been allocated, allocate it now. The array is sized at
1.3207 + ** one entry for each overflow page in the overflow chain. The
1.3208 + ** page number of the first overflow page is stored in aOverflow[0],
1.3209 + ** etc. A value of 0 in the aOverflow[] array means "not yet known"
1.3210 + ** (the cache is lazily populated).
1.3211 + */
1.3212 + if( pCur->isIncrblobHandle && !pCur->aOverflow ){
1.3213 + int nOvfl = (pCur->info.nPayload-pCur->info.nLocal+ovflSize-1)/ovflSize;
1.3214 + pCur->aOverflow = (Pgno *)sqlite3MallocZero(sizeof(Pgno)*nOvfl);
1.3215 + if( nOvfl && !pCur->aOverflow ){
1.3216 + rc = SQLITE_NOMEM;
1.3217 + }
1.3218 + }
1.3219 +
1.3220 + /* If the overflow page-list cache has been allocated and the
1.3221 + ** entry for the first required overflow page is valid, skip
1.3222 + ** directly to it.
1.3223 + */
1.3224 + if( pCur->aOverflow && pCur->aOverflow[offset/ovflSize] ){
1.3225 + iIdx = (offset/ovflSize);
1.3226 + nextPage = pCur->aOverflow[iIdx];
1.3227 + offset = (offset%ovflSize);
1.3228 + }
1.3229 +#endif
1.3230 +
1.3231 + for( ; rc==SQLITE_OK && amt>0 && nextPage; iIdx++){
1.3232 +
1.3233 +#ifndef SQLITE_OMIT_INCRBLOB
1.3234 + /* If required, populate the overflow page-list cache. */
1.3235 + if( pCur->aOverflow ){
1.3236 + assert(!pCur->aOverflow[iIdx] || pCur->aOverflow[iIdx]==nextPage);
1.3237 + pCur->aOverflow[iIdx] = nextPage;
1.3238 + }
1.3239 +#endif
1.3240 +
1.3241 + if( offset>=ovflSize ){
1.3242 + /* The only reason to read this page is to obtain the page
1.3243 + ** number for the next page in the overflow chain. The page
1.3244 + ** data is not required. So first try to lookup the overflow
1.3245 + ** page-list cache, if any, then fall back to the getOverflowPage()
1.3246 + ** function.
1.3247 + */
1.3248 +#ifndef SQLITE_OMIT_INCRBLOB
1.3249 + if( pCur->aOverflow && pCur->aOverflow[iIdx+1] ){
1.3250 + nextPage = pCur->aOverflow[iIdx+1];
1.3251 + } else
1.3252 +#endif
1.3253 + rc = getOverflowPage(pBt, nextPage, 0, &nextPage);
1.3254 + offset -= ovflSize;
1.3255 + }else{
1.3256 + /* Need to read this page properly. It contains some of the
1.3257 + ** range of data that is being read (eOp==0) or written (eOp!=0).
1.3258 + */
1.3259 + DbPage *pDbPage;
1.3260 + int a = amt;
1.3261 + rc = sqlite3PagerGet(pBt->pPager, nextPage, &pDbPage);
1.3262 + if( rc==SQLITE_OK ){
1.3263 + aPayload = sqlite3PagerGetData(pDbPage);
1.3264 + nextPage = get4byte(aPayload);
1.3265 + if( a + offset > ovflSize ){
1.3266 + a = ovflSize - offset;
1.3267 + }
1.3268 + rc = copyPayload(&aPayload[offset+4], pBuf, a, eOp, pDbPage);
1.3269 + sqlite3PagerUnref(pDbPage);
1.3270 + offset = 0;
1.3271 + amt -= a;
1.3272 + pBuf += a;
1.3273 + }
1.3274 + }
1.3275 + }
1.3276 + }
1.3277 +
1.3278 + if( rc==SQLITE_OK && amt>0 ){
1.3279 + return SQLITE_CORRUPT_BKPT;
1.3280 + }
1.3281 + return rc;
1.3282 +}
1.3283 +
1.3284 +/*
1.3285 +** Read part of the key associated with cursor pCur. Exactly
1.3286 +** "amt" bytes will be transfered into pBuf[]. The transfer
1.3287 +** begins at "offset".
1.3288 +**
1.3289 +** Return SQLITE_OK on success or an error code if anything goes
1.3290 +** wrong. An error is returned if "offset+amt" is larger than
1.3291 +** the available payload.
1.3292 +*/
1.3293 +int sqlite3BtreeKey(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){
1.3294 + int rc;
1.3295 +
1.3296 + assert( cursorHoldsMutex(pCur) );
1.3297 + rc = restoreCursorPosition(pCur);
1.3298 + if( rc==SQLITE_OK ){
1.3299 + assert( pCur->eState==CURSOR_VALID );
1.3300 + assert( pCur->pPage!=0 );
1.3301 + if( pCur->pPage->intKey ){
1.3302 + return SQLITE_CORRUPT_BKPT;
1.3303 + }
1.3304 + assert( pCur->pPage->intKey==0 );
1.3305 + assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
1.3306 + rc = accessPayload(pCur, offset, amt, (unsigned char*)pBuf, 0, 0);
1.3307 + }
1.3308 + return rc;
1.3309 +}
1.3310 +
1.3311 +/*
1.3312 +** Read part of the data associated with cursor pCur. Exactly
1.3313 +** "amt" bytes will be transfered into pBuf[]. The transfer
1.3314 +** begins at "offset".
1.3315 +**
1.3316 +** Return SQLITE_OK on success or an error code if anything goes
1.3317 +** wrong. An error is returned if "offset+amt" is larger than
1.3318 +** the available payload.
1.3319 +*/
1.3320 +int sqlite3BtreeData(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){
1.3321 + int rc;
1.3322 +
1.3323 +#ifndef SQLITE_OMIT_INCRBLOB
1.3324 + if ( pCur->eState==CURSOR_INVALID ){
1.3325 + return SQLITE_ABORT;
1.3326 + }
1.3327 +#endif
1.3328 +
1.3329 + assert( cursorHoldsMutex(pCur) );
1.3330 + rc = restoreCursorPosition(pCur);
1.3331 + if( rc==SQLITE_OK ){
1.3332 + assert( pCur->eState==CURSOR_VALID );
1.3333 + assert( pCur->pPage!=0 );
1.3334 + assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
1.3335 + rc = accessPayload(pCur, offset, amt, pBuf, 1, 0);
1.3336 + }
1.3337 + return rc;
1.3338 +}
1.3339 +
1.3340 +/*
1.3341 +** Return a pointer to payload information from the entry that the
1.3342 +** pCur cursor is pointing to. The pointer is to the beginning of
1.3343 +** the key if skipKey==0 and it points to the beginning of data if
1.3344 +** skipKey==1. The number of bytes of available key/data is written
1.3345 +** into *pAmt. If *pAmt==0, then the value returned will not be
1.3346 +** a valid pointer.
1.3347 +**
1.3348 +** This routine is an optimization. It is common for the entire key
1.3349 +** and data to fit on the local page and for there to be no overflow
1.3350 +** pages. When that is so, this routine can be used to access the
1.3351 +** key and data without making a copy. If the key and/or data spills
1.3352 +** onto overflow pages, then accessPayload() must be used to reassembly
1.3353 +** the key/data and copy it into a preallocated buffer.
1.3354 +**
1.3355 +** The pointer returned by this routine looks directly into the cached
1.3356 +** page of the database. The data might change or move the next time
1.3357 +** any btree routine is called.
1.3358 +*/
1.3359 +static const unsigned char *fetchPayload(
1.3360 + BtCursor *pCur, /* Cursor pointing to entry to read from */
1.3361 + int *pAmt, /* Write the number of available bytes here */
1.3362 + int skipKey /* read beginning at data if this is true */
1.3363 +){
1.3364 + unsigned char *aPayload;
1.3365 + MemPage *pPage;
1.3366 + u32 nKey;
1.3367 + int nLocal;
1.3368 +
1.3369 + assert( pCur!=0 && pCur->pPage!=0 );
1.3370 + assert( pCur->eState==CURSOR_VALID );
1.3371 + assert( cursorHoldsMutex(pCur) );
1.3372 + pPage = pCur->pPage;
1.3373 + assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
1.3374 + getCellInfo(pCur);
1.3375 + aPayload = pCur->info.pCell;
1.3376 + aPayload += pCur->info.nHeader;
1.3377 + if( pPage->intKey ){
1.3378 + nKey = 0;
1.3379 + }else{
1.3380 + nKey = pCur->info.nKey;
1.3381 + }
1.3382 + if( skipKey ){
1.3383 + aPayload += nKey;
1.3384 + nLocal = pCur->info.nLocal - nKey;
1.3385 + }else{
1.3386 + nLocal = pCur->info.nLocal;
1.3387 + if( nLocal>nKey ){
1.3388 + nLocal = nKey;
1.3389 + }
1.3390 + }
1.3391 + *pAmt = nLocal;
1.3392 + return aPayload;
1.3393 +}
1.3394 +
1.3395 +
1.3396 +/*
1.3397 +** For the entry that cursor pCur is point to, return as
1.3398 +** many bytes of the key or data as are available on the local
1.3399 +** b-tree page. Write the number of available bytes into *pAmt.
1.3400 +**
1.3401 +** The pointer returned is ephemeral. The key/data may move
1.3402 +** or be destroyed on the next call to any Btree routine,
1.3403 +** including calls from other threads against the same cache.
1.3404 +** Hence, a mutex on the BtShared should be held prior to calling
1.3405 +** this routine.
1.3406 +**
1.3407 +** These routines is used to get quick access to key and data
1.3408 +** in the common case where no overflow pages are used.
1.3409 +*/
1.3410 +const void *sqlite3BtreeKeyFetch(BtCursor *pCur, int *pAmt){
1.3411 + assert( cursorHoldsMutex(pCur) );
1.3412 + if( pCur->eState==CURSOR_VALID ){
1.3413 + return (const void*)fetchPayload(pCur, pAmt, 0);
1.3414 + }
1.3415 + return 0;
1.3416 +}
1.3417 +const void *sqlite3BtreeDataFetch(BtCursor *pCur, int *pAmt){
1.3418 + assert( cursorHoldsMutex(pCur) );
1.3419 + if( pCur->eState==CURSOR_VALID ){
1.3420 + return (const void*)fetchPayload(pCur, pAmt, 1);
1.3421 + }
1.3422 + return 0;
1.3423 +}
1.3424 +
1.3425 +
1.3426 +/*
1.3427 +** Move the cursor down to a new child page. The newPgno argument is the
1.3428 +** page number of the child page to move to.
1.3429 +*/
1.3430 +static int moveToChild(BtCursor *pCur, u32 newPgno){
1.3431 + int rc;
1.3432 + MemPage *pNewPage;
1.3433 + MemPage *pOldPage;
1.3434 + BtShared *pBt = pCur->pBt;
1.3435 +
1.3436 + assert( cursorHoldsMutex(pCur) );
1.3437 + assert( pCur->eState==CURSOR_VALID );
1.3438 + rc = getAndInitPage(pBt, newPgno, &pNewPage, pCur->pPage);
1.3439 + if( rc ) return rc;
1.3440 + pNewPage->idxParent = pCur->idx;
1.3441 + pOldPage = pCur->pPage;
1.3442 + pOldPage->idxShift = 0;
1.3443 + releasePage(pOldPage);
1.3444 + pCur->pPage = pNewPage;
1.3445 + pCur->idx = 0;
1.3446 + pCur->info.nSize = 0;
1.3447 + pCur->validNKey = 0;
1.3448 + if( pNewPage->nCell<1 ){
1.3449 + return SQLITE_CORRUPT_BKPT;
1.3450 + }
1.3451 + return SQLITE_OK;
1.3452 +}
1.3453 +
1.3454 +/*
1.3455 +** Return true if the page is the virtual root of its table.
1.3456 +**
1.3457 +** The virtual root page is the root page for most tables. But
1.3458 +** for the table rooted on page 1, sometime the real root page
1.3459 +** is empty except for the right-pointer. In such cases the
1.3460 +** virtual root page is the page that the right-pointer of page
1.3461 +** 1 is pointing to.
1.3462 +*/
1.3463 +int sqlite3BtreeIsRootPage(MemPage *pPage){
1.3464 + MemPage *pParent;
1.3465 +
1.3466 + assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1.3467 + pParent = pPage->pParent;
1.3468 + if( pParent==0 ) return 1;
1.3469 + if( pParent->pgno>1 ) return 0;
1.3470 + if( get2byte(&pParent->aData[pParent->hdrOffset+3])==0 ) return 1;
1.3471 + return 0;
1.3472 +}
1.3473 +
1.3474 +/*
1.3475 +** Move the cursor up to the parent page.
1.3476 +**
1.3477 +** pCur->idx is set to the cell index that contains the pointer
1.3478 +** to the page we are coming from. If we are coming from the
1.3479 +** right-most child page then pCur->idx is set to one more than
1.3480 +** the largest cell index.
1.3481 +*/
1.3482 +void sqlite3BtreeMoveToParent(BtCursor *pCur){
1.3483 + MemPage *pParent;
1.3484 + MemPage *pPage;
1.3485 + int idxParent;
1.3486 +
1.3487 + assert( cursorHoldsMutex(pCur) );
1.3488 + assert( pCur->eState==CURSOR_VALID );
1.3489 + pPage = pCur->pPage;
1.3490 + assert( pPage!=0 );
1.3491 + assert( !sqlite3BtreeIsRootPage(pPage) );
1.3492 + pParent = pPage->pParent;
1.3493 + assert( pParent!=0 );
1.3494 + idxParent = pPage->idxParent;
1.3495 + sqlite3PagerRef(pParent->pDbPage);
1.3496 + releasePage(pPage);
1.3497 + pCur->pPage = pParent;
1.3498 + pCur->info.nSize = 0;
1.3499 + pCur->validNKey = 0;
1.3500 + assert( pParent->idxShift==0 );
1.3501 + pCur->idx = idxParent;
1.3502 +}
1.3503 +
1.3504 +/*
1.3505 +** Move the cursor to the root page
1.3506 +*/
1.3507 +static int moveToRoot(BtCursor *pCur){
1.3508 + MemPage *pRoot;
1.3509 + int rc = SQLITE_OK;
1.3510 + Btree *p = pCur->pBtree;
1.3511 + BtShared *pBt = p->pBt;
1.3512 +
1.3513 + assert( cursorHoldsMutex(pCur) );
1.3514 + assert( CURSOR_INVALID < CURSOR_REQUIRESEEK );
1.3515 + assert( CURSOR_VALID < CURSOR_REQUIRESEEK );
1.3516 + assert( CURSOR_FAULT > CURSOR_REQUIRESEEK );
1.3517 + if( pCur->eState>=CURSOR_REQUIRESEEK ){
1.3518 + if( pCur->eState==CURSOR_FAULT ){
1.3519 + return pCur->skip;
1.3520 + }
1.3521 + clearCursorPosition(pCur);
1.3522 + }
1.3523 + pRoot = pCur->pPage;
1.3524 + if( pRoot && pRoot->pgno==pCur->pgnoRoot ){
1.3525 + assert( pRoot->isInit );
1.3526 + }else{
1.3527 + if(
1.3528 + SQLITE_OK!=(rc = getAndInitPage(pBt, pCur->pgnoRoot, &pRoot, 0))
1.3529 + ){
1.3530 + pCur->eState = CURSOR_INVALID;
1.3531 + return rc;
1.3532 + }
1.3533 + releasePage(pCur->pPage);
1.3534 + pCur->pPage = pRoot;
1.3535 + }
1.3536 + pCur->idx = 0;
1.3537 + pCur->info.nSize = 0;
1.3538 + pCur->atLast = 0;
1.3539 + pCur->validNKey = 0;
1.3540 + if( pRoot->nCell==0 && !pRoot->leaf ){
1.3541 + Pgno subpage;
1.3542 + assert( pRoot->pgno==1 );
1.3543 + subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+8]);
1.3544 + assert( subpage>0 );
1.3545 + pCur->eState = CURSOR_VALID;
1.3546 + rc = moveToChild(pCur, subpage);
1.3547 + }
1.3548 + pCur->eState = ((pCur->pPage->nCell>0)?CURSOR_VALID:CURSOR_INVALID);
1.3549 + return rc;
1.3550 +}
1.3551 +
1.3552 +/*
1.3553 +** Move the cursor down to the left-most leaf entry beneath the
1.3554 +** entry to which it is currently pointing.
1.3555 +**
1.3556 +** The left-most leaf is the one with the smallest key - the first
1.3557 +** in ascending order.
1.3558 +*/
1.3559 +static int moveToLeftmost(BtCursor *pCur){
1.3560 + Pgno pgno;
1.3561 + int rc = SQLITE_OK;
1.3562 + MemPage *pPage;
1.3563 +
1.3564 + assert( cursorHoldsMutex(pCur) );
1.3565 + assert( pCur->eState==CURSOR_VALID );
1.3566 + while( rc==SQLITE_OK && !(pPage = pCur->pPage)->leaf ){
1.3567 + assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
1.3568 + pgno = get4byte(findCell(pPage, pCur->idx));
1.3569 + rc = moveToChild(pCur, pgno);
1.3570 + }
1.3571 + return rc;
1.3572 +}
1.3573 +
1.3574 +/*
1.3575 +** Move the cursor down to the right-most leaf entry beneath the
1.3576 +** page to which it is currently pointing. Notice the difference
1.3577 +** between moveToLeftmost() and moveToRightmost(). moveToLeftmost()
1.3578 +** finds the left-most entry beneath the *entry* whereas moveToRightmost()
1.3579 +** finds the right-most entry beneath the *page*.
1.3580 +**
1.3581 +** The right-most entry is the one with the largest key - the last
1.3582 +** key in ascending order.
1.3583 +*/
1.3584 +static int moveToRightmost(BtCursor *pCur){
1.3585 + Pgno pgno;
1.3586 + int rc = SQLITE_OK;
1.3587 + MemPage *pPage;
1.3588 +
1.3589 + assert( cursorHoldsMutex(pCur) );
1.3590 + assert( pCur->eState==CURSOR_VALID );
1.3591 + while( rc==SQLITE_OK && !(pPage = pCur->pPage)->leaf ){
1.3592 + pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
1.3593 + pCur->idx = pPage->nCell;
1.3594 + rc = moveToChild(pCur, pgno);
1.3595 + }
1.3596 + if( rc==SQLITE_OK ){
1.3597 + pCur->idx = pPage->nCell - 1;
1.3598 + pCur->info.nSize = 0;
1.3599 + pCur->validNKey = 0;
1.3600 + }
1.3601 + return SQLITE_OK;
1.3602 +}
1.3603 +
1.3604 +/* Move the cursor to the first entry in the table. Return SQLITE_OK
1.3605 +** on success. Set *pRes to 0 if the cursor actually points to something
1.3606 +** or set *pRes to 1 if the table is empty.
1.3607 +*/
1.3608 +int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){
1.3609 + int rc;
1.3610 +
1.3611 + assert( cursorHoldsMutex(pCur) );
1.3612 + assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
1.3613 + rc = moveToRoot(pCur);
1.3614 + if( rc==SQLITE_OK ){
1.3615 + if( pCur->eState==CURSOR_INVALID ){
1.3616 + assert( pCur->pPage->nCell==0 );
1.3617 + *pRes = 1;
1.3618 + rc = SQLITE_OK;
1.3619 + }else{
1.3620 + assert( pCur->pPage->nCell>0 );
1.3621 + *pRes = 0;
1.3622 + rc = moveToLeftmost(pCur);
1.3623 + }
1.3624 + }
1.3625 + return rc;
1.3626 +}
1.3627 +
1.3628 +/* Move the cursor to the last entry in the table. Return SQLITE_OK
1.3629 +** on success. Set *pRes to 0 if the cursor actually points to something
1.3630 +** or set *pRes to 1 if the table is empty.
1.3631 +*/
1.3632 +int sqlite3BtreeLast(BtCursor *pCur, int *pRes){
1.3633 + int rc;
1.3634 +
1.3635 + assert( cursorHoldsMutex(pCur) );
1.3636 + assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
1.3637 + rc = moveToRoot(pCur);
1.3638 + if( rc==SQLITE_OK ){
1.3639 + if( CURSOR_INVALID==pCur->eState ){
1.3640 + assert( pCur->pPage->nCell==0 );
1.3641 + *pRes = 1;
1.3642 + }else{
1.3643 + assert( pCur->eState==CURSOR_VALID );
1.3644 + *pRes = 0;
1.3645 + rc = moveToRightmost(pCur);
1.3646 + getCellInfo(pCur);
1.3647 + pCur->atLast = rc==SQLITE_OK;
1.3648 + }
1.3649 + }
1.3650 + return rc;
1.3651 +}
1.3652 +
1.3653 +/* Move the cursor so that it points to an entry near the key
1.3654 +** specified by pKey/nKey/pUnKey. Return a success code.
1.3655 +**
1.3656 +** For INTKEY tables, only the nKey parameter is used. pKey
1.3657 +** and pUnKey must be NULL. For index tables, either pUnKey
1.3658 +** must point to a key that has already been unpacked, or else
1.3659 +** pKey/nKey describes a blob containing the key.
1.3660 +**
1.3661 +** If an exact match is not found, then the cursor is always
1.3662 +** left pointing at a leaf page which would hold the entry if it
1.3663 +** were present. The cursor might point to an entry that comes
1.3664 +** before or after the key.
1.3665 +**
1.3666 +** The result of comparing the key with the entry to which the
1.3667 +** cursor is written to *pRes if pRes!=NULL. The meaning of
1.3668 +** this value is as follows:
1.3669 +**
1.3670 +** *pRes<0 The cursor is left pointing at an entry that
1.3671 +** is smaller than pKey or if the table is empty
1.3672 +** and the cursor is therefore left point to nothing.
1.3673 +**
1.3674 +** *pRes==0 The cursor is left pointing at an entry that
1.3675 +** exactly matches pKey.
1.3676 +**
1.3677 +** *pRes>0 The cursor is left pointing at an entry that
1.3678 +** is larger than pKey.
1.3679 +**
1.3680 +*/
1.3681 +int sqlite3BtreeMoveto(
1.3682 + BtCursor *pCur, /* The cursor to be moved */
1.3683 + const void *pKey, /* The key content for indices. Not used by tables */
1.3684 + UnpackedRecord *pUnKey,/* Unpacked version of pKey */
1.3685 + i64 nKey, /* Size of pKey. Or the key for tables */
1.3686 + int biasRight, /* If true, bias the search to the high end */
1.3687 + int *pRes /* Search result flag */
1.3688 +){
1.3689 + int rc;
1.3690 + char aSpace[200];
1.3691 +
1.3692 + assert( cursorHoldsMutex(pCur) );
1.3693 + assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
1.3694 +
1.3695 + /* If the cursor is already positioned at the point we are trying
1.3696 + ** to move to, then just return without doing any work */
1.3697 + if( pCur->eState==CURSOR_VALID && pCur->validNKey && pCur->pPage->intKey ){
1.3698 + if( pCur->info.nKey==nKey ){
1.3699 + *pRes = 0;
1.3700 + return SQLITE_OK;
1.3701 + }
1.3702 + if( pCur->atLast && pCur->info.nKey<nKey ){
1.3703 + *pRes = -1;
1.3704 + return SQLITE_OK;
1.3705 + }
1.3706 + }
1.3707 +
1.3708 +
1.3709 + rc = moveToRoot(pCur);
1.3710 + if( rc ){
1.3711 + return rc;
1.3712 + }
1.3713 + assert( pCur->pPage );
1.3714 + assert( pCur->pPage->isInit );
1.3715 + if( pCur->eState==CURSOR_INVALID ){
1.3716 + *pRes = -1;
1.3717 + assert( pCur->pPage->nCell==0 );
1.3718 + return SQLITE_OK;
1.3719 + }
1.3720 + if( pCur->pPage->intKey ){
1.3721 + /* We are given an SQL table to search. The key is the integer
1.3722 + ** rowid contained in nKey. pKey and pUnKey should both be NULL */
1.3723 + assert( pUnKey==0 );
1.3724 + assert( pKey==0 );
1.3725 + }else if( pUnKey==0 ){
1.3726 + /* We are to search an SQL index using a key encoded as a blob.
1.3727 + ** The blob is found at pKey and is nKey bytes in length. Unpack
1.3728 + ** this key so that we can use it. */
1.3729 + assert( pKey!=0 );
1.3730 + pUnKey = sqlite3VdbeRecordUnpack(pCur->pKeyInfo, nKey, pKey,
1.3731 + aSpace, sizeof(aSpace));
1.3732 + if( pUnKey==0 ) return SQLITE_NOMEM;
1.3733 + }else{
1.3734 + /* We are to search an SQL index using a key that is already unpacked
1.3735 + ** and handed to us in pUnKey. */
1.3736 + assert( pKey==0 );
1.3737 + }
1.3738 + for(;;){
1.3739 + int lwr, upr;
1.3740 + Pgno chldPg;
1.3741 + MemPage *pPage = pCur->pPage;
1.3742 + int c = -1; /* pRes return if table is empty must be -1 */
1.3743 + lwr = 0;
1.3744 + upr = pPage->nCell-1;
1.3745 + if( !pPage->intKey && pUnKey==0 ){
1.3746 + rc = SQLITE_CORRUPT_BKPT;
1.3747 + goto moveto_finish;
1.3748 + }
1.3749 + if( biasRight ){
1.3750 + pCur->idx = upr;
1.3751 + }else{
1.3752 + pCur->idx = (upr+lwr)/2;
1.3753 + }
1.3754 + if( lwr<=upr ) for(;;){
1.3755 + void *pCellKey;
1.3756 + i64 nCellKey;
1.3757 + pCur->info.nSize = 0;
1.3758 + pCur->validNKey = 1;
1.3759 + if( pPage->intKey ){
1.3760 + u8 *pCell;
1.3761 + pCell = findCell(pPage, pCur->idx) + pPage->childPtrSize;
1.3762 + if( pPage->hasData ){
1.3763 + u32 dummy;
1.3764 + pCell += getVarint32(pCell, dummy);
1.3765 + }
1.3766 + getVarint(pCell, (u64*)&nCellKey);
1.3767 + if( nCellKey==nKey ){
1.3768 + c = 0;
1.3769 + }else if( nCellKey<nKey ){
1.3770 + c = -1;
1.3771 + }else{
1.3772 + assert( nCellKey>nKey );
1.3773 + c = +1;
1.3774 + }
1.3775 + }else{
1.3776 + int available;
1.3777 + pCellKey = (void *)fetchPayload(pCur, &available, 0);
1.3778 + nCellKey = pCur->info.nKey;
1.3779 + if( available>=nCellKey ){
1.3780 + c = sqlite3VdbeRecordCompare(nCellKey, pCellKey, pUnKey);
1.3781 + }else{
1.3782 + pCellKey = sqlite3Malloc( nCellKey );
1.3783 + if( pCellKey==0 ){
1.3784 + rc = SQLITE_NOMEM;
1.3785 + goto moveto_finish;
1.3786 + }
1.3787 + rc = sqlite3BtreeKey(pCur, 0, nCellKey, (void *)pCellKey);
1.3788 + c = sqlite3VdbeRecordCompare(nCellKey, pCellKey, pUnKey);
1.3789 + sqlite3_free(pCellKey);
1.3790 + if( rc ) goto moveto_finish;
1.3791 + }
1.3792 + }
1.3793 + if( c==0 ){
1.3794 + pCur->info.nKey = nCellKey;
1.3795 + if( pPage->intKey && !pPage->leaf ){
1.3796 + lwr = pCur->idx;
1.3797 + upr = lwr - 1;
1.3798 + break;
1.3799 + }else{
1.3800 + if( pRes ) *pRes = 0;
1.3801 + rc = SQLITE_OK;
1.3802 + goto moveto_finish;
1.3803 + }
1.3804 + }
1.3805 + if( c<0 ){
1.3806 + lwr = pCur->idx+1;
1.3807 + }else{
1.3808 + upr = pCur->idx-1;
1.3809 + }
1.3810 + if( lwr>upr ){
1.3811 + pCur->info.nKey = nCellKey;
1.3812 + break;
1.3813 + }
1.3814 + pCur->idx = (lwr+upr)/2;
1.3815 + }
1.3816 + assert( lwr==upr+1 );
1.3817 + assert( pPage->isInit );
1.3818 + if( pPage->leaf ){
1.3819 + chldPg = 0;
1.3820 + }else if( lwr>=pPage->nCell ){
1.3821 + chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]);
1.3822 + }else{
1.3823 + chldPg = get4byte(findCell(pPage, lwr));
1.3824 + }
1.3825 + if( chldPg==0 ){
1.3826 + assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
1.3827 + if( pRes ) *pRes = c;
1.3828 + rc = SQLITE_OK;
1.3829 + goto moveto_finish;
1.3830 + }
1.3831 + pCur->idx = lwr;
1.3832 + pCur->info.nSize = 0;
1.3833 + pCur->validNKey = 0;
1.3834 + rc = moveToChild(pCur, chldPg);
1.3835 + if( rc ) goto moveto_finish;
1.3836 + }
1.3837 +moveto_finish:
1.3838 + if( pKey ){
1.3839 + /* If we created our own unpacked key at the top of this
1.3840 + ** procedure, then destroy that key before returning. */
1.3841 + sqlite3VdbeDeleteUnpackedRecord(pUnKey);
1.3842 + }
1.3843 + return rc;
1.3844 +}
1.3845 +
1.3846 +
1.3847 +/*
1.3848 +** Return TRUE if the cursor is not pointing at an entry of the table.
1.3849 +**
1.3850 +** TRUE will be returned after a call to sqlite3BtreeNext() moves
1.3851 +** past the last entry in the table or sqlite3BtreePrev() moves past
1.3852 +** the first entry. TRUE is also returned if the table is empty.
1.3853 +*/
1.3854 +int sqlite3BtreeEof(BtCursor *pCur){
1.3855 + /* TODO: What if the cursor is in CURSOR_REQUIRESEEK but all table entries
1.3856 + ** have been deleted? This API will need to change to return an error code
1.3857 + ** as well as the boolean result value.
1.3858 + */
1.3859 + return (CURSOR_VALID!=pCur->eState);
1.3860 +}
1.3861 +
1.3862 +/*
1.3863 +** Return the database connection handle for a cursor.
1.3864 +*/
1.3865 +sqlite3 *sqlite3BtreeCursorDb(const BtCursor *pCur){
1.3866 + assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
1.3867 + return pCur->pBtree->db;
1.3868 +}
1.3869 +
1.3870 +/*
1.3871 +** Advance the cursor to the next entry in the database. If
1.3872 +** successful then set *pRes=0. If the cursor
1.3873 +** was already pointing to the last entry in the database before
1.3874 +** this routine was called, then set *pRes=1.
1.3875 +*/
1.3876 +int sqlite3BtreeNext(BtCursor *pCur, int *pRes){
1.3877 + int rc;
1.3878 + MemPage *pPage;
1.3879 +
1.3880 + assert( cursorHoldsMutex(pCur) );
1.3881 + rc = restoreCursorPosition(pCur);
1.3882 + if( rc!=SQLITE_OK ){
1.3883 + return rc;
1.3884 + }
1.3885 + assert( pRes!=0 );
1.3886 + pPage = pCur->pPage;
1.3887 + if( CURSOR_INVALID==pCur->eState ){
1.3888 + *pRes = 1;
1.3889 + return SQLITE_OK;
1.3890 + }
1.3891 + if( pCur->skip>0 ){
1.3892 + pCur->skip = 0;
1.3893 + *pRes = 0;
1.3894 + return SQLITE_OK;
1.3895 + }
1.3896 + pCur->skip = 0;
1.3897 +
1.3898 + assert( pPage->isInit );
1.3899 + assert( pCur->idx<pPage->nCell );
1.3900 +
1.3901 + pCur->idx++;
1.3902 + pCur->info.nSize = 0;
1.3903 + pCur->validNKey = 0;
1.3904 + if( pCur->idx>=pPage->nCell ){
1.3905 + if( !pPage->leaf ){
1.3906 + rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8]));
1.3907 + if( rc ) return rc;
1.3908 + rc = moveToLeftmost(pCur);
1.3909 + *pRes = 0;
1.3910 + return rc;
1.3911 + }
1.3912 + do{
1.3913 + if( sqlite3BtreeIsRootPage(pPage) ){
1.3914 + *pRes = 1;
1.3915 + pCur->eState = CURSOR_INVALID;
1.3916 + return SQLITE_OK;
1.3917 + }
1.3918 + sqlite3BtreeMoveToParent(pCur);
1.3919 + pPage = pCur->pPage;
1.3920 + }while( pCur->idx>=pPage->nCell );
1.3921 + *pRes = 0;
1.3922 + if( pPage->intKey ){
1.3923 + rc = sqlite3BtreeNext(pCur, pRes);
1.3924 + }else{
1.3925 + rc = SQLITE_OK;
1.3926 + }
1.3927 + return rc;
1.3928 + }
1.3929 + *pRes = 0;
1.3930 + if( pPage->leaf ){
1.3931 + return SQLITE_OK;
1.3932 + }
1.3933 + rc = moveToLeftmost(pCur);
1.3934 + return rc;
1.3935 +}
1.3936 +
1.3937 +
1.3938 +/*
1.3939 +** Step the cursor to the back to the previous entry in the database. If
1.3940 +** successful then set *pRes=0. If the cursor
1.3941 +** was already pointing to the first entry in the database before
1.3942 +** this routine was called, then set *pRes=1.
1.3943 +*/
1.3944 +int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){
1.3945 + int rc;
1.3946 + Pgno pgno;
1.3947 + MemPage *pPage;
1.3948 +
1.3949 + assert( cursorHoldsMutex(pCur) );
1.3950 + rc = restoreCursorPosition(pCur);
1.3951 + if( rc!=SQLITE_OK ){
1.3952 + return rc;
1.3953 + }
1.3954 + pCur->atLast = 0;
1.3955 + if( CURSOR_INVALID==pCur->eState ){
1.3956 + *pRes = 1;
1.3957 + return SQLITE_OK;
1.3958 + }
1.3959 + if( pCur->skip<0 ){
1.3960 + pCur->skip = 0;
1.3961 + *pRes = 0;
1.3962 + return SQLITE_OK;
1.3963 + }
1.3964 + pCur->skip = 0;
1.3965 +
1.3966 + pPage = pCur->pPage;
1.3967 + assert( pPage->isInit );
1.3968 + assert( pCur->idx>=0 );
1.3969 + if( !pPage->leaf ){
1.3970 + pgno = get4byte( findCell(pPage, pCur->idx) );
1.3971 + rc = moveToChild(pCur, pgno);
1.3972 + if( rc ){
1.3973 + return rc;
1.3974 + }
1.3975 + rc = moveToRightmost(pCur);
1.3976 + }else{
1.3977 + while( pCur->idx==0 ){
1.3978 + if( sqlite3BtreeIsRootPage(pPage) ){
1.3979 + pCur->eState = CURSOR_INVALID;
1.3980 + *pRes = 1;
1.3981 + return SQLITE_OK;
1.3982 + }
1.3983 + sqlite3BtreeMoveToParent(pCur);
1.3984 + pPage = pCur->pPage;
1.3985 + }
1.3986 + pCur->idx--;
1.3987 + pCur->info.nSize = 0;
1.3988 + pCur->validNKey = 0;
1.3989 + if( pPage->intKey && !pPage->leaf ){
1.3990 + rc = sqlite3BtreePrevious(pCur, pRes);
1.3991 + }else{
1.3992 + rc = SQLITE_OK;
1.3993 + }
1.3994 + }
1.3995 + *pRes = 0;
1.3996 + return rc;
1.3997 +}
1.3998 +
1.3999 +/*
1.4000 +** Allocate a new page from the database file.
1.4001 +**
1.4002 +** The new page is marked as dirty. (In other words, sqlite3PagerWrite()
1.4003 +** has already been called on the new page.) The new page has also
1.4004 +** been referenced and the calling routine is responsible for calling
1.4005 +** sqlite3PagerUnref() on the new page when it is done.
1.4006 +**
1.4007 +** SQLITE_OK is returned on success. Any other return value indicates
1.4008 +** an error. *ppPage and *pPgno are undefined in the event of an error.
1.4009 +** Do not invoke sqlite3PagerUnref() on *ppPage if an error is returned.
1.4010 +**
1.4011 +** If the "nearby" parameter is not 0, then a (feeble) effort is made to
1.4012 +** locate a page close to the page number "nearby". This can be used in an
1.4013 +** attempt to keep related pages close to each other in the database file,
1.4014 +** which in turn can make database access faster.
1.4015 +**
1.4016 +** If the "exact" parameter is not 0, and the page-number nearby exists
1.4017 +** anywhere on the free-list, then it is guarenteed to be returned. This
1.4018 +** is only used by auto-vacuum databases when allocating a new table.
1.4019 +*/
1.4020 +static int allocateBtreePage(
1.4021 + BtShared *pBt,
1.4022 + MemPage **ppPage,
1.4023 + Pgno *pPgno,
1.4024 + Pgno nearby,
1.4025 + u8 exact
1.4026 +){
1.4027 + MemPage *pPage1;
1.4028 + int rc;
1.4029 + int n; /* Number of pages on the freelist */
1.4030 + int k; /* Number of leaves on the trunk of the freelist */
1.4031 + MemPage *pTrunk = 0;
1.4032 + MemPage *pPrevTrunk = 0;
1.4033 +
1.4034 + assert( sqlite3_mutex_held(pBt->mutex) );
1.4035 + pPage1 = pBt->pPage1;
1.4036 + n = get4byte(&pPage1->aData[36]);
1.4037 + if( n>0 ){
1.4038 + /* There are pages on the freelist. Reuse one of those pages. */
1.4039 + Pgno iTrunk;
1.4040 + u8 searchList = 0; /* If the free-list must be searched for 'nearby' */
1.4041 +
1.4042 + /* If the 'exact' parameter was true and a query of the pointer-map
1.4043 + ** shows that the page 'nearby' is somewhere on the free-list, then
1.4044 + ** the entire-list will be searched for that page.
1.4045 + */
1.4046 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.4047 + if( exact && nearby<=pagerPagecount(pBt->pPager) ){
1.4048 + u8 eType;
1.4049 + assert( nearby>0 );
1.4050 + assert( pBt->autoVacuum );
1.4051 + rc = ptrmapGet(pBt, nearby, &eType, 0);
1.4052 + if( rc ) return rc;
1.4053 + if( eType==PTRMAP_FREEPAGE ){
1.4054 + searchList = 1;
1.4055 + }
1.4056 + *pPgno = nearby;
1.4057 + }
1.4058 +#endif
1.4059 +
1.4060 + /* Decrement the free-list count by 1. Set iTrunk to the index of the
1.4061 + ** first free-list trunk page. iPrevTrunk is initially 1.
1.4062 + */
1.4063 + rc = sqlite3PagerWrite(pPage1->pDbPage);
1.4064 + if( rc ) return rc;
1.4065 + put4byte(&pPage1->aData[36], n-1);
1.4066 +
1.4067 + /* The code within this loop is run only once if the 'searchList' variable
1.4068 + ** is not true. Otherwise, it runs once for each trunk-page on the
1.4069 + ** free-list until the page 'nearby' is located.
1.4070 + */
1.4071 + do {
1.4072 + pPrevTrunk = pTrunk;
1.4073 + if( pPrevTrunk ){
1.4074 + iTrunk = get4byte(&pPrevTrunk->aData[0]);
1.4075 + }else{
1.4076 + iTrunk = get4byte(&pPage1->aData[32]);
1.4077 + }
1.4078 + rc = sqlite3BtreeGetPage(pBt, iTrunk, &pTrunk, 0);
1.4079 + if( rc ){
1.4080 + pTrunk = 0;
1.4081 + goto end_allocate_page;
1.4082 + }
1.4083 +
1.4084 + k = get4byte(&pTrunk->aData[4]);
1.4085 + if( k==0 && !searchList ){
1.4086 + /* The trunk has no leaves and the list is not being searched.
1.4087 + ** So extract the trunk page itself and use it as the newly
1.4088 + ** allocated page */
1.4089 + assert( pPrevTrunk==0 );
1.4090 + rc = sqlite3PagerWrite(pTrunk->pDbPage);
1.4091 + if( rc ){
1.4092 + goto end_allocate_page;
1.4093 + }
1.4094 + *pPgno = iTrunk;
1.4095 + memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4);
1.4096 + *ppPage = pTrunk;
1.4097 + pTrunk = 0;
1.4098 + TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1));
1.4099 + }else if( k>pBt->usableSize/4 - 2 ){
1.4100 + /* Value of k is out of range. Database corruption */
1.4101 + rc = SQLITE_CORRUPT_BKPT;
1.4102 + goto end_allocate_page;
1.4103 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.4104 + }else if( searchList && nearby==iTrunk ){
1.4105 + /* The list is being searched and this trunk page is the page
1.4106 + ** to allocate, regardless of whether it has leaves.
1.4107 + */
1.4108 + assert( *pPgno==iTrunk );
1.4109 + *ppPage = pTrunk;
1.4110 + searchList = 0;
1.4111 + rc = sqlite3PagerWrite(pTrunk->pDbPage);
1.4112 + if( rc ){
1.4113 + goto end_allocate_page;
1.4114 + }
1.4115 + if( k==0 ){
1.4116 + if( !pPrevTrunk ){
1.4117 + memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4);
1.4118 + }else{
1.4119 + memcpy(&pPrevTrunk->aData[0], &pTrunk->aData[0], 4);
1.4120 + }
1.4121 + }else{
1.4122 + /* The trunk page is required by the caller but it contains
1.4123 + ** pointers to free-list leaves. The first leaf becomes a trunk
1.4124 + ** page in this case.
1.4125 + */
1.4126 + MemPage *pNewTrunk;
1.4127 + Pgno iNewTrunk = get4byte(&pTrunk->aData[8]);
1.4128 + rc = sqlite3BtreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0);
1.4129 + if( rc!=SQLITE_OK ){
1.4130 + goto end_allocate_page;
1.4131 + }
1.4132 + rc = sqlite3PagerWrite(pNewTrunk->pDbPage);
1.4133 + if( rc!=SQLITE_OK ){
1.4134 + releasePage(pNewTrunk);
1.4135 + goto end_allocate_page;
1.4136 + }
1.4137 + memcpy(&pNewTrunk->aData[0], &pTrunk->aData[0], 4);
1.4138 + put4byte(&pNewTrunk->aData[4], k-1);
1.4139 + memcpy(&pNewTrunk->aData[8], &pTrunk->aData[12], (k-1)*4);
1.4140 + releasePage(pNewTrunk);
1.4141 + if( !pPrevTrunk ){
1.4142 + put4byte(&pPage1->aData[32], iNewTrunk);
1.4143 + }else{
1.4144 + rc = sqlite3PagerWrite(pPrevTrunk->pDbPage);
1.4145 + if( rc ){
1.4146 + goto end_allocate_page;
1.4147 + }
1.4148 + put4byte(&pPrevTrunk->aData[0], iNewTrunk);
1.4149 + }
1.4150 + }
1.4151 + pTrunk = 0;
1.4152 + TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1));
1.4153 +#endif
1.4154 + }else{
1.4155 + /* Extract a leaf from the trunk */
1.4156 + int closest;
1.4157 + Pgno iPage;
1.4158 + unsigned char *aData = pTrunk->aData;
1.4159 + rc = sqlite3PagerWrite(pTrunk->pDbPage);
1.4160 + if( rc ){
1.4161 + goto end_allocate_page;
1.4162 + }
1.4163 + if( nearby>0 ){
1.4164 + int i, dist;
1.4165 + closest = 0;
1.4166 + dist = get4byte(&aData[8]) - nearby;
1.4167 + if( dist<0 ) dist = -dist;
1.4168 + for(i=1; i<k; i++){
1.4169 + int d2 = get4byte(&aData[8+i*4]) - nearby;
1.4170 + if( d2<0 ) d2 = -d2;
1.4171 + if( d2<dist ){
1.4172 + closest = i;
1.4173 + dist = d2;
1.4174 + }
1.4175 + }
1.4176 + }else{
1.4177 + closest = 0;
1.4178 + }
1.4179 +
1.4180 + iPage = get4byte(&aData[8+closest*4]);
1.4181 + if( !searchList || iPage==nearby ){
1.4182 + int nPage;
1.4183 + *pPgno = iPage;
1.4184 + nPage = pagerPagecount(pBt->pPager);
1.4185 + if( *pPgno>nPage ){
1.4186 + /* Free page off the end of the file */
1.4187 + rc = SQLITE_CORRUPT_BKPT;
1.4188 + goto end_allocate_page;
1.4189 + }
1.4190 + TRACE(("ALLOCATE: %d was leaf %d of %d on trunk %d"
1.4191 + ": %d more free pages\n",
1.4192 + *pPgno, closest+1, k, pTrunk->pgno, n-1));
1.4193 + if( closest<k-1 ){
1.4194 + memcpy(&aData[8+closest*4], &aData[4+k*4], 4);
1.4195 + }
1.4196 + put4byte(&aData[4], k-1);
1.4197 + rc = sqlite3BtreeGetPage(pBt, *pPgno, ppPage, 1);
1.4198 + if( rc==SQLITE_OK ){
1.4199 + sqlite3PagerDontRollback((*ppPage)->pDbPage);
1.4200 + rc = sqlite3PagerWrite((*ppPage)->pDbPage);
1.4201 + if( rc!=SQLITE_OK ){
1.4202 + releasePage(*ppPage);
1.4203 + }
1.4204 + }
1.4205 + searchList = 0;
1.4206 + }
1.4207 + }
1.4208 + releasePage(pPrevTrunk);
1.4209 + pPrevTrunk = 0;
1.4210 + }while( searchList );
1.4211 + }else{
1.4212 + /* There are no pages on the freelist, so create a new page at the
1.4213 + ** end of the file */
1.4214 + int nPage = pagerPagecount(pBt->pPager);
1.4215 + *pPgno = nPage + 1;
1.4216 +
1.4217 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.4218 + if( pBt->nTrunc ){
1.4219 + /* An incr-vacuum has already run within this transaction. So the
1.4220 + ** page to allocate is not from the physical end of the file, but
1.4221 + ** at pBt->nTrunc.
1.4222 + */
1.4223 + *pPgno = pBt->nTrunc+1;
1.4224 + if( *pPgno==PENDING_BYTE_PAGE(pBt) ){
1.4225 + (*pPgno)++;
1.4226 + }
1.4227 + }
1.4228 + if( pBt->autoVacuum && PTRMAP_ISPAGE(pBt, *pPgno) ){
1.4229 + /* If *pPgno refers to a pointer-map page, allocate two new pages
1.4230 + ** at the end of the file instead of one. The first allocated page
1.4231 + ** becomes a new pointer-map page, the second is used by the caller.
1.4232 + */
1.4233 + TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", *pPgno));
1.4234 + assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
1.4235 + (*pPgno)++;
1.4236 + if( *pPgno==PENDING_BYTE_PAGE(pBt) ){ (*pPgno)++; }
1.4237 + }
1.4238 + if( pBt->nTrunc ){
1.4239 + pBt->nTrunc = *pPgno;
1.4240 + }
1.4241 +#endif
1.4242 +
1.4243 + assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
1.4244 + rc = sqlite3BtreeGetPage(pBt, *pPgno, ppPage, 0);
1.4245 + if( rc ) return rc;
1.4246 + rc = sqlite3PagerWrite((*ppPage)->pDbPage);
1.4247 + if( rc!=SQLITE_OK ){
1.4248 + releasePage(*ppPage);
1.4249 + }
1.4250 + TRACE(("ALLOCATE: %d from end of file\n", *pPgno));
1.4251 + }
1.4252 +
1.4253 + assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
1.4254 +
1.4255 +end_allocate_page:
1.4256 + releasePage(pTrunk);
1.4257 + releasePage(pPrevTrunk);
1.4258 + return rc;
1.4259 +}
1.4260 +
1.4261 +/*
1.4262 +** Add a page of the database file to the freelist.
1.4263 +**
1.4264 +** sqlite3PagerUnref() is NOT called for pPage.
1.4265 +*/
1.4266 +static int freePage(MemPage *pPage){
1.4267 + BtShared *pBt = pPage->pBt;
1.4268 + MemPage *pPage1 = pBt->pPage1;
1.4269 + int rc, n, k;
1.4270 +
1.4271 + /* Prepare the page for freeing */
1.4272 + assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1.4273 + assert( pPage->pgno>1 );
1.4274 + pPage->isInit = 0;
1.4275 + releasePage(pPage->pParent);
1.4276 + pPage->pParent = 0;
1.4277 +
1.4278 + /* Increment the free page count on pPage1 */
1.4279 + rc = sqlite3PagerWrite(pPage1->pDbPage);
1.4280 + if( rc ) return rc;
1.4281 + n = get4byte(&pPage1->aData[36]);
1.4282 + put4byte(&pPage1->aData[36], n+1);
1.4283 +
1.4284 +#ifdef SQLITE_SECURE_DELETE
1.4285 + /* If the SQLITE_SECURE_DELETE compile-time option is enabled, then
1.4286 + ** always fully overwrite deleted information with zeros.
1.4287 + */
1.4288 + rc = sqlite3PagerWrite(pPage->pDbPage);
1.4289 + if( rc ) return rc;
1.4290 + memset(pPage->aData, 0, pPage->pBt->pageSize);
1.4291 +#endif
1.4292 +
1.4293 + /* If the database supports auto-vacuum, write an entry in the pointer-map
1.4294 + ** to indicate that the page is free.
1.4295 + */
1.4296 + if( ISAUTOVACUUM ){
1.4297 + rc = ptrmapPut(pBt, pPage->pgno, PTRMAP_FREEPAGE, 0);
1.4298 + if( rc ) return rc;
1.4299 + }
1.4300 +
1.4301 + if( n==0 ){
1.4302 + /* This is the first free page */
1.4303 + rc = sqlite3PagerWrite(pPage->pDbPage);
1.4304 + if( rc ) return rc;
1.4305 + memset(pPage->aData, 0, 8);
1.4306 + put4byte(&pPage1->aData[32], pPage->pgno);
1.4307 + TRACE(("FREE-PAGE: %d first\n", pPage->pgno));
1.4308 + }else{
1.4309 + /* Other free pages already exist. Retrive the first trunk page
1.4310 + ** of the freelist and find out how many leaves it has. */
1.4311 + MemPage *pTrunk;
1.4312 + rc = sqlite3BtreeGetPage(pBt, get4byte(&pPage1->aData[32]), &pTrunk, 0);
1.4313 + if( rc ) return rc;
1.4314 + k = get4byte(&pTrunk->aData[4]);
1.4315 + if( k>=pBt->usableSize/4 - 8 ){
1.4316 + /* The trunk is full. Turn the page being freed into a new
1.4317 + ** trunk page with no leaves.
1.4318 + **
1.4319 + ** Note that the trunk page is not really full until it contains
1.4320 + ** usableSize/4 - 2 entries, not usableSize/4 - 8 entries as we have
1.4321 + ** coded. But due to a coding error in versions of SQLite prior to
1.4322 + ** 3.6.0, databases with freelist trunk pages holding more than
1.4323 + ** usableSize/4 - 8 entries will be reported as corrupt. In order
1.4324 + ** to maintain backwards compatibility with older versions of SQLite,
1.4325 + ** we will contain to restrict the number of entries to usableSize/4 - 8
1.4326 + ** for now. At some point in the future (once everyone has upgraded
1.4327 + ** to 3.6.0 or later) we should consider fixing the conditional above
1.4328 + ** to read "usableSize/4-2" instead of "usableSize/4-8".
1.4329 + */
1.4330 + rc = sqlite3PagerWrite(pPage->pDbPage);
1.4331 + if( rc==SQLITE_OK ){
1.4332 + put4byte(pPage->aData, pTrunk->pgno);
1.4333 + put4byte(&pPage->aData[4], 0);
1.4334 + put4byte(&pPage1->aData[32], pPage->pgno);
1.4335 + TRACE(("FREE-PAGE: %d new trunk page replacing %d\n",
1.4336 + pPage->pgno, pTrunk->pgno));
1.4337 + }
1.4338 + }else if( k<0 ){
1.4339 + rc = SQLITE_CORRUPT;
1.4340 + }else{
1.4341 + /* Add the newly freed page as a leaf on the current trunk */
1.4342 + rc = sqlite3PagerWrite(pTrunk->pDbPage);
1.4343 + if( rc==SQLITE_OK ){
1.4344 + put4byte(&pTrunk->aData[4], k+1);
1.4345 + put4byte(&pTrunk->aData[8+k*4], pPage->pgno);
1.4346 +#ifndef SQLITE_SECURE_DELETE
1.4347 + sqlite3PagerDontWrite(pPage->pDbPage);
1.4348 +#endif
1.4349 + }
1.4350 + TRACE(("FREE-PAGE: %d leaf on trunk page %d\n",pPage->pgno,pTrunk->pgno));
1.4351 + }
1.4352 + releasePage(pTrunk);
1.4353 + }
1.4354 + return rc;
1.4355 +}
1.4356 +
1.4357 +/*
1.4358 +** Free any overflow pages associated with the given Cell.
1.4359 +*/
1.4360 +static int clearCell(MemPage *pPage, unsigned char *pCell){
1.4361 + BtShared *pBt = pPage->pBt;
1.4362 + CellInfo info;
1.4363 + Pgno ovflPgno;
1.4364 + int rc;
1.4365 + int nOvfl;
1.4366 + int ovflPageSize;
1.4367 +
1.4368 + assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1.4369 + sqlite3BtreeParseCellPtr(pPage, pCell, &info);
1.4370 + if( info.iOverflow==0 ){
1.4371 + return SQLITE_OK; /* No overflow pages. Return without doing anything */
1.4372 + }
1.4373 + ovflPgno = get4byte(&pCell[info.iOverflow]);
1.4374 + ovflPageSize = pBt->usableSize - 4;
1.4375 + nOvfl = (info.nPayload - info.nLocal + ovflPageSize - 1)/ovflPageSize;
1.4376 + assert( ovflPgno==0 || nOvfl>0 );
1.4377 + while( nOvfl-- ){
1.4378 + MemPage *pOvfl;
1.4379 + if( ovflPgno==0 || ovflPgno>pagerPagecount(pBt->pPager) ){
1.4380 + return SQLITE_CORRUPT_BKPT;
1.4381 + }
1.4382 +
1.4383 + rc = getOverflowPage(pBt, ovflPgno, &pOvfl, (nOvfl==0)?0:&ovflPgno);
1.4384 + if( rc ) return rc;
1.4385 + rc = freePage(pOvfl);
1.4386 + sqlite3PagerUnref(pOvfl->pDbPage);
1.4387 + if( rc ) return rc;
1.4388 + }
1.4389 + return SQLITE_OK;
1.4390 +}
1.4391 +
1.4392 +/*
1.4393 +** Create the byte sequence used to represent a cell on page pPage
1.4394 +** and write that byte sequence into pCell[]. Overflow pages are
1.4395 +** allocated and filled in as necessary. The calling procedure
1.4396 +** is responsible for making sure sufficient space has been allocated
1.4397 +** for pCell[].
1.4398 +**
1.4399 +** Note that pCell does not necessary need to point to the pPage->aData
1.4400 +** area. pCell might point to some temporary storage. The cell will
1.4401 +** be constructed in this temporary area then copied into pPage->aData
1.4402 +** later.
1.4403 +*/
1.4404 +static int fillInCell(
1.4405 + MemPage *pPage, /* The page that contains the cell */
1.4406 + unsigned char *pCell, /* Complete text of the cell */
1.4407 + const void *pKey, i64 nKey, /* The key */
1.4408 + const void *pData,int nData, /* The data */
1.4409 + int nZero, /* Extra zero bytes to append to pData */
1.4410 + int *pnSize /* Write cell size here */
1.4411 +){
1.4412 + int nPayload;
1.4413 + const u8 *pSrc;
1.4414 + int nSrc, n, rc;
1.4415 + int spaceLeft;
1.4416 + MemPage *pOvfl = 0;
1.4417 + MemPage *pToRelease = 0;
1.4418 + unsigned char *pPrior;
1.4419 + unsigned char *pPayload;
1.4420 + BtShared *pBt = pPage->pBt;
1.4421 + Pgno pgnoOvfl = 0;
1.4422 + int nHeader;
1.4423 + CellInfo info;
1.4424 +
1.4425 + assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1.4426 +
1.4427 + /* Fill in the header. */
1.4428 + nHeader = 0;
1.4429 + if( !pPage->leaf ){
1.4430 + nHeader += 4;
1.4431 + }
1.4432 + if( pPage->hasData ){
1.4433 + nHeader += putVarint(&pCell[nHeader], nData+nZero);
1.4434 + }else{
1.4435 + nData = nZero = 0;
1.4436 + }
1.4437 + nHeader += putVarint(&pCell[nHeader], *(u64*)&nKey);
1.4438 + sqlite3BtreeParseCellPtr(pPage, pCell, &info);
1.4439 + assert( info.nHeader==nHeader );
1.4440 + assert( info.nKey==nKey );
1.4441 + assert( info.nData==nData+nZero );
1.4442 +
1.4443 + /* Fill in the payload */
1.4444 + nPayload = nData + nZero;
1.4445 + if( pPage->intKey ){
1.4446 + pSrc = pData;
1.4447 + nSrc = nData;
1.4448 + nData = 0;
1.4449 + }else{
1.4450 + nPayload += nKey;
1.4451 + pSrc = pKey;
1.4452 + nSrc = nKey;
1.4453 + }
1.4454 + *pnSize = info.nSize;
1.4455 + spaceLeft = info.nLocal;
1.4456 + pPayload = &pCell[nHeader];
1.4457 + pPrior = &pCell[info.iOverflow];
1.4458 +
1.4459 + while( nPayload>0 ){
1.4460 + if( spaceLeft==0 ){
1.4461 + int isExact = 0;
1.4462 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.4463 + Pgno pgnoPtrmap = pgnoOvfl; /* Overflow page pointer-map entry page */
1.4464 + if( pBt->autoVacuum ){
1.4465 + do{
1.4466 + pgnoOvfl++;
1.4467 + } while(
1.4468 + PTRMAP_ISPAGE(pBt, pgnoOvfl) || pgnoOvfl==PENDING_BYTE_PAGE(pBt)
1.4469 + );
1.4470 + if( pgnoOvfl>1 ){
1.4471 + /* isExact = 1; */
1.4472 + }
1.4473 + }
1.4474 +#endif
1.4475 + rc = allocateBtreePage(pBt, &pOvfl, &pgnoOvfl, pgnoOvfl, isExact);
1.4476 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.4477 + /* If the database supports auto-vacuum, and the second or subsequent
1.4478 + ** overflow page is being allocated, add an entry to the pointer-map
1.4479 + ** for that page now.
1.4480 + **
1.4481 + ** If this is the first overflow page, then write a partial entry
1.4482 + ** to the pointer-map. If we write nothing to this pointer-map slot,
1.4483 + ** then the optimistic overflow chain processing in clearCell()
1.4484 + ** may misinterpret the uninitialised values and delete the
1.4485 + ** wrong pages from the database.
1.4486 + */
1.4487 + if( pBt->autoVacuum && rc==SQLITE_OK ){
1.4488 + u8 eType = (pgnoPtrmap?PTRMAP_OVERFLOW2:PTRMAP_OVERFLOW1);
1.4489 + rc = ptrmapPut(pBt, pgnoOvfl, eType, pgnoPtrmap);
1.4490 + if( rc ){
1.4491 + releasePage(pOvfl);
1.4492 + }
1.4493 + }
1.4494 +#endif
1.4495 + if( rc ){
1.4496 + releasePage(pToRelease);
1.4497 + return rc;
1.4498 + }
1.4499 + put4byte(pPrior, pgnoOvfl);
1.4500 + releasePage(pToRelease);
1.4501 + pToRelease = pOvfl;
1.4502 + pPrior = pOvfl->aData;
1.4503 + put4byte(pPrior, 0);
1.4504 + pPayload = &pOvfl->aData[4];
1.4505 + spaceLeft = pBt->usableSize - 4;
1.4506 + }
1.4507 + n = nPayload;
1.4508 + if( n>spaceLeft ) n = spaceLeft;
1.4509 + if( nSrc>0 ){
1.4510 + if( n>nSrc ) n = nSrc;
1.4511 + assert( pSrc );
1.4512 + memcpy(pPayload, pSrc, n);
1.4513 + }else{
1.4514 + memset(pPayload, 0, n);
1.4515 + }
1.4516 + nPayload -= n;
1.4517 + pPayload += n;
1.4518 + pSrc += n;
1.4519 + nSrc -= n;
1.4520 + spaceLeft -= n;
1.4521 + if( nSrc==0 ){
1.4522 + nSrc = nData;
1.4523 + pSrc = pData;
1.4524 + }
1.4525 + }
1.4526 + releasePage(pToRelease);
1.4527 + return SQLITE_OK;
1.4528 +}
1.4529 +
1.4530 +
1.4531 +/*
1.4532 +** Change the MemPage.pParent pointer on the page whose number is
1.4533 +** given in the second argument so that MemPage.pParent holds the
1.4534 +** pointer in the third argument.
1.4535 +**
1.4536 +** If the final argument, updatePtrmap, is non-zero and the database
1.4537 +** is an auto-vacuum database, then the pointer-map entry for pgno
1.4538 +** is updated.
1.4539 +*/
1.4540 +static int reparentPage(
1.4541 + BtShared *pBt, /* B-Tree structure */
1.4542 + Pgno pgno, /* Page number of child being adopted */
1.4543 + MemPage *pNewParent, /* New parent of pgno */
1.4544 + int idx, /* Index of child page pgno in pNewParent */
1.4545 + int updatePtrmap /* If true, update pointer-map for pgno */
1.4546 +){
1.4547 + MemPage *pThis;
1.4548 + DbPage *pDbPage;
1.4549 +
1.4550 + assert( sqlite3_mutex_held(pBt->mutex) );
1.4551 + assert( pNewParent!=0 );
1.4552 + if( pgno==0 ) return SQLITE_OK;
1.4553 + assert( pBt->pPager!=0 );
1.4554 + pDbPage = sqlite3PagerLookup(pBt->pPager, pgno);
1.4555 + if( pDbPage ){
1.4556 + pThis = (MemPage *)sqlite3PagerGetExtra(pDbPage);
1.4557 + if( pThis->isInit ){
1.4558 + assert( pThis->aData==sqlite3PagerGetData(pDbPage) );
1.4559 + if( pThis->pParent!=pNewParent ){
1.4560 + if( pThis->pParent ) sqlite3PagerUnref(pThis->pParent->pDbPage);
1.4561 + pThis->pParent = pNewParent;
1.4562 + sqlite3PagerRef(pNewParent->pDbPage);
1.4563 + }
1.4564 + pThis->idxParent = idx;
1.4565 + }
1.4566 + sqlite3PagerUnref(pDbPage);
1.4567 + }
1.4568 +
1.4569 + if( ISAUTOVACUUM && updatePtrmap ){
1.4570 + return ptrmapPut(pBt, pgno, PTRMAP_BTREE, pNewParent->pgno);
1.4571 + }
1.4572 +
1.4573 +#ifndef NDEBUG
1.4574 + /* If the updatePtrmap flag was clear, assert that the entry in the
1.4575 + ** pointer-map is already correct.
1.4576 + */
1.4577 + if( ISAUTOVACUUM ){
1.4578 + pDbPage = sqlite3PagerLookup(pBt->pPager,PTRMAP_PAGENO(pBt,pgno));
1.4579 + if( pDbPage ){
1.4580 + u8 eType;
1.4581 + Pgno ii;
1.4582 + int rc = ptrmapGet(pBt, pgno, &eType, &ii);
1.4583 + assert( rc==SQLITE_OK && ii==pNewParent->pgno && eType==PTRMAP_BTREE );
1.4584 + sqlite3PagerUnref(pDbPage);
1.4585 + }
1.4586 + }
1.4587 +#endif
1.4588 +
1.4589 + return SQLITE_OK;
1.4590 +}
1.4591 +
1.4592 +
1.4593 +
1.4594 +/*
1.4595 +** Change the pParent pointer of all children of pPage to point back
1.4596 +** to pPage.
1.4597 +**
1.4598 +** In other words, for every child of pPage, invoke reparentPage()
1.4599 +** to make sure that each child knows that pPage is its parent.
1.4600 +**
1.4601 +** This routine gets called after you memcpy() one page into
1.4602 +** another.
1.4603 +**
1.4604 +** If updatePtrmap is true, then the pointer-map entries for all child
1.4605 +** pages of pPage are updated.
1.4606 +*/
1.4607 +static int reparentChildPages(MemPage *pPage, int updatePtrmap){
1.4608 + int rc = SQLITE_OK;
1.4609 + assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1.4610 + if( !pPage->leaf ){
1.4611 + int i;
1.4612 + BtShared *pBt = pPage->pBt;
1.4613 + Pgno iRight = get4byte(&pPage->aData[pPage->hdrOffset+8]);
1.4614 +
1.4615 + for(i=0; i<pPage->nCell; i++){
1.4616 + u8 *pCell = findCell(pPage, i);
1.4617 + rc = reparentPage(pBt, get4byte(pCell), pPage, i, updatePtrmap);
1.4618 + if( rc!=SQLITE_OK ) return rc;
1.4619 + }
1.4620 + rc = reparentPage(pBt, iRight, pPage, i, updatePtrmap);
1.4621 + pPage->idxShift = 0;
1.4622 + }
1.4623 + return rc;
1.4624 +}
1.4625 +
1.4626 +/*
1.4627 +** Remove the i-th cell from pPage. This routine effects pPage only.
1.4628 +** The cell content is not freed or deallocated. It is assumed that
1.4629 +** the cell content has been copied someplace else. This routine just
1.4630 +** removes the reference to the cell from pPage.
1.4631 +**
1.4632 +** "sz" must be the number of bytes in the cell.
1.4633 +*/
1.4634 +static void dropCell(MemPage *pPage, int idx, int sz){
1.4635 + int i; /* Loop counter */
1.4636 + int pc; /* Offset to cell content of cell being deleted */
1.4637 + u8 *data; /* pPage->aData */
1.4638 + u8 *ptr; /* Used to move bytes around within data[] */
1.4639 +
1.4640 + assert( idx>=0 && idx<pPage->nCell );
1.4641 + assert( sz==cellSize(pPage, idx) );
1.4642 + assert( sqlite3PagerIswriteable(pPage->pDbPage) );
1.4643 + assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1.4644 + data = pPage->aData;
1.4645 + ptr = &data[pPage->cellOffset + 2*idx];
1.4646 + pc = get2byte(ptr);
1.4647 + assert( pc>10 && pc+sz<=pPage->pBt->usableSize );
1.4648 + freeSpace(pPage, pc, sz);
1.4649 + for(i=idx+1; i<pPage->nCell; i++, ptr+=2){
1.4650 + ptr[0] = ptr[2];
1.4651 + ptr[1] = ptr[3];
1.4652 + }
1.4653 + pPage->nCell--;
1.4654 + put2byte(&data[pPage->hdrOffset+3], pPage->nCell);
1.4655 + pPage->nFree += 2;
1.4656 + pPage->idxShift = 1;
1.4657 +}
1.4658 +
1.4659 +/*
1.4660 +** Insert a new cell on pPage at cell index "i". pCell points to the
1.4661 +** content of the cell.
1.4662 +**
1.4663 +** If the cell content will fit on the page, then put it there. If it
1.4664 +** will not fit, then make a copy of the cell content into pTemp if
1.4665 +** pTemp is not null. Regardless of pTemp, allocate a new entry
1.4666 +** in pPage->aOvfl[] and make it point to the cell content (either
1.4667 +** in pTemp or the original pCell) and also record its index.
1.4668 +** Allocating a new entry in pPage->aCell[] implies that
1.4669 +** pPage->nOverflow is incremented.
1.4670 +**
1.4671 +** If nSkip is non-zero, then do not copy the first nSkip bytes of the
1.4672 +** cell. The caller will overwrite them after this function returns. If
1.4673 +** nSkip is non-zero, then pCell may not point to an invalid memory location
1.4674 +** (but pCell+nSkip is always valid).
1.4675 +*/
1.4676 +static int insertCell(
1.4677 + MemPage *pPage, /* Page into which we are copying */
1.4678 + int i, /* New cell becomes the i-th cell of the page */
1.4679 + u8 *pCell, /* Content of the new cell */
1.4680 + int sz, /* Bytes of content in pCell */
1.4681 + u8 *pTemp, /* Temp storage space for pCell, if needed */
1.4682 + u8 nSkip /* Do not write the first nSkip bytes of the cell */
1.4683 +){
1.4684 + int idx; /* Where to write new cell content in data[] */
1.4685 + int j; /* Loop counter */
1.4686 + int top; /* First byte of content for any cell in data[] */
1.4687 + int end; /* First byte past the last cell pointer in data[] */
1.4688 + int ins; /* Index in data[] where new cell pointer is inserted */
1.4689 + int hdr; /* Offset into data[] of the page header */
1.4690 + int cellOffset; /* Address of first cell pointer in data[] */
1.4691 + u8 *data; /* The content of the whole page */
1.4692 + u8 *ptr; /* Used for moving information around in data[] */
1.4693 +
1.4694 + assert( i>=0 && i<=pPage->nCell+pPage->nOverflow );
1.4695 + assert( sz==cellSizePtr(pPage, pCell) );
1.4696 + assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1.4697 + if( pPage->nOverflow || sz+2>pPage->nFree ){
1.4698 + if( pTemp ){
1.4699 + memcpy(pTemp+nSkip, pCell+nSkip, sz-nSkip);
1.4700 + pCell = pTemp;
1.4701 + }
1.4702 + j = pPage->nOverflow++;
1.4703 + assert( j<sizeof(pPage->aOvfl)/sizeof(pPage->aOvfl[0]) );
1.4704 + pPage->aOvfl[j].pCell = pCell;
1.4705 + pPage->aOvfl[j].idx = i;
1.4706 + pPage->nFree = 0;
1.4707 + }else{
1.4708 + int rc = sqlite3PagerWrite(pPage->pDbPage);
1.4709 + if( rc!=SQLITE_OK ){
1.4710 + return rc;
1.4711 + }
1.4712 + assert( sqlite3PagerIswriteable(pPage->pDbPage) );
1.4713 + data = pPage->aData;
1.4714 + hdr = pPage->hdrOffset;
1.4715 + top = get2byte(&data[hdr+5]);
1.4716 + cellOffset = pPage->cellOffset;
1.4717 + end = cellOffset + 2*pPage->nCell + 2;
1.4718 + ins = cellOffset + 2*i;
1.4719 + if( end > top - sz ){
1.4720 + defragmentPage(pPage);
1.4721 + top = get2byte(&data[hdr+5]);
1.4722 + assert( end + sz <= top );
1.4723 + }
1.4724 + idx = allocateSpace(pPage, sz);
1.4725 + assert( idx>0 );
1.4726 + assert( end <= get2byte(&data[hdr+5]) );
1.4727 + pPage->nCell++;
1.4728 + pPage->nFree -= 2;
1.4729 + memcpy(&data[idx+nSkip], pCell+nSkip, sz-nSkip);
1.4730 + for(j=end-2, ptr=&data[j]; j>ins; j-=2, ptr-=2){
1.4731 + ptr[0] = ptr[-2];
1.4732 + ptr[1] = ptr[-1];
1.4733 + }
1.4734 + put2byte(&data[ins], idx);
1.4735 + put2byte(&data[hdr+3], pPage->nCell);
1.4736 + pPage->idxShift = 1;
1.4737 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.4738 + if( pPage->pBt->autoVacuum ){
1.4739 + /* The cell may contain a pointer to an overflow page. If so, write
1.4740 + ** the entry for the overflow page into the pointer map.
1.4741 + */
1.4742 + CellInfo info;
1.4743 + sqlite3BtreeParseCellPtr(pPage, pCell, &info);
1.4744 + assert( (info.nData+(pPage->intKey?0:info.nKey))==info.nPayload );
1.4745 + if( (info.nData+(pPage->intKey?0:info.nKey))>info.nLocal ){
1.4746 + Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]);
1.4747 + rc = ptrmapPut(pPage->pBt, pgnoOvfl, PTRMAP_OVERFLOW1, pPage->pgno);
1.4748 + if( rc!=SQLITE_OK ) return rc;
1.4749 + }
1.4750 + }
1.4751 +#endif
1.4752 + }
1.4753 +
1.4754 + return SQLITE_OK;
1.4755 +}
1.4756 +
1.4757 +/*
1.4758 +** Add a list of cells to a page. The page should be initially empty.
1.4759 +** The cells are guaranteed to fit on the page.
1.4760 +*/
1.4761 +static void assemblePage(
1.4762 + MemPage *pPage, /* The page to be assemblied */
1.4763 + int nCell, /* The number of cells to add to this page */
1.4764 + u8 **apCell, /* Pointers to cell bodies */
1.4765 + u16 *aSize /* Sizes of the cells */
1.4766 +){
1.4767 + int i; /* Loop counter */
1.4768 + int totalSize; /* Total size of all cells */
1.4769 + int hdr; /* Index of page header */
1.4770 + int cellptr; /* Address of next cell pointer */
1.4771 + int cellbody; /* Address of next cell body */
1.4772 + u8 *data; /* Data for the page */
1.4773 +
1.4774 + assert( pPage->nOverflow==0 );
1.4775 + assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1.4776 + totalSize = 0;
1.4777 + for(i=0; i<nCell; i++){
1.4778 + totalSize += aSize[i];
1.4779 + }
1.4780 + assert( totalSize+2*nCell<=pPage->nFree );
1.4781 + assert( pPage->nCell==0 );
1.4782 + cellptr = pPage->cellOffset;
1.4783 + data = pPage->aData;
1.4784 + hdr = pPage->hdrOffset;
1.4785 + put2byte(&data[hdr+3], nCell);
1.4786 + if( nCell ){
1.4787 + cellbody = allocateSpace(pPage, totalSize);
1.4788 + assert( cellbody>0 );
1.4789 + assert( pPage->nFree >= 2*nCell );
1.4790 + pPage->nFree -= 2*nCell;
1.4791 + for(i=0; i<nCell; i++){
1.4792 + put2byte(&data[cellptr], cellbody);
1.4793 + memcpy(&data[cellbody], apCell[i], aSize[i]);
1.4794 + cellptr += 2;
1.4795 + cellbody += aSize[i];
1.4796 + }
1.4797 + assert( cellbody==pPage->pBt->usableSize );
1.4798 + }
1.4799 + pPage->nCell = nCell;
1.4800 +}
1.4801 +
1.4802 +/*
1.4803 +** The following parameters determine how many adjacent pages get involved
1.4804 +** in a balancing operation. NN is the number of neighbors on either side
1.4805 +** of the page that participate in the balancing operation. NB is the
1.4806 +** total number of pages that participate, including the target page and
1.4807 +** NN neighbors on either side.
1.4808 +**
1.4809 +** The minimum value of NN is 1 (of course). Increasing NN above 1
1.4810 +** (to 2 or 3) gives a modest improvement in SELECT and DELETE performance
1.4811 +** in exchange for a larger degradation in INSERT and UPDATE performance.
1.4812 +** The value of NN appears to give the best results overall.
1.4813 +*/
1.4814 +#define NN 1 /* Number of neighbors on either side of pPage */
1.4815 +#define NB (NN*2+1) /* Total pages involved in the balance */
1.4816 +
1.4817 +/* Forward reference */
1.4818 +static int balance(MemPage*, int);
1.4819 +
1.4820 +#ifndef SQLITE_OMIT_QUICKBALANCE
1.4821 +/*
1.4822 +** This version of balance() handles the common special case where
1.4823 +** a new entry is being inserted on the extreme right-end of the
1.4824 +** tree, in other words, when the new entry will become the largest
1.4825 +** entry in the tree.
1.4826 +**
1.4827 +** Instead of trying balance the 3 right-most leaf pages, just add
1.4828 +** a new page to the right-hand side and put the one new entry in
1.4829 +** that page. This leaves the right side of the tree somewhat
1.4830 +** unbalanced. But odds are that we will be inserting new entries
1.4831 +** at the end soon afterwards so the nearly empty page will quickly
1.4832 +** fill up. On average.
1.4833 +**
1.4834 +** pPage is the leaf page which is the right-most page in the tree.
1.4835 +** pParent is its parent. pPage must have a single overflow entry
1.4836 +** which is also the right-most entry on the page.
1.4837 +*/
1.4838 +static int balance_quick(MemPage *pPage, MemPage *pParent){
1.4839 + int rc;
1.4840 + MemPage *pNew;
1.4841 + Pgno pgnoNew;
1.4842 + u8 *pCell;
1.4843 + u16 szCell;
1.4844 + CellInfo info;
1.4845 + BtShared *pBt = pPage->pBt;
1.4846 + int parentIdx = pParent->nCell; /* pParent new divider cell index */
1.4847 + int parentSize; /* Size of new divider cell */
1.4848 + u8 parentCell[64]; /* Space for the new divider cell */
1.4849 +
1.4850 + assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1.4851 +
1.4852 + /* Allocate a new page. Insert the overflow cell from pPage
1.4853 + ** into it. Then remove the overflow cell from pPage.
1.4854 + */
1.4855 + rc = allocateBtreePage(pBt, &pNew, &pgnoNew, 0, 0);
1.4856 + if( rc!=SQLITE_OK ){
1.4857 + return rc;
1.4858 + }
1.4859 + pCell = pPage->aOvfl[0].pCell;
1.4860 + szCell = cellSizePtr(pPage, pCell);
1.4861 + zeroPage(pNew, pPage->aData[0]);
1.4862 + assemblePage(pNew, 1, &pCell, &szCell);
1.4863 + pPage->nOverflow = 0;
1.4864 +
1.4865 + /* Set the parent of the newly allocated page to pParent. */
1.4866 + pNew->pParent = pParent;
1.4867 + sqlite3PagerRef(pParent->pDbPage);
1.4868 +
1.4869 + /* pPage is currently the right-child of pParent. Change this
1.4870 + ** so that the right-child is the new page allocated above and
1.4871 + ** pPage is the next-to-right child.
1.4872 + **
1.4873 + ** Ignore the return value of the call to fillInCell(). fillInCell()
1.4874 + ** may only return other than SQLITE_OK if it is required to allocate
1.4875 + ** one or more overflow pages. Since an internal table B-Tree cell
1.4876 + ** may never spill over onto an overflow page (it is a maximum of
1.4877 + ** 13 bytes in size), it is not neccessary to check the return code.
1.4878 + **
1.4879 + ** Similarly, the insertCell() function cannot fail if the page
1.4880 + ** being inserted into is already writable and the cell does not
1.4881 + ** contain an overflow pointer. So ignore this return code too.
1.4882 + */
1.4883 + assert( pPage->nCell>0 );
1.4884 + pCell = findCell(pPage, pPage->nCell-1);
1.4885 + sqlite3BtreeParseCellPtr(pPage, pCell, &info);
1.4886 + fillInCell(pParent, parentCell, 0, info.nKey, 0, 0, 0, &parentSize);
1.4887 + assert( parentSize<64 );
1.4888 + assert( sqlite3PagerIswriteable(pParent->pDbPage) );
1.4889 + insertCell(pParent, parentIdx, parentCell, parentSize, 0, 4);
1.4890 + put4byte(findOverflowCell(pParent,parentIdx), pPage->pgno);
1.4891 + put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew);
1.4892 +
1.4893 + /* If this is an auto-vacuum database, update the pointer map
1.4894 + ** with entries for the new page, and any pointer from the
1.4895 + ** cell on the page to an overflow page.
1.4896 + */
1.4897 + if( ISAUTOVACUUM ){
1.4898 + rc = ptrmapPut(pBt, pgnoNew, PTRMAP_BTREE, pParent->pgno);
1.4899 + if( rc==SQLITE_OK ){
1.4900 + rc = ptrmapPutOvfl(pNew, 0);
1.4901 + }
1.4902 + if( rc!=SQLITE_OK ){
1.4903 + releasePage(pNew);
1.4904 + return rc;
1.4905 + }
1.4906 + }
1.4907 +
1.4908 + /* Release the reference to the new page and balance the parent page,
1.4909 + ** in case the divider cell inserted caused it to become overfull.
1.4910 + */
1.4911 + releasePage(pNew);
1.4912 + return balance(pParent, 0);
1.4913 +}
1.4914 +#endif /* SQLITE_OMIT_QUICKBALANCE */
1.4915 +
1.4916 +/*
1.4917 +** This routine redistributes Cells on pPage and up to NN*2 siblings
1.4918 +** of pPage so that all pages have about the same amount of free space.
1.4919 +** Usually NN siblings on either side of pPage is used in the balancing,
1.4920 +** though more siblings might come from one side if pPage is the first
1.4921 +** or last child of its parent. If pPage has fewer than 2*NN siblings
1.4922 +** (something which can only happen if pPage is the root page or a
1.4923 +** child of root) then all available siblings participate in the balancing.
1.4924 +**
1.4925 +** The number of siblings of pPage might be increased or decreased by one or
1.4926 +** two in an effort to keep pages nearly full but not over full. The root page
1.4927 +** is special and is allowed to be nearly empty. If pPage is
1.4928 +** the root page, then the depth of the tree might be increased
1.4929 +** or decreased by one, as necessary, to keep the root page from being
1.4930 +** overfull or completely empty.
1.4931 +**
1.4932 +** Note that when this routine is called, some of the Cells on pPage
1.4933 +** might not actually be stored in pPage->aData[]. This can happen
1.4934 +** if the page is overfull. Part of the job of this routine is to
1.4935 +** make sure all Cells for pPage once again fit in pPage->aData[].
1.4936 +**
1.4937 +** In the course of balancing the siblings of pPage, the parent of pPage
1.4938 +** might become overfull or underfull. If that happens, then this routine
1.4939 +** is called recursively on the parent.
1.4940 +**
1.4941 +** If this routine fails for any reason, it might leave the database
1.4942 +** in a corrupted state. So if this routine fails, the database should
1.4943 +** be rolled back.
1.4944 +*/
1.4945 +static int balance_nonroot(MemPage *pPage){
1.4946 + MemPage *pParent; /* The parent of pPage */
1.4947 + BtShared *pBt; /* The whole database */
1.4948 + int nCell = 0; /* Number of cells in apCell[] */
1.4949 + int nMaxCells = 0; /* Allocated size of apCell, szCell, aFrom. */
1.4950 + int nOld; /* Number of pages in apOld[] */
1.4951 + int nNew; /* Number of pages in apNew[] */
1.4952 + int nDiv; /* Number of cells in apDiv[] */
1.4953 + int i, j, k; /* Loop counters */
1.4954 + int idx; /* Index of pPage in pParent->aCell[] */
1.4955 + int nxDiv; /* Next divider slot in pParent->aCell[] */
1.4956 + int rc; /* The return code */
1.4957 + int leafCorrection; /* 4 if pPage is a leaf. 0 if not */
1.4958 + int leafData; /* True if pPage is a leaf of a LEAFDATA tree */
1.4959 + int usableSpace; /* Bytes in pPage beyond the header */
1.4960 + int pageFlags; /* Value of pPage->aData[0] */
1.4961 + int subtotal; /* Subtotal of bytes in cells on one page */
1.4962 + int iSpace1 = 0; /* First unused byte of aSpace1[] */
1.4963 + int iSpace2 = 0; /* First unused byte of aSpace2[] */
1.4964 + int szScratch; /* Size of scratch memory requested */
1.4965 + MemPage *apOld[NB]; /* pPage and up to two siblings */
1.4966 + Pgno pgnoOld[NB]; /* Page numbers for each page in apOld[] */
1.4967 + MemPage *apCopy[NB]; /* Private copies of apOld[] pages */
1.4968 + MemPage *apNew[NB+2]; /* pPage and up to NB siblings after balancing */
1.4969 + Pgno pgnoNew[NB+2]; /* Page numbers for each page in apNew[] */
1.4970 + u8 *apDiv[NB]; /* Divider cells in pParent */
1.4971 + int cntNew[NB+2]; /* Index in aCell[] of cell after i-th page */
1.4972 + int szNew[NB+2]; /* Combined size of cells place on i-th page */
1.4973 + u8 **apCell = 0; /* All cells begin balanced */
1.4974 + u16 *szCell; /* Local size of all cells in apCell[] */
1.4975 + u8 *aCopy[NB]; /* Space for holding data of apCopy[] */
1.4976 + u8 *aSpace1; /* Space for copies of dividers cells before balance */
1.4977 + u8 *aSpace2 = 0; /* Space for overflow dividers cells after balance */
1.4978 + u8 *aFrom = 0;
1.4979 +
1.4980 + assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1.4981 +
1.4982 + /*
1.4983 + ** Find the parent page.
1.4984 + */
1.4985 + assert( pPage->isInit );
1.4986 + assert( sqlite3PagerIswriteable(pPage->pDbPage) || pPage->nOverflow==1 );
1.4987 + pBt = pPage->pBt;
1.4988 + pParent = pPage->pParent;
1.4989 + assert( pParent );
1.4990 + if( SQLITE_OK!=(rc = sqlite3PagerWrite(pParent->pDbPage)) ){
1.4991 + return rc;
1.4992 + }
1.4993 +
1.4994 + TRACE(("BALANCE: begin page %d child of %d\n", pPage->pgno, pParent->pgno));
1.4995 +
1.4996 +#ifndef SQLITE_OMIT_QUICKBALANCE
1.4997 + /*
1.4998 + ** A special case: If a new entry has just been inserted into a
1.4999 + ** table (that is, a btree with integer keys and all data at the leaves)
1.5000 + ** and the new entry is the right-most entry in the tree (it has the
1.5001 + ** largest key) then use the special balance_quick() routine for
1.5002 + ** balancing. balance_quick() is much faster and results in a tighter
1.5003 + ** packing of data in the common case.
1.5004 + */
1.5005 + if( pPage->leaf &&
1.5006 + pPage->intKey &&
1.5007 + pPage->nOverflow==1 &&
1.5008 + pPage->aOvfl[0].idx==pPage->nCell &&
1.5009 + pPage->pParent->pgno!=1 &&
1.5010 + get4byte(&pParent->aData[pParent->hdrOffset+8])==pPage->pgno
1.5011 + ){
1.5012 + assert( pPage->intKey );
1.5013 + /*
1.5014 + ** TODO: Check the siblings to the left of pPage. It may be that
1.5015 + ** they are not full and no new page is required.
1.5016 + */
1.5017 + return balance_quick(pPage, pParent);
1.5018 + }
1.5019 +#endif
1.5020 +
1.5021 + if( SQLITE_OK!=(rc = sqlite3PagerWrite(pPage->pDbPage)) ){
1.5022 + return rc;
1.5023 + }
1.5024 +
1.5025 + /*
1.5026 + ** Find the cell in the parent page whose left child points back
1.5027 + ** to pPage. The "idx" variable is the index of that cell. If pPage
1.5028 + ** is the rightmost child of pParent then set idx to pParent->nCell
1.5029 + */
1.5030 + if( pParent->idxShift ){
1.5031 + Pgno pgno;
1.5032 + pgno = pPage->pgno;
1.5033 + assert( pgno==sqlite3PagerPagenumber(pPage->pDbPage) );
1.5034 + for(idx=0; idx<pParent->nCell; idx++){
1.5035 + if( get4byte(findCell(pParent, idx))==pgno ){
1.5036 + break;
1.5037 + }
1.5038 + }
1.5039 + assert( idx<pParent->nCell
1.5040 + || get4byte(&pParent->aData[pParent->hdrOffset+8])==pgno );
1.5041 + }else{
1.5042 + idx = pPage->idxParent;
1.5043 + }
1.5044 +
1.5045 + /*
1.5046 + ** Initialize variables so that it will be safe to jump
1.5047 + ** directly to balance_cleanup at any moment.
1.5048 + */
1.5049 + nOld = nNew = 0;
1.5050 + sqlite3PagerRef(pParent->pDbPage);
1.5051 +
1.5052 + /*
1.5053 + ** Find sibling pages to pPage and the cells in pParent that divide
1.5054 + ** the siblings. An attempt is made to find NN siblings on either
1.5055 + ** side of pPage. More siblings are taken from one side, however, if
1.5056 + ** pPage there are fewer than NN siblings on the other side. If pParent
1.5057 + ** has NB or fewer children then all children of pParent are taken.
1.5058 + */
1.5059 + nxDiv = idx - NN;
1.5060 + if( nxDiv + NB > pParent->nCell ){
1.5061 + nxDiv = pParent->nCell - NB + 1;
1.5062 + }
1.5063 + if( nxDiv<0 ){
1.5064 + nxDiv = 0;
1.5065 + }
1.5066 + nDiv = 0;
1.5067 + for(i=0, k=nxDiv; i<NB; i++, k++){
1.5068 + if( k<pParent->nCell ){
1.5069 + apDiv[i] = findCell(pParent, k);
1.5070 + nDiv++;
1.5071 + assert( !pParent->leaf );
1.5072 + pgnoOld[i] = get4byte(apDiv[i]);
1.5073 + }else if( k==pParent->nCell ){
1.5074 + pgnoOld[i] = get4byte(&pParent->aData[pParent->hdrOffset+8]);
1.5075 + }else{
1.5076 + break;
1.5077 + }
1.5078 + rc = getAndInitPage(pBt, pgnoOld[i], &apOld[i], pParent);
1.5079 + if( rc ) goto balance_cleanup;
1.5080 + apOld[i]->idxParent = k;
1.5081 + apCopy[i] = 0;
1.5082 + assert( i==nOld );
1.5083 + nOld++;
1.5084 + nMaxCells += 1+apOld[i]->nCell+apOld[i]->nOverflow;
1.5085 + }
1.5086 +
1.5087 + /* Make nMaxCells a multiple of 4 in order to preserve 8-byte
1.5088 + ** alignment */
1.5089 + nMaxCells = (nMaxCells + 3)&~3;
1.5090 +
1.5091 + /*
1.5092 + ** Allocate space for memory structures
1.5093 + */
1.5094 + szScratch =
1.5095 + nMaxCells*sizeof(u8*) /* apCell */
1.5096 + + nMaxCells*sizeof(u16) /* szCell */
1.5097 + + (ROUND8(sizeof(MemPage))+pBt->pageSize)*NB /* aCopy */
1.5098 + + pBt->pageSize /* aSpace1 */
1.5099 + + (ISAUTOVACUUM ? nMaxCells : 0); /* aFrom */
1.5100 + apCell = sqlite3ScratchMalloc( szScratch );
1.5101 + if( apCell==0 ){
1.5102 + rc = SQLITE_NOMEM;
1.5103 + goto balance_cleanup;
1.5104 + }
1.5105 + szCell = (u16*)&apCell[nMaxCells];
1.5106 + aCopy[0] = (u8*)&szCell[nMaxCells];
1.5107 + assert( ((aCopy[0] - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */
1.5108 + for(i=1; i<NB; i++){
1.5109 + aCopy[i] = &aCopy[i-1][pBt->pageSize+ROUND8(sizeof(MemPage))];
1.5110 + assert( ((aCopy[i] - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */
1.5111 + }
1.5112 + aSpace1 = &aCopy[NB-1][pBt->pageSize+ROUND8(sizeof(MemPage))];
1.5113 + assert( ((aSpace1 - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */
1.5114 + if( ISAUTOVACUUM ){
1.5115 + aFrom = &aSpace1[pBt->pageSize];
1.5116 + }
1.5117 + aSpace2 = sqlite3PageMalloc(pBt->pageSize);
1.5118 + if( aSpace2==0 ){
1.5119 + rc = SQLITE_NOMEM;
1.5120 + goto balance_cleanup;
1.5121 + }
1.5122 +
1.5123 + /*
1.5124 + ** Make copies of the content of pPage and its siblings into aOld[].
1.5125 + ** The rest of this function will use data from the copies rather
1.5126 + ** that the original pages since the original pages will be in the
1.5127 + ** process of being overwritten.
1.5128 + */
1.5129 + for(i=0; i<nOld; i++){
1.5130 + MemPage *p = apCopy[i] = (MemPage*)aCopy[i];
1.5131 + memcpy(p, apOld[i], sizeof(MemPage));
1.5132 + p->aData = (void*)&p[1];
1.5133 + memcpy(p->aData, apOld[i]->aData, pBt->pageSize);
1.5134 + }
1.5135 +
1.5136 + /*
1.5137 + ** Load pointers to all cells on sibling pages and the divider cells
1.5138 + ** into the local apCell[] array. Make copies of the divider cells
1.5139 + ** into space obtained form aSpace1[] and remove the the divider Cells
1.5140 + ** from pParent.
1.5141 + **
1.5142 + ** If the siblings are on leaf pages, then the child pointers of the
1.5143 + ** divider cells are stripped from the cells before they are copied
1.5144 + ** into aSpace1[]. In this way, all cells in apCell[] are without
1.5145 + ** child pointers. If siblings are not leaves, then all cell in
1.5146 + ** apCell[] include child pointers. Either way, all cells in apCell[]
1.5147 + ** are alike.
1.5148 + **
1.5149 + ** leafCorrection: 4 if pPage is a leaf. 0 if pPage is not a leaf.
1.5150 + ** leafData: 1 if pPage holds key+data and pParent holds only keys.
1.5151 + */
1.5152 + nCell = 0;
1.5153 + leafCorrection = pPage->leaf*4;
1.5154 + leafData = pPage->hasData;
1.5155 + for(i=0; i<nOld; i++){
1.5156 + MemPage *pOld = apCopy[i];
1.5157 + int limit = pOld->nCell+pOld->nOverflow;
1.5158 + for(j=0; j<limit; j++){
1.5159 + assert( nCell<nMaxCells );
1.5160 + apCell[nCell] = findOverflowCell(pOld, j);
1.5161 + szCell[nCell] = cellSizePtr(pOld, apCell[nCell]);
1.5162 + if( ISAUTOVACUUM ){
1.5163 + int a;
1.5164 + aFrom[nCell] = i;
1.5165 + for(a=0; a<pOld->nOverflow; a++){
1.5166 + if( pOld->aOvfl[a].pCell==apCell[nCell] ){
1.5167 + aFrom[nCell] = 0xFF;
1.5168 + break;
1.5169 + }
1.5170 + }
1.5171 + }
1.5172 + nCell++;
1.5173 + }
1.5174 + if( i<nOld-1 ){
1.5175 + u16 sz = cellSizePtr(pParent, apDiv[i]);
1.5176 + if( leafData ){
1.5177 + /* With the LEAFDATA flag, pParent cells hold only INTKEYs that
1.5178 + ** are duplicates of keys on the child pages. We need to remove
1.5179 + ** the divider cells from pParent, but the dividers cells are not
1.5180 + ** added to apCell[] because they are duplicates of child cells.
1.5181 + */
1.5182 + dropCell(pParent, nxDiv, sz);
1.5183 + }else{
1.5184 + u8 *pTemp;
1.5185 + assert( nCell<nMaxCells );
1.5186 + szCell[nCell] = sz;
1.5187 + pTemp = &aSpace1[iSpace1];
1.5188 + iSpace1 += sz;
1.5189 + assert( sz<=pBt->pageSize/4 );
1.5190 + assert( iSpace1<=pBt->pageSize );
1.5191 + memcpy(pTemp, apDiv[i], sz);
1.5192 + apCell[nCell] = pTemp+leafCorrection;
1.5193 + if( ISAUTOVACUUM ){
1.5194 + aFrom[nCell] = 0xFF;
1.5195 + }
1.5196 + dropCell(pParent, nxDiv, sz);
1.5197 + szCell[nCell] -= leafCorrection;
1.5198 + assert( get4byte(pTemp)==pgnoOld[i] );
1.5199 + if( !pOld->leaf ){
1.5200 + assert( leafCorrection==0 );
1.5201 + /* The right pointer of the child page pOld becomes the left
1.5202 + ** pointer of the divider cell */
1.5203 + memcpy(apCell[nCell], &pOld->aData[pOld->hdrOffset+8], 4);
1.5204 + }else{
1.5205 + assert( leafCorrection==4 );
1.5206 + if( szCell[nCell]<4 ){
1.5207 + /* Do not allow any cells smaller than 4 bytes. */
1.5208 + szCell[nCell] = 4;
1.5209 + }
1.5210 + }
1.5211 + nCell++;
1.5212 + }
1.5213 + }
1.5214 + }
1.5215 +
1.5216 + /*
1.5217 + ** Figure out the number of pages needed to hold all nCell cells.
1.5218 + ** Store this number in "k". Also compute szNew[] which is the total
1.5219 + ** size of all cells on the i-th page and cntNew[] which is the index
1.5220 + ** in apCell[] of the cell that divides page i from page i+1.
1.5221 + ** cntNew[k] should equal nCell.
1.5222 + **
1.5223 + ** Values computed by this block:
1.5224 + **
1.5225 + ** k: The total number of sibling pages
1.5226 + ** szNew[i]: Spaced used on the i-th sibling page.
1.5227 + ** cntNew[i]: Index in apCell[] and szCell[] for the first cell to
1.5228 + ** the right of the i-th sibling page.
1.5229 + ** usableSpace: Number of bytes of space available on each sibling.
1.5230 + **
1.5231 + */
1.5232 + usableSpace = pBt->usableSize - 12 + leafCorrection;
1.5233 + for(subtotal=k=i=0; i<nCell; i++){
1.5234 + assert( i<nMaxCells );
1.5235 + subtotal += szCell[i] + 2;
1.5236 + if( subtotal > usableSpace ){
1.5237 + szNew[k] = subtotal - szCell[i];
1.5238 + cntNew[k] = i;
1.5239 + if( leafData ){ i--; }
1.5240 + subtotal = 0;
1.5241 + k++;
1.5242 + }
1.5243 + }
1.5244 + szNew[k] = subtotal;
1.5245 + cntNew[k] = nCell;
1.5246 + k++;
1.5247 +
1.5248 + /*
1.5249 + ** The packing computed by the previous block is biased toward the siblings
1.5250 + ** on the left side. The left siblings are always nearly full, while the
1.5251 + ** right-most sibling might be nearly empty. This block of code attempts
1.5252 + ** to adjust the packing of siblings to get a better balance.
1.5253 + **
1.5254 + ** This adjustment is more than an optimization. The packing above might
1.5255 + ** be so out of balance as to be illegal. For example, the right-most
1.5256 + ** sibling might be completely empty. This adjustment is not optional.
1.5257 + */
1.5258 + for(i=k-1; i>0; i--){
1.5259 + int szRight = szNew[i]; /* Size of sibling on the right */
1.5260 + int szLeft = szNew[i-1]; /* Size of sibling on the left */
1.5261 + int r; /* Index of right-most cell in left sibling */
1.5262 + int d; /* Index of first cell to the left of right sibling */
1.5263 +
1.5264 + r = cntNew[i-1] - 1;
1.5265 + d = r + 1 - leafData;
1.5266 + assert( d<nMaxCells );
1.5267 + assert( r<nMaxCells );
1.5268 + while( szRight==0 || szRight+szCell[d]+2<=szLeft-(szCell[r]+2) ){
1.5269 + szRight += szCell[d] + 2;
1.5270 + szLeft -= szCell[r] + 2;
1.5271 + cntNew[i-1]--;
1.5272 + r = cntNew[i-1] - 1;
1.5273 + d = r + 1 - leafData;
1.5274 + }
1.5275 + szNew[i] = szRight;
1.5276 + szNew[i-1] = szLeft;
1.5277 + }
1.5278 +
1.5279 + /* Either we found one or more cells (cntnew[0])>0) or we are the
1.5280 + ** a virtual root page. A virtual root page is when the real root
1.5281 + ** page is page 1 and we are the only child of that page.
1.5282 + */
1.5283 + assert( cntNew[0]>0 || (pParent->pgno==1 && pParent->nCell==0) );
1.5284 +
1.5285 + /*
1.5286 + ** Allocate k new pages. Reuse old pages where possible.
1.5287 + */
1.5288 + assert( pPage->pgno>1 );
1.5289 + pageFlags = pPage->aData[0];
1.5290 + for(i=0; i<k; i++){
1.5291 + MemPage *pNew;
1.5292 + if( i<nOld ){
1.5293 + pNew = apNew[i] = apOld[i];
1.5294 + pgnoNew[i] = pgnoOld[i];
1.5295 + apOld[i] = 0;
1.5296 + rc = sqlite3PagerWrite(pNew->pDbPage);
1.5297 + nNew++;
1.5298 + if( rc ) goto balance_cleanup;
1.5299 + }else{
1.5300 + assert( i>0 );
1.5301 + rc = allocateBtreePage(pBt, &pNew, &pgnoNew[i], pgnoNew[i-1], 0);
1.5302 + if( rc ) goto balance_cleanup;
1.5303 + apNew[i] = pNew;
1.5304 + nNew++;
1.5305 + }
1.5306 + }
1.5307 +
1.5308 + /* Free any old pages that were not reused as new pages.
1.5309 + */
1.5310 + while( i<nOld ){
1.5311 + rc = freePage(apOld[i]);
1.5312 + if( rc ) goto balance_cleanup;
1.5313 + releasePage(apOld[i]);
1.5314 + apOld[i] = 0;
1.5315 + i++;
1.5316 + }
1.5317 +
1.5318 + /*
1.5319 + ** Put the new pages in accending order. This helps to
1.5320 + ** keep entries in the disk file in order so that a scan
1.5321 + ** of the table is a linear scan through the file. That
1.5322 + ** in turn helps the operating system to deliver pages
1.5323 + ** from the disk more rapidly.
1.5324 + **
1.5325 + ** An O(n^2) insertion sort algorithm is used, but since
1.5326 + ** n is never more than NB (a small constant), that should
1.5327 + ** not be a problem.
1.5328 + **
1.5329 + ** When NB==3, this one optimization makes the database
1.5330 + ** about 25% faster for large insertions and deletions.
1.5331 + */
1.5332 + for(i=0; i<k-1; i++){
1.5333 + int minV = pgnoNew[i];
1.5334 + int minI = i;
1.5335 + for(j=i+1; j<k; j++){
1.5336 + if( pgnoNew[j]<(unsigned)minV ){
1.5337 + minI = j;
1.5338 + minV = pgnoNew[j];
1.5339 + }
1.5340 + }
1.5341 + if( minI>i ){
1.5342 + int t;
1.5343 + MemPage *pT;
1.5344 + t = pgnoNew[i];
1.5345 + pT = apNew[i];
1.5346 + pgnoNew[i] = pgnoNew[minI];
1.5347 + apNew[i] = apNew[minI];
1.5348 + pgnoNew[minI] = t;
1.5349 + apNew[minI] = pT;
1.5350 + }
1.5351 + }
1.5352 + TRACE(("BALANCE: old: %d %d %d new: %d(%d) %d(%d) %d(%d) %d(%d) %d(%d)\n",
1.5353 + pgnoOld[0],
1.5354 + nOld>=2 ? pgnoOld[1] : 0,
1.5355 + nOld>=3 ? pgnoOld[2] : 0,
1.5356 + pgnoNew[0], szNew[0],
1.5357 + nNew>=2 ? pgnoNew[1] : 0, nNew>=2 ? szNew[1] : 0,
1.5358 + nNew>=3 ? pgnoNew[2] : 0, nNew>=3 ? szNew[2] : 0,
1.5359 + nNew>=4 ? pgnoNew[3] : 0, nNew>=4 ? szNew[3] : 0,
1.5360 + nNew>=5 ? pgnoNew[4] : 0, nNew>=5 ? szNew[4] : 0));
1.5361 +
1.5362 + /*
1.5363 + ** Evenly distribute the data in apCell[] across the new pages.
1.5364 + ** Insert divider cells into pParent as necessary.
1.5365 + */
1.5366 + j = 0;
1.5367 + for(i=0; i<nNew; i++){
1.5368 + /* Assemble the new sibling page. */
1.5369 + MemPage *pNew = apNew[i];
1.5370 + assert( j<nMaxCells );
1.5371 + assert( pNew->pgno==pgnoNew[i] );
1.5372 + zeroPage(pNew, pageFlags);
1.5373 + assemblePage(pNew, cntNew[i]-j, &apCell[j], &szCell[j]);
1.5374 + assert( pNew->nCell>0 || (nNew==1 && cntNew[0]==0) );
1.5375 + assert( pNew->nOverflow==0 );
1.5376 +
1.5377 + /* If this is an auto-vacuum database, update the pointer map entries
1.5378 + ** that point to the siblings that were rearranged. These can be: left
1.5379 + ** children of cells, the right-child of the page, or overflow pages
1.5380 + ** pointed to by cells.
1.5381 + */
1.5382 + if( ISAUTOVACUUM ){
1.5383 + for(k=j; k<cntNew[i]; k++){
1.5384 + assert( k<nMaxCells );
1.5385 + if( aFrom[k]==0xFF || apCopy[aFrom[k]]->pgno!=pNew->pgno ){
1.5386 + rc = ptrmapPutOvfl(pNew, k-j);
1.5387 + if( rc==SQLITE_OK && leafCorrection==0 ){
1.5388 + rc = ptrmapPut(pBt, get4byte(apCell[k]), PTRMAP_BTREE, pNew->pgno);
1.5389 + }
1.5390 + if( rc!=SQLITE_OK ){
1.5391 + goto balance_cleanup;
1.5392 + }
1.5393 + }
1.5394 + }
1.5395 + }
1.5396 +
1.5397 + j = cntNew[i];
1.5398 +
1.5399 + /* If the sibling page assembled above was not the right-most sibling,
1.5400 + ** insert a divider cell into the parent page.
1.5401 + */
1.5402 + if( i<nNew-1 && j<nCell ){
1.5403 + u8 *pCell;
1.5404 + u8 *pTemp;
1.5405 + int sz;
1.5406 +
1.5407 + assert( j<nMaxCells );
1.5408 + pCell = apCell[j];
1.5409 + sz = szCell[j] + leafCorrection;
1.5410 + pTemp = &aSpace2[iSpace2];
1.5411 + if( !pNew->leaf ){
1.5412 + memcpy(&pNew->aData[8], pCell, 4);
1.5413 + if( ISAUTOVACUUM
1.5414 + && (aFrom[j]==0xFF || apCopy[aFrom[j]]->pgno!=pNew->pgno)
1.5415 + ){
1.5416 + rc = ptrmapPut(pBt, get4byte(pCell), PTRMAP_BTREE, pNew->pgno);
1.5417 + if( rc!=SQLITE_OK ){
1.5418 + goto balance_cleanup;
1.5419 + }
1.5420 + }
1.5421 + }else if( leafData ){
1.5422 + /* If the tree is a leaf-data tree, and the siblings are leaves,
1.5423 + ** then there is no divider cell in apCell[]. Instead, the divider
1.5424 + ** cell consists of the integer key for the right-most cell of
1.5425 + ** the sibling-page assembled above only.
1.5426 + */
1.5427 + CellInfo info;
1.5428 + j--;
1.5429 + sqlite3BtreeParseCellPtr(pNew, apCell[j], &info);
1.5430 + pCell = pTemp;
1.5431 + fillInCell(pParent, pCell, 0, info.nKey, 0, 0, 0, &sz);
1.5432 + pTemp = 0;
1.5433 + }else{
1.5434 + pCell -= 4;
1.5435 + /* Obscure case for non-leaf-data trees: If the cell at pCell was
1.5436 + ** previously stored on a leaf node, and its reported size was 4
1.5437 + ** bytes, then it may actually be smaller than this
1.5438 + ** (see sqlite3BtreeParseCellPtr(), 4 bytes is the minimum size of
1.5439 + ** any cell). But it is important to pass the correct size to
1.5440 + ** insertCell(), so reparse the cell now.
1.5441 + **
1.5442 + ** Note that this can never happen in an SQLite data file, as all
1.5443 + ** cells are at least 4 bytes. It only happens in b-trees used
1.5444 + ** to evaluate "IN (SELECT ...)" and similar clauses.
1.5445 + */
1.5446 + if( szCell[j]==4 ){
1.5447 + assert(leafCorrection==4);
1.5448 + sz = cellSizePtr(pParent, pCell);
1.5449 + }
1.5450 + }
1.5451 + iSpace2 += sz;
1.5452 + assert( sz<=pBt->pageSize/4 );
1.5453 + assert( iSpace2<=pBt->pageSize );
1.5454 + rc = insertCell(pParent, nxDiv, pCell, sz, pTemp, 4);
1.5455 + if( rc!=SQLITE_OK ) goto balance_cleanup;
1.5456 + put4byte(findOverflowCell(pParent,nxDiv), pNew->pgno);
1.5457 +
1.5458 + /* If this is an auto-vacuum database, and not a leaf-data tree,
1.5459 + ** then update the pointer map with an entry for the overflow page
1.5460 + ** that the cell just inserted points to (if any).
1.5461 + */
1.5462 + if( ISAUTOVACUUM && !leafData ){
1.5463 + rc = ptrmapPutOvfl(pParent, nxDiv);
1.5464 + if( rc!=SQLITE_OK ){
1.5465 + goto balance_cleanup;
1.5466 + }
1.5467 + }
1.5468 + j++;
1.5469 + nxDiv++;
1.5470 + }
1.5471 +
1.5472 + /* Set the pointer-map entry for the new sibling page. */
1.5473 + if( ISAUTOVACUUM ){
1.5474 + rc = ptrmapPut(pBt, pNew->pgno, PTRMAP_BTREE, pParent->pgno);
1.5475 + if( rc!=SQLITE_OK ){
1.5476 + goto balance_cleanup;
1.5477 + }
1.5478 + }
1.5479 + }
1.5480 + assert( j==nCell );
1.5481 + assert( nOld>0 );
1.5482 + assert( nNew>0 );
1.5483 + if( (pageFlags & PTF_LEAF)==0 ){
1.5484 + u8 *zChild = &apCopy[nOld-1]->aData[8];
1.5485 + memcpy(&apNew[nNew-1]->aData[8], zChild, 4);
1.5486 + if( ISAUTOVACUUM ){
1.5487 + rc = ptrmapPut(pBt, get4byte(zChild), PTRMAP_BTREE, apNew[nNew-1]->pgno);
1.5488 + if( rc!=SQLITE_OK ){
1.5489 + goto balance_cleanup;
1.5490 + }
1.5491 + }
1.5492 + }
1.5493 + if( nxDiv==pParent->nCell+pParent->nOverflow ){
1.5494 + /* Right-most sibling is the right-most child of pParent */
1.5495 + put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew[nNew-1]);
1.5496 + }else{
1.5497 + /* Right-most sibling is the left child of the first entry in pParent
1.5498 + ** past the right-most divider entry */
1.5499 + put4byte(findOverflowCell(pParent, nxDiv), pgnoNew[nNew-1]);
1.5500 + }
1.5501 +
1.5502 + /*
1.5503 + ** Reparent children of all cells.
1.5504 + */
1.5505 + for(i=0; i<nNew; i++){
1.5506 + rc = reparentChildPages(apNew[i], 0);
1.5507 + if( rc!=SQLITE_OK ) goto balance_cleanup;
1.5508 + }
1.5509 + rc = reparentChildPages(pParent, 0);
1.5510 + if( rc!=SQLITE_OK ) goto balance_cleanup;
1.5511 +
1.5512 + /*
1.5513 + ** Balance the parent page. Note that the current page (pPage) might
1.5514 + ** have been added to the freelist so it might no longer be initialized.
1.5515 + ** But the parent page will always be initialized.
1.5516 + */
1.5517 + assert( pParent->isInit );
1.5518 + sqlite3ScratchFree(apCell);
1.5519 + apCell = 0;
1.5520 + rc = balance(pParent, 0);
1.5521 +
1.5522 + /*
1.5523 + ** Cleanup before returning.
1.5524 + */
1.5525 +balance_cleanup:
1.5526 + sqlite3PageFree(aSpace2);
1.5527 + sqlite3ScratchFree(apCell);
1.5528 + for(i=0; i<nOld; i++){
1.5529 + releasePage(apOld[i]);
1.5530 + }
1.5531 + for(i=0; i<nNew; i++){
1.5532 + releasePage(apNew[i]);
1.5533 + }
1.5534 + releasePage(pParent);
1.5535 + TRACE(("BALANCE: finished with %d: old=%d new=%d cells=%d\n",
1.5536 + pPage->pgno, nOld, nNew, nCell));
1.5537 + return rc;
1.5538 +}
1.5539 +
1.5540 +/*
1.5541 +** This routine is called for the root page of a btree when the root
1.5542 +** page contains no cells. This is an opportunity to make the tree
1.5543 +** shallower by one level.
1.5544 +*/
1.5545 +static int balance_shallower(MemPage *pPage){
1.5546 + MemPage *pChild; /* The only child page of pPage */
1.5547 + Pgno pgnoChild; /* Page number for pChild */
1.5548 + int rc = SQLITE_OK; /* Return code from subprocedures */
1.5549 + BtShared *pBt; /* The main BTree structure */
1.5550 + int mxCellPerPage; /* Maximum number of cells per page */
1.5551 + u8 **apCell; /* All cells from pages being balanced */
1.5552 + u16 *szCell; /* Local size of all cells */
1.5553 +
1.5554 + assert( pPage->pParent==0 );
1.5555 + assert( pPage->nCell==0 );
1.5556 + assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1.5557 + pBt = pPage->pBt;
1.5558 + mxCellPerPage = MX_CELL(pBt);
1.5559 + apCell = sqlite3Malloc( mxCellPerPage*(sizeof(u8*)+sizeof(u16)) );
1.5560 + if( apCell==0 ) return SQLITE_NOMEM;
1.5561 + szCell = (u16*)&apCell[mxCellPerPage];
1.5562 + if( pPage->leaf ){
1.5563 + /* The table is completely empty */
1.5564 + TRACE(("BALANCE: empty table %d\n", pPage->pgno));
1.5565 + }else{
1.5566 + /* The root page is empty but has one child. Transfer the
1.5567 + ** information from that one child into the root page if it
1.5568 + ** will fit. This reduces the depth of the tree by one.
1.5569 + **
1.5570 + ** If the root page is page 1, it has less space available than
1.5571 + ** its child (due to the 100 byte header that occurs at the beginning
1.5572 + ** of the database fle), so it might not be able to hold all of the
1.5573 + ** information currently contained in the child. If this is the
1.5574 + ** case, then do not do the transfer. Leave page 1 empty except
1.5575 + ** for the right-pointer to the child page. The child page becomes
1.5576 + ** the virtual root of the tree.
1.5577 + */
1.5578 + pgnoChild = get4byte(&pPage->aData[pPage->hdrOffset+8]);
1.5579 + assert( pgnoChild>0 );
1.5580 + assert( pgnoChild<=pagerPagecount(pPage->pBt->pPager) );
1.5581 + rc = sqlite3BtreeGetPage(pPage->pBt, pgnoChild, &pChild, 0);
1.5582 + if( rc ) goto end_shallow_balance;
1.5583 + if( pPage->pgno==1 ){
1.5584 + rc = sqlite3BtreeInitPage(pChild, pPage);
1.5585 + if( rc ) goto end_shallow_balance;
1.5586 + assert( pChild->nOverflow==0 );
1.5587 + if( pChild->nFree>=100 ){
1.5588 + /* The child information will fit on the root page, so do the
1.5589 + ** copy */
1.5590 + int i;
1.5591 + zeroPage(pPage, pChild->aData[0]);
1.5592 + for(i=0; i<pChild->nCell; i++){
1.5593 + apCell[i] = findCell(pChild,i);
1.5594 + szCell[i] = cellSizePtr(pChild, apCell[i]);
1.5595 + }
1.5596 + assemblePage(pPage, pChild->nCell, apCell, szCell);
1.5597 + /* Copy the right-pointer of the child to the parent. */
1.5598 + put4byte(&pPage->aData[pPage->hdrOffset+8],
1.5599 + get4byte(&pChild->aData[pChild->hdrOffset+8]));
1.5600 + freePage(pChild);
1.5601 + TRACE(("BALANCE: child %d transfer to page 1\n", pChild->pgno));
1.5602 + }else{
1.5603 + /* The child has more information that will fit on the root.
1.5604 + ** The tree is already balanced. Do nothing. */
1.5605 + TRACE(("BALANCE: child %d will not fit on page 1\n", pChild->pgno));
1.5606 + }
1.5607 + }else{
1.5608 + memcpy(pPage->aData, pChild->aData, pPage->pBt->usableSize);
1.5609 + pPage->isInit = 0;
1.5610 + pPage->pParent = 0;
1.5611 + rc = sqlite3BtreeInitPage(pPage, 0);
1.5612 + assert( rc==SQLITE_OK );
1.5613 + freePage(pChild);
1.5614 + TRACE(("BALANCE: transfer child %d into root %d\n",
1.5615 + pChild->pgno, pPage->pgno));
1.5616 + }
1.5617 + rc = reparentChildPages(pPage, 1);
1.5618 + assert( pPage->nOverflow==0 );
1.5619 + if( ISAUTOVACUUM ){
1.5620 + int i;
1.5621 + for(i=0; i<pPage->nCell; i++){
1.5622 + rc = ptrmapPutOvfl(pPage, i);
1.5623 + if( rc!=SQLITE_OK ){
1.5624 + goto end_shallow_balance;
1.5625 + }
1.5626 + }
1.5627 + }
1.5628 + releasePage(pChild);
1.5629 + }
1.5630 +end_shallow_balance:
1.5631 + sqlite3_free(apCell);
1.5632 + return rc;
1.5633 +}
1.5634 +
1.5635 +
1.5636 +/*
1.5637 +** The root page is overfull
1.5638 +**
1.5639 +** When this happens, Create a new child page and copy the
1.5640 +** contents of the root into the child. Then make the root
1.5641 +** page an empty page with rightChild pointing to the new
1.5642 +** child. Finally, call balance_internal() on the new child
1.5643 +** to cause it to split.
1.5644 +*/
1.5645 +static int balance_deeper(MemPage *pPage){
1.5646 + int rc; /* Return value from subprocedures */
1.5647 + MemPage *pChild; /* Pointer to a new child page */
1.5648 + Pgno pgnoChild; /* Page number of the new child page */
1.5649 + BtShared *pBt; /* The BTree */
1.5650 + int usableSize; /* Total usable size of a page */
1.5651 + u8 *data; /* Content of the parent page */
1.5652 + u8 *cdata; /* Content of the child page */
1.5653 + int hdr; /* Offset to page header in parent */
1.5654 + int brk; /* Offset to content of first cell in parent */
1.5655 +
1.5656 + assert( pPage->pParent==0 );
1.5657 + assert( pPage->nOverflow>0 );
1.5658 + pBt = pPage->pBt;
1.5659 + assert( sqlite3_mutex_held(pBt->mutex) );
1.5660 + rc = allocateBtreePage(pBt, &pChild, &pgnoChild, pPage->pgno, 0);
1.5661 + if( rc ) return rc;
1.5662 + assert( sqlite3PagerIswriteable(pChild->pDbPage) );
1.5663 + usableSize = pBt->usableSize;
1.5664 + data = pPage->aData;
1.5665 + hdr = pPage->hdrOffset;
1.5666 + brk = get2byte(&data[hdr+5]);
1.5667 + cdata = pChild->aData;
1.5668 + memcpy(cdata, &data[hdr], pPage->cellOffset+2*pPage->nCell-hdr);
1.5669 + memcpy(&cdata[brk], &data[brk], usableSize-brk);
1.5670 + if( pChild->isInit ) return SQLITE_CORRUPT;
1.5671 + rc = sqlite3BtreeInitPage(pChild, pPage);
1.5672 + if( rc ) goto balancedeeper_out;
1.5673 + memcpy(pChild->aOvfl, pPage->aOvfl, pPage->nOverflow*sizeof(pPage->aOvfl[0]));
1.5674 + pChild->nOverflow = pPage->nOverflow;
1.5675 + if( pChild->nOverflow ){
1.5676 + pChild->nFree = 0;
1.5677 + }
1.5678 + assert( pChild->nCell==pPage->nCell );
1.5679 + zeroPage(pPage, pChild->aData[0] & ~PTF_LEAF);
1.5680 + put4byte(&pPage->aData[pPage->hdrOffset+8], pgnoChild);
1.5681 + TRACE(("BALANCE: copy root %d into %d\n", pPage->pgno, pChild->pgno));
1.5682 + if( ISAUTOVACUUM ){
1.5683 + int i;
1.5684 + rc = ptrmapPut(pBt, pChild->pgno, PTRMAP_BTREE, pPage->pgno);
1.5685 + if( rc ) goto balancedeeper_out;
1.5686 + for(i=0; i<pChild->nCell; i++){
1.5687 + rc = ptrmapPutOvfl(pChild, i);
1.5688 + if( rc!=SQLITE_OK ){
1.5689 + goto balancedeeper_out;
1.5690 + }
1.5691 + }
1.5692 + rc = reparentChildPages(pChild, 1);
1.5693 + }
1.5694 + if( rc==SQLITE_OK ){
1.5695 + rc = balance_nonroot(pChild);
1.5696 + }
1.5697 +
1.5698 +balancedeeper_out:
1.5699 + releasePage(pChild);
1.5700 + return rc;
1.5701 +}
1.5702 +
1.5703 +/*
1.5704 +** Decide if the page pPage needs to be balanced. If balancing is
1.5705 +** required, call the appropriate balancing routine.
1.5706 +*/
1.5707 +static int balance(MemPage *pPage, int insert){
1.5708 + int rc = SQLITE_OK;
1.5709 + assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1.5710 + if( pPage->pParent==0 ){
1.5711 + rc = sqlite3PagerWrite(pPage->pDbPage);
1.5712 + if( rc==SQLITE_OK && pPage->nOverflow>0 ){
1.5713 + rc = balance_deeper(pPage);
1.5714 + }
1.5715 + if( rc==SQLITE_OK && pPage->nCell==0 ){
1.5716 + rc = balance_shallower(pPage);
1.5717 + }
1.5718 + }else{
1.5719 + if( pPage->nOverflow>0 ||
1.5720 + (!insert && pPage->nFree>pPage->pBt->usableSize*2/3) ){
1.5721 + rc = balance_nonroot(pPage);
1.5722 + }
1.5723 + }
1.5724 + return rc;
1.5725 +}
1.5726 +
1.5727 +/*
1.5728 +** This routine checks all cursors that point to table pgnoRoot.
1.5729 +** If any of those cursors were opened with wrFlag==0 in a different
1.5730 +** database connection (a database connection that shares the pager
1.5731 +** cache with the current connection) and that other connection
1.5732 +** is not in the ReadUncommmitted state, then this routine returns
1.5733 +** SQLITE_LOCKED.
1.5734 +**
1.5735 +** As well as cursors with wrFlag==0, cursors with wrFlag==1 and
1.5736 +** isIncrblobHandle==1 are also considered 'read' cursors. Incremental
1.5737 +** blob cursors are used for both reading and writing.
1.5738 +**
1.5739 +** When pgnoRoot is the root page of an intkey table, this function is also
1.5740 +** responsible for invalidating incremental blob cursors when the table row
1.5741 +** on which they are opened is deleted or modified. Cursors are invalidated
1.5742 +** according to the following rules:
1.5743 +**
1.5744 +** 1) When BtreeClearTable() is called to completely delete the contents
1.5745 +** of a B-Tree table, pExclude is set to zero and parameter iRow is
1.5746 +** set to non-zero. In this case all incremental blob cursors open
1.5747 +** on the table rooted at pgnoRoot are invalidated.
1.5748 +**
1.5749 +** 2) When BtreeInsert(), BtreeDelete() or BtreePutData() is called to
1.5750 +** modify a table row via an SQL statement, pExclude is set to the
1.5751 +** write cursor used to do the modification and parameter iRow is set
1.5752 +** to the integer row id of the B-Tree entry being modified. Unless
1.5753 +** pExclude is itself an incremental blob cursor, then all incremental
1.5754 +** blob cursors open on row iRow of the B-Tree are invalidated.
1.5755 +**
1.5756 +** 3) If both pExclude and iRow are set to zero, no incremental blob
1.5757 +** cursors are invalidated.
1.5758 +*/
1.5759 +static int checkReadLocks(
1.5760 + Btree *pBtree,
1.5761 + Pgno pgnoRoot,
1.5762 + BtCursor *pExclude,
1.5763 + i64 iRow
1.5764 +){
1.5765 + BtCursor *p;
1.5766 + BtShared *pBt = pBtree->pBt;
1.5767 + sqlite3 *db = pBtree->db;
1.5768 + assert( sqlite3BtreeHoldsMutex(pBtree) );
1.5769 + for(p=pBt->pCursor; p; p=p->pNext){
1.5770 + if( p==pExclude ) continue;
1.5771 + if( p->pgnoRoot!=pgnoRoot ) continue;
1.5772 +#ifndef SQLITE_OMIT_INCRBLOB
1.5773 + if( p->isIncrblobHandle && (
1.5774 + (!pExclude && iRow)
1.5775 + || (pExclude && !pExclude->isIncrblobHandle && p->info.nKey==iRow)
1.5776 + )){
1.5777 + p->eState = CURSOR_INVALID;
1.5778 + }
1.5779 +#endif
1.5780 + if( p->eState!=CURSOR_VALID ) continue;
1.5781 + if( p->wrFlag==0
1.5782 +#ifndef SQLITE_OMIT_INCRBLOB
1.5783 + || p->isIncrblobHandle
1.5784 +#endif
1.5785 + ){
1.5786 + sqlite3 *dbOther = p->pBtree->db;
1.5787 + if( dbOther==0 ||
1.5788 + (dbOther!=db && (dbOther->flags & SQLITE_ReadUncommitted)==0) ){
1.5789 + return SQLITE_LOCKED;
1.5790 + }
1.5791 + }
1.5792 + }
1.5793 + return SQLITE_OK;
1.5794 +}
1.5795 +
1.5796 +/*
1.5797 +** Insert a new record into the BTree. The key is given by (pKey,nKey)
1.5798 +** and the data is given by (pData,nData). The cursor is used only to
1.5799 +** define what table the record should be inserted into. The cursor
1.5800 +** is left pointing at a random location.
1.5801 +**
1.5802 +** For an INTKEY table, only the nKey value of the key is used. pKey is
1.5803 +** ignored. For a ZERODATA table, the pData and nData are both ignored.
1.5804 +*/
1.5805 +int sqlite3BtreeInsert(
1.5806 + BtCursor *pCur, /* Insert data into the table of this cursor */
1.5807 + const void *pKey, i64 nKey, /* The key of the new record */
1.5808 + const void *pData, int nData, /* The data of the new record */
1.5809 + int nZero, /* Number of extra 0 bytes to append to data */
1.5810 + int appendBias /* True if this is likely an append */
1.5811 +){
1.5812 + int rc;
1.5813 + int loc;
1.5814 + int szNew;
1.5815 + MemPage *pPage;
1.5816 + Btree *p = pCur->pBtree;
1.5817 + BtShared *pBt = p->pBt;
1.5818 + unsigned char *oldCell;
1.5819 + unsigned char *newCell = 0;
1.5820 +
1.5821 + assert( cursorHoldsMutex(pCur) );
1.5822 + if( pBt->inTransaction!=TRANS_WRITE ){
1.5823 + /* Must start a transaction before doing an insert */
1.5824 + rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
1.5825 + return rc;
1.5826 + }
1.5827 + assert( !pBt->readOnly );
1.5828 + if( !pCur->wrFlag ){
1.5829 + return SQLITE_PERM; /* Cursor not open for writing */
1.5830 + }
1.5831 + if( checkReadLocks(pCur->pBtree, pCur->pgnoRoot, pCur, nKey) ){
1.5832 + return SQLITE_LOCKED; /* The table pCur points to has a read lock */
1.5833 + }
1.5834 + if( pCur->eState==CURSOR_FAULT ){
1.5835 + return pCur->skip;
1.5836 + }
1.5837 +
1.5838 + /* Save the positions of any other cursors open on this table */
1.5839 + clearCursorPosition(pCur);
1.5840 + if(
1.5841 + SQLITE_OK!=(rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur)) ||
1.5842 + SQLITE_OK!=(rc = sqlite3BtreeMoveto(pCur, pKey, 0, nKey, appendBias, &loc))
1.5843 + ){
1.5844 + return rc;
1.5845 + }
1.5846 +
1.5847 + pPage = pCur->pPage;
1.5848 + assert( pPage->intKey || nKey>=0 );
1.5849 + assert( pPage->leaf || !pPage->intKey );
1.5850 + TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n",
1.5851 + pCur->pgnoRoot, nKey, nData, pPage->pgno,
1.5852 + loc==0 ? "overwrite" : "new entry"));
1.5853 + assert( pPage->isInit );
1.5854 + allocateTempSpace(pBt);
1.5855 + newCell = pBt->pTmpSpace;
1.5856 + if( newCell==0 ) return SQLITE_NOMEM;
1.5857 + rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, nZero, &szNew);
1.5858 + if( rc ) goto end_insert;
1.5859 + assert( szNew==cellSizePtr(pPage, newCell) );
1.5860 + assert( szNew<=MX_CELL_SIZE(pBt) );
1.5861 + if( loc==0 && CURSOR_VALID==pCur->eState ){
1.5862 + u16 szOld;
1.5863 + assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
1.5864 + rc = sqlite3PagerWrite(pPage->pDbPage);
1.5865 + if( rc ){
1.5866 + goto end_insert;
1.5867 + }
1.5868 + oldCell = findCell(pPage, pCur->idx);
1.5869 + if( !pPage->leaf ){
1.5870 + memcpy(newCell, oldCell, 4);
1.5871 + }
1.5872 + szOld = cellSizePtr(pPage, oldCell);
1.5873 + rc = clearCell(pPage, oldCell);
1.5874 + if( rc ) goto end_insert;
1.5875 + dropCell(pPage, pCur->idx, szOld);
1.5876 + }else if( loc<0 && pPage->nCell>0 ){
1.5877 + assert( pPage->leaf );
1.5878 + pCur->idx++;
1.5879 + pCur->info.nSize = 0;
1.5880 + pCur->validNKey = 0;
1.5881 + }else{
1.5882 + assert( pPage->leaf );
1.5883 + }
1.5884 + rc = insertCell(pPage, pCur->idx, newCell, szNew, 0, 0);
1.5885 + if( rc!=SQLITE_OK ) goto end_insert;
1.5886 + rc = balance(pPage, 1);
1.5887 + if( rc==SQLITE_OK ){
1.5888 + moveToRoot(pCur);
1.5889 + }
1.5890 +end_insert:
1.5891 + return rc;
1.5892 +}
1.5893 +
1.5894 +/*
1.5895 +** Delete the entry that the cursor is pointing to. The cursor
1.5896 +** is left pointing at a random location.
1.5897 +*/
1.5898 +int sqlite3BtreeDelete(BtCursor *pCur){
1.5899 + MemPage *pPage = pCur->pPage;
1.5900 + unsigned char *pCell;
1.5901 + int rc;
1.5902 + Pgno pgnoChild = 0;
1.5903 + Btree *p = pCur->pBtree;
1.5904 + BtShared *pBt = p->pBt;
1.5905 +
1.5906 + assert( cursorHoldsMutex(pCur) );
1.5907 + assert( pPage->isInit );
1.5908 + if( pBt->inTransaction!=TRANS_WRITE ){
1.5909 + /* Must start a transaction before doing a delete */
1.5910 + rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
1.5911 + return rc;
1.5912 + }
1.5913 + assert( !pBt->readOnly );
1.5914 + if( pCur->eState==CURSOR_FAULT ){
1.5915 + return pCur->skip;
1.5916 + }
1.5917 + if( pCur->idx >= pPage->nCell ){
1.5918 + return SQLITE_ERROR; /* The cursor is not pointing to anything */
1.5919 + }
1.5920 + if( !pCur->wrFlag ){
1.5921 + return SQLITE_PERM; /* Did not open this cursor for writing */
1.5922 + }
1.5923 + if( checkReadLocks(pCur->pBtree, pCur->pgnoRoot, pCur, pCur->info.nKey) ){
1.5924 + return SQLITE_LOCKED; /* The table pCur points to has a read lock */
1.5925 + }
1.5926 +
1.5927 + /* Restore the current cursor position (a no-op if the cursor is not in
1.5928 + ** CURSOR_REQUIRESEEK state) and save the positions of any other cursors
1.5929 + ** open on the same table. Then call sqlite3PagerWrite() on the page
1.5930 + ** that the entry will be deleted from.
1.5931 + */
1.5932 + if(
1.5933 + (rc = restoreCursorPosition(pCur))!=0 ||
1.5934 + (rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur))!=0 ||
1.5935 + (rc = sqlite3PagerWrite(pPage->pDbPage))!=0
1.5936 + ){
1.5937 + return rc;
1.5938 + }
1.5939 +
1.5940 + /* Locate the cell within its page and leave pCell pointing to the
1.5941 + ** data. The clearCell() call frees any overflow pages associated with the
1.5942 + ** cell. The cell itself is still intact.
1.5943 + */
1.5944 + pCell = findCell(pPage, pCur->idx);
1.5945 + if( !pPage->leaf ){
1.5946 + pgnoChild = get4byte(pCell);
1.5947 + }
1.5948 + rc = clearCell(pPage, pCell);
1.5949 + if( rc ){
1.5950 + return rc;
1.5951 + }
1.5952 +
1.5953 + if( !pPage->leaf ){
1.5954 + /*
1.5955 + ** The entry we are about to delete is not a leaf so if we do not
1.5956 + ** do something we will leave a hole on an internal page.
1.5957 + ** We have to fill the hole by moving in a cell from a leaf. The
1.5958 + ** next Cell after the one to be deleted is guaranteed to exist and
1.5959 + ** to be a leaf so we can use it.
1.5960 + */
1.5961 + BtCursor leafCur;
1.5962 + unsigned char *pNext;
1.5963 + int notUsed;
1.5964 + unsigned char *tempCell = 0;
1.5965 + assert( !pPage->intKey );
1.5966 + sqlite3BtreeGetTempCursor(pCur, &leafCur);
1.5967 + rc = sqlite3BtreeNext(&leafCur, ¬Used);
1.5968 + if( rc==SQLITE_OK ){
1.5969 + rc = sqlite3PagerWrite(leafCur.pPage->pDbPage);
1.5970 + }
1.5971 + if( rc==SQLITE_OK ){
1.5972 + u16 szNext;
1.5973 + TRACE(("DELETE: table=%d delete internal from %d replace from leaf %d\n",
1.5974 + pCur->pgnoRoot, pPage->pgno, leafCur.pPage->pgno));
1.5975 + dropCell(pPage, pCur->idx, cellSizePtr(pPage, pCell));
1.5976 + pNext = findCell(leafCur.pPage, leafCur.idx);
1.5977 + szNext = cellSizePtr(leafCur.pPage, pNext);
1.5978 + assert( MX_CELL_SIZE(pBt)>=szNext+4 );
1.5979 + allocateTempSpace(pBt);
1.5980 + tempCell = pBt->pTmpSpace;
1.5981 + if( tempCell==0 ){
1.5982 + rc = SQLITE_NOMEM;
1.5983 + }
1.5984 + if( rc==SQLITE_OK ){
1.5985 + rc = insertCell(pPage, pCur->idx, pNext-4, szNext+4, tempCell, 0);
1.5986 + }
1.5987 + if( rc==SQLITE_OK ){
1.5988 + put4byte(findOverflowCell(pPage, pCur->idx), pgnoChild);
1.5989 + rc = balance(pPage, 0);
1.5990 + }
1.5991 + if( rc==SQLITE_OK ){
1.5992 + dropCell(leafCur.pPage, leafCur.idx, szNext);
1.5993 + rc = balance(leafCur.pPage, 0);
1.5994 + }
1.5995 + }
1.5996 + sqlite3BtreeReleaseTempCursor(&leafCur);
1.5997 + }else{
1.5998 + TRACE(("DELETE: table=%d delete from leaf %d\n",
1.5999 + pCur->pgnoRoot, pPage->pgno));
1.6000 + dropCell(pPage, pCur->idx, cellSizePtr(pPage, pCell));
1.6001 + rc = balance(pPage, 0);
1.6002 + }
1.6003 + if( rc==SQLITE_OK ){
1.6004 + moveToRoot(pCur);
1.6005 + }
1.6006 + return rc;
1.6007 +}
1.6008 +
1.6009 +/*
1.6010 +** Create a new BTree table. Write into *piTable the page
1.6011 +** number for the root page of the new table.
1.6012 +**
1.6013 +** The type of type is determined by the flags parameter. Only the
1.6014 +** following values of flags are currently in use. Other values for
1.6015 +** flags might not work:
1.6016 +**
1.6017 +** BTREE_INTKEY|BTREE_LEAFDATA Used for SQL tables with rowid keys
1.6018 +** BTREE_ZERODATA Used for SQL indices
1.6019 +*/
1.6020 +static int btreeCreateTable(Btree *p, int *piTable, int flags){
1.6021 + BtShared *pBt = p->pBt;
1.6022 + MemPage *pRoot;
1.6023 + Pgno pgnoRoot;
1.6024 + int rc;
1.6025 +
1.6026 + assert( sqlite3BtreeHoldsMutex(p) );
1.6027 + if( pBt->inTransaction!=TRANS_WRITE ){
1.6028 + /* Must start a transaction first */
1.6029 + rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
1.6030 + return rc;
1.6031 + }
1.6032 + assert( !pBt->readOnly );
1.6033 +
1.6034 +#ifdef SQLITE_OMIT_AUTOVACUUM
1.6035 + rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0);
1.6036 + if( rc ){
1.6037 + return rc;
1.6038 + }
1.6039 +#else
1.6040 + if( pBt->autoVacuum ){
1.6041 + Pgno pgnoMove; /* Move a page here to make room for the root-page */
1.6042 + MemPage *pPageMove; /* The page to move to. */
1.6043 +
1.6044 + /* Creating a new table may probably require moving an existing database
1.6045 + ** to make room for the new tables root page. In case this page turns
1.6046 + ** out to be an overflow page, delete all overflow page-map caches
1.6047 + ** held by open cursors.
1.6048 + */
1.6049 + invalidateAllOverflowCache(pBt);
1.6050 +
1.6051 + /* Read the value of meta[3] from the database to determine where the
1.6052 + ** root page of the new table should go. meta[3] is the largest root-page
1.6053 + ** created so far, so the new root-page is (meta[3]+1).
1.6054 + */
1.6055 + rc = sqlite3BtreeGetMeta(p, 4, &pgnoRoot);
1.6056 + if( rc!=SQLITE_OK ){
1.6057 + return rc;
1.6058 + }
1.6059 + pgnoRoot++;
1.6060 +
1.6061 + /* The new root-page may not be allocated on a pointer-map page, or the
1.6062 + ** PENDING_BYTE page.
1.6063 + */
1.6064 + while( pgnoRoot==PTRMAP_PAGENO(pBt, pgnoRoot) ||
1.6065 + pgnoRoot==PENDING_BYTE_PAGE(pBt) ){
1.6066 + pgnoRoot++;
1.6067 + }
1.6068 + assert( pgnoRoot>=3 );
1.6069 +
1.6070 + /* Allocate a page. The page that currently resides at pgnoRoot will
1.6071 + ** be moved to the allocated page (unless the allocated page happens
1.6072 + ** to reside at pgnoRoot).
1.6073 + */
1.6074 + rc = allocateBtreePage(pBt, &pPageMove, &pgnoMove, pgnoRoot, 1);
1.6075 + if( rc!=SQLITE_OK ){
1.6076 + return rc;
1.6077 + }
1.6078 +
1.6079 + if( pgnoMove!=pgnoRoot ){
1.6080 + /* pgnoRoot is the page that will be used for the root-page of
1.6081 + ** the new table (assuming an error did not occur). But we were
1.6082 + ** allocated pgnoMove. If required (i.e. if it was not allocated
1.6083 + ** by extending the file), the current page at position pgnoMove
1.6084 + ** is already journaled.
1.6085 + */
1.6086 + u8 eType;
1.6087 + Pgno iPtrPage;
1.6088 +
1.6089 + releasePage(pPageMove);
1.6090 +
1.6091 + /* Move the page currently at pgnoRoot to pgnoMove. */
1.6092 + rc = sqlite3BtreeGetPage(pBt, pgnoRoot, &pRoot, 0);
1.6093 + if( rc!=SQLITE_OK ){
1.6094 + return rc;
1.6095 + }
1.6096 + rc = ptrmapGet(pBt, pgnoRoot, &eType, &iPtrPage);
1.6097 + if( rc!=SQLITE_OK || eType==PTRMAP_ROOTPAGE || eType==PTRMAP_FREEPAGE ){
1.6098 + releasePage(pRoot);
1.6099 + return rc;
1.6100 + }
1.6101 + assert( eType!=PTRMAP_ROOTPAGE );
1.6102 + assert( eType!=PTRMAP_FREEPAGE );
1.6103 + rc = sqlite3PagerWrite(pRoot->pDbPage);
1.6104 + if( rc!=SQLITE_OK ){
1.6105 + releasePage(pRoot);
1.6106 + return rc;
1.6107 + }
1.6108 + rc = relocatePage(pBt, pRoot, eType, iPtrPage, pgnoMove, 0);
1.6109 + releasePage(pRoot);
1.6110 +
1.6111 + /* Obtain the page at pgnoRoot */
1.6112 + if( rc!=SQLITE_OK ){
1.6113 + return rc;
1.6114 + }
1.6115 + rc = sqlite3BtreeGetPage(pBt, pgnoRoot, &pRoot, 0);
1.6116 + if( rc!=SQLITE_OK ){
1.6117 + return rc;
1.6118 + }
1.6119 + rc = sqlite3PagerWrite(pRoot->pDbPage);
1.6120 + if( rc!=SQLITE_OK ){
1.6121 + releasePage(pRoot);
1.6122 + return rc;
1.6123 + }
1.6124 + }else{
1.6125 + pRoot = pPageMove;
1.6126 + }
1.6127 +
1.6128 + /* Update the pointer-map and meta-data with the new root-page number. */
1.6129 + rc = ptrmapPut(pBt, pgnoRoot, PTRMAP_ROOTPAGE, 0);
1.6130 + if( rc ){
1.6131 + releasePage(pRoot);
1.6132 + return rc;
1.6133 + }
1.6134 + rc = sqlite3BtreeUpdateMeta(p, 4, pgnoRoot);
1.6135 + if( rc ){
1.6136 + releasePage(pRoot);
1.6137 + return rc;
1.6138 + }
1.6139 +
1.6140 + }else{
1.6141 + rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0);
1.6142 + if( rc ) return rc;
1.6143 + }
1.6144 +#endif
1.6145 + assert( sqlite3PagerIswriteable(pRoot->pDbPage) );
1.6146 + zeroPage(pRoot, flags | PTF_LEAF);
1.6147 + sqlite3PagerUnref(pRoot->pDbPage);
1.6148 + *piTable = (int)pgnoRoot;
1.6149 + return SQLITE_OK;
1.6150 +}
1.6151 +int sqlite3BtreeCreateTable(Btree *p, int *piTable, int flags){
1.6152 + int rc;
1.6153 + sqlite3BtreeEnter(p);
1.6154 + p->pBt->db = p->db;
1.6155 + rc = btreeCreateTable(p, piTable, flags);
1.6156 + sqlite3BtreeLeave(p);
1.6157 + return rc;
1.6158 +}
1.6159 +
1.6160 +/*
1.6161 +** Erase the given database page and all its children. Return
1.6162 +** the page to the freelist.
1.6163 +*/
1.6164 +static int clearDatabasePage(
1.6165 + BtShared *pBt, /* The BTree that contains the table */
1.6166 + Pgno pgno, /* Page number to clear */
1.6167 + MemPage *pParent, /* Parent page. NULL for the root */
1.6168 + int freePageFlag /* Deallocate page if true */
1.6169 +){
1.6170 + MemPage *pPage = 0;
1.6171 + int rc;
1.6172 + unsigned char *pCell;
1.6173 + int i;
1.6174 +
1.6175 + assert( sqlite3_mutex_held(pBt->mutex) );
1.6176 + if( pgno>pagerPagecount(pBt->pPager) ){
1.6177 + return SQLITE_CORRUPT_BKPT;
1.6178 + }
1.6179 +
1.6180 + rc = getAndInitPage(pBt, pgno, &pPage, pParent);
1.6181 + if( rc ) goto cleardatabasepage_out;
1.6182 + for(i=0; i<pPage->nCell; i++){
1.6183 + pCell = findCell(pPage, i);
1.6184 + if( !pPage->leaf ){
1.6185 + rc = clearDatabasePage(pBt, get4byte(pCell), pPage->pParent, 1);
1.6186 + if( rc ) goto cleardatabasepage_out;
1.6187 + }
1.6188 + rc = clearCell(pPage, pCell);
1.6189 + if( rc ) goto cleardatabasepage_out;
1.6190 + }
1.6191 + if( !pPage->leaf ){
1.6192 + rc = clearDatabasePage(pBt, get4byte(&pPage->aData[8]), pPage->pParent, 1);
1.6193 + if( rc ) goto cleardatabasepage_out;
1.6194 + }
1.6195 + if( freePageFlag ){
1.6196 + rc = freePage(pPage);
1.6197 + }else if( (rc = sqlite3PagerWrite(pPage->pDbPage))==0 ){
1.6198 + zeroPage(pPage, pPage->aData[0] | PTF_LEAF);
1.6199 + }
1.6200 +
1.6201 +cleardatabasepage_out:
1.6202 + releasePage(pPage);
1.6203 + return rc;
1.6204 +}
1.6205 +
1.6206 +/*
1.6207 +** Delete all information from a single table in the database. iTable is
1.6208 +** the page number of the root of the table. After this routine returns,
1.6209 +** the root page is empty, but still exists.
1.6210 +**
1.6211 +** This routine will fail with SQLITE_LOCKED if there are any open
1.6212 +** read cursors on the table. Open write cursors are moved to the
1.6213 +** root of the table.
1.6214 +*/
1.6215 +int sqlite3BtreeClearTable(Btree *p, int iTable){
1.6216 + int rc;
1.6217 + BtShared *pBt = p->pBt;
1.6218 + sqlite3BtreeEnter(p);
1.6219 + pBt->db = p->db;
1.6220 + if( p->inTrans!=TRANS_WRITE ){
1.6221 + rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
1.6222 + }else if( (rc = checkReadLocks(p, iTable, 0, 1))!=SQLITE_OK ){
1.6223 + /* nothing to do */
1.6224 + }else if( SQLITE_OK!=(rc = saveAllCursors(pBt, iTable, 0)) ){
1.6225 + /* nothing to do */
1.6226 + }else{
1.6227 + rc = clearDatabasePage(pBt, (Pgno)iTable, 0, 0);
1.6228 + }
1.6229 + sqlite3BtreeLeave(p);
1.6230 + return rc;
1.6231 +}
1.6232 +
1.6233 +/*
1.6234 +** Erase all information in a table and add the root of the table to
1.6235 +** the freelist. Except, the root of the principle table (the one on
1.6236 +** page 1) is never added to the freelist.
1.6237 +**
1.6238 +** This routine will fail with SQLITE_LOCKED if there are any open
1.6239 +** cursors on the table.
1.6240 +**
1.6241 +** If AUTOVACUUM is enabled and the page at iTable is not the last
1.6242 +** root page in the database file, then the last root page
1.6243 +** in the database file is moved into the slot formerly occupied by
1.6244 +** iTable and that last slot formerly occupied by the last root page
1.6245 +** is added to the freelist instead of iTable. In this say, all
1.6246 +** root pages are kept at the beginning of the database file, which
1.6247 +** is necessary for AUTOVACUUM to work right. *piMoved is set to the
1.6248 +** page number that used to be the last root page in the file before
1.6249 +** the move. If no page gets moved, *piMoved is set to 0.
1.6250 +** The last root page is recorded in meta[3] and the value of
1.6251 +** meta[3] is updated by this procedure.
1.6252 +*/
1.6253 +static int btreeDropTable(Btree *p, int iTable, int *piMoved){
1.6254 + int rc;
1.6255 + MemPage *pPage = 0;
1.6256 + BtShared *pBt = p->pBt;
1.6257 +
1.6258 + assert( sqlite3BtreeHoldsMutex(p) );
1.6259 + if( p->inTrans!=TRANS_WRITE ){
1.6260 + return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
1.6261 + }
1.6262 +
1.6263 + /* It is illegal to drop a table if any cursors are open on the
1.6264 + ** database. This is because in auto-vacuum mode the backend may
1.6265 + ** need to move another root-page to fill a gap left by the deleted
1.6266 + ** root page. If an open cursor was using this page a problem would
1.6267 + ** occur.
1.6268 + */
1.6269 + if( pBt->pCursor ){
1.6270 + return SQLITE_LOCKED;
1.6271 + }
1.6272 +
1.6273 + rc = sqlite3BtreeGetPage(pBt, (Pgno)iTable, &pPage, 0);
1.6274 + if( rc ) return rc;
1.6275 + rc = sqlite3BtreeClearTable(p, iTable);
1.6276 + if( rc ){
1.6277 + releasePage(pPage);
1.6278 + return rc;
1.6279 + }
1.6280 +
1.6281 + *piMoved = 0;
1.6282 +
1.6283 + if( iTable>1 ){
1.6284 +#ifdef SQLITE_OMIT_AUTOVACUUM
1.6285 + rc = freePage(pPage);
1.6286 + releasePage(pPage);
1.6287 +#else
1.6288 + if( pBt->autoVacuum ){
1.6289 + Pgno maxRootPgno;
1.6290 + rc = sqlite3BtreeGetMeta(p, 4, &maxRootPgno);
1.6291 + if( rc!=SQLITE_OK ){
1.6292 + releasePage(pPage);
1.6293 + return rc;
1.6294 + }
1.6295 +
1.6296 + if( iTable==maxRootPgno ){
1.6297 + /* If the table being dropped is the table with the largest root-page
1.6298 + ** number in the database, put the root page on the free list.
1.6299 + */
1.6300 + rc = freePage(pPage);
1.6301 + releasePage(pPage);
1.6302 + if( rc!=SQLITE_OK ){
1.6303 + return rc;
1.6304 + }
1.6305 + }else{
1.6306 + /* The table being dropped does not have the largest root-page
1.6307 + ** number in the database. So move the page that does into the
1.6308 + ** gap left by the deleted root-page.
1.6309 + */
1.6310 + MemPage *pMove;
1.6311 + releasePage(pPage);
1.6312 + rc = sqlite3BtreeGetPage(pBt, maxRootPgno, &pMove, 0);
1.6313 + if( rc!=SQLITE_OK ){
1.6314 + return rc;
1.6315 + }
1.6316 + rc = relocatePage(pBt, pMove, PTRMAP_ROOTPAGE, 0, iTable, 0);
1.6317 + releasePage(pMove);
1.6318 + if( rc!=SQLITE_OK ){
1.6319 + return rc;
1.6320 + }
1.6321 + rc = sqlite3BtreeGetPage(pBt, maxRootPgno, &pMove, 0);
1.6322 + if( rc!=SQLITE_OK ){
1.6323 + return rc;
1.6324 + }
1.6325 + rc = freePage(pMove);
1.6326 + releasePage(pMove);
1.6327 + if( rc!=SQLITE_OK ){
1.6328 + return rc;
1.6329 + }
1.6330 + *piMoved = maxRootPgno;
1.6331 + }
1.6332 +
1.6333 + /* Set the new 'max-root-page' value in the database header. This
1.6334 + ** is the old value less one, less one more if that happens to
1.6335 + ** be a root-page number, less one again if that is the
1.6336 + ** PENDING_BYTE_PAGE.
1.6337 + */
1.6338 + maxRootPgno--;
1.6339 + if( maxRootPgno==PENDING_BYTE_PAGE(pBt) ){
1.6340 + maxRootPgno--;
1.6341 + }
1.6342 + if( maxRootPgno==PTRMAP_PAGENO(pBt, maxRootPgno) ){
1.6343 + maxRootPgno--;
1.6344 + }
1.6345 + assert( maxRootPgno!=PENDING_BYTE_PAGE(pBt) );
1.6346 +
1.6347 + rc = sqlite3BtreeUpdateMeta(p, 4, maxRootPgno);
1.6348 + }else{
1.6349 + rc = freePage(pPage);
1.6350 + releasePage(pPage);
1.6351 + }
1.6352 +#endif
1.6353 + }else{
1.6354 + /* If sqlite3BtreeDropTable was called on page 1. */
1.6355 + zeroPage(pPage, PTF_INTKEY|PTF_LEAF );
1.6356 + releasePage(pPage);
1.6357 + }
1.6358 + return rc;
1.6359 +}
1.6360 +int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){
1.6361 + int rc;
1.6362 + sqlite3BtreeEnter(p);
1.6363 + p->pBt->db = p->db;
1.6364 + rc = btreeDropTable(p, iTable, piMoved);
1.6365 + sqlite3BtreeLeave(p);
1.6366 + return rc;
1.6367 +}
1.6368 +
1.6369 +
1.6370 +/*
1.6371 +** Read the meta-information out of a database file. Meta[0]
1.6372 +** is the number of free pages currently in the database. Meta[1]
1.6373 +** through meta[15] are available for use by higher layers. Meta[0]
1.6374 +** is read-only, the others are read/write.
1.6375 +**
1.6376 +** The schema layer numbers meta values differently. At the schema
1.6377 +** layer (and the SetCookie and ReadCookie opcodes) the number of
1.6378 +** free pages is not visible. So Cookie[0] is the same as Meta[1].
1.6379 +*/
1.6380 +int sqlite3BtreeGetMeta(Btree *p, int idx, u32 *pMeta){
1.6381 + DbPage *pDbPage;
1.6382 + int rc;
1.6383 + unsigned char *pP1;
1.6384 + BtShared *pBt = p->pBt;
1.6385 +
1.6386 + sqlite3BtreeEnter(p);
1.6387 + pBt->db = p->db;
1.6388 +
1.6389 + /* Reading a meta-data value requires a read-lock on page 1 (and hence
1.6390 + ** the sqlite_master table. We grab this lock regardless of whether or
1.6391 + ** not the SQLITE_ReadUncommitted flag is set (the table rooted at page
1.6392 + ** 1 is treated as a special case by queryTableLock() and lockTable()).
1.6393 + */
1.6394 + rc = queryTableLock(p, 1, READ_LOCK);
1.6395 + if( rc!=SQLITE_OK ){
1.6396 + sqlite3BtreeLeave(p);
1.6397 + return rc;
1.6398 + }
1.6399 +
1.6400 + assert( idx>=0 && idx<=15 );
1.6401 + rc = sqlite3PagerGet(pBt->pPager, 1, &pDbPage);
1.6402 + if( rc ){
1.6403 + sqlite3BtreeLeave(p);
1.6404 + return rc;
1.6405 + }
1.6406 + pP1 = (unsigned char *)sqlite3PagerGetData(pDbPage);
1.6407 + *pMeta = get4byte(&pP1[36 + idx*4]);
1.6408 + sqlite3PagerUnref(pDbPage);
1.6409 +
1.6410 + /* If autovacuumed is disabled in this build but we are trying to
1.6411 + ** access an autovacuumed database, then make the database readonly.
1.6412 + */
1.6413 +#ifdef SQLITE_OMIT_AUTOVACUUM
1.6414 + if( idx==4 && *pMeta>0 ) pBt->readOnly = 1;
1.6415 +#endif
1.6416 +
1.6417 + /* Grab the read-lock on page 1. */
1.6418 + rc = lockTable(p, 1, READ_LOCK);
1.6419 + sqlite3BtreeLeave(p);
1.6420 + return rc;
1.6421 +}
1.6422 +
1.6423 +/*
1.6424 +** Write meta-information back into the database. Meta[0] is
1.6425 +** read-only and may not be written.
1.6426 +*/
1.6427 +int sqlite3BtreeUpdateMeta(Btree *p, int idx, u32 iMeta){
1.6428 + BtShared *pBt = p->pBt;
1.6429 + unsigned char *pP1;
1.6430 + int rc;
1.6431 + assert( idx>=1 && idx<=15 );
1.6432 + sqlite3BtreeEnter(p);
1.6433 + pBt->db = p->db;
1.6434 + if( p->inTrans!=TRANS_WRITE ){
1.6435 + rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
1.6436 + }else{
1.6437 + assert( pBt->pPage1!=0 );
1.6438 + pP1 = pBt->pPage1->aData;
1.6439 + rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
1.6440 + if( rc==SQLITE_OK ){
1.6441 + put4byte(&pP1[36 + idx*4], iMeta);
1.6442 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.6443 + if( idx==7 ){
1.6444 + assert( pBt->autoVacuum || iMeta==0 );
1.6445 + assert( iMeta==0 || iMeta==1 );
1.6446 + pBt->incrVacuum = iMeta;
1.6447 + }
1.6448 +#endif
1.6449 + }
1.6450 + }
1.6451 + sqlite3BtreeLeave(p);
1.6452 + return rc;
1.6453 +}
1.6454 +
1.6455 +/*
1.6456 +** Return the flag byte at the beginning of the page that the cursor
1.6457 +** is currently pointing to.
1.6458 +*/
1.6459 +int sqlite3BtreeFlags(BtCursor *pCur){
1.6460 + /* TODO: What about CURSOR_REQUIRESEEK state? Probably need to call
1.6461 + ** restoreCursorPosition() here.
1.6462 + */
1.6463 + MemPage *pPage;
1.6464 + restoreCursorPosition(pCur);
1.6465 + pPage = pCur->pPage;
1.6466 + assert( cursorHoldsMutex(pCur) );
1.6467 + assert( pPage->pBt==pCur->pBt );
1.6468 + return pPage ? pPage->aData[pPage->hdrOffset] : 0;
1.6469 +}
1.6470 +
1.6471 +
1.6472 +/*
1.6473 +** Return the pager associated with a BTree. This routine is used for
1.6474 +** testing and debugging only.
1.6475 +*/
1.6476 +Pager *sqlite3BtreePager(Btree *p){
1.6477 + return p->pBt->pPager;
1.6478 +}
1.6479 +
1.6480 +#ifndef SQLITE_OMIT_INTEGRITY_CHECK
1.6481 +/*
1.6482 +** Append a message to the error message string.
1.6483 +*/
1.6484 +static void checkAppendMsg(
1.6485 + IntegrityCk *pCheck,
1.6486 + char *zMsg1,
1.6487 + const char *zFormat,
1.6488 + ...
1.6489 +){
1.6490 + va_list ap;
1.6491 + if( !pCheck->mxErr ) return;
1.6492 + pCheck->mxErr--;
1.6493 + pCheck->nErr++;
1.6494 + va_start(ap, zFormat);
1.6495 + if( pCheck->errMsg.nChar ){
1.6496 + sqlite3StrAccumAppend(&pCheck->errMsg, "\n", 1);
1.6497 + }
1.6498 + if( zMsg1 ){
1.6499 + sqlite3StrAccumAppend(&pCheck->errMsg, zMsg1, -1);
1.6500 + }
1.6501 + sqlite3VXPrintf(&pCheck->errMsg, 1, zFormat, ap);
1.6502 + va_end(ap);
1.6503 + if( pCheck->errMsg.mallocFailed ){
1.6504 + pCheck->mallocFailed = 1;
1.6505 + }
1.6506 +}
1.6507 +#endif /* SQLITE_OMIT_INTEGRITY_CHECK */
1.6508 +
1.6509 +#ifndef SQLITE_OMIT_INTEGRITY_CHECK
1.6510 +/*
1.6511 +** Add 1 to the reference count for page iPage. If this is the second
1.6512 +** reference to the page, add an error message to pCheck->zErrMsg.
1.6513 +** Return 1 if there are 2 ore more references to the page and 0 if
1.6514 +** if this is the first reference to the page.
1.6515 +**
1.6516 +** Also check that the page number is in bounds.
1.6517 +*/
1.6518 +static int checkRef(IntegrityCk *pCheck, int iPage, char *zContext){
1.6519 + if( iPage==0 ) return 1;
1.6520 + if( iPage>pCheck->nPage || iPage<0 ){
1.6521 + checkAppendMsg(pCheck, zContext, "invalid page number %d", iPage);
1.6522 + return 1;
1.6523 + }
1.6524 + if( pCheck->anRef[iPage]==1 ){
1.6525 + checkAppendMsg(pCheck, zContext, "2nd reference to page %d", iPage);
1.6526 + return 1;
1.6527 + }
1.6528 + return (pCheck->anRef[iPage]++)>1;
1.6529 +}
1.6530 +
1.6531 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.6532 +/*
1.6533 +** Check that the entry in the pointer-map for page iChild maps to
1.6534 +** page iParent, pointer type ptrType. If not, append an error message
1.6535 +** to pCheck.
1.6536 +*/
1.6537 +static void checkPtrmap(
1.6538 + IntegrityCk *pCheck, /* Integrity check context */
1.6539 + Pgno iChild, /* Child page number */
1.6540 + u8 eType, /* Expected pointer map type */
1.6541 + Pgno iParent, /* Expected pointer map parent page number */
1.6542 + char *zContext /* Context description (used for error msg) */
1.6543 +){
1.6544 + int rc;
1.6545 + u8 ePtrmapType;
1.6546 + Pgno iPtrmapParent;
1.6547 +
1.6548 + rc = ptrmapGet(pCheck->pBt, iChild, &ePtrmapType, &iPtrmapParent);
1.6549 + if( rc!=SQLITE_OK ){
1.6550 + checkAppendMsg(pCheck, zContext, "Failed to read ptrmap key=%d", iChild);
1.6551 + return;
1.6552 + }
1.6553 +
1.6554 + if( ePtrmapType!=eType || iPtrmapParent!=iParent ){
1.6555 + checkAppendMsg(pCheck, zContext,
1.6556 + "Bad ptr map entry key=%d expected=(%d,%d) got=(%d,%d)",
1.6557 + iChild, eType, iParent, ePtrmapType, iPtrmapParent);
1.6558 + }
1.6559 +}
1.6560 +#endif
1.6561 +
1.6562 +/*
1.6563 +** Check the integrity of the freelist or of an overflow page list.
1.6564 +** Verify that the number of pages on the list is N.
1.6565 +*/
1.6566 +static void checkList(
1.6567 + IntegrityCk *pCheck, /* Integrity checking context */
1.6568 + int isFreeList, /* True for a freelist. False for overflow page list */
1.6569 + int iPage, /* Page number for first page in the list */
1.6570 + int N, /* Expected number of pages in the list */
1.6571 + char *zContext /* Context for error messages */
1.6572 +){
1.6573 + int i;
1.6574 + int expected = N;
1.6575 + int iFirst = iPage;
1.6576 + while( N-- > 0 && pCheck->mxErr ){
1.6577 + DbPage *pOvflPage;
1.6578 + unsigned char *pOvflData;
1.6579 + if( iPage<1 ){
1.6580 + checkAppendMsg(pCheck, zContext,
1.6581 + "%d of %d pages missing from overflow list starting at %d",
1.6582 + N+1, expected, iFirst);
1.6583 + break;
1.6584 + }
1.6585 + if( checkRef(pCheck, iPage, zContext) ) break;
1.6586 + if( sqlite3PagerGet(pCheck->pPager, (Pgno)iPage, &pOvflPage) ){
1.6587 + checkAppendMsg(pCheck, zContext, "failed to get page %d", iPage);
1.6588 + break;
1.6589 + }
1.6590 + pOvflData = (unsigned char *)sqlite3PagerGetData(pOvflPage);
1.6591 + if( isFreeList ){
1.6592 + int n = get4byte(&pOvflData[4]);
1.6593 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.6594 + if( pCheck->pBt->autoVacuum ){
1.6595 + checkPtrmap(pCheck, iPage, PTRMAP_FREEPAGE, 0, zContext);
1.6596 + }
1.6597 +#endif
1.6598 + if( n>pCheck->pBt->usableSize/4-2 ){
1.6599 + checkAppendMsg(pCheck, zContext,
1.6600 + "freelist leaf count too big on page %d", iPage);
1.6601 + N--;
1.6602 + }else{
1.6603 + for(i=0; i<n; i++){
1.6604 + Pgno iFreePage = get4byte(&pOvflData[8+i*4]);
1.6605 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.6606 + if( pCheck->pBt->autoVacuum ){
1.6607 + checkPtrmap(pCheck, iFreePage, PTRMAP_FREEPAGE, 0, zContext);
1.6608 + }
1.6609 +#endif
1.6610 + checkRef(pCheck, iFreePage, zContext);
1.6611 + }
1.6612 + N -= n;
1.6613 + }
1.6614 + }
1.6615 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.6616 + else{
1.6617 + /* If this database supports auto-vacuum and iPage is not the last
1.6618 + ** page in this overflow list, check that the pointer-map entry for
1.6619 + ** the following page matches iPage.
1.6620 + */
1.6621 + if( pCheck->pBt->autoVacuum && N>0 ){
1.6622 + i = get4byte(pOvflData);
1.6623 + checkPtrmap(pCheck, i, PTRMAP_OVERFLOW2, iPage, zContext);
1.6624 + }
1.6625 + }
1.6626 +#endif
1.6627 + iPage = get4byte(pOvflData);
1.6628 + sqlite3PagerUnref(pOvflPage);
1.6629 + }
1.6630 +}
1.6631 +#endif /* SQLITE_OMIT_INTEGRITY_CHECK */
1.6632 +
1.6633 +#ifndef SQLITE_OMIT_INTEGRITY_CHECK
1.6634 +/*
1.6635 +** Do various sanity checks on a single page of a tree. Return
1.6636 +** the tree depth. Root pages return 0. Parents of root pages
1.6637 +** return 1, and so forth.
1.6638 +**
1.6639 +** These checks are done:
1.6640 +**
1.6641 +** 1. Make sure that cells and freeblocks do not overlap
1.6642 +** but combine to completely cover the page.
1.6643 +** NO 2. Make sure cell keys are in order.
1.6644 +** NO 3. Make sure no key is less than or equal to zLowerBound.
1.6645 +** NO 4. Make sure no key is greater than or equal to zUpperBound.
1.6646 +** 5. Check the integrity of overflow pages.
1.6647 +** 6. Recursively call checkTreePage on all children.
1.6648 +** 7. Verify that the depth of all children is the same.
1.6649 +** 8. Make sure this page is at least 33% full or else it is
1.6650 +** the root of the tree.
1.6651 +*/
1.6652 +static int checkTreePage(
1.6653 + IntegrityCk *pCheck, /* Context for the sanity check */
1.6654 + int iPage, /* Page number of the page to check */
1.6655 + MemPage *pParent, /* Parent page */
1.6656 + char *zParentContext /* Parent context */
1.6657 +){
1.6658 + MemPage *pPage;
1.6659 + int i, rc, depth, d2, pgno, cnt;
1.6660 + int hdr, cellStart;
1.6661 + int nCell;
1.6662 + u8 *data;
1.6663 + BtShared *pBt;
1.6664 + int usableSize;
1.6665 + char zContext[100];
1.6666 + char *hit;
1.6667 +
1.6668 + sqlite3_snprintf(sizeof(zContext), zContext, "Page %d: ", iPage);
1.6669 +
1.6670 + /* Check that the page exists
1.6671 + */
1.6672 + pBt = pCheck->pBt;
1.6673 + usableSize = pBt->usableSize;
1.6674 + if( iPage==0 ) return 0;
1.6675 + if( checkRef(pCheck, iPage, zParentContext) ) return 0;
1.6676 + if( (rc = sqlite3BtreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){
1.6677 + checkAppendMsg(pCheck, zContext,
1.6678 + "unable to get the page. error code=%d", rc);
1.6679 + return 0;
1.6680 + }
1.6681 + if( (rc = sqlite3BtreeInitPage(pPage, pParent))!=0 ){
1.6682 + checkAppendMsg(pCheck, zContext,
1.6683 + "sqlite3BtreeInitPage() returns error code %d", rc);
1.6684 + releasePage(pPage);
1.6685 + return 0;
1.6686 + }
1.6687 +
1.6688 + /* Check out all the cells.
1.6689 + */
1.6690 + depth = 0;
1.6691 + for(i=0; i<pPage->nCell && pCheck->mxErr; i++){
1.6692 + u8 *pCell;
1.6693 + int sz;
1.6694 + CellInfo info;
1.6695 +
1.6696 + /* Check payload overflow pages
1.6697 + */
1.6698 + sqlite3_snprintf(sizeof(zContext), zContext,
1.6699 + "On tree page %d cell %d: ", iPage, i);
1.6700 + pCell = findCell(pPage,i);
1.6701 + sqlite3BtreeParseCellPtr(pPage, pCell, &info);
1.6702 + sz = info.nData;
1.6703 + if( !pPage->intKey ) sz += info.nKey;
1.6704 + assert( sz==info.nPayload );
1.6705 + if( sz>info.nLocal ){
1.6706 + int nPage = (sz - info.nLocal + usableSize - 5)/(usableSize - 4);
1.6707 + Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]);
1.6708 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.6709 + if( pBt->autoVacuum ){
1.6710 + checkPtrmap(pCheck, pgnoOvfl, PTRMAP_OVERFLOW1, iPage, zContext);
1.6711 + }
1.6712 +#endif
1.6713 + checkList(pCheck, 0, pgnoOvfl, nPage, zContext);
1.6714 + }
1.6715 +
1.6716 + /* Check sanity of left child page.
1.6717 + */
1.6718 + if( !pPage->leaf ){
1.6719 + pgno = get4byte(pCell);
1.6720 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.6721 + if( pBt->autoVacuum ){
1.6722 + checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage, zContext);
1.6723 + }
1.6724 +#endif
1.6725 + d2 = checkTreePage(pCheck,pgno,pPage,zContext);
1.6726 + if( i>0 && d2!=depth ){
1.6727 + checkAppendMsg(pCheck, zContext, "Child page depth differs");
1.6728 + }
1.6729 + depth = d2;
1.6730 + }
1.6731 + }
1.6732 + if( !pPage->leaf ){
1.6733 + pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
1.6734 + sqlite3_snprintf(sizeof(zContext), zContext,
1.6735 + "On page %d at right child: ", iPage);
1.6736 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.6737 + if( pBt->autoVacuum ){
1.6738 + checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage, 0);
1.6739 + }
1.6740 +#endif
1.6741 + checkTreePage(pCheck, pgno, pPage, zContext);
1.6742 + }
1.6743 +
1.6744 + /* Check for complete coverage of the page
1.6745 + */
1.6746 + data = pPage->aData;
1.6747 + hdr = pPage->hdrOffset;
1.6748 + hit = sqlite3PageMalloc( pBt->pageSize );
1.6749 + if( hit==0 ){
1.6750 + pCheck->mallocFailed = 1;
1.6751 + }else{
1.6752 + memset(hit, 0, usableSize );
1.6753 + memset(hit, 1, get2byte(&data[hdr+5]));
1.6754 + nCell = get2byte(&data[hdr+3]);
1.6755 + cellStart = hdr + 12 - 4*pPage->leaf;
1.6756 + for(i=0; i<nCell; i++){
1.6757 + int pc = get2byte(&data[cellStart+i*2]);
1.6758 + u16 size = cellSizePtr(pPage, &data[pc]);
1.6759 + int j;
1.6760 + if( (pc+size-1)>=usableSize || pc<0 ){
1.6761 + checkAppendMsg(pCheck, 0,
1.6762 + "Corruption detected in cell %d on page %d",i,iPage,0);
1.6763 + }else{
1.6764 + for(j=pc+size-1; j>=pc; j--) hit[j]++;
1.6765 + }
1.6766 + }
1.6767 + for(cnt=0, i=get2byte(&data[hdr+1]); i>0 && i<usableSize && cnt<10000;
1.6768 + cnt++){
1.6769 + int size = get2byte(&data[i+2]);
1.6770 + int j;
1.6771 + if( (i+size-1)>=usableSize || i<0 ){
1.6772 + checkAppendMsg(pCheck, 0,
1.6773 + "Corruption detected in cell %d on page %d",i,iPage,0);
1.6774 + }else{
1.6775 + for(j=i+size-1; j>=i; j--) hit[j]++;
1.6776 + }
1.6777 + i = get2byte(&data[i]);
1.6778 + }
1.6779 + for(i=cnt=0; i<usableSize; i++){
1.6780 + if( hit[i]==0 ){
1.6781 + cnt++;
1.6782 + }else if( hit[i]>1 ){
1.6783 + checkAppendMsg(pCheck, 0,
1.6784 + "Multiple uses for byte %d of page %d", i, iPage);
1.6785 + break;
1.6786 + }
1.6787 + }
1.6788 + if( cnt!=data[hdr+7] ){
1.6789 + checkAppendMsg(pCheck, 0,
1.6790 + "Fragmented space is %d byte reported as %d on page %d",
1.6791 + cnt, data[hdr+7], iPage);
1.6792 + }
1.6793 + }
1.6794 + sqlite3PageFree(hit);
1.6795 +
1.6796 + releasePage(pPage);
1.6797 + return depth+1;
1.6798 +}
1.6799 +#endif /* SQLITE_OMIT_INTEGRITY_CHECK */
1.6800 +
1.6801 +#ifndef SQLITE_OMIT_INTEGRITY_CHECK
1.6802 +/*
1.6803 +** This routine does a complete check of the given BTree file. aRoot[] is
1.6804 +** an array of pages numbers were each page number is the root page of
1.6805 +** a table. nRoot is the number of entries in aRoot.
1.6806 +**
1.6807 +** Write the number of error seen in *pnErr. Except for some memory
1.6808 +** allocation errors, nn error message is held in memory obtained from
1.6809 +** malloc is returned if *pnErr is non-zero. If *pnErr==0 then NULL is
1.6810 +** returned.
1.6811 +*/
1.6812 +char *sqlite3BtreeIntegrityCheck(
1.6813 + Btree *p, /* The btree to be checked */
1.6814 + int *aRoot, /* An array of root pages numbers for individual trees */
1.6815 + int nRoot, /* Number of entries in aRoot[] */
1.6816 + int mxErr, /* Stop reporting errors after this many */
1.6817 + int *pnErr /* Write number of errors seen to this variable */
1.6818 +){
1.6819 + int i;
1.6820 + int nRef;
1.6821 + IntegrityCk sCheck;
1.6822 + BtShared *pBt = p->pBt;
1.6823 + char zErr[100];
1.6824 +
1.6825 + sqlite3BtreeEnter(p);
1.6826 + pBt->db = p->db;
1.6827 + nRef = sqlite3PagerRefcount(pBt->pPager);
1.6828 + if( lockBtreeWithRetry(p)!=SQLITE_OK ){
1.6829 + *pnErr = 1;
1.6830 + sqlite3BtreeLeave(p);
1.6831 + return sqlite3DbStrDup(0, "cannot acquire a read lock on the database");
1.6832 + }
1.6833 + sCheck.pBt = pBt;
1.6834 + sCheck.pPager = pBt->pPager;
1.6835 + sCheck.nPage = pagerPagecount(sCheck.pPager);
1.6836 + sCheck.mxErr = mxErr;
1.6837 + sCheck.nErr = 0;
1.6838 + sCheck.mallocFailed = 0;
1.6839 + *pnErr = 0;
1.6840 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.6841 + if( pBt->nTrunc!=0 ){
1.6842 + sCheck.nPage = pBt->nTrunc;
1.6843 + }
1.6844 +#endif
1.6845 + if( sCheck.nPage==0 ){
1.6846 + unlockBtreeIfUnused(pBt);
1.6847 + sqlite3BtreeLeave(p);
1.6848 + return 0;
1.6849 + }
1.6850 + sCheck.anRef = sqlite3Malloc( (sCheck.nPage+1)*sizeof(sCheck.anRef[0]) );
1.6851 + if( !sCheck.anRef ){
1.6852 + unlockBtreeIfUnused(pBt);
1.6853 + *pnErr = 1;
1.6854 + sqlite3BtreeLeave(p);
1.6855 + return 0;
1.6856 + }
1.6857 + for(i=0; i<=sCheck.nPage; i++){ sCheck.anRef[i] = 0; }
1.6858 + i = PENDING_BYTE_PAGE(pBt);
1.6859 + if( i<=sCheck.nPage ){
1.6860 + sCheck.anRef[i] = 1;
1.6861 + }
1.6862 + sqlite3StrAccumInit(&sCheck.errMsg, zErr, sizeof(zErr), 20000);
1.6863 +
1.6864 + /* Check the integrity of the freelist
1.6865 + */
1.6866 + checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]),
1.6867 + get4byte(&pBt->pPage1->aData[36]), "Main freelist: ");
1.6868 +
1.6869 + /* Check all the tables.
1.6870 + */
1.6871 + for(i=0; i<nRoot && sCheck.mxErr; i++){
1.6872 + if( aRoot[i]==0 ) continue;
1.6873 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.6874 + if( pBt->autoVacuum && aRoot[i]>1 ){
1.6875 + checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, 0, 0);
1.6876 + }
1.6877 +#endif
1.6878 + checkTreePage(&sCheck, aRoot[i], 0, "List of tree roots: ");
1.6879 + }
1.6880 +
1.6881 + /* Make sure every page in the file is referenced
1.6882 + */
1.6883 + for(i=1; i<=sCheck.nPage && sCheck.mxErr; i++){
1.6884 +#ifdef SQLITE_OMIT_AUTOVACUUM
1.6885 + if( sCheck.anRef[i]==0 ){
1.6886 + checkAppendMsg(&sCheck, 0, "Page %d is never used", i);
1.6887 + }
1.6888 +#else
1.6889 + /* If the database supports auto-vacuum, make sure no tables contain
1.6890 + ** references to pointer-map pages.
1.6891 + */
1.6892 + if( sCheck.anRef[i]==0 &&
1.6893 + (PTRMAP_PAGENO(pBt, i)!=i || !pBt->autoVacuum) ){
1.6894 + checkAppendMsg(&sCheck, 0, "Page %d is never used", i);
1.6895 + }
1.6896 + if( sCheck.anRef[i]!=0 &&
1.6897 + (PTRMAP_PAGENO(pBt, i)==i && pBt->autoVacuum) ){
1.6898 + checkAppendMsg(&sCheck, 0, "Pointer map page %d is referenced", i);
1.6899 + }
1.6900 +#endif
1.6901 + }
1.6902 +
1.6903 + /* Make sure this analysis did not leave any unref() pages
1.6904 + */
1.6905 + unlockBtreeIfUnused(pBt);
1.6906 + if( nRef != sqlite3PagerRefcount(pBt->pPager) ){
1.6907 + checkAppendMsg(&sCheck, 0,
1.6908 + "Outstanding page count goes from %d to %d during this analysis",
1.6909 + nRef, sqlite3PagerRefcount(pBt->pPager)
1.6910 + );
1.6911 + }
1.6912 +
1.6913 + /* Clean up and report errors.
1.6914 + */
1.6915 + sqlite3BtreeLeave(p);
1.6916 + sqlite3_free(sCheck.anRef);
1.6917 + if( sCheck.mallocFailed ){
1.6918 + sqlite3StrAccumReset(&sCheck.errMsg);
1.6919 + *pnErr = sCheck.nErr+1;
1.6920 + return 0;
1.6921 + }
1.6922 + *pnErr = sCheck.nErr;
1.6923 + if( sCheck.nErr==0 ) sqlite3StrAccumReset(&sCheck.errMsg);
1.6924 + return sqlite3StrAccumFinish(&sCheck.errMsg);
1.6925 +}
1.6926 +#endif /* SQLITE_OMIT_INTEGRITY_CHECK */
1.6927 +
1.6928 +/*
1.6929 +** Return the full pathname of the underlying database file.
1.6930 +**
1.6931 +** The pager filename is invariant as long as the pager is
1.6932 +** open so it is safe to access without the BtShared mutex.
1.6933 +*/
1.6934 +const char *sqlite3BtreeGetFilename(Btree *p){
1.6935 + assert( p->pBt->pPager!=0 );
1.6936 + return sqlite3PagerFilename(p->pBt->pPager);
1.6937 +}
1.6938 +
1.6939 +/*
1.6940 +** Return the pathname of the directory that contains the database file.
1.6941 +**
1.6942 +** The pager directory name is invariant as long as the pager is
1.6943 +** open so it is safe to access without the BtShared mutex.
1.6944 +*/
1.6945 +const char *sqlite3BtreeGetDirname(Btree *p){
1.6946 + assert( p->pBt->pPager!=0 );
1.6947 + return sqlite3PagerDirname(p->pBt->pPager);
1.6948 +}
1.6949 +
1.6950 +/*
1.6951 +** Return the pathname of the journal file for this database. The return
1.6952 +** value of this routine is the same regardless of whether the journal file
1.6953 +** has been created or not.
1.6954 +**
1.6955 +** The pager journal filename is invariant as long as the pager is
1.6956 +** open so it is safe to access without the BtShared mutex.
1.6957 +*/
1.6958 +const char *sqlite3BtreeGetJournalname(Btree *p){
1.6959 + assert( p->pBt->pPager!=0 );
1.6960 + return sqlite3PagerJournalname(p->pBt->pPager);
1.6961 +}
1.6962 +
1.6963 +#ifndef SQLITE_OMIT_VACUUM
1.6964 +/*
1.6965 +** Copy the complete content of pBtFrom into pBtTo. A transaction
1.6966 +** must be active for both files.
1.6967 +**
1.6968 +** The size of file pTo may be reduced by this operation.
1.6969 +** If anything goes wrong, the transaction on pTo is rolled back.
1.6970 +**
1.6971 +** If successful, CommitPhaseOne() may be called on pTo before returning.
1.6972 +** The caller should finish committing the transaction on pTo by calling
1.6973 +** sqlite3BtreeCommit().
1.6974 +*/
1.6975 +static int btreeCopyFile(Btree *pTo, Btree *pFrom){
1.6976 + int rc = SQLITE_OK;
1.6977 + Pgno i;
1.6978 +
1.6979 + Pgno nFromPage; /* Number of pages in pFrom */
1.6980 + Pgno nToPage; /* Number of pages in pTo */
1.6981 + Pgno nNewPage; /* Number of pages in pTo after the copy */
1.6982 +
1.6983 + Pgno iSkip; /* Pending byte page in pTo */
1.6984 + int nToPageSize; /* Page size of pTo in bytes */
1.6985 + int nFromPageSize; /* Page size of pFrom in bytes */
1.6986 +
1.6987 + BtShared *pBtTo = pTo->pBt;
1.6988 + BtShared *pBtFrom = pFrom->pBt;
1.6989 + pBtTo->db = pTo->db;
1.6990 + pBtFrom->db = pFrom->db;
1.6991 +
1.6992 + nToPageSize = pBtTo->pageSize;
1.6993 + nFromPageSize = pBtFrom->pageSize;
1.6994 +
1.6995 + if( pTo->inTrans!=TRANS_WRITE || pFrom->inTrans!=TRANS_WRITE ){
1.6996 + return SQLITE_ERROR;
1.6997 + }
1.6998 + if( pBtTo->pCursor ){
1.6999 + return SQLITE_BUSY;
1.7000 + }
1.7001 +
1.7002 + nToPage = pagerPagecount(pBtTo->pPager);
1.7003 + nFromPage = pagerPagecount(pBtFrom->pPager);
1.7004 + iSkip = PENDING_BYTE_PAGE(pBtTo);
1.7005 +
1.7006 + /* Variable nNewPage is the number of pages required to store the
1.7007 + ** contents of pFrom using the current page-size of pTo.
1.7008 + */
1.7009 + nNewPage = ((i64)nFromPage * (i64)nFromPageSize + (i64)nToPageSize - 1) /
1.7010 + (i64)nToPageSize;
1.7011 +
1.7012 + for(i=1; rc==SQLITE_OK && (i<=nToPage || i<=nNewPage); i++){
1.7013 +
1.7014 + /* Journal the original page.
1.7015 + **
1.7016 + ** iSkip is the page number of the locking page (PENDING_BYTE_PAGE)
1.7017 + ** in database *pTo (before the copy). This page is never written
1.7018 + ** into the journal file. Unless i==iSkip or the page was not
1.7019 + ** present in pTo before the copy operation, journal page i from pTo.
1.7020 + */
1.7021 + if( i!=iSkip && i<=nToPage ){
1.7022 + DbPage *pDbPage = 0;
1.7023 + rc = sqlite3PagerGet(pBtTo->pPager, i, &pDbPage);
1.7024 + if( rc==SQLITE_OK ){
1.7025 + rc = sqlite3PagerWrite(pDbPage);
1.7026 + if( rc==SQLITE_OK && i>nFromPage ){
1.7027 + /* Yeah. It seems wierd to call DontWrite() right after Write(). But
1.7028 + ** that is because the names of those procedures do not exactly
1.7029 + ** represent what they do. Write() really means "put this page in the
1.7030 + ** rollback journal and mark it as dirty so that it will be written
1.7031 + ** to the database file later." DontWrite() undoes the second part of
1.7032 + ** that and prevents the page from being written to the database. The
1.7033 + ** page is still on the rollback journal, though. And that is the
1.7034 + ** whole point of this block: to put pages on the rollback journal.
1.7035 + */
1.7036 + sqlite3PagerDontWrite(pDbPage);
1.7037 + }
1.7038 + sqlite3PagerUnref(pDbPage);
1.7039 + }
1.7040 + }
1.7041 +
1.7042 + /* Overwrite the data in page i of the target database */
1.7043 + if( rc==SQLITE_OK && i!=iSkip && i<=nNewPage ){
1.7044 +
1.7045 + DbPage *pToPage = 0;
1.7046 + sqlite3_int64 iOff;
1.7047 +
1.7048 + rc = sqlite3PagerGet(pBtTo->pPager, i, &pToPage);
1.7049 + if( rc==SQLITE_OK ){
1.7050 + rc = sqlite3PagerWrite(pToPage);
1.7051 + }
1.7052 +
1.7053 + for(
1.7054 + iOff=(i-1)*nToPageSize;
1.7055 + rc==SQLITE_OK && iOff<i*nToPageSize;
1.7056 + iOff += nFromPageSize
1.7057 + ){
1.7058 + DbPage *pFromPage = 0;
1.7059 + Pgno iFrom = (iOff/nFromPageSize)+1;
1.7060 +
1.7061 + if( iFrom==PENDING_BYTE_PAGE(pBtFrom) ){
1.7062 + continue;
1.7063 + }
1.7064 +
1.7065 + rc = sqlite3PagerGet(pBtFrom->pPager, iFrom, &pFromPage);
1.7066 + if( rc==SQLITE_OK ){
1.7067 + char *zTo = sqlite3PagerGetData(pToPage);
1.7068 + char *zFrom = sqlite3PagerGetData(pFromPage);
1.7069 + int nCopy;
1.7070 +
1.7071 + if( nFromPageSize>=nToPageSize ){
1.7072 + zFrom += ((i-1)*nToPageSize - ((iFrom-1)*nFromPageSize));
1.7073 + nCopy = nToPageSize;
1.7074 + }else{
1.7075 + zTo += (((iFrom-1)*nFromPageSize) - (i-1)*nToPageSize);
1.7076 + nCopy = nFromPageSize;
1.7077 + }
1.7078 +
1.7079 + memcpy(zTo, zFrom, nCopy);
1.7080 + sqlite3PagerUnref(pFromPage);
1.7081 + }
1.7082 + }
1.7083 +
1.7084 + if( pToPage ) sqlite3PagerUnref(pToPage);
1.7085 + }
1.7086 + }
1.7087 +
1.7088 + /* If things have worked so far, the database file may need to be
1.7089 + ** truncated. The complex part is that it may need to be truncated to
1.7090 + ** a size that is not an integer multiple of nToPageSize - the current
1.7091 + ** page size used by the pager associated with B-Tree pTo.
1.7092 + **
1.7093 + ** For example, say the page-size of pTo is 2048 bytes and the original
1.7094 + ** number of pages is 5 (10 KB file). If pFrom has a page size of 1024
1.7095 + ** bytes and 9 pages, then the file needs to be truncated to 9KB.
1.7096 + */
1.7097 + if( rc==SQLITE_OK ){
1.7098 + if( nFromPageSize!=nToPageSize ){
1.7099 + sqlite3_file *pFile = sqlite3PagerFile(pBtTo->pPager);
1.7100 + i64 iSize = (i64)nFromPageSize * (i64)nFromPage;
1.7101 + i64 iNow = (i64)((nToPage>nNewPage)?nToPage:nNewPage) * (i64)nToPageSize;
1.7102 + i64 iPending = ((i64)PENDING_BYTE_PAGE(pBtTo)-1) *(i64)nToPageSize;
1.7103 +
1.7104 + assert( iSize<=iNow );
1.7105 +
1.7106 + /* Commit phase one syncs the journal file associated with pTo
1.7107 + ** containing the original data. It does not sync the database file
1.7108 + ** itself. After doing this it is safe to use OsTruncate() and other
1.7109 + ** file APIs on the database file directly.
1.7110 + */
1.7111 + pBtTo->db = pTo->db;
1.7112 + rc = sqlite3PagerCommitPhaseOne(pBtTo->pPager, 0, 0, 1);
1.7113 + if( iSize<iNow && rc==SQLITE_OK ){
1.7114 + rc = sqlite3OsTruncate(pFile, iSize);
1.7115 + }
1.7116 +
1.7117 + /* The loop that copied data from database pFrom to pTo did not
1.7118 + ** populate the locking page of database pTo. If the page-size of
1.7119 + ** pFrom is smaller than that of pTo, this means some data will
1.7120 + ** not have been copied.
1.7121 + **
1.7122 + ** This block copies the missing data from database pFrom to pTo
1.7123 + ** using file APIs. This is safe because at this point we know that
1.7124 + ** all of the original data from pTo has been synced into the
1.7125 + ** journal file. At this point it would be safe to do anything at
1.7126 + ** all to the database file except truncate it to zero bytes.
1.7127 + */
1.7128 + if( rc==SQLITE_OK && nFromPageSize<nToPageSize && iSize>iPending){
1.7129 + i64 iOff;
1.7130 + for(
1.7131 + iOff=iPending;
1.7132 + rc==SQLITE_OK && iOff<(iPending+nToPageSize);
1.7133 + iOff += nFromPageSize
1.7134 + ){
1.7135 + DbPage *pFromPage = 0;
1.7136 + Pgno iFrom = (iOff/nFromPageSize)+1;
1.7137 +
1.7138 + if( iFrom==PENDING_BYTE_PAGE(pBtFrom) || iFrom>nFromPage ){
1.7139 + continue;
1.7140 + }
1.7141 +
1.7142 + rc = sqlite3PagerGet(pBtFrom->pPager, iFrom, &pFromPage);
1.7143 + if( rc==SQLITE_OK ){
1.7144 + char *zFrom = sqlite3PagerGetData(pFromPage);
1.7145 + rc = sqlite3OsWrite(pFile, zFrom, nFromPageSize, iOff);
1.7146 + sqlite3PagerUnref(pFromPage);
1.7147 + }
1.7148 + }
1.7149 + }
1.7150 +
1.7151 + /* Sync the database file */
1.7152 + if( rc==SQLITE_OK ){
1.7153 + rc = sqlite3PagerSync(pBtTo->pPager);
1.7154 + }
1.7155 + }else{
1.7156 + rc = sqlite3PagerTruncate(pBtTo->pPager, nNewPage);
1.7157 + }
1.7158 + if( rc==SQLITE_OK ){
1.7159 + pBtTo->pageSizeFixed = 0;
1.7160 + }
1.7161 + }
1.7162 +
1.7163 + if( rc ){
1.7164 + sqlite3BtreeRollback(pTo);
1.7165 + }
1.7166 +
1.7167 + return rc;
1.7168 +}
1.7169 +int sqlite3BtreeCopyFile(Btree *pTo, Btree *pFrom){
1.7170 + int rc;
1.7171 + sqlite3BtreeEnter(pTo);
1.7172 + sqlite3BtreeEnter(pFrom);
1.7173 + rc = btreeCopyFile(pTo, pFrom);
1.7174 + sqlite3BtreeLeave(pFrom);
1.7175 + sqlite3BtreeLeave(pTo);
1.7176 + return rc;
1.7177 +}
1.7178 +
1.7179 +#endif /* SQLITE_OMIT_VACUUM */
1.7180 +
1.7181 +/*
1.7182 +** Return non-zero if a transaction is active.
1.7183 +*/
1.7184 +int sqlite3BtreeIsInTrans(Btree *p){
1.7185 + assert( p==0 || sqlite3_mutex_held(p->db->mutex) );
1.7186 + return (p && (p->inTrans==TRANS_WRITE));
1.7187 +}
1.7188 +
1.7189 +/*
1.7190 +** Return non-zero if a statement transaction is active.
1.7191 +*/
1.7192 +int sqlite3BtreeIsInStmt(Btree *p){
1.7193 + assert( sqlite3BtreeHoldsMutex(p) );
1.7194 + return (p->pBt && p->pBt->inStmt);
1.7195 +}
1.7196 +
1.7197 +/*
1.7198 +** Return non-zero if a read (or write) transaction is active.
1.7199 +*/
1.7200 +int sqlite3BtreeIsInReadTrans(Btree *p){
1.7201 + assert( sqlite3_mutex_held(p->db->mutex) );
1.7202 + return (p && (p->inTrans!=TRANS_NONE));
1.7203 +}
1.7204 +
1.7205 +/*
1.7206 +** This function returns a pointer to a blob of memory associated with
1.7207 +** a single shared-btree. The memory is used by client code for its own
1.7208 +** purposes (for example, to store a high-level schema associated with
1.7209 +** the shared-btree). The btree layer manages reference counting issues.
1.7210 +**
1.7211 +** The first time this is called on a shared-btree, nBytes bytes of memory
1.7212 +** are allocated, zeroed, and returned to the caller. For each subsequent
1.7213 +** call the nBytes parameter is ignored and a pointer to the same blob
1.7214 +** of memory returned.
1.7215 +**
1.7216 +** If the nBytes parameter is 0 and the blob of memory has not yet been
1.7217 +** allocated, a null pointer is returned. If the blob has already been
1.7218 +** allocated, it is returned as normal.
1.7219 +**
1.7220 +** Just before the shared-btree is closed, the function passed as the
1.7221 +** xFree argument when the memory allocation was made is invoked on the
1.7222 +** blob of allocated memory. This function should not call sqlite3_free()
1.7223 +** on the memory, the btree layer does that.
1.7224 +*/
1.7225 +void *sqlite3BtreeSchema(Btree *p, int nBytes, void(*xFree)(void *)){
1.7226 + BtShared *pBt = p->pBt;
1.7227 + sqlite3BtreeEnter(p);
1.7228 + if( !pBt->pSchema && nBytes ){
1.7229 + pBt->pSchema = sqlite3MallocZero(nBytes);
1.7230 + pBt->xFreeSchema = xFree;
1.7231 + }
1.7232 + sqlite3BtreeLeave(p);
1.7233 + return pBt->pSchema;
1.7234 +}
1.7235 +
1.7236 +/*
1.7237 +** Return true if another user of the same shared btree as the argument
1.7238 +** handle holds an exclusive lock on the sqlite_master table.
1.7239 +*/
1.7240 +int sqlite3BtreeSchemaLocked(Btree *p){
1.7241 + int rc;
1.7242 + assert( sqlite3_mutex_held(p->db->mutex) );
1.7243 + sqlite3BtreeEnter(p);
1.7244 + rc = (queryTableLock(p, MASTER_ROOT, READ_LOCK)!=SQLITE_OK);
1.7245 + sqlite3BtreeLeave(p);
1.7246 + return rc;
1.7247 +}
1.7248 +
1.7249 +
1.7250 +#ifndef SQLITE_OMIT_SHARED_CACHE
1.7251 +/*
1.7252 +** Obtain a lock on the table whose root page is iTab. The
1.7253 +** lock is a write lock if isWritelock is true or a read lock
1.7254 +** if it is false.
1.7255 +*/
1.7256 +int sqlite3BtreeLockTable(Btree *p, int iTab, u8 isWriteLock){
1.7257 + int rc = SQLITE_OK;
1.7258 + if( p->sharable ){
1.7259 + u8 lockType = READ_LOCK + isWriteLock;
1.7260 + assert( READ_LOCK+1==WRITE_LOCK );
1.7261 + assert( isWriteLock==0 || isWriteLock==1 );
1.7262 + sqlite3BtreeEnter(p);
1.7263 + rc = queryTableLock(p, iTab, lockType);
1.7264 + if( rc==SQLITE_OK ){
1.7265 + rc = lockTable(p, iTab, lockType);
1.7266 + }
1.7267 + sqlite3BtreeLeave(p);
1.7268 + }
1.7269 + return rc;
1.7270 +}
1.7271 +#endif
1.7272 +
1.7273 +#ifndef SQLITE_OMIT_INCRBLOB
1.7274 +/*
1.7275 +** Argument pCsr must be a cursor opened for writing on an
1.7276 +** INTKEY table currently pointing at a valid table entry.
1.7277 +** This function modifies the data stored as part of that entry.
1.7278 +** Only the data content may only be modified, it is not possible
1.7279 +** to change the length of the data stored.
1.7280 +*/
1.7281 +int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){
1.7282 + assert( cursorHoldsMutex(pCsr) );
1.7283 + assert( sqlite3_mutex_held(pCsr->pBtree->db->mutex) );
1.7284 + assert(pCsr->isIncrblobHandle);
1.7285 +
1.7286 + restoreCursorPosition(pCsr);
1.7287 + assert( pCsr->eState!=CURSOR_REQUIRESEEK );
1.7288 + if( pCsr->eState!=CURSOR_VALID ){
1.7289 + return SQLITE_ABORT;
1.7290 + }
1.7291 +
1.7292 + /* Check some preconditions:
1.7293 + ** (a) the cursor is open for writing,
1.7294 + ** (b) there is no read-lock on the table being modified and
1.7295 + ** (c) the cursor points at a valid row of an intKey table.
1.7296 + */
1.7297 + if( !pCsr->wrFlag ){
1.7298 + return SQLITE_READONLY;
1.7299 + }
1.7300 + assert( !pCsr->pBt->readOnly
1.7301 + && pCsr->pBt->inTransaction==TRANS_WRITE );
1.7302 + if( checkReadLocks(pCsr->pBtree, pCsr->pgnoRoot, pCsr, 0) ){
1.7303 + return SQLITE_LOCKED; /* The table pCur points to has a read lock */
1.7304 + }
1.7305 + if( pCsr->eState==CURSOR_INVALID || !pCsr->pPage->intKey ){
1.7306 + return SQLITE_ERROR;
1.7307 + }
1.7308 +
1.7309 + return accessPayload(pCsr, offset, amt, (unsigned char *)z, 0, 1);
1.7310 +}
1.7311 +
1.7312 +/*
1.7313 +** Set a flag on this cursor to cache the locations of pages from the
1.7314 +** overflow list for the current row. This is used by cursors opened
1.7315 +** for incremental blob IO only.
1.7316 +**
1.7317 +** This function sets a flag only. The actual page location cache
1.7318 +** (stored in BtCursor.aOverflow[]) is allocated and used by function
1.7319 +** accessPayload() (the worker function for sqlite3BtreeData() and
1.7320 +** sqlite3BtreePutData()).
1.7321 +*/
1.7322 +void sqlite3BtreeCacheOverflow(BtCursor *pCur){
1.7323 + assert( cursorHoldsMutex(pCur) );
1.7324 + assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
1.7325 + assert(!pCur->isIncrblobHandle);
1.7326 + assert(!pCur->aOverflow);
1.7327 + pCur->isIncrblobHandle = 1;
1.7328 +}
1.7329 +#endif