1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/persistentdata/persistentstorage/sqlite3api/SQLite/pager.c Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,4260 @@
1.4 +/*
1.5 +** 2001 September 15
1.6 +**
1.7 +** The author disclaims copyright to this source code. In place of
1.8 +** a legal notice, here is a blessing:
1.9 +**
1.10 +** May you do good and not evil.
1.11 +** May you find forgiveness for yourself and forgive others.
1.12 +** May you share freely, never taking more than you give.
1.13 +**
1.14 +*************************************************************************
1.15 +** This is the implementation of the page cache subsystem or "pager".
1.16 +**
1.17 +** The pager is used to access a database disk file. It implements
1.18 +** atomic commit and rollback through the use of a journal file that
1.19 +** is separate from the database file. The pager also implements file
1.20 +** locking to prevent two processes from writing the same database
1.21 +** file simultaneously, or one process from reading the database while
1.22 +** another is writing.
1.23 +**
1.24 +** @(#) $Id: pager.c,v 1.496 2008/09/29 11:49:48 danielk1977 Exp $
1.25 +*/
1.26 +#ifndef SQLITE_OMIT_DISKIO
1.27 +#include "sqliteInt.h"
1.28 +
1.29 +/*
1.30 +** Macros for troubleshooting. Normally turned off
1.31 +*/
1.32 +#if 0
1.33 +#define sqlite3DebugPrintf printf
1.34 +#define PAGERTRACE1(X) sqlite3DebugPrintf(X)
1.35 +#define PAGERTRACE2(X,Y) sqlite3DebugPrintf(X,Y)
1.36 +#define PAGERTRACE3(X,Y,Z) sqlite3DebugPrintf(X,Y,Z)
1.37 +#define PAGERTRACE4(X,Y,Z,W) sqlite3DebugPrintf(X,Y,Z,W)
1.38 +#define PAGERTRACE5(X,Y,Z,W,V) sqlite3DebugPrintf(X,Y,Z,W,V)
1.39 +#else
1.40 +#define PAGERTRACE1(X)
1.41 +#define PAGERTRACE2(X,Y)
1.42 +#define PAGERTRACE3(X,Y,Z)
1.43 +#define PAGERTRACE4(X,Y,Z,W)
1.44 +#define PAGERTRACE5(X,Y,Z,W,V)
1.45 +#endif
1.46 +
1.47 +/*
1.48 +** The following two macros are used within the PAGERTRACEX() macros above
1.49 +** to print out file-descriptors.
1.50 +**
1.51 +** PAGERID() takes a pointer to a Pager struct as its argument. The
1.52 +** associated file-descriptor is returned. FILEHANDLEID() takes an sqlite3_file
1.53 +** struct as its argument.
1.54 +*/
1.55 +#define PAGERID(p) ((int)(p->fd))
1.56 +#define FILEHANDLEID(fd) ((int)fd)
1.57 +
1.58 +/*
1.59 +** The page cache as a whole is always in one of the following
1.60 +** states:
1.61 +**
1.62 +** PAGER_UNLOCK The page cache is not currently reading or
1.63 +** writing the database file. There is no
1.64 +** data held in memory. This is the initial
1.65 +** state.
1.66 +**
1.67 +** PAGER_SHARED The page cache is reading the database.
1.68 +** Writing is not permitted. There can be
1.69 +** multiple readers accessing the same database
1.70 +** file at the same time.
1.71 +**
1.72 +** PAGER_RESERVED This process has reserved the database for writing
1.73 +** but has not yet made any changes. Only one process
1.74 +** at a time can reserve the database. The original
1.75 +** database file has not been modified so other
1.76 +** processes may still be reading the on-disk
1.77 +** database file.
1.78 +**
1.79 +** PAGER_EXCLUSIVE The page cache is writing the database.
1.80 +** Access is exclusive. No other processes or
1.81 +** threads can be reading or writing while one
1.82 +** process is writing.
1.83 +**
1.84 +** PAGER_SYNCED The pager moves to this state from PAGER_EXCLUSIVE
1.85 +** after all dirty pages have been written to the
1.86 +** database file and the file has been synced to
1.87 +** disk. All that remains to do is to remove or
1.88 +** truncate the journal file and the transaction
1.89 +** will be committed.
1.90 +**
1.91 +** The page cache comes up in PAGER_UNLOCK. The first time a
1.92 +** sqlite3PagerGet() occurs, the state transitions to PAGER_SHARED.
1.93 +** After all pages have been released using sqlite_page_unref(),
1.94 +** the state transitions back to PAGER_UNLOCK. The first time
1.95 +** that sqlite3PagerWrite() is called, the state transitions to
1.96 +** PAGER_RESERVED. (Note that sqlite3PagerWrite() can only be
1.97 +** called on an outstanding page which means that the pager must
1.98 +** be in PAGER_SHARED before it transitions to PAGER_RESERVED.)
1.99 +** PAGER_RESERVED means that there is an open rollback journal.
1.100 +** The transition to PAGER_EXCLUSIVE occurs before any changes
1.101 +** are made to the database file, though writes to the rollback
1.102 +** journal occurs with just PAGER_RESERVED. After an sqlite3PagerRollback()
1.103 +** or sqlite3PagerCommitPhaseTwo(), the state can go back to PAGER_SHARED,
1.104 +** or it can stay at PAGER_EXCLUSIVE if we are in exclusive access mode.
1.105 +*/
1.106 +#define PAGER_UNLOCK 0
1.107 +#define PAGER_SHARED 1 /* same as SHARED_LOCK */
1.108 +#define PAGER_RESERVED 2 /* same as RESERVED_LOCK */
1.109 +#define PAGER_EXCLUSIVE 4 /* same as EXCLUSIVE_LOCK */
1.110 +#define PAGER_SYNCED 5
1.111 +
1.112 +/*
1.113 +** If the SQLITE_BUSY_RESERVED_LOCK macro is set to true at compile-time,
1.114 +** then failed attempts to get a reserved lock will invoke the busy callback.
1.115 +** This is off by default. To see why, consider the following scenario:
1.116 +**
1.117 +** Suppose thread A already has a shared lock and wants a reserved lock.
1.118 +** Thread B already has a reserved lock and wants an exclusive lock. If
1.119 +** both threads are using their busy callbacks, it might be a long time
1.120 +** be for one of the threads give up and allows the other to proceed.
1.121 +** But if the thread trying to get the reserved lock gives up quickly
1.122 +** (if it never invokes its busy callback) then the contention will be
1.123 +** resolved quickly.
1.124 +*/
1.125 +#ifndef SQLITE_BUSY_RESERVED_LOCK
1.126 +# define SQLITE_BUSY_RESERVED_LOCK 0
1.127 +#endif
1.128 +
1.129 +/*
1.130 +** This macro rounds values up so that if the value is an address it
1.131 +** is guaranteed to be an address that is aligned to an 8-byte boundary.
1.132 +*/
1.133 +#define FORCE_ALIGNMENT(X) (((X)+7)&~7)
1.134 +
1.135 +/*
1.136 +** A macro used for invoking the codec if there is one
1.137 +*/
1.138 +#ifdef SQLITE_HAS_CODEC
1.139 +# define CODEC1(P,D,N,X) if( P->xCodec!=0 ){ P->xCodec(P->pCodecArg,D,N,X); }
1.140 +# define CODEC2(P,D,N,X) ((char*)(P->xCodec!=0?P->xCodec(P->pCodecArg,D,N,X):D))
1.141 +#else
1.142 +# define CODEC1(P,D,N,X) /* NO-OP */
1.143 +# define CODEC2(P,D,N,X) ((char*)D)
1.144 +#endif
1.145 +
1.146 +/*
1.147 +** A open page cache is an instance of the following structure.
1.148 +**
1.149 +** Pager.errCode may be set to SQLITE_IOERR, SQLITE_CORRUPT, or
1.150 +** or SQLITE_FULL. Once one of the first three errors occurs, it persists
1.151 +** and is returned as the result of every major pager API call. The
1.152 +** SQLITE_FULL return code is slightly different. It persists only until the
1.153 +** next successful rollback is performed on the pager cache. Also,
1.154 +** SQLITE_FULL does not affect the sqlite3PagerGet() and sqlite3PagerLookup()
1.155 +** APIs, they may still be used successfully.
1.156 +*/
1.157 +struct Pager {
1.158 + sqlite3_vfs *pVfs; /* OS functions to use for IO */
1.159 + u8 journalOpen; /* True if journal file descriptors is valid */
1.160 + u8 journalStarted; /* True if header of journal is synced */
1.161 + u8 useJournal; /* Use a rollback journal on this file */
1.162 + u8 noReadlock; /* Do not bother to obtain readlocks */
1.163 + u8 stmtOpen; /* True if the statement subjournal is open */
1.164 + u8 stmtInUse; /* True we are in a statement subtransaction */
1.165 + u8 stmtAutoopen; /* Open stmt journal when main journal is opened*/
1.166 + u8 noSync; /* Do not sync the journal if true */
1.167 + u8 fullSync; /* Do extra syncs of the journal for robustness */
1.168 + u8 sync_flags; /* One of SYNC_NORMAL or SYNC_FULL */
1.169 + u8 state; /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */
1.170 + u8 tempFile; /* zFilename is a temporary file */
1.171 + u8 readOnly; /* True for a read-only database */
1.172 + u8 needSync; /* True if an fsync() is needed on the journal */
1.173 + u8 dirtyCache; /* True if cached pages have changed */
1.174 + u8 alwaysRollback; /* Disable DontRollback() for all pages */
1.175 + u8 memDb; /* True to inhibit all file I/O */
1.176 + u8 setMaster; /* True if a m-j name has been written to jrnl */
1.177 + u8 doNotSync; /* Boolean. While true, do not spill the cache */
1.178 + u8 exclusiveMode; /* Boolean. True if locking_mode==EXCLUSIVE */
1.179 + u8 journalMode; /* On of the PAGER_JOURNALMODE_* values */
1.180 + u8 dbModified; /* True if there are any changes to the Db */
1.181 + u8 changeCountDone; /* Set after incrementing the change-counter */
1.182 + u32 vfsFlags; /* Flags for sqlite3_vfs.xOpen() */
1.183 + int errCode; /* One of several kinds of errors */
1.184 + int dbSize; /* Number of pages in the file */
1.185 + int origDbSize; /* dbSize before the current change */
1.186 + int stmtSize; /* Size of database (in pages) at stmt_begin() */
1.187 + int nRec; /* Number of pages written to the journal */
1.188 + u32 cksumInit; /* Quasi-random value added to every checksum */
1.189 + int stmtNRec; /* Number of records in stmt subjournal */
1.190 + int nExtra; /* Add this many bytes to each in-memory page */
1.191 + int pageSize; /* Number of bytes in a page */
1.192 + int nPage; /* Total number of in-memory pages */
1.193 + int mxPage; /* Maximum number of pages to hold in cache */
1.194 + Pgno mxPgno; /* Maximum allowed size of the database */
1.195 + Bitvec *pInJournal; /* One bit for each page in the database file */
1.196 + Bitvec *pInStmt; /* One bit for each page in the database */
1.197 + Bitvec *pAlwaysRollback; /* One bit for each page marked always-rollback */
1.198 + char *zFilename; /* Name of the database file */
1.199 + char *zJournal; /* Name of the journal file */
1.200 + char *zDirectory; /* Directory hold database and journal files */
1.201 + sqlite3_file *fd, *jfd; /* File descriptors for database and journal */
1.202 + sqlite3_file *stfd; /* File descriptor for the statement subjournal*/
1.203 + BusyHandler *pBusyHandler; /* Pointer to sqlite.busyHandler */
1.204 + i64 journalOff; /* Current byte offset in the journal file */
1.205 + i64 journalHdr; /* Byte offset to previous journal header */
1.206 + i64 stmtHdrOff; /* First journal header written this statement */
1.207 + i64 stmtCksum; /* cksumInit when statement was started */
1.208 + i64 stmtJSize; /* Size of journal at stmt_begin() */
1.209 + int sectorSize; /* Assumed sector size during rollback */
1.210 +#ifdef SQLITE_TEST
1.211 + int nHit, nMiss; /* Cache hits and missing */
1.212 + int nRead, nWrite; /* Database pages read/written */
1.213 +#endif
1.214 + void (*xReiniter)(DbPage*); /* Call this routine when reloading pages */
1.215 +#ifdef SQLITE_HAS_CODEC
1.216 + void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
1.217 + void *pCodecArg; /* First argument to xCodec() */
1.218 +#endif
1.219 + char *pTmpSpace; /* Pager.pageSize bytes of space for tmp use */
1.220 + char dbFileVers[16]; /* Changes whenever database file changes */
1.221 + i64 journalSizeLimit; /* Size limit for persistent journal files */
1.222 + PCache *pPCache; /* Pointer to page cache object */
1.223 +};
1.224 +
1.225 +/*
1.226 +** The following global variables hold counters used for
1.227 +** testing purposes only. These variables do not exist in
1.228 +** a non-testing build. These variables are not thread-safe.
1.229 +*/
1.230 +#ifdef SQLITE_TEST
1.231 +int sqlite3_pager_readdb_count = 0; /* Number of full pages read from DB */
1.232 +int sqlite3_pager_writedb_count = 0; /* Number of full pages written to DB */
1.233 +int sqlite3_pager_writej_count = 0; /* Number of pages written to journal */
1.234 +# define PAGER_INCR(v) v++
1.235 +#else
1.236 +# define PAGER_INCR(v)
1.237 +#endif
1.238 +
1.239 +
1.240 +
1.241 +/*
1.242 +** Journal files begin with the following magic string. The data
1.243 +** was obtained from /dev/random. It is used only as a sanity check.
1.244 +**
1.245 +** Since version 2.8.0, the journal format contains additional sanity
1.246 +** checking information. If the power fails while the journal is begin
1.247 +** written, semi-random garbage data might appear in the journal
1.248 +** file after power is restored. If an attempt is then made
1.249 +** to roll the journal back, the database could be corrupted. The additional
1.250 +** sanity checking data is an attempt to discover the garbage in the
1.251 +** journal and ignore it.
1.252 +**
1.253 +** The sanity checking information for the new journal format consists
1.254 +** of a 32-bit checksum on each page of data. The checksum covers both
1.255 +** the page number and the pPager->pageSize bytes of data for the page.
1.256 +** This cksum is initialized to a 32-bit random value that appears in the
1.257 +** journal file right after the header. The random initializer is important,
1.258 +** because garbage data that appears at the end of a journal is likely
1.259 +** data that was once in other files that have now been deleted. If the
1.260 +** garbage data came from an obsolete journal file, the checksums might
1.261 +** be correct. But by initializing the checksum to random value which
1.262 +** is different for every journal, we minimize that risk.
1.263 +*/
1.264 +static const unsigned char aJournalMagic[] = {
1.265 + 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7,
1.266 +};
1.267 +
1.268 +/*
1.269 +** The size of the header and of each page in the journal is determined
1.270 +** by the following macros.
1.271 +*/
1.272 +#define JOURNAL_PG_SZ(pPager) ((pPager->pageSize) + 8)
1.273 +
1.274 +/*
1.275 +** The journal header size for this pager. In the future, this could be
1.276 +** set to some value read from the disk controller. The important
1.277 +** characteristic is that it is the same size as a disk sector.
1.278 +*/
1.279 +#define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize)
1.280 +
1.281 +/*
1.282 +** The macro MEMDB is true if we are dealing with an in-memory database.
1.283 +** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set,
1.284 +** the value of MEMDB will be a constant and the compiler will optimize
1.285 +** out code that would never execute.
1.286 +*/
1.287 +#ifdef SQLITE_OMIT_MEMORYDB
1.288 +# define MEMDB 0
1.289 +#else
1.290 +# define MEMDB pPager->memDb
1.291 +#endif
1.292 +
1.293 +/*
1.294 +** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is
1.295 +** reserved for working around a windows/posix incompatibility). It is
1.296 +** used in the journal to signify that the remainder of the journal file
1.297 +** is devoted to storing a master journal name - there are no more pages to
1.298 +** roll back. See comments for function writeMasterJournal() for details.
1.299 +*/
1.300 +/* #define PAGER_MJ_PGNO(x) (PENDING_BYTE/((x)->pageSize)) */
1.301 +#define PAGER_MJ_PGNO(x) ((PENDING_BYTE/((x)->pageSize))+1)
1.302 +
1.303 +/*
1.304 +** The maximum legal page number is (2^31 - 1).
1.305 +*/
1.306 +#define PAGER_MAX_PGNO 2147483647
1.307 +
1.308 +/*
1.309 +** Return true if page *pPg has already been written to the statement
1.310 +** journal (or statement snapshot has been created, if *pPg is part
1.311 +** of an in-memory database).
1.312 +*/
1.313 +static int pageInStatement(PgHdr *pPg){
1.314 + Pager *pPager = pPg->pPager;
1.315 + if( MEMDB ){
1.316 + return pPg->apSave[1]!=0;
1.317 + }else{
1.318 + return sqlite3BitvecTest(pPager->pInStmt, pPg->pgno);
1.319 + }
1.320 +}
1.321 +
1.322 +/*
1.323 +** Read a 32-bit integer from the given file descriptor. Store the integer
1.324 +** that is read in *pRes. Return SQLITE_OK if everything worked, or an
1.325 +** error code is something goes wrong.
1.326 +**
1.327 +** All values are stored on disk as big-endian.
1.328 +*/
1.329 +static int read32bits(sqlite3_file *fd, i64 offset, u32 *pRes){
1.330 + unsigned char ac[4];
1.331 + int rc = sqlite3OsRead(fd, ac, sizeof(ac), offset);
1.332 + if( rc==SQLITE_OK ){
1.333 + *pRes = sqlite3Get4byte(ac);
1.334 + }
1.335 + return rc;
1.336 +}
1.337 +
1.338 +/*
1.339 +** Write a 32-bit integer into a string buffer in big-endian byte order.
1.340 +*/
1.341 +#define put32bits(A,B) sqlite3Put4byte((u8*)A,B)
1.342 +
1.343 +/*
1.344 +** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK
1.345 +** on success or an error code is something goes wrong.
1.346 +*/
1.347 +static int write32bits(sqlite3_file *fd, i64 offset, u32 val){
1.348 + char ac[4];
1.349 + put32bits(ac, val);
1.350 + return sqlite3OsWrite(fd, ac, 4, offset);
1.351 +}
1.352 +
1.353 +/*
1.354 +** If file pFd is open, call sqlite3OsUnlock() on it.
1.355 +*/
1.356 +static int osUnlock(sqlite3_file *pFd, int eLock){
1.357 + if( !pFd->pMethods ){
1.358 + return SQLITE_OK;
1.359 + }
1.360 + return sqlite3OsUnlock(pFd, eLock);
1.361 +}
1.362 +
1.363 +/*
1.364 +** This function determines whether or not the atomic-write optimization
1.365 +** can be used with this pager. The optimization can be used if:
1.366 +**
1.367 +** (a) the value returned by OsDeviceCharacteristics() indicates that
1.368 +** a database page may be written atomically, and
1.369 +** (b) the value returned by OsSectorSize() is less than or equal
1.370 +** to the page size.
1.371 +**
1.372 +** If the optimization cannot be used, 0 is returned. If it can be used,
1.373 +** then the value returned is the size of the journal file when it
1.374 +** contains rollback data for exactly one page.
1.375 +*/
1.376 +#ifdef SQLITE_ENABLE_ATOMIC_WRITE
1.377 +static int jrnlBufferSize(Pager *pPager){
1.378 + int dc; /* Device characteristics */
1.379 + int nSector; /* Sector size */
1.380 + int szPage; /* Page size */
1.381 + sqlite3_file *fd = pPager->fd;
1.382 +
1.383 + if( fd->pMethods ){
1.384 + dc = sqlite3OsDeviceCharacteristics(fd);
1.385 + nSector = sqlite3OsSectorSize(fd);
1.386 + szPage = pPager->pageSize;
1.387 + }
1.388 +
1.389 + assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
1.390 + assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
1.391 +
1.392 + if( !fd->pMethods ||
1.393 + (dc & (SQLITE_IOCAP_ATOMIC|(szPage>>8)) && nSector<=szPage) ){
1.394 + return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager);
1.395 + }
1.396 + return 0;
1.397 +}
1.398 +#endif
1.399 +
1.400 +/*
1.401 +** This function should be called when an error occurs within the pager
1.402 +** code. The first argument is a pointer to the pager structure, the
1.403 +** second the error-code about to be returned by a pager API function.
1.404 +** The value returned is a copy of the second argument to this function.
1.405 +**
1.406 +** If the second argument is SQLITE_IOERR, SQLITE_CORRUPT, or SQLITE_FULL
1.407 +** the error becomes persistent. Until the persisten error is cleared,
1.408 +** subsequent API calls on this Pager will immediately return the same
1.409 +** error code.
1.410 +**
1.411 +** A persistent error indicates that the contents of the pager-cache
1.412 +** cannot be trusted. This state can be cleared by completely discarding
1.413 +** the contents of the pager-cache. If a transaction was active when
1.414 +** the persistent error occured, then the rollback journal may need
1.415 +** to be replayed.
1.416 +*/
1.417 +static void pager_unlock(Pager *pPager);
1.418 +static int pager_error(Pager *pPager, int rc){
1.419 + int rc2 = rc & 0xff;
1.420 + assert(
1.421 + pPager->errCode==SQLITE_FULL ||
1.422 + pPager->errCode==SQLITE_OK ||
1.423 + (pPager->errCode & 0xff)==SQLITE_IOERR
1.424 + );
1.425 + if(
1.426 + rc2==SQLITE_FULL ||
1.427 + rc2==SQLITE_IOERR ||
1.428 + rc2==SQLITE_CORRUPT
1.429 + ){
1.430 + pPager->errCode = rc;
1.431 + if( pPager->state==PAGER_UNLOCK
1.432 + && sqlite3PcacheRefCount(pPager->pPCache)==0
1.433 + ){
1.434 + /* If the pager is already unlocked, call pager_unlock() now to
1.435 + ** clear the error state and ensure that the pager-cache is
1.436 + ** completely empty.
1.437 + */
1.438 + pager_unlock(pPager);
1.439 + }
1.440 + }
1.441 + return rc;
1.442 +}
1.443 +
1.444 +/*
1.445 +** If SQLITE_CHECK_PAGES is defined then we do some sanity checking
1.446 +** on the cache using a hash function. This is used for testing
1.447 +** and debugging only.
1.448 +*/
1.449 +#ifdef SQLITE_CHECK_PAGES
1.450 +/*
1.451 +** Return a 32-bit hash of the page data for pPage.
1.452 +*/
1.453 +static u32 pager_datahash(int nByte, unsigned char *pData){
1.454 + u32 hash = 0;
1.455 + int i;
1.456 + for(i=0; i<nByte; i++){
1.457 + hash = (hash*1039) + pData[i];
1.458 + }
1.459 + return hash;
1.460 +}
1.461 +static u32 pager_pagehash(PgHdr *pPage){
1.462 + return pager_datahash(pPage->pPager->pageSize, (unsigned char *)pPage->pData);
1.463 +}
1.464 +static u32 pager_set_pagehash(PgHdr *pPage){
1.465 + pPage->pageHash = pager_pagehash(pPage);
1.466 +}
1.467 +
1.468 +/*
1.469 +** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES
1.470 +** is defined, and NDEBUG is not defined, an assert() statement checks
1.471 +** that the page is either dirty or still matches the calculated page-hash.
1.472 +*/
1.473 +#define CHECK_PAGE(x) checkPage(x)
1.474 +static void checkPage(PgHdr *pPg){
1.475 + Pager *pPager = pPg->pPager;
1.476 + assert( !pPg->pageHash || pPager->errCode || MEMDB
1.477 + || (pPg->flags&PGHDR_DIRTY) || pPg->pageHash==pager_pagehash(pPg) );
1.478 +}
1.479 +
1.480 +#else
1.481 +#define pager_datahash(X,Y) 0
1.482 +#define pager_pagehash(X) 0
1.483 +#define CHECK_PAGE(x)
1.484 +#endif /* SQLITE_CHECK_PAGES */
1.485 +
1.486 +/*
1.487 +** When this is called the journal file for pager pPager must be open.
1.488 +** The master journal file name is read from the end of the file and
1.489 +** written into memory supplied by the caller.
1.490 +**
1.491 +** zMaster must point to a buffer of at least nMaster bytes allocated by
1.492 +** the caller. This should be sqlite3_vfs.mxPathname+1 (to ensure there is
1.493 +** enough space to write the master journal name). If the master journal
1.494 +** name in the journal is longer than nMaster bytes (including a
1.495 +** nul-terminator), then this is handled as if no master journal name
1.496 +** were present in the journal.
1.497 +**
1.498 +** If no master journal file name is present zMaster[0] is set to 0 and
1.499 +** SQLITE_OK returned.
1.500 +*/
1.501 +static int readMasterJournal(sqlite3_file *pJrnl, char *zMaster, int nMaster){
1.502 + int rc;
1.503 + u32 len;
1.504 + i64 szJ;
1.505 + u32 cksum;
1.506 + u32 u; /* Unsigned loop counter */
1.507 + unsigned char aMagic[8]; /* A buffer to hold the magic header */
1.508 +
1.509 + zMaster[0] = '\0';
1.510 +
1.511 + rc = sqlite3OsFileSize(pJrnl, &szJ);
1.512 + if( rc!=SQLITE_OK || szJ<16 ) return rc;
1.513 +
1.514 + rc = read32bits(pJrnl, szJ-16, &len);
1.515 + if( rc!=SQLITE_OK ) return rc;
1.516 +
1.517 + if( len>=nMaster ){
1.518 + return SQLITE_OK;
1.519 + }
1.520 +
1.521 + rc = read32bits(pJrnl, szJ-12, &cksum);
1.522 + if( rc!=SQLITE_OK ) return rc;
1.523 +
1.524 + rc = sqlite3OsRead(pJrnl, aMagic, 8, szJ-8);
1.525 + if( rc!=SQLITE_OK || memcmp(aMagic, aJournalMagic, 8) ) return rc;
1.526 +
1.527 + rc = sqlite3OsRead(pJrnl, zMaster, len, szJ-16-len);
1.528 + if( rc!=SQLITE_OK ){
1.529 + return rc;
1.530 + }
1.531 + zMaster[len] = '\0';
1.532 +
1.533 + /* See if the checksum matches the master journal name */
1.534 + for(u=0; u<len; u++){
1.535 + cksum -= zMaster[u];
1.536 + }
1.537 + if( cksum ){
1.538 + /* If the checksum doesn't add up, then one or more of the disk sectors
1.539 + ** containing the master journal filename is corrupted. This means
1.540 + ** definitely roll back, so just return SQLITE_OK and report a (nul)
1.541 + ** master-journal filename.
1.542 + */
1.543 + zMaster[0] = '\0';
1.544 + }
1.545 +
1.546 + return SQLITE_OK;
1.547 +}
1.548 +
1.549 +/*
1.550 +** Seek the journal file descriptor to the next sector boundary where a
1.551 +** journal header may be read or written. Pager.journalOff is updated with
1.552 +** the new seek offset.
1.553 +**
1.554 +** i.e for a sector size of 512:
1.555 +**
1.556 +** Input Offset Output Offset
1.557 +** ---------------------------------------
1.558 +** 0 0
1.559 +** 512 512
1.560 +** 100 512
1.561 +** 2000 2048
1.562 +**
1.563 +*/
1.564 +static void seekJournalHdr(Pager *pPager){
1.565 + i64 offset = 0;
1.566 + i64 c = pPager->journalOff;
1.567 + if( c ){
1.568 + offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager);
1.569 + }
1.570 + assert( offset%JOURNAL_HDR_SZ(pPager)==0 );
1.571 + assert( offset>=c );
1.572 + assert( (offset-c)<JOURNAL_HDR_SZ(pPager) );
1.573 + pPager->journalOff = offset;
1.574 +}
1.575 +
1.576 +/*
1.577 +** Write zeros over the header of the journal file. This has the
1.578 +** effect of invalidating the journal file and committing the
1.579 +** transaction.
1.580 +*/
1.581 +static int zeroJournalHdr(Pager *pPager, int doTruncate){
1.582 + int rc = SQLITE_OK;
1.583 + static const char zeroHdr[28] = {0};
1.584 +
1.585 + if( pPager->journalOff ){
1.586 + i64 iLimit = pPager->journalSizeLimit;
1.587 +
1.588 + IOTRACE(("JZEROHDR %p\n", pPager))
1.589 + if( doTruncate || iLimit==0 ){
1.590 + rc = sqlite3OsTruncate(pPager->jfd, 0);
1.591 + }else{
1.592 + rc = sqlite3OsWrite(pPager->jfd, zeroHdr, sizeof(zeroHdr), 0);
1.593 + }
1.594 + if( rc==SQLITE_OK && !pPager->noSync ){
1.595 + rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_DATAONLY|pPager->sync_flags);
1.596 + }
1.597 +
1.598 + /* At this point the transaction is committed but the write lock
1.599 + ** is still held on the file. If there is a size limit configured for
1.600 + ** the persistent journal and the journal file currently consumes more
1.601 + ** space than that limit allows for, truncate it now. There is no need
1.602 + ** to sync the file following this operation.
1.603 + */
1.604 + if( rc==SQLITE_OK && iLimit>0 ){
1.605 + i64 sz;
1.606 + rc = sqlite3OsFileSize(pPager->jfd, &sz);
1.607 + if( rc==SQLITE_OK && sz>iLimit ){
1.608 + rc = sqlite3OsTruncate(pPager->jfd, iLimit);
1.609 + }
1.610 + }
1.611 + }
1.612 + return rc;
1.613 +}
1.614 +
1.615 +/*
1.616 +** The journal file must be open when this routine is called. A journal
1.617 +** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the
1.618 +** current location.
1.619 +**
1.620 +** The format for the journal header is as follows:
1.621 +** - 8 bytes: Magic identifying journal format.
1.622 +** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
1.623 +** - 4 bytes: Random number used for page hash.
1.624 +** - 4 bytes: Initial database page count.
1.625 +** - 4 bytes: Sector size used by the process that wrote this journal.
1.626 +** - 4 bytes: Database page size.
1.627 +**
1.628 +** Followed by (JOURNAL_HDR_SZ - 28) bytes of unused space.
1.629 +*/
1.630 +static int writeJournalHdr(Pager *pPager){
1.631 + int rc = SQLITE_OK;
1.632 + char *zHeader = pPager->pTmpSpace;
1.633 + int nHeader = pPager->pageSize;
1.634 + int nWrite;
1.635 +
1.636 + if( nHeader>JOURNAL_HDR_SZ(pPager) ){
1.637 + nHeader = JOURNAL_HDR_SZ(pPager);
1.638 + }
1.639 +
1.640 + if( pPager->stmtHdrOff==0 ){
1.641 + pPager->stmtHdrOff = pPager->journalOff;
1.642 + }
1.643 +
1.644 + seekJournalHdr(pPager);
1.645 + pPager->journalHdr = pPager->journalOff;
1.646 +
1.647 + memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
1.648 +
1.649 + /*
1.650 + ** Write the nRec Field - the number of page records that follow this
1.651 + ** journal header. Normally, zero is written to this value at this time.
1.652 + ** After the records are added to the journal (and the journal synced,
1.653 + ** if in full-sync mode), the zero is overwritten with the true number
1.654 + ** of records (see syncJournal()).
1.655 + **
1.656 + ** A faster alternative is to write 0xFFFFFFFF to the nRec field. When
1.657 + ** reading the journal this value tells SQLite to assume that the
1.658 + ** rest of the journal file contains valid page records. This assumption
1.659 + ** is dangerous, as if a failure occured whilst writing to the journal
1.660 + ** file it may contain some garbage data. There are two scenarios
1.661 + ** where this risk can be ignored:
1.662 + **
1.663 + ** * When the pager is in no-sync mode. Corruption can follow a
1.664 + ** power failure in this case anyway.
1.665 + **
1.666 + ** * When the SQLITE_IOCAP_SAFE_APPEND flag is set. This guarantees
1.667 + ** that garbage data is never appended to the journal file.
1.668 + */
1.669 + assert(pPager->fd->pMethods||pPager->noSync);
1.670 + if( (pPager->noSync)
1.671 + || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND)
1.672 + ){
1.673 + put32bits(&zHeader[sizeof(aJournalMagic)], 0xffffffff);
1.674 + }else{
1.675 + put32bits(&zHeader[sizeof(aJournalMagic)], 0);
1.676 + }
1.677 +
1.678 + /* The random check-hash initialiser */
1.679 + sqlite3_randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
1.680 + put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit);
1.681 + /* The initial database size */
1.682 + put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbSize);
1.683 + /* The assumed sector size for this process */
1.684 + put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize);
1.685 + if( pPager->journalHdr==0 ){
1.686 + /* The page size */
1.687 + put32bits(&zHeader[sizeof(aJournalMagic)+16], pPager->pageSize);
1.688 + }
1.689 +
1.690 + for(nWrite=0; rc==SQLITE_OK&&nWrite<JOURNAL_HDR_SZ(pPager); nWrite+=nHeader){
1.691 + IOTRACE(("JHDR %p %lld %d\n", pPager, pPager->journalHdr, nHeader))
1.692 + rc = sqlite3OsWrite(pPager->jfd, zHeader, nHeader, pPager->journalOff);
1.693 + pPager->journalOff += nHeader;
1.694 + }
1.695 +
1.696 + return rc;
1.697 +}
1.698 +
1.699 +/*
1.700 +** The journal file must be open when this is called. A journal header file
1.701 +** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal
1.702 +** file. See comments above function writeJournalHdr() for a description of
1.703 +** the journal header format.
1.704 +**
1.705 +** If the header is read successfully, *nRec is set to the number of
1.706 +** page records following this header and *dbSize is set to the size of the
1.707 +** database before the transaction began, in pages. Also, pPager->cksumInit
1.708 +** is set to the value read from the journal header. SQLITE_OK is returned
1.709 +** in this case.
1.710 +**
1.711 +** If the journal header file appears to be corrupted, SQLITE_DONE is
1.712 +** returned and *nRec and *dbSize are not set. If JOURNAL_HDR_SZ bytes
1.713 +** cannot be read from the journal file an error code is returned.
1.714 +*/
1.715 +static int readJournalHdr(
1.716 + Pager *pPager,
1.717 + i64 journalSize,
1.718 + u32 *pNRec,
1.719 + u32 *pDbSize
1.720 +){
1.721 + int rc;
1.722 + unsigned char aMagic[8]; /* A buffer to hold the magic header */
1.723 + i64 jrnlOff;
1.724 + int iPageSize;
1.725 +
1.726 + seekJournalHdr(pPager);
1.727 + if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){
1.728 + return SQLITE_DONE;
1.729 + }
1.730 + jrnlOff = pPager->journalOff;
1.731 +
1.732 + rc = sqlite3OsRead(pPager->jfd, aMagic, sizeof(aMagic), jrnlOff);
1.733 + if( rc ) return rc;
1.734 + jrnlOff += sizeof(aMagic);
1.735 +
1.736 + if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){
1.737 + return SQLITE_DONE;
1.738 + }
1.739 +
1.740 + rc = read32bits(pPager->jfd, jrnlOff, pNRec);
1.741 + if( rc ) return rc;
1.742 +
1.743 + rc = read32bits(pPager->jfd, jrnlOff+4, &pPager->cksumInit);
1.744 + if( rc ) return rc;
1.745 +
1.746 + rc = read32bits(pPager->jfd, jrnlOff+8, pDbSize);
1.747 + if( rc ) return rc;
1.748 +
1.749 + rc = read32bits(pPager->jfd, jrnlOff+16, (u32 *)&iPageSize);
1.750 + if( rc==SQLITE_OK
1.751 + && iPageSize>=512
1.752 + && iPageSize<=SQLITE_MAX_PAGE_SIZE
1.753 + && ((iPageSize-1)&iPageSize)==0
1.754 + ){
1.755 + u16 pagesize = iPageSize;
1.756 + rc = sqlite3PagerSetPagesize(pPager, &pagesize);
1.757 + }
1.758 + if( rc ) return rc;
1.759 +
1.760 + /* Update the assumed sector-size to match the value used by
1.761 + ** the process that created this journal. If this journal was
1.762 + ** created by a process other than this one, then this routine
1.763 + ** is being called from within pager_playback(). The local value
1.764 + ** of Pager.sectorSize is restored at the end of that routine.
1.765 + */
1.766 + rc = read32bits(pPager->jfd, jrnlOff+12, (u32 *)&pPager->sectorSize);
1.767 + if( rc ) return rc;
1.768 +
1.769 + pPager->journalOff += JOURNAL_HDR_SZ(pPager);
1.770 + return SQLITE_OK;
1.771 +}
1.772 +
1.773 +
1.774 +/*
1.775 +** Write the supplied master journal name into the journal file for pager
1.776 +** pPager at the current location. The master journal name must be the last
1.777 +** thing written to a journal file. If the pager is in full-sync mode, the
1.778 +** journal file descriptor is advanced to the next sector boundary before
1.779 +** anything is written. The format is:
1.780 +**
1.781 +** + 4 bytes: PAGER_MJ_PGNO.
1.782 +** + N bytes: length of master journal name.
1.783 +** + 4 bytes: N
1.784 +** + 4 bytes: Master journal name checksum.
1.785 +** + 8 bytes: aJournalMagic[].
1.786 +**
1.787 +** The master journal page checksum is the sum of the bytes in the master
1.788 +** journal name.
1.789 +**
1.790 +** If zMaster is a NULL pointer (occurs for a single database transaction),
1.791 +** this call is a no-op.
1.792 +*/
1.793 +static int writeMasterJournal(Pager *pPager, const char *zMaster){
1.794 + int rc;
1.795 + int len;
1.796 + int i;
1.797 + i64 jrnlOff;
1.798 + i64 jrnlSize;
1.799 + u32 cksum = 0;
1.800 + char zBuf[sizeof(aJournalMagic)+2*4];
1.801 +
1.802 + if( !zMaster || pPager->setMaster) return SQLITE_OK;
1.803 + pPager->setMaster = 1;
1.804 +
1.805 + len = strlen(zMaster);
1.806 + for(i=0; i<len; i++){
1.807 + cksum += zMaster[i];
1.808 + }
1.809 +
1.810 + /* If in full-sync mode, advance to the next disk sector before writing
1.811 + ** the master journal name. This is in case the previous page written to
1.812 + ** the journal has already been synced.
1.813 + */
1.814 + if( pPager->fullSync ){
1.815 + seekJournalHdr(pPager);
1.816 + }
1.817 + jrnlOff = pPager->journalOff;
1.818 + pPager->journalOff += (len+20);
1.819 +
1.820 + rc = write32bits(pPager->jfd, jrnlOff, PAGER_MJ_PGNO(pPager));
1.821 + if( rc!=SQLITE_OK ) return rc;
1.822 + jrnlOff += 4;
1.823 +
1.824 + rc = sqlite3OsWrite(pPager->jfd, zMaster, len, jrnlOff);
1.825 + if( rc!=SQLITE_OK ) return rc;
1.826 + jrnlOff += len;
1.827 +
1.828 + put32bits(zBuf, len);
1.829 + put32bits(&zBuf[4], cksum);
1.830 + memcpy(&zBuf[8], aJournalMagic, sizeof(aJournalMagic));
1.831 + rc = sqlite3OsWrite(pPager->jfd, zBuf, 8+sizeof(aJournalMagic), jrnlOff);
1.832 + jrnlOff += 8+sizeof(aJournalMagic);
1.833 + pPager->needSync = !pPager->noSync;
1.834 +
1.835 + /* If the pager is in peristent-journal mode, then the physical
1.836 + ** journal-file may extend past the end of the master-journal name
1.837 + ** and 8 bytes of magic data just written to the file. This is
1.838 + ** dangerous because the code to rollback a hot-journal file
1.839 + ** will not be able to find the master-journal name to determine
1.840 + ** whether or not the journal is hot.
1.841 + **
1.842 + ** Easiest thing to do in this scenario is to truncate the journal
1.843 + ** file to the required size.
1.844 + */
1.845 + if( (rc==SQLITE_OK)
1.846 + && (rc = sqlite3OsFileSize(pPager->jfd, &jrnlSize))==SQLITE_OK
1.847 + && jrnlSize>jrnlOff
1.848 + ){
1.849 + rc = sqlite3OsTruncate(pPager->jfd, jrnlOff);
1.850 + }
1.851 + return rc;
1.852 +}
1.853 +
1.854 +/*
1.855 +** Find a page in the hash table given its page number. Return
1.856 +** a pointer to the page or NULL if not found.
1.857 +*/
1.858 +static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
1.859 + PgHdr *p;
1.860 + sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &p);
1.861 + return p;
1.862 +}
1.863 +
1.864 +/*
1.865 +** Clear the in-memory cache. This routine
1.866 +** sets the state of the pager back to what it was when it was first
1.867 +** opened. Any outstanding pages are invalidated and subsequent attempts
1.868 +** to access those pages will likely result in a coredump.
1.869 +*/
1.870 +static void pager_reset(Pager *pPager){
1.871 + if( pPager->errCode ) return;
1.872 + sqlite3PcacheClear(pPager->pPCache);
1.873 +}
1.874 +
1.875 +/*
1.876 +** Unlock the database file.
1.877 +**
1.878 +** If the pager is currently in error state, discard the contents of
1.879 +** the cache and reset the Pager structure internal state. If there is
1.880 +** an open journal-file, then the next time a shared-lock is obtained
1.881 +** on the pager file (by this or any other process), it will be
1.882 +** treated as a hot-journal and rolled back.
1.883 +*/
1.884 +static void pager_unlock(Pager *pPager){
1.885 + if( !pPager->exclusiveMode ){
1.886 + if( !MEMDB ){
1.887 + int rc = osUnlock(pPager->fd, NO_LOCK);
1.888 + if( rc ) pPager->errCode = rc;
1.889 + pPager->dbSize = -1;
1.890 + IOTRACE(("UNLOCK %p\n", pPager))
1.891 +
1.892 + /* Always close the journal file when dropping the database lock.
1.893 + ** Otherwise, another connection with journal_mode=delete might
1.894 + ** delete the file out from under us.
1.895 + */
1.896 + if( pPager->journalOpen ){
1.897 + sqlite3OsClose(pPager->jfd);
1.898 + pPager->journalOpen = 0;
1.899 + sqlite3BitvecDestroy(pPager->pInJournal);
1.900 + pPager->pInJournal = 0;
1.901 + sqlite3BitvecDestroy(pPager->pAlwaysRollback);
1.902 + pPager->pAlwaysRollback = 0;
1.903 + }
1.904 +
1.905 + /* If Pager.errCode is set, the contents of the pager cache cannot be
1.906 + ** trusted. Now that the pager file is unlocked, the contents of the
1.907 + ** cache can be discarded and the error code safely cleared.
1.908 + */
1.909 + if( pPager->errCode ){
1.910 + if( rc==SQLITE_OK ) pPager->errCode = SQLITE_OK;
1.911 + pager_reset(pPager);
1.912 + if( pPager->stmtOpen ){
1.913 + sqlite3OsClose(pPager->stfd);
1.914 + sqlite3BitvecDestroy(pPager->pInStmt);
1.915 + pPager->pInStmt = 0;
1.916 + }
1.917 + pPager->stmtOpen = 0;
1.918 + pPager->stmtInUse = 0;
1.919 + pPager->journalOff = 0;
1.920 + pPager->journalStarted = 0;
1.921 + pPager->stmtAutoopen = 0;
1.922 + pPager->origDbSize = 0;
1.923 + }
1.924 + }
1.925 +
1.926 + if( !MEMDB || pPager->errCode==SQLITE_OK ){
1.927 + pPager->state = PAGER_UNLOCK;
1.928 + pPager->changeCountDone = 0;
1.929 + }
1.930 + }
1.931 +}
1.932 +
1.933 +/*
1.934 +** Execute a rollback if a transaction is active and unlock the
1.935 +** database file. If the pager has already entered the error state,
1.936 +** do not attempt the rollback.
1.937 +*/
1.938 +static void pagerUnlockAndRollback(Pager *p){
1.939 + if( p->errCode==SQLITE_OK && p->state>=PAGER_RESERVED ){
1.940 + sqlite3BeginBenignMalloc();
1.941 + sqlite3PagerRollback(p);
1.942 + sqlite3EndBenignMalloc();
1.943 + }
1.944 + pager_unlock(p);
1.945 +}
1.946 +
1.947 +/*
1.948 +** This routine ends a transaction. A transaction is ended by either
1.949 +** a COMMIT or a ROLLBACK.
1.950 +**
1.951 +** When this routine is called, the pager has the journal file open and
1.952 +** a RESERVED or EXCLUSIVE lock on the database. This routine will release
1.953 +** the database lock and acquires a SHARED lock in its place if that is
1.954 +** the appropriate thing to do. Release locks usually is appropriate,
1.955 +** unless we are in exclusive access mode or unless this is a
1.956 +** COMMIT AND BEGIN or ROLLBACK AND BEGIN operation.
1.957 +**
1.958 +** The journal file is either deleted or truncated.
1.959 +**
1.960 +** TODO: Consider keeping the journal file open for temporary databases.
1.961 +** This might give a performance improvement on windows where opening
1.962 +** a file is an expensive operation.
1.963 +*/
1.964 +static int pager_end_transaction(Pager *pPager, int hasMaster){
1.965 + int rc = SQLITE_OK;
1.966 + int rc2 = SQLITE_OK;
1.967 + assert( !MEMDB );
1.968 + if( pPager->state<PAGER_RESERVED ){
1.969 + return SQLITE_OK;
1.970 + }
1.971 + sqlite3PagerStmtCommit(pPager);
1.972 + if( pPager->stmtOpen && !pPager->exclusiveMode ){
1.973 + sqlite3OsClose(pPager->stfd);
1.974 + pPager->stmtOpen = 0;
1.975 + }
1.976 + if( pPager->journalOpen ){
1.977 + if( pPager->journalMode==PAGER_JOURNALMODE_TRUNCATE
1.978 + && (rc = sqlite3OsTruncate(pPager->jfd, 0))==SQLITE_OK ){
1.979 + pPager->journalOff = 0;
1.980 + pPager->journalStarted = 0;
1.981 + }else if( pPager->exclusiveMode
1.982 + || pPager->journalMode==PAGER_JOURNALMODE_PERSIST
1.983 + ){
1.984 + rc = zeroJournalHdr(pPager, hasMaster);
1.985 + pager_error(pPager, rc);
1.986 + pPager->journalOff = 0;
1.987 + pPager->journalStarted = 0;
1.988 + }else{
1.989 + assert( pPager->journalMode==PAGER_JOURNALMODE_DELETE || rc );
1.990 + sqlite3OsClose(pPager->jfd);
1.991 + pPager->journalOpen = 0;
1.992 + if( rc==SQLITE_OK && !pPager->tempFile ){
1.993 + rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
1.994 + }
1.995 + }
1.996 + sqlite3BitvecDestroy(pPager->pInJournal);
1.997 + pPager->pInJournal = 0;
1.998 + sqlite3BitvecDestroy(pPager->pAlwaysRollback);
1.999 + pPager->pAlwaysRollback = 0;
1.1000 + sqlite3PcacheCleanAll(pPager->pPCache);
1.1001 +#ifdef SQLITE_CHECK_PAGES
1.1002 + sqlite3PcacheIterate(pPager->pPCache, pager_set_pagehash);
1.1003 +#endif
1.1004 + sqlite3PcacheClearFlags(pPager->pPCache,
1.1005 + PGHDR_IN_JOURNAL | PGHDR_NEED_SYNC
1.1006 + );
1.1007 + pPager->dirtyCache = 0;
1.1008 + pPager->nRec = 0;
1.1009 + }else{
1.1010 + assert( pPager->pInJournal==0 );
1.1011 + }
1.1012 +
1.1013 + if( !pPager->exclusiveMode ){
1.1014 + rc2 = osUnlock(pPager->fd, SHARED_LOCK);
1.1015 + pPager->state = PAGER_SHARED;
1.1016 + }else if( pPager->state==PAGER_SYNCED ){
1.1017 + pPager->state = PAGER_EXCLUSIVE;
1.1018 + }
1.1019 + pPager->origDbSize = 0;
1.1020 + pPager->setMaster = 0;
1.1021 + pPager->needSync = 0;
1.1022 + /* lruListSetFirstSynced(pPager); */
1.1023 + pPager->dbSize = -1;
1.1024 + pPager->dbModified = 0;
1.1025 +
1.1026 + return (rc==SQLITE_OK?rc2:rc);
1.1027 +}
1.1028 +
1.1029 +/*
1.1030 +** Compute and return a checksum for the page of data.
1.1031 +**
1.1032 +** This is not a real checksum. It is really just the sum of the
1.1033 +** random initial value and the page number. We experimented with
1.1034 +** a checksum of the entire data, but that was found to be too slow.
1.1035 +**
1.1036 +** Note that the page number is stored at the beginning of data and
1.1037 +** the checksum is stored at the end. This is important. If journal
1.1038 +** corruption occurs due to a power failure, the most likely scenario
1.1039 +** is that one end or the other of the record will be changed. It is
1.1040 +** much less likely that the two ends of the journal record will be
1.1041 +** correct and the middle be corrupt. Thus, this "checksum" scheme,
1.1042 +** though fast and simple, catches the mostly likely kind of corruption.
1.1043 +**
1.1044 +** FIX ME: Consider adding every 200th (or so) byte of the data to the
1.1045 +** checksum. That way if a single page spans 3 or more disk sectors and
1.1046 +** only the middle sector is corrupt, we will still have a reasonable
1.1047 +** chance of failing the checksum and thus detecting the problem.
1.1048 +*/
1.1049 +static u32 pager_cksum(Pager *pPager, const u8 *aData){
1.1050 + u32 cksum = pPager->cksumInit;
1.1051 + int i = pPager->pageSize-200;
1.1052 + while( i>0 ){
1.1053 + cksum += aData[i];
1.1054 + i -= 200;
1.1055 + }
1.1056 + return cksum;
1.1057 +}
1.1058 +
1.1059 +/* Forward declaration */
1.1060 +static void makeClean(PgHdr*);
1.1061 +
1.1062 +/*
1.1063 +** Read a single page from the journal file opened on file descriptor
1.1064 +** jfd. Playback this one page.
1.1065 +**
1.1066 +** The isMainJrnl flag is true if this is the main rollback journal and
1.1067 +** false for the statement journal. The main rollback journal uses
1.1068 +** checksums - the statement journal does not.
1.1069 +*/
1.1070 +static int pager_playback_one_page(
1.1071 + Pager *pPager, /* The pager being played back */
1.1072 + sqlite3_file *jfd, /* The file that is the journal being rolled back */
1.1073 + i64 offset, /* Offset of the page within the journal */
1.1074 + int isMainJrnl /* True for main rollback journal. False for Stmt jrnl */
1.1075 +){
1.1076 + int rc;
1.1077 + PgHdr *pPg; /* An existing page in the cache */
1.1078 + Pgno pgno; /* The page number of a page in journal */
1.1079 + u32 cksum; /* Checksum used for sanity checking */
1.1080 + u8 *aData = (u8 *)pPager->pTmpSpace; /* Temp storage for a page */
1.1081 +
1.1082 + /* isMainJrnl should be true for the main journal and false for
1.1083 + ** statement journals. Verify that this is always the case
1.1084 + */
1.1085 + assert( jfd == (isMainJrnl ? pPager->jfd : pPager->stfd) );
1.1086 + assert( aData );
1.1087 +
1.1088 + rc = read32bits(jfd, offset, &pgno);
1.1089 + if( rc!=SQLITE_OK ) return rc;
1.1090 + rc = sqlite3OsRead(jfd, aData, pPager->pageSize, offset+4);
1.1091 + if( rc!=SQLITE_OK ) return rc;
1.1092 + pPager->journalOff += pPager->pageSize + 4;
1.1093 +
1.1094 + /* Sanity checking on the page. This is more important that I originally
1.1095 + ** thought. If a power failure occurs while the journal is being written,
1.1096 + ** it could cause invalid data to be written into the journal. We need to
1.1097 + ** detect this invalid data (with high probability) and ignore it.
1.1098 + */
1.1099 + if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
1.1100 + return SQLITE_DONE;
1.1101 + }
1.1102 + if( pgno>(unsigned)pPager->dbSize ){
1.1103 + return SQLITE_OK;
1.1104 + }
1.1105 + if( isMainJrnl ){
1.1106 + rc = read32bits(jfd, offset+pPager->pageSize+4, &cksum);
1.1107 + if( rc ) return rc;
1.1108 + pPager->journalOff += 4;
1.1109 + if( pager_cksum(pPager, aData)!=cksum ){
1.1110 + return SQLITE_DONE;
1.1111 + }
1.1112 + }
1.1113 +
1.1114 + assert( pPager->state==PAGER_RESERVED || pPager->state>=PAGER_EXCLUSIVE );
1.1115 +
1.1116 + /* If the pager is in RESERVED state, then there must be a copy of this
1.1117 + ** page in the pager cache. In this case just update the pager cache,
1.1118 + ** not the database file. The page is left marked dirty in this case.
1.1119 + **
1.1120 + ** An exception to the above rule: If the database is in no-sync mode
1.1121 + ** and a page is moved during an incremental vacuum then the page may
1.1122 + ** not be in the pager cache. Later: if a malloc() or IO error occurs
1.1123 + ** during a Movepage() call, then the page may not be in the cache
1.1124 + ** either. So the condition described in the above paragraph is not
1.1125 + ** assert()able.
1.1126 + **
1.1127 + ** If in EXCLUSIVE state, then we update the pager cache if it exists
1.1128 + ** and the main file. The page is then marked not dirty.
1.1129 + **
1.1130 + ** Ticket #1171: The statement journal might contain page content that is
1.1131 + ** different from the page content at the start of the transaction.
1.1132 + ** This occurs when a page is changed prior to the start of a statement
1.1133 + ** then changed again within the statement. When rolling back such a
1.1134 + ** statement we must not write to the original database unless we know
1.1135 + ** for certain that original page contents are synced into the main rollback
1.1136 + ** journal. Otherwise, a power loss might leave modified data in the
1.1137 + ** database file without an entry in the rollback journal that can
1.1138 + ** restore the database to its original form. Two conditions must be
1.1139 + ** met before writing to the database files. (1) the database must be
1.1140 + ** locked. (2) we know that the original page content is fully synced
1.1141 + ** in the main journal either because the page is not in cache or else
1.1142 + ** the page is marked as needSync==0.
1.1143 + **
1.1144 + ** 2008-04-14: When attempting to vacuum a corrupt database file, it
1.1145 + ** is possible to fail a statement on a database that does not yet exist.
1.1146 + ** Do not attempt to write if database file has never been opened.
1.1147 + */
1.1148 + pPg = pager_lookup(pPager, pgno);
1.1149 + PAGERTRACE4("PLAYBACK %d page %d hash(%08x)\n",
1.1150 + PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, aData));
1.1151 + if( (pPager->state>=PAGER_EXCLUSIVE)
1.1152 + && (pPg==0 || 0==(pPg->flags&PGHDR_NEED_SYNC))
1.1153 + && (pPager->fd->pMethods)
1.1154 + ){
1.1155 + i64 ofst = (pgno-1)*(i64)pPager->pageSize;
1.1156 + rc = sqlite3OsWrite(pPager->fd, aData, pPager->pageSize, ofst);
1.1157 + }
1.1158 + if( pPg ){
1.1159 + /* No page should ever be explicitly rolled back that is in use, except
1.1160 + ** for page 1 which is held in use in order to keep the lock on the
1.1161 + ** database active. However such a page may be rolled back as a result
1.1162 + ** of an internal error resulting in an automatic call to
1.1163 + ** sqlite3PagerRollback().
1.1164 + */
1.1165 + void *pData;
1.1166 + pData = pPg->pData;
1.1167 + memcpy(pData, aData, pPager->pageSize);
1.1168 + if( pPager->xReiniter ){
1.1169 + pPager->xReiniter(pPg);
1.1170 + }
1.1171 + if( isMainJrnl ) makeClean(pPg);
1.1172 +#ifdef SQLITE_CHECK_PAGES
1.1173 + pPg->pageHash = pager_pagehash(pPg);
1.1174 +#endif
1.1175 + /* If this was page 1, then restore the value of Pager.dbFileVers.
1.1176 + ** Do this before any decoding. */
1.1177 + if( pgno==1 ){
1.1178 + memcpy(&pPager->dbFileVers, &((u8*)pData)[24],sizeof(pPager->dbFileVers));
1.1179 + }
1.1180 +
1.1181 + /* Decode the page just read from disk */
1.1182 + CODEC1(pPager, pData, pPg->pgno, 3);
1.1183 + sqlite3PcacheRelease(pPg);
1.1184 + }
1.1185 + return rc;
1.1186 +}
1.1187 +
1.1188 +/*
1.1189 +** Parameter zMaster is the name of a master journal file. A single journal
1.1190 +** file that referred to the master journal file has just been rolled back.
1.1191 +** This routine checks if it is possible to delete the master journal file,
1.1192 +** and does so if it is.
1.1193 +**
1.1194 +** Argument zMaster may point to Pager.pTmpSpace. So that buffer is not
1.1195 +** available for use within this function.
1.1196 +**
1.1197 +**
1.1198 +** The master journal file contains the names of all child journals.
1.1199 +** To tell if a master journal can be deleted, check to each of the
1.1200 +** children. If all children are either missing or do not refer to
1.1201 +** a different master journal, then this master journal can be deleted.
1.1202 +*/
1.1203 +static int pager_delmaster(Pager *pPager, const char *zMaster){
1.1204 + sqlite3_vfs *pVfs = pPager->pVfs;
1.1205 + int rc;
1.1206 + int master_open = 0;
1.1207 + sqlite3_file *pMaster;
1.1208 + sqlite3_file *pJournal;
1.1209 + char *zMasterJournal = 0; /* Contents of master journal file */
1.1210 + i64 nMasterJournal; /* Size of master journal file */
1.1211 +
1.1212 + /* Open the master journal file exclusively in case some other process
1.1213 + ** is running this routine also. Not that it makes too much difference.
1.1214 + */
1.1215 + pMaster = (sqlite3_file *)sqlite3Malloc(pVfs->szOsFile * 2);
1.1216 + pJournal = (sqlite3_file *)(((u8 *)pMaster) + pVfs->szOsFile);
1.1217 + if( !pMaster ){
1.1218 + rc = SQLITE_NOMEM;
1.1219 + }else{
1.1220 + int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MASTER_JOURNAL);
1.1221 + rc = sqlite3OsOpen(pVfs, zMaster, pMaster, flags, 0);
1.1222 + }
1.1223 + if( rc!=SQLITE_OK ) goto delmaster_out;
1.1224 + master_open = 1;
1.1225 +
1.1226 + rc = sqlite3OsFileSize(pMaster, &nMasterJournal);
1.1227 + if( rc!=SQLITE_OK ) goto delmaster_out;
1.1228 +
1.1229 + if( nMasterJournal>0 ){
1.1230 + char *zJournal;
1.1231 + char *zMasterPtr = 0;
1.1232 + int nMasterPtr = pPager->pVfs->mxPathname+1;
1.1233 +
1.1234 + /* Load the entire master journal file into space obtained from
1.1235 + ** sqlite3_malloc() and pointed to by zMasterJournal.
1.1236 + */
1.1237 + zMasterJournal = (char *)sqlite3Malloc(nMasterJournal + nMasterPtr);
1.1238 + if( !zMasterJournal ){
1.1239 + rc = SQLITE_NOMEM;
1.1240 + goto delmaster_out;
1.1241 + }
1.1242 + zMasterPtr = &zMasterJournal[nMasterJournal];
1.1243 + rc = sqlite3OsRead(pMaster, zMasterJournal, nMasterJournal, 0);
1.1244 + if( rc!=SQLITE_OK ) goto delmaster_out;
1.1245 +
1.1246 + zJournal = zMasterJournal;
1.1247 + while( (zJournal-zMasterJournal)<nMasterJournal ){
1.1248 + int exists;
1.1249 + rc = sqlite3OsAccess(pVfs, zJournal, SQLITE_ACCESS_EXISTS, &exists);
1.1250 + if( rc!=SQLITE_OK ){
1.1251 + goto delmaster_out;
1.1252 + }
1.1253 + if( exists ){
1.1254 + /* One of the journals pointed to by the master journal exists.
1.1255 + ** Open it and check if it points at the master journal. If
1.1256 + ** so, return without deleting the master journal file.
1.1257 + */
1.1258 + int c;
1.1259 + int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL);
1.1260 + rc = sqlite3OsOpen(pVfs, zJournal, pJournal, flags, 0);
1.1261 + if( rc!=SQLITE_OK ){
1.1262 + goto delmaster_out;
1.1263 + }
1.1264 +
1.1265 + rc = readMasterJournal(pJournal, zMasterPtr, nMasterPtr);
1.1266 + sqlite3OsClose(pJournal);
1.1267 + if( rc!=SQLITE_OK ){
1.1268 + goto delmaster_out;
1.1269 + }
1.1270 +
1.1271 + c = zMasterPtr[0]!=0 && strcmp(zMasterPtr, zMaster)==0;
1.1272 + if( c ){
1.1273 + /* We have a match. Do not delete the master journal file. */
1.1274 + goto delmaster_out;
1.1275 + }
1.1276 + }
1.1277 + zJournal += (strlen(zJournal)+1);
1.1278 + }
1.1279 + }
1.1280 +
1.1281 + rc = sqlite3OsDelete(pVfs, zMaster, 0);
1.1282 +
1.1283 +delmaster_out:
1.1284 + if( zMasterJournal ){
1.1285 + sqlite3_free(zMasterJournal);
1.1286 + }
1.1287 + if( master_open ){
1.1288 + sqlite3OsClose(pMaster);
1.1289 + }
1.1290 + sqlite3_free(pMaster);
1.1291 + return rc;
1.1292 +}
1.1293 +
1.1294 +
1.1295 +static void pager_truncate_cache(Pager *pPager);
1.1296 +
1.1297 +/*
1.1298 +** Truncate the main file of the given pager to the number of pages
1.1299 +** indicated. Also truncate the cached representation of the file.
1.1300 +**
1.1301 +** Might might be the case that the file on disk is smaller than nPage.
1.1302 +** This can happen, for example, if we are in the middle of a transaction
1.1303 +** which has extended the file size and the new pages are still all held
1.1304 +** in cache, then an INSERT or UPDATE does a statement rollback. Some
1.1305 +** operating system implementations can get confused if you try to
1.1306 +** truncate a file to some size that is larger than it currently is,
1.1307 +** so detect this case and write a single zero byte to the end of the new
1.1308 +** file instead.
1.1309 +*/
1.1310 +static int pager_truncate(Pager *pPager, int nPage){
1.1311 + int rc = SQLITE_OK;
1.1312 + if( pPager->state>=PAGER_EXCLUSIVE && pPager->fd->pMethods ){
1.1313 + i64 currentSize, newSize;
1.1314 + rc = sqlite3OsFileSize(pPager->fd, ¤tSize);
1.1315 + newSize = pPager->pageSize*(i64)nPage;
1.1316 + if( rc==SQLITE_OK && currentSize!=newSize ){
1.1317 + if( currentSize>newSize ){
1.1318 + rc = sqlite3OsTruncate(pPager->fd, newSize);
1.1319 + }else{
1.1320 + rc = sqlite3OsWrite(pPager->fd, "", 1, newSize-1);
1.1321 + }
1.1322 + }
1.1323 + }
1.1324 + if( rc==SQLITE_OK ){
1.1325 + pPager->dbSize = nPage;
1.1326 + pager_truncate_cache(pPager);
1.1327 + }
1.1328 + return rc;
1.1329 +}
1.1330 +
1.1331 +/*
1.1332 +** Set the sectorSize for the given pager.
1.1333 +**
1.1334 +** The sector size is at least as big as the sector size reported
1.1335 +** by sqlite3OsSectorSize(). The minimum sector size is 512.
1.1336 +*/
1.1337 +static void setSectorSize(Pager *pPager){
1.1338 + assert(pPager->fd->pMethods||pPager->tempFile);
1.1339 + if( !pPager->tempFile ){
1.1340 + /* Sector size doesn't matter for temporary files. Also, the file
1.1341 + ** may not have been opened yet, in whcih case the OsSectorSize()
1.1342 + ** call will segfault.
1.1343 + */
1.1344 + pPager->sectorSize = sqlite3OsSectorSize(pPager->fd);
1.1345 + }
1.1346 + if( pPager->sectorSize<512 ){
1.1347 + pPager->sectorSize = 512;
1.1348 + }
1.1349 +}
1.1350 +
1.1351 +/*
1.1352 +** Playback the journal and thus restore the database file to
1.1353 +** the state it was in before we started making changes.
1.1354 +**
1.1355 +** The journal file format is as follows:
1.1356 +**
1.1357 +** (1) 8 byte prefix. A copy of aJournalMagic[].
1.1358 +** (2) 4 byte big-endian integer which is the number of valid page records
1.1359 +** in the journal. If this value is 0xffffffff, then compute the
1.1360 +** number of page records from the journal size.
1.1361 +** (3) 4 byte big-endian integer which is the initial value for the
1.1362 +** sanity checksum.
1.1363 +** (4) 4 byte integer which is the number of pages to truncate the
1.1364 +** database to during a rollback.
1.1365 +** (5) 4 byte big-endian integer which is the sector size. The header
1.1366 +** is this many bytes in size.
1.1367 +** (6) 4 byte big-endian integer which is the page case.
1.1368 +** (7) 4 byte integer which is the number of bytes in the master journal
1.1369 +** name. The value may be zero (indicate that there is no master
1.1370 +** journal.)
1.1371 +** (8) N bytes of the master journal name. The name will be nul-terminated
1.1372 +** and might be shorter than the value read from (5). If the first byte
1.1373 +** of the name is \000 then there is no master journal. The master
1.1374 +** journal name is stored in UTF-8.
1.1375 +** (9) Zero or more pages instances, each as follows:
1.1376 +** + 4 byte page number.
1.1377 +** + pPager->pageSize bytes of data.
1.1378 +** + 4 byte checksum
1.1379 +**
1.1380 +** When we speak of the journal header, we mean the first 8 items above.
1.1381 +** Each entry in the journal is an instance of the 9th item.
1.1382 +**
1.1383 +** Call the value from the second bullet "nRec". nRec is the number of
1.1384 +** valid page entries in the journal. In most cases, you can compute the
1.1385 +** value of nRec from the size of the journal file. But if a power
1.1386 +** failure occurred while the journal was being written, it could be the
1.1387 +** case that the size of the journal file had already been increased but
1.1388 +** the extra entries had not yet made it safely to disk. In such a case,
1.1389 +** the value of nRec computed from the file size would be too large. For
1.1390 +** that reason, we always use the nRec value in the header.
1.1391 +**
1.1392 +** If the nRec value is 0xffffffff it means that nRec should be computed
1.1393 +** from the file size. This value is used when the user selects the
1.1394 +** no-sync option for the journal. A power failure could lead to corruption
1.1395 +** in this case. But for things like temporary table (which will be
1.1396 +** deleted when the power is restored) we don't care.
1.1397 +**
1.1398 +** If the file opened as the journal file is not a well-formed
1.1399 +** journal file then all pages up to the first corrupted page are rolled
1.1400 +** back (or no pages if the journal header is corrupted). The journal file
1.1401 +** is then deleted and SQLITE_OK returned, just as if no corruption had
1.1402 +** been encountered.
1.1403 +**
1.1404 +** If an I/O or malloc() error occurs, the journal-file is not deleted
1.1405 +** and an error code is returned.
1.1406 +*/
1.1407 +static int pager_playback(Pager *pPager, int isHot){
1.1408 + sqlite3_vfs *pVfs = pPager->pVfs;
1.1409 + i64 szJ; /* Size of the journal file in bytes */
1.1410 + u32 nRec; /* Number of Records in the journal */
1.1411 + u32 u; /* Unsigned loop counter */
1.1412 + Pgno mxPg = 0; /* Size of the original file in pages */
1.1413 + int rc; /* Result code of a subroutine */
1.1414 + int res = 1; /* Value returned by sqlite3OsAccess() */
1.1415 + char *zMaster = 0; /* Name of master journal file if any */
1.1416 +
1.1417 + /* Figure out how many records are in the journal. Abort early if
1.1418 + ** the journal is empty.
1.1419 + */
1.1420 + assert( pPager->journalOpen );
1.1421 + rc = sqlite3OsFileSize(pPager->jfd, &szJ);
1.1422 + if( rc!=SQLITE_OK || szJ==0 ){
1.1423 + goto end_playback;
1.1424 + }
1.1425 +
1.1426 + /* Read the master journal name from the journal, if it is present.
1.1427 + ** If a master journal file name is specified, but the file is not
1.1428 + ** present on disk, then the journal is not hot and does not need to be
1.1429 + ** played back.
1.1430 + */
1.1431 + zMaster = pPager->pTmpSpace;
1.1432 + rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
1.1433 + if( rc==SQLITE_OK && zMaster[0] ){
1.1434 + rc = sqlite3OsAccess(pVfs, zMaster, SQLITE_ACCESS_EXISTS, &res);
1.1435 + }
1.1436 + zMaster = 0;
1.1437 + if( rc!=SQLITE_OK || !res ){
1.1438 + goto end_playback;
1.1439 + }
1.1440 + pPager->journalOff = 0;
1.1441 +
1.1442 + /* This loop terminates either when the readJournalHdr() call returns
1.1443 + ** SQLITE_DONE or an IO error occurs. */
1.1444 + while( 1 ){
1.1445 +
1.1446 + /* Read the next journal header from the journal file. If there are
1.1447 + ** not enough bytes left in the journal file for a complete header, or
1.1448 + ** it is corrupted, then a process must of failed while writing it.
1.1449 + ** This indicates nothing more needs to be rolled back.
1.1450 + */
1.1451 + rc = readJournalHdr(pPager, szJ, &nRec, &mxPg);
1.1452 + if( rc!=SQLITE_OK ){
1.1453 + if( rc==SQLITE_DONE ){
1.1454 + rc = SQLITE_OK;
1.1455 + }
1.1456 + goto end_playback;
1.1457 + }
1.1458 +
1.1459 + /* If nRec is 0xffffffff, then this journal was created by a process
1.1460 + ** working in no-sync mode. This means that the rest of the journal
1.1461 + ** file consists of pages, there are no more journal headers. Compute
1.1462 + ** the value of nRec based on this assumption.
1.1463 + */
1.1464 + if( nRec==0xffffffff ){
1.1465 + assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) );
1.1466 + nRec = (szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager);
1.1467 + }
1.1468 +
1.1469 + /* If nRec is 0 and this rollback is of a transaction created by this
1.1470 + ** process and if this is the final header in the journal, then it means
1.1471 + ** that this part of the journal was being filled but has not yet been
1.1472 + ** synced to disk. Compute the number of pages based on the remaining
1.1473 + ** size of the file.
1.1474 + **
1.1475 + ** The third term of the test was added to fix ticket #2565.
1.1476 + */
1.1477 + if( nRec==0 && !isHot &&
1.1478 + pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){
1.1479 + nRec = (szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager);
1.1480 + }
1.1481 +
1.1482 + /* If this is the first header read from the journal, truncate the
1.1483 + ** database file back to its original size.
1.1484 + */
1.1485 + if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){
1.1486 + rc = pager_truncate(pPager, mxPg);
1.1487 + if( rc!=SQLITE_OK ){
1.1488 + goto end_playback;
1.1489 + }
1.1490 + }
1.1491 +
1.1492 + /* Copy original pages out of the journal and back into the database file.
1.1493 + */
1.1494 + for(u=0; u<nRec; u++){
1.1495 + rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1);
1.1496 + if( rc!=SQLITE_OK ){
1.1497 + if( rc==SQLITE_DONE ){
1.1498 + rc = SQLITE_OK;
1.1499 + pPager->journalOff = szJ;
1.1500 + break;
1.1501 + }else{
1.1502 + /* If we are unable to rollback, then the database is probably
1.1503 + ** going to end up being corrupt. It is corrupt to us, anyhow.
1.1504 + ** Perhaps the next process to come along can fix it....
1.1505 + */
1.1506 + rc = SQLITE_CORRUPT_BKPT;
1.1507 + goto end_playback;
1.1508 + }
1.1509 + }
1.1510 + }
1.1511 + }
1.1512 + /*NOTREACHED*/
1.1513 + assert( 0 );
1.1514 +
1.1515 +end_playback:
1.1516 + if( rc==SQLITE_OK ){
1.1517 + zMaster = pPager->pTmpSpace;
1.1518 + rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
1.1519 + }
1.1520 + if( rc==SQLITE_OK ){
1.1521 + rc = pager_end_transaction(pPager, zMaster[0]!='\0');
1.1522 + }
1.1523 + if( rc==SQLITE_OK && zMaster[0] ){
1.1524 + /* If there was a master journal and this routine will return success,
1.1525 + ** see if it is possible to delete the master journal.
1.1526 + */
1.1527 + rc = pager_delmaster(pPager, zMaster);
1.1528 + }
1.1529 +
1.1530 + /* The Pager.sectorSize variable may have been updated while rolling
1.1531 + ** back a journal created by a process with a different sector size
1.1532 + ** value. Reset it to the correct value for this process.
1.1533 + */
1.1534 + setSectorSize(pPager);
1.1535 + return rc;
1.1536 +}
1.1537 +
1.1538 +/*
1.1539 +** Playback the statement journal.
1.1540 +**
1.1541 +** This is similar to playing back the transaction journal but with
1.1542 +** a few extra twists.
1.1543 +**
1.1544 +** (1) The number of pages in the database file at the start of
1.1545 +** the statement is stored in pPager->stmtSize, not in the
1.1546 +** journal file itself.
1.1547 +**
1.1548 +** (2) In addition to playing back the statement journal, also
1.1549 +** playback all pages of the transaction journal beginning
1.1550 +** at offset pPager->stmtJSize.
1.1551 +*/
1.1552 +static int pager_stmt_playback(Pager *pPager){
1.1553 + i64 szJ; /* Size of the full journal */
1.1554 + i64 hdrOff;
1.1555 + int nRec; /* Number of Records */
1.1556 + int i; /* Loop counter */
1.1557 + int rc;
1.1558 +
1.1559 + szJ = pPager->journalOff;
1.1560 +
1.1561 + /* Set hdrOff to be the offset just after the end of the last journal
1.1562 + ** page written before the first journal-header for this statement
1.1563 + ** transaction was written, or the end of the file if no journal
1.1564 + ** header was written.
1.1565 + */
1.1566 + hdrOff = pPager->stmtHdrOff;
1.1567 + assert( pPager->fullSync || !hdrOff );
1.1568 + if( !hdrOff ){
1.1569 + hdrOff = szJ;
1.1570 + }
1.1571 +
1.1572 + /* Truncate the database back to its original size.
1.1573 + */
1.1574 + rc = pager_truncate(pPager, pPager->stmtSize);
1.1575 + assert( pPager->state>=PAGER_SHARED );
1.1576 +
1.1577 + /* Figure out how many records are in the statement journal.
1.1578 + */
1.1579 + assert( pPager->stmtInUse && pPager->journalOpen );
1.1580 + nRec = pPager->stmtNRec;
1.1581 +
1.1582 + /* Copy original pages out of the statement journal and back into the
1.1583 + ** database file. Note that the statement journal omits checksums from
1.1584 + ** each record since power-failure recovery is not important to statement
1.1585 + ** journals.
1.1586 + */
1.1587 + for(i=0; i<nRec; i++){
1.1588 + i64 offset = i*(4+pPager->pageSize);
1.1589 + rc = pager_playback_one_page(pPager, pPager->stfd, offset, 0);
1.1590 + assert( rc!=SQLITE_DONE );
1.1591 + if( rc!=SQLITE_OK ) goto end_stmt_playback;
1.1592 + }
1.1593 +
1.1594 + /* Now roll some pages back from the transaction journal. Pager.stmtJSize
1.1595 + ** was the size of the journal file when this statement was started, so
1.1596 + ** everything after that needs to be rolled back, either into the
1.1597 + ** database, the memory cache, or both.
1.1598 + **
1.1599 + ** If it is not zero, then Pager.stmtHdrOff is the offset to the start
1.1600 + ** of the first journal header written during this statement transaction.
1.1601 + */
1.1602 + pPager->journalOff = pPager->stmtJSize;
1.1603 + pPager->cksumInit = pPager->stmtCksum;
1.1604 + while( pPager->journalOff < hdrOff ){
1.1605 + rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1);
1.1606 + assert( rc!=SQLITE_DONE );
1.1607 + if( rc!=SQLITE_OK ) goto end_stmt_playback;
1.1608 + }
1.1609 +
1.1610 + while( pPager->journalOff < szJ ){
1.1611 + u32 nJRec; /* Number of Journal Records */
1.1612 + u32 dummy;
1.1613 + rc = readJournalHdr(pPager, szJ, &nJRec, &dummy);
1.1614 + if( rc!=SQLITE_OK ){
1.1615 + assert( rc!=SQLITE_DONE );
1.1616 + goto end_stmt_playback;
1.1617 + }
1.1618 + if( nJRec==0 ){
1.1619 + nJRec = (szJ - pPager->journalOff) / (pPager->pageSize+8);
1.1620 + }
1.1621 + for(i=nJRec-1; i>=0 && pPager->journalOff < szJ; i--){
1.1622 + rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1);
1.1623 + assert( rc!=SQLITE_DONE );
1.1624 + if( rc!=SQLITE_OK ) goto end_stmt_playback;
1.1625 + }
1.1626 + }
1.1627 +
1.1628 + pPager->journalOff = szJ;
1.1629 +
1.1630 +end_stmt_playback:
1.1631 + if( rc==SQLITE_OK) {
1.1632 + pPager->journalOff = szJ;
1.1633 + /* pager_reload_cache(pPager); */
1.1634 + }
1.1635 + return rc;
1.1636 +}
1.1637 +
1.1638 +/*
1.1639 +** Change the maximum number of in-memory pages that are allowed.
1.1640 +*/
1.1641 +void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){
1.1642 + sqlite3PcacheSetCachesize(pPager->pPCache, mxPage);
1.1643 +}
1.1644 +
1.1645 +/*
1.1646 +** Adjust the robustness of the database to damage due to OS crashes
1.1647 +** or power failures by changing the number of syncs()s when writing
1.1648 +** the rollback journal. There are three levels:
1.1649 +**
1.1650 +** OFF sqlite3OsSync() is never called. This is the default
1.1651 +** for temporary and transient files.
1.1652 +**
1.1653 +** NORMAL The journal is synced once before writes begin on the
1.1654 +** database. This is normally adequate protection, but
1.1655 +** it is theoretically possible, though very unlikely,
1.1656 +** that an inopertune power failure could leave the journal
1.1657 +** in a state which would cause damage to the database
1.1658 +** when it is rolled back.
1.1659 +**
1.1660 +** FULL The journal is synced twice before writes begin on the
1.1661 +** database (with some additional information - the nRec field
1.1662 +** of the journal header - being written in between the two
1.1663 +** syncs). If we assume that writing a
1.1664 +** single disk sector is atomic, then this mode provides
1.1665 +** assurance that the journal will not be corrupted to the
1.1666 +** point of causing damage to the database during rollback.
1.1667 +**
1.1668 +** Numeric values associated with these states are OFF==1, NORMAL=2,
1.1669 +** and FULL=3.
1.1670 +*/
1.1671 +#ifndef SQLITE_OMIT_PAGER_PRAGMAS
1.1672 +void sqlite3PagerSetSafetyLevel(Pager *pPager, int level, int bFullFsync){
1.1673 + pPager->noSync = level==1 || pPager->tempFile || MEMDB;
1.1674 + pPager->fullSync = level==3 && !pPager->tempFile;
1.1675 + pPager->sync_flags = (bFullFsync?SQLITE_SYNC_FULL:SQLITE_SYNC_NORMAL);
1.1676 + if( pPager->noSync ) pPager->needSync = 0;
1.1677 +}
1.1678 +#endif
1.1679 +
1.1680 +/*
1.1681 +** The following global variable is incremented whenever the library
1.1682 +** attempts to open a temporary file. This information is used for
1.1683 +** testing and analysis only.
1.1684 +*/
1.1685 +#ifdef SQLITE_TEST
1.1686 +int sqlite3_opentemp_count = 0;
1.1687 +#endif
1.1688 +
1.1689 +/*
1.1690 +** Open a temporary file.
1.1691 +**
1.1692 +** Write the file descriptor into *fd. Return SQLITE_OK on success or some
1.1693 +** other error code if we fail. The OS will automatically delete the temporary
1.1694 +** file when it is closed.
1.1695 +*/
1.1696 +static int sqlite3PagerOpentemp(
1.1697 + Pager *pPager, /* The pager object */
1.1698 + sqlite3_file *pFile, /* Write the file descriptor here */
1.1699 + int vfsFlags /* Flags passed through to the VFS */
1.1700 +){
1.1701 + int rc;
1.1702 +
1.1703 +#ifdef SQLITE_TEST
1.1704 + sqlite3_opentemp_count++; /* Used for testing and analysis only */
1.1705 +#endif
1.1706 +
1.1707 + vfsFlags |= SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE |
1.1708 + SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE;
1.1709 + rc = sqlite3OsOpen(pPager->pVfs, 0, pFile, vfsFlags, 0);
1.1710 + assert( rc!=SQLITE_OK || pFile->pMethods );
1.1711 + return rc;
1.1712 +}
1.1713 +
1.1714 +static int pagerStress(void *,PgHdr *);
1.1715 +
1.1716 +/*
1.1717 +** Create a new page cache and put a pointer to the page cache in *ppPager.
1.1718 +** The file to be cached need not exist. The file is not locked until
1.1719 +** the first call to sqlite3PagerGet() and is only held open until the
1.1720 +** last page is released using sqlite3PagerUnref().
1.1721 +**
1.1722 +** If zFilename is NULL then a randomly-named temporary file is created
1.1723 +** and used as the file to be cached. The file will be deleted
1.1724 +** automatically when it is closed.
1.1725 +**
1.1726 +** If zFilename is ":memory:" then all information is held in cache.
1.1727 +** It is never written to disk. This can be used to implement an
1.1728 +** in-memory database.
1.1729 +*/
1.1730 +int sqlite3PagerOpen(
1.1731 + sqlite3_vfs *pVfs, /* The virtual file system to use */
1.1732 + Pager **ppPager, /* Return the Pager structure here */
1.1733 + const char *zFilename, /* Name of the database file to open */
1.1734 + int nExtra, /* Extra bytes append to each in-memory page */
1.1735 + int flags, /* flags controlling this file */
1.1736 + int vfsFlags /* flags passed through to sqlite3_vfs.xOpen() */
1.1737 +){
1.1738 + u8 *pPtr;
1.1739 + Pager *pPager = 0;
1.1740 + int rc = SQLITE_OK;
1.1741 + int i;
1.1742 + int tempFile = 0;
1.1743 + int memDb = 0;
1.1744 + int readOnly = 0;
1.1745 + int useJournal = (flags & PAGER_OMIT_JOURNAL)==0;
1.1746 + int noReadlock = (flags & PAGER_NO_READLOCK)!=0;
1.1747 + int journalFileSize = sqlite3JournalSize(pVfs);
1.1748 + int pcacheSize = sqlite3PcacheSize();
1.1749 + int szPageDflt = SQLITE_DEFAULT_PAGE_SIZE;
1.1750 + char *zPathname = 0;
1.1751 + int nPathname = 0;
1.1752 +
1.1753 + /* The default return is a NULL pointer */
1.1754 + *ppPager = 0;
1.1755 +
1.1756 + /* Compute and store the full pathname in an allocated buffer pointed
1.1757 + ** to by zPathname, length nPathname. Or, if this is a temporary file,
1.1758 + ** leave both nPathname and zPathname set to 0.
1.1759 + */
1.1760 + if( zFilename && zFilename[0] ){
1.1761 + nPathname = pVfs->mxPathname+1;
1.1762 + zPathname = sqlite3Malloc(nPathname*2);
1.1763 + if( zPathname==0 ){
1.1764 + return SQLITE_NOMEM;
1.1765 + }
1.1766 +#ifndef SQLITE_OMIT_MEMORYDB
1.1767 + if( strcmp(zFilename,":memory:")==0 ){
1.1768 + memDb = 1;
1.1769 + zPathname[0] = 0;
1.1770 + useJournal = 0;
1.1771 + }else
1.1772 +#endif
1.1773 + {
1.1774 + rc = sqlite3OsFullPathname(pVfs, zFilename, nPathname, zPathname);
1.1775 + }
1.1776 + if( rc!=SQLITE_OK ){
1.1777 + sqlite3_free(zPathname);
1.1778 + return rc;
1.1779 + }
1.1780 + nPathname = strlen(zPathname);
1.1781 + }
1.1782 +
1.1783 + /* Allocate memory for the pager structure */
1.1784 + pPager = sqlite3MallocZero(
1.1785 + sizeof(*pPager) + /* Pager structure */
1.1786 + pcacheSize + /* PCache object */
1.1787 + journalFileSize + /* The journal file structure */
1.1788 + pVfs->szOsFile * 3 + /* The main db and two journal files */
1.1789 + 3*nPathname + 40 /* zFilename, zDirectory, zJournal */
1.1790 + );
1.1791 + if( !pPager ){
1.1792 + sqlite3_free(zPathname);
1.1793 + return SQLITE_NOMEM;
1.1794 + }
1.1795 + pPager->pPCache = (PCache *)&pPager[1];
1.1796 + pPtr = ((u8 *)&pPager[1]) + pcacheSize;
1.1797 + pPager->vfsFlags = vfsFlags;
1.1798 + pPager->fd = (sqlite3_file*)&pPtr[pVfs->szOsFile*0];
1.1799 + pPager->stfd = (sqlite3_file*)&pPtr[pVfs->szOsFile*1];
1.1800 + pPager->jfd = (sqlite3_file*)&pPtr[pVfs->szOsFile*2];
1.1801 + pPager->zFilename = (char*)&pPtr[pVfs->szOsFile*2+journalFileSize];
1.1802 + pPager->zDirectory = &pPager->zFilename[nPathname+1];
1.1803 + pPager->zJournal = &pPager->zDirectory[nPathname+1];
1.1804 + pPager->pVfs = pVfs;
1.1805 + if( zPathname ){
1.1806 + memcpy(pPager->zFilename, zPathname, nPathname+1);
1.1807 + sqlite3_free(zPathname);
1.1808 + }
1.1809 +
1.1810 + /* Open the pager file.
1.1811 + */
1.1812 + if( zFilename && zFilename[0] && !memDb ){
1.1813 + if( nPathname>(pVfs->mxPathname - sizeof("-journal")) ){
1.1814 + rc = SQLITE_CANTOPEN;
1.1815 + }else{
1.1816 + int fout = 0;
1.1817 + rc = sqlite3OsOpen(pVfs, pPager->zFilename, pPager->fd,
1.1818 + pPager->vfsFlags, &fout);
1.1819 + readOnly = (fout&SQLITE_OPEN_READONLY);
1.1820 +
1.1821 + /* If the file was successfully opened for read/write access,
1.1822 + ** choose a default page size in case we have to create the
1.1823 + ** database file. The default page size is the maximum of:
1.1824 + **
1.1825 + ** + SQLITE_DEFAULT_PAGE_SIZE,
1.1826 + ** + The value returned by sqlite3OsSectorSize()
1.1827 + ** + The largest page size that can be written atomically.
1.1828 + */
1.1829 + if( rc==SQLITE_OK && !readOnly ){
1.1830 + int iSectorSize = sqlite3OsSectorSize(pPager->fd);
1.1831 + if( szPageDflt<iSectorSize ){
1.1832 + szPageDflt = iSectorSize;
1.1833 + }
1.1834 +#ifdef SQLITE_ENABLE_ATOMIC_WRITE
1.1835 + {
1.1836 + int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
1.1837 + int ii;
1.1838 + assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
1.1839 + assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
1.1840 + assert(SQLITE_MAX_DEFAULT_PAGE_SIZE<=65536);
1.1841 + for(ii=szPageDflt; ii<=SQLITE_MAX_DEFAULT_PAGE_SIZE; ii=ii*2){
1.1842 + if( iDc&(SQLITE_IOCAP_ATOMIC|(ii>>8)) ) szPageDflt = ii;
1.1843 + }
1.1844 + }
1.1845 +#endif
1.1846 + if( szPageDflt>SQLITE_MAX_DEFAULT_PAGE_SIZE ){
1.1847 + szPageDflt = SQLITE_MAX_DEFAULT_PAGE_SIZE;
1.1848 + }
1.1849 + }
1.1850 + }
1.1851 + }else if( !memDb ){
1.1852 + /* If a temporary file is requested, it is not opened immediately.
1.1853 + ** In this case we accept the default page size and delay actually
1.1854 + ** opening the file until the first call to OsWrite().
1.1855 + */
1.1856 + tempFile = 1;
1.1857 + pPager->state = PAGER_EXCLUSIVE;
1.1858 + }
1.1859 +
1.1860 + if( pPager && rc==SQLITE_OK ){
1.1861 + pPager->pTmpSpace = sqlite3PageMalloc(szPageDflt);
1.1862 + }
1.1863 +
1.1864 + /* If an error occured in either of the blocks above.
1.1865 + ** Free the Pager structure and close the file.
1.1866 + ** Since the pager is not allocated there is no need to set
1.1867 + ** any Pager.errMask variables.
1.1868 + */
1.1869 + if( !pPager || !pPager->pTmpSpace ){
1.1870 + sqlite3OsClose(pPager->fd);
1.1871 + sqlite3_free(pPager);
1.1872 + return ((rc==SQLITE_OK)?SQLITE_NOMEM:rc);
1.1873 + }
1.1874 + nExtra = FORCE_ALIGNMENT(nExtra);
1.1875 + sqlite3PcacheOpen(szPageDflt, nExtra, !memDb,
1.1876 + !memDb?pagerStress:0, (void *)pPager, pPager->pPCache);
1.1877 +
1.1878 + PAGERTRACE3("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename);
1.1879 + IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename))
1.1880 +
1.1881 + /* Fill in Pager.zDirectory[] */
1.1882 + memcpy(pPager->zDirectory, pPager->zFilename, nPathname+1);
1.1883 + for(i=strlen(pPager->zDirectory); i>0 && pPager->zDirectory[i-1]!='/'; i--){}
1.1884 + if( i>0 ) pPager->zDirectory[i-1] = 0;
1.1885 +
1.1886 + /* Fill in Pager.zJournal[] */
1.1887 + if( zPathname ){
1.1888 + memcpy(pPager->zJournal, pPager->zFilename, nPathname);
1.1889 + memcpy(&pPager->zJournal[nPathname], "-journal", 9);
1.1890 + }else{
1.1891 + pPager->zJournal = 0;
1.1892 + }
1.1893 +
1.1894 + /* pPager->journalOpen = 0; */
1.1895 + pPager->useJournal = useJournal;
1.1896 + pPager->noReadlock = noReadlock && readOnly;
1.1897 + /* pPager->stmtOpen = 0; */
1.1898 + /* pPager->stmtInUse = 0; */
1.1899 + /* pPager->nRef = 0; */
1.1900 + pPager->dbSize = memDb-1;
1.1901 + pPager->pageSize = szPageDflt;
1.1902 + /* pPager->stmtSize = 0; */
1.1903 + /* pPager->stmtJSize = 0; */
1.1904 + /* pPager->nPage = 0; */
1.1905 + pPager->mxPage = 100;
1.1906 + pPager->mxPgno = SQLITE_MAX_PAGE_COUNT;
1.1907 + /* pPager->state = PAGER_UNLOCK; */
1.1908 + assert( pPager->state == (tempFile ? PAGER_EXCLUSIVE : PAGER_UNLOCK) );
1.1909 + /* pPager->errMask = 0; */
1.1910 + pPager->tempFile = tempFile;
1.1911 + assert( tempFile==PAGER_LOCKINGMODE_NORMAL
1.1912 + || tempFile==PAGER_LOCKINGMODE_EXCLUSIVE );
1.1913 + assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 );
1.1914 + pPager->exclusiveMode = tempFile;
1.1915 + pPager->memDb = memDb;
1.1916 + pPager->readOnly = readOnly;
1.1917 + /* pPager->needSync = 0; */
1.1918 + pPager->noSync = pPager->tempFile || !useJournal;
1.1919 + pPager->fullSync = (pPager->noSync?0:1);
1.1920 + pPager->sync_flags = SQLITE_SYNC_NORMAL;
1.1921 + /* pPager->pFirst = 0; */
1.1922 + /* pPager->pFirstSynced = 0; */
1.1923 + /* pPager->pLast = 0; */
1.1924 + pPager->nExtra = nExtra;
1.1925 + pPager->journalSizeLimit = SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT;
1.1926 + assert(pPager->fd->pMethods||memDb||tempFile);
1.1927 + if( !memDb ){
1.1928 + setSectorSize(pPager);
1.1929 + }
1.1930 + /* pPager->pBusyHandler = 0; */
1.1931 + /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */
1.1932 + *ppPager = pPager;
1.1933 + return SQLITE_OK;
1.1934 +}
1.1935 +
1.1936 +/*
1.1937 +** Set the busy handler function.
1.1938 +*/
1.1939 +void sqlite3PagerSetBusyhandler(Pager *pPager, BusyHandler *pBusyHandler){
1.1940 + pPager->pBusyHandler = pBusyHandler;
1.1941 +}
1.1942 +
1.1943 +/*
1.1944 +** Set the reinitializer for this pager. If not NULL, the reinitializer
1.1945 +** is called when the content of a page in cache is restored to its original
1.1946 +** value as a result of a rollback. The callback gives higher-level code
1.1947 +** an opportunity to restore the EXTRA section to agree with the restored
1.1948 +** page data.
1.1949 +*/
1.1950 +void sqlite3PagerSetReiniter(Pager *pPager, void (*xReinit)(DbPage*)){
1.1951 + pPager->xReiniter = xReinit;
1.1952 +}
1.1953 +
1.1954 +/*
1.1955 +** Set the page size to *pPageSize. If the suggest new page size is
1.1956 +** inappropriate, then an alternative page size is set to that
1.1957 +** value before returning.
1.1958 +*/
1.1959 +int sqlite3PagerSetPagesize(Pager *pPager, u16 *pPageSize){
1.1960 + int rc = pPager->errCode;
1.1961 + if( rc==SQLITE_OK ){
1.1962 + u16 pageSize = *pPageSize;
1.1963 + assert( pageSize==0 || (pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE) );
1.1964 + if( pageSize && pageSize!=pPager->pageSize
1.1965 + && (pPager->memDb==0 || pPager->dbSize==0)
1.1966 + && sqlite3PcacheRefCount(pPager->pPCache)==0
1.1967 + ){
1.1968 + char *pNew = (char *)sqlite3PageMalloc(pageSize);
1.1969 + if( !pNew ){
1.1970 + rc = SQLITE_NOMEM;
1.1971 + }else{
1.1972 + pager_reset(pPager);
1.1973 + pPager->pageSize = pageSize;
1.1974 + if( !pPager->memDb ) setSectorSize(pPager);
1.1975 + sqlite3PageFree(pPager->pTmpSpace);
1.1976 + pPager->pTmpSpace = pNew;
1.1977 + sqlite3PcacheSetPageSize(pPager->pPCache, pageSize);
1.1978 + }
1.1979 + }
1.1980 + *pPageSize = pPager->pageSize;
1.1981 + }
1.1982 + return rc;
1.1983 +}
1.1984 +
1.1985 +/*
1.1986 +** Return a pointer to the "temporary page" buffer held internally
1.1987 +** by the pager. This is a buffer that is big enough to hold the
1.1988 +** entire content of a database page. This buffer is used internally
1.1989 +** during rollback and will be overwritten whenever a rollback
1.1990 +** occurs. But other modules are free to use it too, as long as
1.1991 +** no rollbacks are happening.
1.1992 +*/
1.1993 +void *sqlite3PagerTempSpace(Pager *pPager){
1.1994 + return pPager->pTmpSpace;
1.1995 +}
1.1996 +
1.1997 +/*
1.1998 +** Attempt to set the maximum database page count if mxPage is positive.
1.1999 +** Make no changes if mxPage is zero or negative. And never reduce the
1.2000 +** maximum page count below the current size of the database.
1.2001 +**
1.2002 +** Regardless of mxPage, return the current maximum page count.
1.2003 +*/
1.2004 +int sqlite3PagerMaxPageCount(Pager *pPager, int mxPage){
1.2005 + if( mxPage>0 ){
1.2006 + pPager->mxPgno = mxPage;
1.2007 + }
1.2008 + sqlite3PagerPagecount(pPager, 0);
1.2009 + return pPager->mxPgno;
1.2010 +}
1.2011 +
1.2012 +/*
1.2013 +** The following set of routines are used to disable the simulated
1.2014 +** I/O error mechanism. These routines are used to avoid simulated
1.2015 +** errors in places where we do not care about errors.
1.2016 +**
1.2017 +** Unless -DSQLITE_TEST=1 is used, these routines are all no-ops
1.2018 +** and generate no code.
1.2019 +*/
1.2020 +#ifdef SQLITE_TEST
1.2021 +extern int sqlite3_io_error_pending;
1.2022 +extern int sqlite3_io_error_hit;
1.2023 +static int saved_cnt;
1.2024 +void disable_simulated_io_errors(void){
1.2025 + saved_cnt = sqlite3_io_error_pending;
1.2026 + sqlite3_io_error_pending = -1;
1.2027 +}
1.2028 +void enable_simulated_io_errors(void){
1.2029 + sqlite3_io_error_pending = saved_cnt;
1.2030 +}
1.2031 +#else
1.2032 +# define disable_simulated_io_errors()
1.2033 +# define enable_simulated_io_errors()
1.2034 +#endif
1.2035 +
1.2036 +/*
1.2037 +** Read the first N bytes from the beginning of the file into memory
1.2038 +** that pDest points to.
1.2039 +**
1.2040 +** No error checking is done. The rational for this is that this function
1.2041 +** may be called even if the file does not exist or contain a header. In
1.2042 +** these cases sqlite3OsRead() will return an error, to which the correct
1.2043 +** response is to zero the memory at pDest and continue. A real IO error
1.2044 +** will presumably recur and be picked up later (Todo: Think about this).
1.2045 +*/
1.2046 +int sqlite3PagerReadFileheader(Pager *pPager, int N, unsigned char *pDest){
1.2047 + int rc = SQLITE_OK;
1.2048 + memset(pDest, 0, N);
1.2049 + assert(MEMDB||pPager->fd->pMethods||pPager->tempFile);
1.2050 + if( pPager->fd->pMethods ){
1.2051 + IOTRACE(("DBHDR %p 0 %d\n", pPager, N))
1.2052 + rc = sqlite3OsRead(pPager->fd, pDest, N, 0);
1.2053 + if( rc==SQLITE_IOERR_SHORT_READ ){
1.2054 + rc = SQLITE_OK;
1.2055 + }
1.2056 + }
1.2057 + return rc;
1.2058 +}
1.2059 +
1.2060 +/*
1.2061 +** Return the total number of pages in the disk file associated with
1.2062 +** pPager.
1.2063 +**
1.2064 +** If the PENDING_BYTE lies on the page directly after the end of the
1.2065 +** file, then consider this page part of the file too. For example, if
1.2066 +** PENDING_BYTE is byte 4096 (the first byte of page 5) and the size of the
1.2067 +** file is 4096 bytes, 5 is returned instead of 4.
1.2068 +*/
1.2069 +int sqlite3PagerPagecount(Pager *pPager, int *pnPage){
1.2070 + i64 n = 0;
1.2071 + int rc;
1.2072 + assert( pPager!=0 );
1.2073 + if( pPager->errCode ){
1.2074 + rc = pPager->errCode;
1.2075 + return rc;
1.2076 + }
1.2077 + if( pPager->dbSize>=0 ){
1.2078 + n = pPager->dbSize;
1.2079 + } else {
1.2080 + assert(pPager->fd->pMethods||pPager->tempFile);
1.2081 + if( (pPager->fd->pMethods)
1.2082 + && (rc = sqlite3OsFileSize(pPager->fd, &n))!=SQLITE_OK ){
1.2083 + pager_error(pPager, rc);
1.2084 + return rc;
1.2085 + }
1.2086 + if( n>0 && n<pPager->pageSize ){
1.2087 + n = 1;
1.2088 + }else{
1.2089 + n /= pPager->pageSize;
1.2090 + }
1.2091 + if( pPager->state!=PAGER_UNLOCK ){
1.2092 + pPager->dbSize = n;
1.2093 + }
1.2094 + }
1.2095 + if( n==(PENDING_BYTE/pPager->pageSize) ){
1.2096 + n++;
1.2097 + }
1.2098 + if( n>pPager->mxPgno ){
1.2099 + pPager->mxPgno = n;
1.2100 + }
1.2101 + if( pnPage ){
1.2102 + *pnPage = n;
1.2103 + }
1.2104 + return SQLITE_OK;
1.2105 +}
1.2106 +
1.2107 +/*
1.2108 +** Forward declaration
1.2109 +*/
1.2110 +static int syncJournal(Pager*);
1.2111 +
1.2112 +/*
1.2113 +** This routine is used to truncate the cache when a database
1.2114 +** is truncated. Drop from the cache all pages whose pgno is
1.2115 +** larger than pPager->dbSize and is unreferenced.
1.2116 +**
1.2117 +** Referenced pages larger than pPager->dbSize are zeroed.
1.2118 +**
1.2119 +** Actually, at the point this routine is called, it would be
1.2120 +** an error to have a referenced page. But rather than delete
1.2121 +** that page and guarantee a subsequent segfault, it seems better
1.2122 +** to zero it and hope that we error out sanely.
1.2123 +*/
1.2124 +static void pager_truncate_cache(Pager *pPager){
1.2125 + sqlite3PcacheTruncate(pPager->pPCache, pPager->dbSize);
1.2126 +}
1.2127 +
1.2128 +/*
1.2129 +** Try to obtain a lock on a file. Invoke the busy callback if the lock
1.2130 +** is currently not available. Repeat until the busy callback returns
1.2131 +** false or until the lock succeeds.
1.2132 +**
1.2133 +** Return SQLITE_OK on success and an error code if we cannot obtain
1.2134 +** the lock.
1.2135 +*/
1.2136 +static int pager_wait_on_lock(Pager *pPager, int locktype){
1.2137 + int rc;
1.2138 +
1.2139 + /* The OS lock values must be the same as the Pager lock values */
1.2140 + assert( PAGER_SHARED==SHARED_LOCK );
1.2141 + assert( PAGER_RESERVED==RESERVED_LOCK );
1.2142 + assert( PAGER_EXCLUSIVE==EXCLUSIVE_LOCK );
1.2143 +
1.2144 + /* If the file is currently unlocked then the size must be unknown */
1.2145 + assert( pPager->state>=PAGER_SHARED || pPager->dbSize<0 || MEMDB );
1.2146 +
1.2147 + if( pPager->state>=locktype ){
1.2148 + rc = SQLITE_OK;
1.2149 + }else{
1.2150 + if( pPager->pBusyHandler ) pPager->pBusyHandler->nBusy = 0;
1.2151 + do {
1.2152 + rc = sqlite3OsLock(pPager->fd, locktype);
1.2153 + }while( rc==SQLITE_BUSY && sqlite3InvokeBusyHandler(pPager->pBusyHandler) );
1.2154 + if( rc==SQLITE_OK ){
1.2155 + pPager->state = locktype;
1.2156 + IOTRACE(("LOCK %p %d\n", pPager, locktype))
1.2157 + }
1.2158 + }
1.2159 + return rc;
1.2160 +}
1.2161 +
1.2162 +/*
1.2163 +** Truncate the file to the number of pages specified.
1.2164 +*/
1.2165 +int sqlite3PagerTruncate(Pager *pPager, Pgno nPage){
1.2166 + int rc = SQLITE_OK;
1.2167 + assert( pPager->state>=PAGER_SHARED || MEMDB );
1.2168 +
1.2169 +
1.2170 + sqlite3PagerPagecount(pPager, 0);
1.2171 + if( pPager->errCode ){
1.2172 + rc = pPager->errCode;
1.2173 + }else if( nPage<(unsigned)pPager->dbSize ){
1.2174 + if( MEMDB ){
1.2175 + pPager->dbSize = nPage;
1.2176 + pager_truncate_cache(pPager);
1.2177 + }else{
1.2178 + rc = syncJournal(pPager);
1.2179 + if( rc==SQLITE_OK ){
1.2180 + /* Get an exclusive lock on the database before truncating. */
1.2181 + rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
1.2182 + }
1.2183 + if( rc==SQLITE_OK ){
1.2184 + rc = pager_truncate(pPager, nPage);
1.2185 + }
1.2186 + }
1.2187 + }
1.2188 +
1.2189 + return rc;
1.2190 +}
1.2191 +
1.2192 +/*
1.2193 +** Shutdown the page cache. Free all memory and close all files.
1.2194 +**
1.2195 +** If a transaction was in progress when this routine is called, that
1.2196 +** transaction is rolled back. All outstanding pages are invalidated
1.2197 +** and their memory is freed. Any attempt to use a page associated
1.2198 +** with this page cache after this function returns will likely
1.2199 +** result in a coredump.
1.2200 +**
1.2201 +** This function always succeeds. If a transaction is active an attempt
1.2202 +** is made to roll it back. If an error occurs during the rollback
1.2203 +** a hot journal may be left in the filesystem but no error is returned
1.2204 +** to the caller.
1.2205 +*/
1.2206 +int sqlite3PagerClose(Pager *pPager){
1.2207 +
1.2208 + disable_simulated_io_errors();
1.2209 + sqlite3BeginBenignMalloc();
1.2210 + pPager->errCode = 0;
1.2211 + pPager->exclusiveMode = 0;
1.2212 + pager_reset(pPager);
1.2213 + pagerUnlockAndRollback(pPager);
1.2214 + enable_simulated_io_errors();
1.2215 + sqlite3EndBenignMalloc();
1.2216 + PAGERTRACE2("CLOSE %d\n", PAGERID(pPager));
1.2217 + IOTRACE(("CLOSE %p\n", pPager))
1.2218 + if( pPager->journalOpen ){
1.2219 + sqlite3OsClose(pPager->jfd);
1.2220 + }
1.2221 + sqlite3BitvecDestroy(pPager->pInJournal);
1.2222 + sqlite3BitvecDestroy(pPager->pAlwaysRollback);
1.2223 + if( pPager->stmtOpen ){
1.2224 + sqlite3OsClose(pPager->stfd);
1.2225 + }
1.2226 + sqlite3OsClose(pPager->fd);
1.2227 + /* Temp files are automatically deleted by the OS
1.2228 + ** if( pPager->tempFile ){
1.2229 + ** sqlite3OsDelete(pPager->zFilename);
1.2230 + ** }
1.2231 + */
1.2232 +
1.2233 + sqlite3PageFree(pPager->pTmpSpace);
1.2234 + sqlite3PcacheClose(pPager->pPCache);
1.2235 + sqlite3_free(pPager);
1.2236 + return SQLITE_OK;
1.2237 +}
1.2238 +
1.2239 +#if !defined(NDEBUG) || defined(SQLITE_TEST)
1.2240 +/*
1.2241 +** Return the page number for the given page data.
1.2242 +*/
1.2243 +Pgno sqlite3PagerPagenumber(DbPage *p){
1.2244 + return p->pgno;
1.2245 +}
1.2246 +#endif
1.2247 +
1.2248 +/*
1.2249 +** Increment the reference count for a page. The input pointer is
1.2250 +** a reference to the page data.
1.2251 +*/
1.2252 +int sqlite3PagerRef(DbPage *pPg){
1.2253 + sqlite3PcacheRef(pPg);
1.2254 + return SQLITE_OK;
1.2255 +}
1.2256 +
1.2257 +/*
1.2258 +** Sync the journal. In other words, make sure all the pages that have
1.2259 +** been written to the journal have actually reached the surface of the
1.2260 +** disk. It is not safe to modify the original database file until after
1.2261 +** the journal has been synced. If the original database is modified before
1.2262 +** the journal is synced and a power failure occurs, the unsynced journal
1.2263 +** data would be lost and we would be unable to completely rollback the
1.2264 +** database changes. Database corruption would occur.
1.2265 +**
1.2266 +** This routine also updates the nRec field in the header of the journal.
1.2267 +** (See comments on the pager_playback() routine for additional information.)
1.2268 +** If the sync mode is FULL, two syncs will occur. First the whole journal
1.2269 +** is synced, then the nRec field is updated, then a second sync occurs.
1.2270 +**
1.2271 +** For temporary databases, we do not care if we are able to rollback
1.2272 +** after a power failure, so no sync occurs.
1.2273 +**
1.2274 +** If the IOCAP_SEQUENTIAL flag is set for the persistent media on which
1.2275 +** the database is stored, then OsSync() is never called on the journal
1.2276 +** file. In this case all that is required is to update the nRec field in
1.2277 +** the journal header.
1.2278 +**
1.2279 +** This routine clears the needSync field of every page current held in
1.2280 +** memory.
1.2281 +*/
1.2282 +static int syncJournal(Pager *pPager){
1.2283 + int rc = SQLITE_OK;
1.2284 +
1.2285 + /* Sync the journal before modifying the main database
1.2286 + ** (assuming there is a journal and it needs to be synced.)
1.2287 + */
1.2288 + if( pPager->needSync ){
1.2289 + if( !pPager->tempFile ){
1.2290 + int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
1.2291 + assert( pPager->journalOpen );
1.2292 +
1.2293 + if( 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){
1.2294 + /* Write the nRec value into the journal file header. If in
1.2295 + ** full-synchronous mode, sync the journal first. This ensures that
1.2296 + ** all data has really hit the disk before nRec is updated to mark
1.2297 + ** it as a candidate for rollback.
1.2298 + **
1.2299 + ** This is not required if the persistent media supports the
1.2300 + ** SAFE_APPEND property. Because in this case it is not possible
1.2301 + ** for garbage data to be appended to the file, the nRec field
1.2302 + ** is populated with 0xFFFFFFFF when the journal header is written
1.2303 + ** and never needs to be updated.
1.2304 + */
1.2305 + i64 jrnlOff;
1.2306 + if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
1.2307 + PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager));
1.2308 + IOTRACE(("JSYNC %p\n", pPager))
1.2309 + rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags);
1.2310 + if( rc!=0 ) return rc;
1.2311 + }
1.2312 +
1.2313 + jrnlOff = pPager->journalHdr + sizeof(aJournalMagic);
1.2314 + IOTRACE(("JHDR %p %lld %d\n", pPager, jrnlOff, 4));
1.2315 + rc = write32bits(pPager->jfd, jrnlOff, pPager->nRec);
1.2316 + if( rc ) return rc;
1.2317 + }
1.2318 + if( 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
1.2319 + PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager));
1.2320 + IOTRACE(("JSYNC %p\n", pPager))
1.2321 + rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags|
1.2322 + (pPager->sync_flags==SQLITE_SYNC_FULL?SQLITE_SYNC_DATAONLY:0)
1.2323 + );
1.2324 + if( rc!=0 ) return rc;
1.2325 + }
1.2326 + pPager->journalStarted = 1;
1.2327 + }
1.2328 + pPager->needSync = 0;
1.2329 +
1.2330 + /* Erase the needSync flag from every page.
1.2331 + */
1.2332 + sqlite3PcacheClearFlags(pPager->pPCache, PGHDR_NEED_SYNC);
1.2333 + }
1.2334 +
1.2335 +#ifndef NDEBUG
1.2336 + /* If the Pager.needSync flag is clear then the PgHdr.needSync
1.2337 + ** flag must also be clear for all pages. Verify that this
1.2338 + ** invariant is true.
1.2339 + */
1.2340 + else{
1.2341 + sqlite3PcacheAssertFlags(pPager->pPCache, 0, PGHDR_NEED_SYNC);
1.2342 + }
1.2343 +#endif
1.2344 +
1.2345 + return rc;
1.2346 +}
1.2347 +
1.2348 +/*
1.2349 +** Given a list of pages (connected by the PgHdr.pDirty pointer) write
1.2350 +** every one of those pages out to the database file. No calls are made
1.2351 +** to the page-cache to mark the pages as clean. It is the responsibility
1.2352 +** of the caller to use PcacheCleanAll() or PcacheMakeClean() to mark
1.2353 +** the pages as clean.
1.2354 +*/
1.2355 +static int pager_write_pagelist(PgHdr *pList){
1.2356 + Pager *pPager;
1.2357 + int rc;
1.2358 +
1.2359 + if( pList==0 ) return SQLITE_OK;
1.2360 + pPager = pList->pPager;
1.2361 +
1.2362 + /* At this point there may be either a RESERVED or EXCLUSIVE lock on the
1.2363 + ** database file. If there is already an EXCLUSIVE lock, the following
1.2364 + ** calls to sqlite3OsLock() are no-ops.
1.2365 + **
1.2366 + ** Moving the lock from RESERVED to EXCLUSIVE actually involves going
1.2367 + ** through an intermediate state PENDING. A PENDING lock prevents new
1.2368 + ** readers from attaching to the database but is unsufficient for us to
1.2369 + ** write. The idea of a PENDING lock is to prevent new readers from
1.2370 + ** coming in while we wait for existing readers to clear.
1.2371 + **
1.2372 + ** While the pager is in the RESERVED state, the original database file
1.2373 + ** is unchanged and we can rollback without having to playback the
1.2374 + ** journal into the original database file. Once we transition to
1.2375 + ** EXCLUSIVE, it means the database file has been changed and any rollback
1.2376 + ** will require a journal playback.
1.2377 + */
1.2378 + rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
1.2379 + if( rc!=SQLITE_OK ){
1.2380 + return rc;
1.2381 + }
1.2382 +
1.2383 + while( pList ){
1.2384 +
1.2385 + /* If the file has not yet been opened, open it now. */
1.2386 + if( !pPager->fd->pMethods ){
1.2387 + assert(pPager->tempFile);
1.2388 + rc = sqlite3PagerOpentemp(pPager, pPager->fd, pPager->vfsFlags);
1.2389 + if( rc ) return rc;
1.2390 + }
1.2391 +
1.2392 + /* If there are dirty pages in the page cache with page numbers greater
1.2393 + ** than Pager.dbSize, this means sqlite3PagerTruncate() was called to
1.2394 + ** make the file smaller (presumably by auto-vacuum code). Do not write
1.2395 + ** any such pages to the file.
1.2396 + */
1.2397 + if( pList->pgno<=pPager->dbSize && 0==(pList->flags&PGHDR_DONT_WRITE) ){
1.2398 + i64 offset = (pList->pgno-1)*(i64)pPager->pageSize;
1.2399 + char *pData = CODEC2(pPager, pList->pData, pList->pgno, 6);
1.2400 + PAGERTRACE4("STORE %d page %d hash(%08x)\n",
1.2401 + PAGERID(pPager), pList->pgno, pager_pagehash(pList));
1.2402 + IOTRACE(("PGOUT %p %d\n", pPager, pList->pgno));
1.2403 + rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize, offset);
1.2404 + PAGER_INCR(sqlite3_pager_writedb_count);
1.2405 + PAGER_INCR(pPager->nWrite);
1.2406 + if( pList->pgno==1 ){
1.2407 + memcpy(&pPager->dbFileVers, &pData[24], sizeof(pPager->dbFileVers));
1.2408 + }
1.2409 + }
1.2410 +#ifndef NDEBUG
1.2411 + else{
1.2412 + PAGERTRACE3("NOSTORE %d page %d\n", PAGERID(pPager), pList->pgno);
1.2413 + }
1.2414 +#endif
1.2415 + if( rc ) return rc;
1.2416 +#ifdef SQLITE_CHECK_PAGES
1.2417 + pList->pageHash = pager_pagehash(pList);
1.2418 +#endif
1.2419 + pList = pList->pDirty;
1.2420 + }
1.2421 +
1.2422 + return SQLITE_OK;
1.2423 +}
1.2424 +
1.2425 +/*
1.2426 +** This function is called by the pcache layer when it has reached some
1.2427 +** soft memory limit. The argument is a pointer to a purgeable Pager
1.2428 +** object. This function attempts to make a single dirty page that has no
1.2429 +** outstanding references (if one exists) clean so that it can be recycled
1.2430 +** by the pcache layer.
1.2431 +*/
1.2432 +static int pagerStress(void *p, PgHdr *pPg){
1.2433 + Pager *pPager = (Pager *)p;
1.2434 + int rc = SQLITE_OK;
1.2435 +
1.2436 + if( pPager->doNotSync ){
1.2437 + return SQLITE_OK;
1.2438 + }
1.2439 +
1.2440 + assert( pPg->flags&PGHDR_DIRTY );
1.2441 + if( pPager->errCode==SQLITE_OK ){
1.2442 + if( pPg->flags&PGHDR_NEED_SYNC ){
1.2443 + rc = syncJournal(pPager);
1.2444 + if( rc==SQLITE_OK && pPager->fullSync &&
1.2445 + !(sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND)
1.2446 + ){
1.2447 + pPager->nRec = 0;
1.2448 + rc = writeJournalHdr(pPager);
1.2449 + }
1.2450 + }
1.2451 + if( rc==SQLITE_OK ){
1.2452 + pPg->pDirty = 0;
1.2453 + rc = pager_write_pagelist(pPg);
1.2454 + }
1.2455 + if( rc!=SQLITE_OK ){
1.2456 + pager_error(pPager, rc);
1.2457 + }
1.2458 + }
1.2459 +
1.2460 + if( rc==SQLITE_OK ){
1.2461 + sqlite3PcacheMakeClean(pPg);
1.2462 + }
1.2463 + return rc;
1.2464 +}
1.2465 +
1.2466 +
1.2467 +/*
1.2468 +** Return 1 if there is a hot journal on the given pager.
1.2469 +** A hot journal is one that needs to be played back.
1.2470 +**
1.2471 +** If the current size of the database file is 0 but a journal file
1.2472 +** exists, that is probably an old journal left over from a prior
1.2473 +** database with the same name. Just delete the journal.
1.2474 +**
1.2475 +** Return negative if unable to determine the status of the journal.
1.2476 +**
1.2477 +** This routine does not open the journal file to examine its
1.2478 +** content. Hence, the journal might contain the name of a master
1.2479 +** journal file that has been deleted, and hence not be hot. Or
1.2480 +** the header of the journal might be zeroed out. This routine
1.2481 +** does not discover these cases of a non-hot journal - if the
1.2482 +** journal file exists and is not empty this routine assumes it
1.2483 +** is hot. The pager_playback() routine will discover that the
1.2484 +** journal file is not really hot and will no-op.
1.2485 +*/
1.2486 +static int hasHotJournal(Pager *pPager, int *pExists){
1.2487 + sqlite3_vfs *pVfs = pPager->pVfs;
1.2488 + int rc = SQLITE_OK;
1.2489 + int exists;
1.2490 + int locked;
1.2491 + assert( pPager!=0 );
1.2492 + assert( pPager->useJournal );
1.2493 + assert( pPager->fd->pMethods );
1.2494 + *pExists = 0;
1.2495 + rc = sqlite3OsAccess(pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &exists);
1.2496 + if( rc==SQLITE_OK && exists ){
1.2497 + rc = sqlite3OsCheckReservedLock(pPager->fd, &locked);
1.2498 + }
1.2499 + if( rc==SQLITE_OK && exists && !locked ){
1.2500 + int nPage;
1.2501 + rc = sqlite3PagerPagecount(pPager, &nPage);
1.2502 + if( rc==SQLITE_OK ){
1.2503 + if( nPage==0 ){
1.2504 + sqlite3OsDelete(pVfs, pPager->zJournal, 0);
1.2505 + }else{
1.2506 + *pExists = 1;
1.2507 + }
1.2508 + }
1.2509 + }
1.2510 + return rc;
1.2511 +}
1.2512 +
1.2513 +/*
1.2514 +** Read the content of page pPg out of the database file.
1.2515 +*/
1.2516 +static int readDbPage(Pager *pPager, PgHdr *pPg, Pgno pgno){
1.2517 + int rc;
1.2518 + i64 offset;
1.2519 + assert( MEMDB==0 );
1.2520 + assert(pPager->fd->pMethods||pPager->tempFile);
1.2521 + if( !pPager->fd->pMethods ){
1.2522 + return SQLITE_IOERR_SHORT_READ;
1.2523 + }
1.2524 + offset = (pgno-1)*(i64)pPager->pageSize;
1.2525 + rc = sqlite3OsRead(pPager->fd, pPg->pData, pPager->pageSize, offset);
1.2526 + PAGER_INCR(sqlite3_pager_readdb_count);
1.2527 + PAGER_INCR(pPager->nRead);
1.2528 + IOTRACE(("PGIN %p %d\n", pPager, pgno));
1.2529 + if( pgno==1 ){
1.2530 + memcpy(&pPager->dbFileVers, &((u8*)pPg->pData)[24],
1.2531 + sizeof(pPager->dbFileVers));
1.2532 + }
1.2533 + CODEC1(pPager, pPg->pData, pPg->pgno, 3);
1.2534 + PAGERTRACE4("FETCH %d page %d hash(%08x)\n",
1.2535 + PAGERID(pPager), pPg->pgno, pager_pagehash(pPg));
1.2536 + return rc;
1.2537 +}
1.2538 +
1.2539 +
1.2540 +/*
1.2541 +** This function is called to obtain the shared lock required before
1.2542 +** data may be read from the pager cache. If the shared lock has already
1.2543 +** been obtained, this function is a no-op.
1.2544 +**
1.2545 +** Immediately after obtaining the shared lock (if required), this function
1.2546 +** checks for a hot-journal file. If one is found, an emergency rollback
1.2547 +** is performed immediately.
1.2548 +*/
1.2549 +static int pagerSharedLock(Pager *pPager){
1.2550 + int rc = SQLITE_OK;
1.2551 + int isErrorReset = 0;
1.2552 +
1.2553 + /* If this database is opened for exclusive access, has no outstanding
1.2554 + ** page references and is in an error-state, now is the chance to clear
1.2555 + ** the error. Discard the contents of the pager-cache and treat any
1.2556 + ** open journal file as a hot-journal.
1.2557 + */
1.2558 + if( !MEMDB && pPager->exclusiveMode
1.2559 + && sqlite3PcacheRefCount(pPager->pPCache)==0 && pPager->errCode
1.2560 + ){
1.2561 + if( pPager->journalOpen ){
1.2562 + isErrorReset = 1;
1.2563 + }
1.2564 + pPager->errCode = SQLITE_OK;
1.2565 + pager_reset(pPager);
1.2566 + }
1.2567 +
1.2568 + /* If the pager is still in an error state, do not proceed. The error
1.2569 + ** state will be cleared at some point in the future when all page
1.2570 + ** references are dropped and the cache can be discarded.
1.2571 + */
1.2572 + if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
1.2573 + return pPager->errCode;
1.2574 + }
1.2575 +
1.2576 + if( pPager->state==PAGER_UNLOCK || isErrorReset ){
1.2577 + sqlite3_vfs *pVfs = pPager->pVfs;
1.2578 + if( !MEMDB ){
1.2579 + int isHotJournal;
1.2580 + assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );
1.2581 + if( !pPager->noReadlock ){
1.2582 + rc = pager_wait_on_lock(pPager, SHARED_LOCK);
1.2583 + if( rc!=SQLITE_OK ){
1.2584 + assert( pPager->state==PAGER_UNLOCK );
1.2585 + return pager_error(pPager, rc);
1.2586 + }
1.2587 + assert( pPager->state>=SHARED_LOCK );
1.2588 + }
1.2589 +
1.2590 + /* If a journal file exists, and there is no RESERVED lock on the
1.2591 + ** database file, then it either needs to be played back or deleted.
1.2592 + */
1.2593 + if( !isErrorReset ){
1.2594 + rc = hasHotJournal(pPager, &isHotJournal);
1.2595 + if( rc!=SQLITE_OK ){
1.2596 + goto failed;
1.2597 + }
1.2598 + }
1.2599 + if( isErrorReset || isHotJournal ){
1.2600 + /* Get an EXCLUSIVE lock on the database file. At this point it is
1.2601 + ** important that a RESERVED lock is not obtained on the way to the
1.2602 + ** EXCLUSIVE lock. If it were, another process might open the
1.2603 + ** database file, detect the RESERVED lock, and conclude that the
1.2604 + ** database is safe to read while this process is still rolling it
1.2605 + ** back.
1.2606 + **
1.2607 + ** Because the intermediate RESERVED lock is not requested, the
1.2608 + ** second process will get to this point in the code and fail to
1.2609 + ** obtain its own EXCLUSIVE lock on the database file.
1.2610 + */
1.2611 + if( pPager->state<EXCLUSIVE_LOCK ){
1.2612 + rc = sqlite3OsLock(pPager->fd, EXCLUSIVE_LOCK);
1.2613 + if( rc!=SQLITE_OK ){
1.2614 + rc = pager_error(pPager, rc);
1.2615 + goto failed;
1.2616 + }
1.2617 + pPager->state = PAGER_EXCLUSIVE;
1.2618 + }
1.2619 +
1.2620 + /* Open the journal for read/write access. This is because in
1.2621 + ** exclusive-access mode the file descriptor will be kept open and
1.2622 + ** possibly used for a transaction later on. On some systems, the
1.2623 + ** OsTruncate() call used in exclusive-access mode also requires
1.2624 + ** a read/write file handle.
1.2625 + */
1.2626 + if( !isErrorReset && pPager->journalOpen==0 ){
1.2627 + int res;
1.2628 + rc = sqlite3OsAccess(pVfs,pPager->zJournal,SQLITE_ACCESS_EXISTS,&res);
1.2629 + if( rc==SQLITE_OK ){
1.2630 + if( res ){
1.2631 + int fout = 0;
1.2632 + int f = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL;
1.2633 + assert( !pPager->tempFile );
1.2634 + rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &fout);
1.2635 + assert( rc!=SQLITE_OK || pPager->jfd->pMethods );
1.2636 + if( fout&SQLITE_OPEN_READONLY ){
1.2637 + rc = SQLITE_BUSY;
1.2638 + sqlite3OsClose(pPager->jfd);
1.2639 + }
1.2640 + }else{
1.2641 + /* If the journal does not exist, that means some other process
1.2642 + ** has already rolled it back */
1.2643 + rc = SQLITE_BUSY;
1.2644 + }
1.2645 + }
1.2646 + }
1.2647 + if( rc!=SQLITE_OK ){
1.2648 + if( rc!=SQLITE_NOMEM && rc!=SQLITE_IOERR_UNLOCK
1.2649 + && rc!=SQLITE_IOERR_NOMEM
1.2650 + ){
1.2651 + rc = SQLITE_BUSY;
1.2652 + }
1.2653 + goto failed;
1.2654 + }
1.2655 + pPager->journalOpen = 1;
1.2656 + pPager->journalStarted = 0;
1.2657 + pPager->journalOff = 0;
1.2658 + pPager->setMaster = 0;
1.2659 + pPager->journalHdr = 0;
1.2660 +
1.2661 + /* Playback and delete the journal. Drop the database write
1.2662 + ** lock and reacquire the read lock.
1.2663 + */
1.2664 + rc = pager_playback(pPager, 1);
1.2665 + if( rc!=SQLITE_OK ){
1.2666 + rc = pager_error(pPager, rc);
1.2667 + goto failed;
1.2668 + }
1.2669 + assert(pPager->state==PAGER_SHARED ||
1.2670 + (pPager->exclusiveMode && pPager->state>PAGER_SHARED)
1.2671 + );
1.2672 + }
1.2673 +
1.2674 + if( sqlite3PcachePagecount(pPager->pPCache)>0 ){
1.2675 + /* The shared-lock has just been acquired on the database file
1.2676 + ** and there are already pages in the cache (from a previous
1.2677 + ** read or write transaction). Check to see if the database
1.2678 + ** has been modified. If the database has changed, flush the
1.2679 + ** cache.
1.2680 + **
1.2681 + ** Database changes is detected by looking at 15 bytes beginning
1.2682 + ** at offset 24 into the file. The first 4 of these 16 bytes are
1.2683 + ** a 32-bit counter that is incremented with each change. The
1.2684 + ** other bytes change randomly with each file change when
1.2685 + ** a codec is in use.
1.2686 + **
1.2687 + ** There is a vanishingly small chance that a change will not be
1.2688 + ** detected. The chance of an undetected change is so small that
1.2689 + ** it can be neglected.
1.2690 + */
1.2691 + char dbFileVers[sizeof(pPager->dbFileVers)];
1.2692 + sqlite3PagerPagecount(pPager, 0);
1.2693 +
1.2694 + if( pPager->errCode ){
1.2695 + rc = pPager->errCode;
1.2696 + goto failed;
1.2697 + }
1.2698 +
1.2699 + if( pPager->dbSize>0 ){
1.2700 + IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers)));
1.2701 + rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24);
1.2702 + if( rc!=SQLITE_OK ){
1.2703 + goto failed;
1.2704 + }
1.2705 + }else{
1.2706 + memset(dbFileVers, 0, sizeof(dbFileVers));
1.2707 + }
1.2708 +
1.2709 + if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){
1.2710 + pager_reset(pPager);
1.2711 + }
1.2712 + }
1.2713 + }
1.2714 + assert( pPager->exclusiveMode || pPager->state<=PAGER_SHARED );
1.2715 + if( pPager->state==PAGER_UNLOCK ){
1.2716 + pPager->state = PAGER_SHARED;
1.2717 + }
1.2718 + }
1.2719 +
1.2720 + failed:
1.2721 + if( rc!=SQLITE_OK ){
1.2722 + /* pager_unlock() is a no-op for exclusive mode and in-memory databases. */
1.2723 + pager_unlock(pPager);
1.2724 + }
1.2725 + return rc;
1.2726 +}
1.2727 +
1.2728 +/*
1.2729 +** Make sure we have the content for a page. If the page was
1.2730 +** previously acquired with noContent==1, then the content was
1.2731 +** just initialized to zeros instead of being read from disk.
1.2732 +** But now we need the real data off of disk. So make sure we
1.2733 +** have it. Read it in if we do not have it already.
1.2734 +*/
1.2735 +static int pager_get_content(PgHdr *pPg){
1.2736 + if( pPg->flags&PGHDR_NEED_READ ){
1.2737 + int rc = readDbPage(pPg->pPager, pPg, pPg->pgno);
1.2738 + if( rc==SQLITE_OK ){
1.2739 + pPg->flags &= ~PGHDR_NEED_READ;
1.2740 + }else{
1.2741 + return rc;
1.2742 + }
1.2743 + }
1.2744 + return SQLITE_OK;
1.2745 +}
1.2746 +
1.2747 +/*
1.2748 +** If the reference count has reached zero, and the pager is not in the
1.2749 +** middle of a write transaction or opened in exclusive mode, unlock it.
1.2750 +*/
1.2751 +static void pagerUnlockIfUnused(Pager *pPager){
1.2752 + if( (sqlite3PcacheRefCount(pPager->pPCache)==0)
1.2753 + && (!pPager->exclusiveMode || pPager->journalOff>0)
1.2754 + ){
1.2755 + pagerUnlockAndRollback(pPager);
1.2756 + }
1.2757 +}
1.2758 +
1.2759 +/*
1.2760 +** Drop a page from the cache using sqlite3PcacheDrop().
1.2761 +**
1.2762 +** If this means there are now no pages with references to them, a rollback
1.2763 +** occurs and the lock on the database is removed.
1.2764 +*/
1.2765 +static void pagerDropPage(DbPage *pPg){
1.2766 + Pager *pPager = pPg->pPager;
1.2767 + sqlite3PcacheDrop(pPg);
1.2768 + pagerUnlockIfUnused(pPager);
1.2769 +}
1.2770 +
1.2771 +/*
1.2772 +** Acquire a page.
1.2773 +**
1.2774 +** A read lock on the disk file is obtained when the first page is acquired.
1.2775 +** This read lock is dropped when the last page is released.
1.2776 +**
1.2777 +** This routine works for any page number greater than 0. If the database
1.2778 +** file is smaller than the requested page, then no actual disk
1.2779 +** read occurs and the memory image of the page is initialized to
1.2780 +** all zeros. The extra data appended to a page is always initialized
1.2781 +** to zeros the first time a page is loaded into memory.
1.2782 +**
1.2783 +** The acquisition might fail for several reasons. In all cases,
1.2784 +** an appropriate error code is returned and *ppPage is set to NULL.
1.2785 +**
1.2786 +** See also sqlite3PagerLookup(). Both this routine and Lookup() attempt
1.2787 +** to find a page in the in-memory cache first. If the page is not already
1.2788 +** in memory, this routine goes to disk to read it in whereas Lookup()
1.2789 +** just returns 0. This routine acquires a read-lock the first time it
1.2790 +** has to go to disk, and could also playback an old journal if necessary.
1.2791 +** Since Lookup() never goes to disk, it never has to deal with locks
1.2792 +** or journal files.
1.2793 +**
1.2794 +** If noContent is false, the page contents are actually read from disk.
1.2795 +** If noContent is true, it means that we do not care about the contents
1.2796 +** of the page at this time, so do not do a disk read. Just fill in the
1.2797 +** page content with zeros. But mark the fact that we have not read the
1.2798 +** content by setting the PgHdr.needRead flag. Later on, if
1.2799 +** sqlite3PagerWrite() is called on this page or if this routine is
1.2800 +** called again with noContent==0, that means that the content is needed
1.2801 +** and the disk read should occur at that point.
1.2802 +*/
1.2803 +int sqlite3PagerAcquire(
1.2804 + Pager *pPager, /* The pager open on the database file */
1.2805 + Pgno pgno, /* Page number to fetch */
1.2806 + DbPage **ppPage, /* Write a pointer to the page here */
1.2807 + int noContent /* Do not bother reading content from disk if true */
1.2808 +){
1.2809 + PgHdr *pPg = 0;
1.2810 + int rc;
1.2811 +
1.2812 + assert( pPager->state==PAGER_UNLOCK
1.2813 + || sqlite3PcacheRefCount(pPager->pPCache)>0
1.2814 + || pgno==1
1.2815 + );
1.2816 +
1.2817 + /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page
1.2818 + ** number greater than this, or zero, is requested.
1.2819 + */
1.2820 + if( pgno>PAGER_MAX_PGNO || pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
1.2821 + return SQLITE_CORRUPT_BKPT;
1.2822 + }
1.2823 +
1.2824 + /* Make sure we have not hit any critical errors.
1.2825 + */
1.2826 + assert( pPager!=0 );
1.2827 + *ppPage = 0;
1.2828 +
1.2829 + /* If this is the first page accessed, then get a SHARED lock
1.2830 + ** on the database file. pagerSharedLock() is a no-op if
1.2831 + ** a database lock is already held.
1.2832 + */
1.2833 + rc = pagerSharedLock(pPager);
1.2834 + if( rc!=SQLITE_OK ){
1.2835 + return rc;
1.2836 + }
1.2837 + assert( pPager->state!=PAGER_UNLOCK );
1.2838 +
1.2839 + rc = sqlite3PcacheFetch(pPager->pPCache, pgno, 1, &pPg);
1.2840 + if( rc!=SQLITE_OK ){
1.2841 + return rc;
1.2842 + }
1.2843 + if( pPg->pPager==0 ){
1.2844 + /* The pager cache has created a new page. Its content needs to
1.2845 + ** be initialized.
1.2846 + */
1.2847 + int nMax;
1.2848 + PAGER_INCR(pPager->nMiss);
1.2849 + pPg->pPager = pPager;
1.2850 + if( sqlite3BitvecTest(pPager->pInJournal, pgno) ){
1.2851 + assert( !MEMDB );
1.2852 + pPg->flags |= PGHDR_IN_JOURNAL;
1.2853 + }
1.2854 + memset(pPg->pExtra, 0, pPager->nExtra);
1.2855 +
1.2856 + rc = sqlite3PagerPagecount(pPager, &nMax);
1.2857 + if( rc!=SQLITE_OK ){
1.2858 + sqlite3PagerUnref(pPg);
1.2859 + return rc;
1.2860 + }
1.2861 +
1.2862 + if( nMax<(int)pgno || MEMDB || noContent ){
1.2863 + if( pgno>pPager->mxPgno ){
1.2864 + sqlite3PagerUnref(pPg);
1.2865 + return SQLITE_FULL;
1.2866 + }
1.2867 + memset(pPg->pData, 0, pPager->pageSize);
1.2868 + if( noContent ){
1.2869 + pPg->flags |= PGHDR_NEED_READ;
1.2870 + }
1.2871 + IOTRACE(("ZERO %p %d\n", pPager, pgno));
1.2872 + }else{
1.2873 + rc = readDbPage(pPager, pPg, pgno);
1.2874 + if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){
1.2875 + /* sqlite3PagerUnref(pPg); */
1.2876 + pagerDropPage(pPg);
1.2877 + return rc;
1.2878 + }
1.2879 + }
1.2880 +#ifdef SQLITE_CHECK_PAGES
1.2881 + pPg->pageHash = pager_pagehash(pPg);
1.2882 +#endif
1.2883 + }else{
1.2884 + /* The requested page is in the page cache. */
1.2885 + assert(sqlite3PcacheRefCount(pPager->pPCache)>0 || pgno==1);
1.2886 + PAGER_INCR(pPager->nHit);
1.2887 + if( !noContent ){
1.2888 + rc = pager_get_content(pPg);
1.2889 + if( rc ){
1.2890 + sqlite3PagerUnref(pPg);
1.2891 + return rc;
1.2892 + }
1.2893 + }
1.2894 + }
1.2895 +
1.2896 + *ppPage = pPg;
1.2897 + return SQLITE_OK;
1.2898 +}
1.2899 +
1.2900 +/*
1.2901 +** Acquire a page if it is already in the in-memory cache. Do
1.2902 +** not read the page from disk. Return a pointer to the page,
1.2903 +** or 0 if the page is not in cache.
1.2904 +**
1.2905 +** See also sqlite3PagerGet(). The difference between this routine
1.2906 +** and sqlite3PagerGet() is that _get() will go to the disk and read
1.2907 +** in the page if the page is not already in cache. This routine
1.2908 +** returns NULL if the page is not in cache or if a disk I/O error
1.2909 +** has ever happened.
1.2910 +*/
1.2911 +DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){
1.2912 + PgHdr *pPg = 0;
1.2913 + assert( pPager!=0 );
1.2914 + assert( pgno!=0 );
1.2915 +
1.2916 + if( (pPager->state!=PAGER_UNLOCK)
1.2917 + && (pPager->errCode==SQLITE_OK || pPager->errCode==SQLITE_FULL)
1.2918 + ){
1.2919 + sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &pPg);
1.2920 + }
1.2921 +
1.2922 + return pPg;
1.2923 +}
1.2924 +
1.2925 +/*
1.2926 +** Release a page.
1.2927 +**
1.2928 +** If the number of references to the page drop to zero, then the
1.2929 +** page is added to the LRU list. When all references to all pages
1.2930 +** are released, a rollback occurs and the lock on the database is
1.2931 +** removed.
1.2932 +*/
1.2933 +int sqlite3PagerUnref(DbPage *pPg){
1.2934 + if( pPg ){
1.2935 + Pager *pPager = pPg->pPager;
1.2936 + sqlite3PcacheRelease(pPg);
1.2937 + pagerUnlockIfUnused(pPager);
1.2938 + }
1.2939 + return SQLITE_OK;
1.2940 +}
1.2941 +
1.2942 +/*
1.2943 +** Create a journal file for pPager. There should already be a RESERVED
1.2944 +** or EXCLUSIVE lock on the database file when this routine is called.
1.2945 +**
1.2946 +** Return SQLITE_OK if everything. Return an error code and release the
1.2947 +** write lock if anything goes wrong.
1.2948 +*/
1.2949 +static int pager_open_journal(Pager *pPager){
1.2950 + sqlite3_vfs *pVfs = pPager->pVfs;
1.2951 + int flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_EXCLUSIVE|SQLITE_OPEN_CREATE);
1.2952 +
1.2953 + int rc;
1.2954 + assert( !MEMDB );
1.2955 + assert( pPager->state>=PAGER_RESERVED );
1.2956 + assert( pPager->useJournal );
1.2957 + assert( pPager->pInJournal==0 );
1.2958 + sqlite3PagerPagecount(pPager, 0);
1.2959 + pPager->pInJournal = sqlite3BitvecCreate(pPager->dbSize);
1.2960 + if( pPager->pInJournal==0 ){
1.2961 + rc = SQLITE_NOMEM;
1.2962 + goto failed_to_open_journal;
1.2963 + }
1.2964 +
1.2965 + if( pPager->journalOpen==0 ){
1.2966 + if( pPager->tempFile ){
1.2967 + flags |= (SQLITE_OPEN_DELETEONCLOSE|SQLITE_OPEN_TEMP_JOURNAL);
1.2968 + }else{
1.2969 + flags |= (SQLITE_OPEN_MAIN_JOURNAL);
1.2970 + }
1.2971 +#ifdef SQLITE_ENABLE_ATOMIC_WRITE
1.2972 + rc = sqlite3JournalOpen(
1.2973 + pVfs, pPager->zJournal, pPager->jfd, flags, jrnlBufferSize(pPager)
1.2974 + );
1.2975 +#else
1.2976 + rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, flags, 0);
1.2977 +#endif
1.2978 + assert( rc!=SQLITE_OK || pPager->jfd->pMethods );
1.2979 + pPager->journalOff = 0;
1.2980 + pPager->setMaster = 0;
1.2981 + pPager->journalHdr = 0;
1.2982 + if( rc!=SQLITE_OK ){
1.2983 + if( rc==SQLITE_NOMEM ){
1.2984 + sqlite3OsDelete(pVfs, pPager->zJournal, 0);
1.2985 + }
1.2986 + goto failed_to_open_journal;
1.2987 + }
1.2988 + }
1.2989 + pPager->journalOpen = 1;
1.2990 + pPager->journalStarted = 0;
1.2991 + pPager->needSync = 0;
1.2992 + pPager->nRec = 0;
1.2993 + if( pPager->errCode ){
1.2994 + rc = pPager->errCode;
1.2995 + goto failed_to_open_journal;
1.2996 + }
1.2997 + pPager->origDbSize = pPager->dbSize;
1.2998 +
1.2999 + rc = writeJournalHdr(pPager);
1.3000 +
1.3001 + if( pPager->stmtAutoopen && rc==SQLITE_OK ){
1.3002 + rc = sqlite3PagerStmtBegin(pPager);
1.3003 + }
1.3004 + if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && rc!=SQLITE_IOERR_NOMEM ){
1.3005 + rc = pager_end_transaction(pPager, 0);
1.3006 + if( rc==SQLITE_OK ){
1.3007 + rc = SQLITE_FULL;
1.3008 + }
1.3009 + }
1.3010 + return rc;
1.3011 +
1.3012 +failed_to_open_journal:
1.3013 + sqlite3BitvecDestroy(pPager->pInJournal);
1.3014 + pPager->pInJournal = 0;
1.3015 + return rc;
1.3016 +}
1.3017 +
1.3018 +/*
1.3019 +** Acquire a write-lock on the database. The lock is removed when
1.3020 +** the any of the following happen:
1.3021 +**
1.3022 +** * sqlite3PagerCommitPhaseTwo() is called.
1.3023 +** * sqlite3PagerRollback() is called.
1.3024 +** * sqlite3PagerClose() is called.
1.3025 +** * sqlite3PagerUnref() is called to on every outstanding page.
1.3026 +**
1.3027 +** The first parameter to this routine is a pointer to any open page of the
1.3028 +** database file. Nothing changes about the page - it is used merely to
1.3029 +** acquire a pointer to the Pager structure and as proof that there is
1.3030 +** already a read-lock on the database.
1.3031 +**
1.3032 +** The second parameter indicates how much space in bytes to reserve for a
1.3033 +** master journal file-name at the start of the journal when it is created.
1.3034 +**
1.3035 +** A journal file is opened if this is not a temporary file. For temporary
1.3036 +** files, the opening of the journal file is deferred until there is an
1.3037 +** actual need to write to the journal.
1.3038 +**
1.3039 +** If the database is already reserved for writing, this routine is a no-op.
1.3040 +**
1.3041 +** If exFlag is true, go ahead and get an EXCLUSIVE lock on the file
1.3042 +** immediately instead of waiting until we try to flush the cache. The
1.3043 +** exFlag is ignored if a transaction is already active.
1.3044 +*/
1.3045 +int sqlite3PagerBegin(DbPage *pPg, int exFlag){
1.3046 + Pager *pPager = pPg->pPager;
1.3047 + int rc = SQLITE_OK;
1.3048 + assert( pPg->nRef>0 );
1.3049 + assert( pPager->state!=PAGER_UNLOCK );
1.3050 + if( pPager->state==PAGER_SHARED ){
1.3051 + assert( pPager->pInJournal==0 );
1.3052 + sqlite3PcacheAssertFlags(pPager->pPCache, 0, PGHDR_IN_JOURNAL);
1.3053 + if( MEMDB ){
1.3054 + pPager->state = PAGER_EXCLUSIVE;
1.3055 + pPager->origDbSize = pPager->dbSize;
1.3056 + }else{
1.3057 + rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK);
1.3058 + if( rc==SQLITE_OK ){
1.3059 + pPager->state = PAGER_RESERVED;
1.3060 + if( exFlag ){
1.3061 + rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
1.3062 + }
1.3063 + }
1.3064 + if( rc!=SQLITE_OK ){
1.3065 + return rc;
1.3066 + }
1.3067 + pPager->dirtyCache = 0;
1.3068 + PAGERTRACE2("TRANSACTION %d\n", PAGERID(pPager));
1.3069 + if( pPager->useJournal && !pPager->tempFile
1.3070 + && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
1.3071 + rc = pager_open_journal(pPager);
1.3072 + }
1.3073 + }
1.3074 + }else if( pPager->journalOpen && pPager->journalOff==0 ){
1.3075 + /* This happens when the pager was in exclusive-access mode the last
1.3076 + ** time a (read or write) transaction was successfully concluded
1.3077 + ** by this connection. Instead of deleting the journal file it was
1.3078 + ** kept open and either was truncated to 0 bytes or its header was
1.3079 + ** overwritten with zeros.
1.3080 + */
1.3081 + assert( pPager->nRec==0 );
1.3082 + assert( pPager->origDbSize==0 );
1.3083 + assert( pPager->pInJournal==0 );
1.3084 + sqlite3PagerPagecount(pPager, 0);
1.3085 + pPager->pInJournal = sqlite3BitvecCreate( pPager->dbSize );
1.3086 + if( !pPager->pInJournal ){
1.3087 + rc = SQLITE_NOMEM;
1.3088 + }else{
1.3089 + pPager->origDbSize = pPager->dbSize;
1.3090 + rc = writeJournalHdr(pPager);
1.3091 + }
1.3092 + }
1.3093 + assert( !pPager->journalOpen || pPager->journalOff>0 || rc!=SQLITE_OK );
1.3094 + return rc;
1.3095 +}
1.3096 +
1.3097 +/*
1.3098 +** Make a page dirty. Set its dirty flag and add it to the dirty
1.3099 +** page list.
1.3100 +*/
1.3101 +static void makeDirty(PgHdr *pPg){
1.3102 + sqlite3PcacheMakeDirty(pPg);
1.3103 +}
1.3104 +
1.3105 +/*
1.3106 +** Make a page clean. Clear its dirty bit and remove it from the
1.3107 +** dirty page list.
1.3108 +*/
1.3109 +static void makeClean(PgHdr *pPg){
1.3110 + sqlite3PcacheMakeClean(pPg);
1.3111 +}
1.3112 +
1.3113 +
1.3114 +/*
1.3115 +** Mark a data page as writeable. The page is written into the journal
1.3116 +** if it is not there already. This routine must be called before making
1.3117 +** changes to a page.
1.3118 +**
1.3119 +** The first time this routine is called, the pager creates a new
1.3120 +** journal and acquires a RESERVED lock on the database. If the RESERVED
1.3121 +** lock could not be acquired, this routine returns SQLITE_BUSY. The
1.3122 +** calling routine must check for that return value and be careful not to
1.3123 +** change any page data until this routine returns SQLITE_OK.
1.3124 +**
1.3125 +** If the journal file could not be written because the disk is full,
1.3126 +** then this routine returns SQLITE_FULL and does an immediate rollback.
1.3127 +** All subsequent write attempts also return SQLITE_FULL until there
1.3128 +** is a call to sqlite3PagerCommit() or sqlite3PagerRollback() to
1.3129 +** reset.
1.3130 +*/
1.3131 +static int pager_write(PgHdr *pPg){
1.3132 + void *pData = pPg->pData;
1.3133 + Pager *pPager = pPg->pPager;
1.3134 + int rc = SQLITE_OK;
1.3135 +
1.3136 + /* Check for errors
1.3137 + */
1.3138 + if( pPager->errCode ){
1.3139 + return pPager->errCode;
1.3140 + }
1.3141 + if( pPager->readOnly ){
1.3142 + return SQLITE_PERM;
1.3143 + }
1.3144 +
1.3145 + assert( !pPager->setMaster );
1.3146 +
1.3147 + CHECK_PAGE(pPg);
1.3148 +
1.3149 + /* If this page was previously acquired with noContent==1, that means
1.3150 + ** we didn't really read in the content of the page. This can happen
1.3151 + ** (for example) when the page is being moved to the freelist. But
1.3152 + ** now we are (perhaps) moving the page off of the freelist for
1.3153 + ** reuse and we need to know its original content so that content
1.3154 + ** can be stored in the rollback journal. So do the read at this
1.3155 + ** time.
1.3156 + */
1.3157 + rc = pager_get_content(pPg);
1.3158 + if( rc ){
1.3159 + return rc;
1.3160 + }
1.3161 +
1.3162 + /* Mark the page as dirty. If the page has already been written
1.3163 + ** to the journal then we can return right away.
1.3164 + */
1.3165 + makeDirty(pPg);
1.3166 + if( (pPg->flags&PGHDR_IN_JOURNAL)
1.3167 + && (pageInStatement(pPg) || pPager->stmtInUse==0)
1.3168 + ){
1.3169 + pPager->dirtyCache = 1;
1.3170 + pPager->dbModified = 1;
1.3171 + }else{
1.3172 +
1.3173 + /* If we get this far, it means that the page needs to be
1.3174 + ** written to the transaction journal or the ckeckpoint journal
1.3175 + ** or both.
1.3176 + **
1.3177 + ** First check to see that the transaction journal exists and
1.3178 + ** create it if it does not.
1.3179 + */
1.3180 + assert( pPager->state!=PAGER_UNLOCK );
1.3181 + rc = sqlite3PagerBegin(pPg, 0);
1.3182 + if( rc!=SQLITE_OK ){
1.3183 + return rc;
1.3184 + }
1.3185 + assert( pPager->state>=PAGER_RESERVED );
1.3186 + if( !pPager->journalOpen && pPager->useJournal
1.3187 + && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
1.3188 + rc = pager_open_journal(pPager);
1.3189 + if( rc!=SQLITE_OK ) return rc;
1.3190 + }
1.3191 + pPager->dirtyCache = 1;
1.3192 + pPager->dbModified = 1;
1.3193 +
1.3194 + /* The transaction journal now exists and we have a RESERVED or an
1.3195 + ** EXCLUSIVE lock on the main database file. Write the current page to
1.3196 + ** the transaction journal if it is not there already.
1.3197 + */
1.3198 + if( !(pPg->flags&PGHDR_IN_JOURNAL) && (pPager->journalOpen || MEMDB) ){
1.3199 + if( (int)pPg->pgno <= pPager->origDbSize ){
1.3200 + if( MEMDB ){
1.3201 + PAGERTRACE3("JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
1.3202 + rc = sqlite3PcachePreserve(pPg, 0);
1.3203 + if( rc!=SQLITE_OK ){
1.3204 + return rc;
1.3205 + }
1.3206 + }else{
1.3207 + u32 cksum;
1.3208 + char *pData2;
1.3209 +
1.3210 + /* We should never write to the journal file the page that
1.3211 + ** contains the database locks. The following assert verifies
1.3212 + ** that we do not. */
1.3213 + assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) );
1.3214 + pData2 = CODEC2(pPager, pData, pPg->pgno, 7);
1.3215 + cksum = pager_cksum(pPager, (u8*)pData2);
1.3216 + rc = write32bits(pPager->jfd, pPager->journalOff, pPg->pgno);
1.3217 + if( rc==SQLITE_OK ){
1.3218 + rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize,
1.3219 + pPager->journalOff + 4);
1.3220 + pPager->journalOff += pPager->pageSize+4;
1.3221 + }
1.3222 + if( rc==SQLITE_OK ){
1.3223 + rc = write32bits(pPager->jfd, pPager->journalOff, cksum);
1.3224 + pPager->journalOff += 4;
1.3225 + }
1.3226 + IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno,
1.3227 + pPager->journalOff, pPager->pageSize));
1.3228 + PAGER_INCR(sqlite3_pager_writej_count);
1.3229 + PAGERTRACE5("JOURNAL %d page %d needSync=%d hash(%08x)\n",
1.3230 + PAGERID(pPager), pPg->pgno,
1.3231 + ((pPg->flags&PGHDR_NEED_SYNC)?1:0), pager_pagehash(pPg));
1.3232 +
1.3233 + /* An error has occured writing to the journal file. The
1.3234 + ** transaction will be rolled back by the layer above.
1.3235 + */
1.3236 + if( rc!=SQLITE_OK ){
1.3237 + return rc;
1.3238 + }
1.3239 +
1.3240 + pPager->nRec++;
1.3241 + assert( pPager->pInJournal!=0 );
1.3242 + sqlite3BitvecSet(pPager->pInJournal, pPg->pgno);
1.3243 + if( !pPager->noSync ){
1.3244 + pPg->flags |= PGHDR_NEED_SYNC;
1.3245 + }
1.3246 + if( pPager->stmtInUse ){
1.3247 + sqlite3BitvecSet(pPager->pInStmt, pPg->pgno);
1.3248 + }
1.3249 + }
1.3250 + }else{
1.3251 + if( !pPager->journalStarted && !pPager->noSync ){
1.3252 + pPg->flags |= PGHDR_NEED_SYNC;
1.3253 + }
1.3254 + PAGERTRACE4("APPEND %d page %d needSync=%d\n",
1.3255 + PAGERID(pPager), pPg->pgno,
1.3256 + ((pPg->flags&PGHDR_NEED_SYNC)?1:0));
1.3257 + }
1.3258 + if( pPg->flags&PGHDR_NEED_SYNC ){
1.3259 + pPager->needSync = 1;
1.3260 + }
1.3261 + pPg->flags |= PGHDR_IN_JOURNAL;
1.3262 + }
1.3263 +
1.3264 + /* If the statement journal is open and the page is not in it,
1.3265 + ** then write the current page to the statement journal. Note that
1.3266 + ** the statement journal format differs from the standard journal format
1.3267 + ** in that it omits the checksums and the header.
1.3268 + */
1.3269 + if( pPager->stmtInUse
1.3270 + && !pageInStatement(pPg)
1.3271 + && (int)pPg->pgno<=pPager->stmtSize
1.3272 + ){
1.3273 + assert( (pPg->flags&PGHDR_IN_JOURNAL)
1.3274 + || (int)pPg->pgno>pPager->origDbSize );
1.3275 + if( MEMDB ){
1.3276 + rc = sqlite3PcachePreserve(pPg, 1);
1.3277 + if( rc!=SQLITE_OK ){
1.3278 + return rc;
1.3279 + }
1.3280 + PAGERTRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
1.3281 + }else{
1.3282 + i64 offset = pPager->stmtNRec*(4+pPager->pageSize);
1.3283 + char *pData2 = CODEC2(pPager, pData, pPg->pgno, 7);
1.3284 + rc = write32bits(pPager->stfd, offset, pPg->pgno);
1.3285 + if( rc==SQLITE_OK ){
1.3286 + rc = sqlite3OsWrite(pPager->stfd, pData2, pPager->pageSize, offset+4);
1.3287 + }
1.3288 + PAGERTRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
1.3289 + if( rc!=SQLITE_OK ){
1.3290 + return rc;
1.3291 + }
1.3292 + pPager->stmtNRec++;
1.3293 + assert( pPager->pInStmt!=0 );
1.3294 + sqlite3BitvecSet(pPager->pInStmt, pPg->pgno);
1.3295 + }
1.3296 + }
1.3297 + }
1.3298 +
1.3299 + /* Update the database size and return.
1.3300 + */
1.3301 + assert( pPager->state>=PAGER_SHARED );
1.3302 + if( pPager->dbSize<(int)pPg->pgno ){
1.3303 + pPager->dbSize = pPg->pgno;
1.3304 + if( !MEMDB && pPager->dbSize==PENDING_BYTE/pPager->pageSize ){
1.3305 + pPager->dbSize++;
1.3306 + }
1.3307 + }
1.3308 + return rc;
1.3309 +}
1.3310 +
1.3311 +/*
1.3312 +** This function is used to mark a data-page as writable. It uses
1.3313 +** pager_write() to open a journal file (if it is not already open)
1.3314 +** and write the page *pData to the journal.
1.3315 +**
1.3316 +** The difference between this function and pager_write() is that this
1.3317 +** function also deals with the special case where 2 or more pages
1.3318 +** fit on a single disk sector. In this case all co-resident pages
1.3319 +** must have been written to the journal file before returning.
1.3320 +*/
1.3321 +int sqlite3PagerWrite(DbPage *pDbPage){
1.3322 + int rc = SQLITE_OK;
1.3323 +
1.3324 + PgHdr *pPg = pDbPage;
1.3325 + Pager *pPager = pPg->pPager;
1.3326 + Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize);
1.3327 +
1.3328 + if( !MEMDB && nPagePerSector>1 ){
1.3329 + Pgno nPageCount; /* Total number of pages in database file */
1.3330 + Pgno pg1; /* First page of the sector pPg is located on. */
1.3331 + int nPage; /* Number of pages starting at pg1 to journal */
1.3332 + int ii;
1.3333 + int needSync = 0;
1.3334 +
1.3335 + /* Set the doNotSync flag to 1. This is because we cannot allow a journal
1.3336 + ** header to be written between the pages journaled by this function.
1.3337 + */
1.3338 + assert( pPager->doNotSync==0 );
1.3339 + pPager->doNotSync = 1;
1.3340 +
1.3341 + /* This trick assumes that both the page-size and sector-size are
1.3342 + ** an integer power of 2. It sets variable pg1 to the identifier
1.3343 + ** of the first page of the sector pPg is located on.
1.3344 + */
1.3345 + pg1 = ((pPg->pgno-1) & ~(nPagePerSector-1)) + 1;
1.3346 +
1.3347 + sqlite3PagerPagecount(pPager, (int *)&nPageCount);
1.3348 + if( pPg->pgno>nPageCount ){
1.3349 + nPage = (pPg->pgno - pg1)+1;
1.3350 + }else if( (pg1+nPagePerSector-1)>nPageCount ){
1.3351 + nPage = nPageCount+1-pg1;
1.3352 + }else{
1.3353 + nPage = nPagePerSector;
1.3354 + }
1.3355 + assert(nPage>0);
1.3356 + assert(pg1<=pPg->pgno);
1.3357 + assert((pg1+nPage)>pPg->pgno);
1.3358 +
1.3359 + for(ii=0; ii<nPage && rc==SQLITE_OK; ii++){
1.3360 + Pgno pg = pg1+ii;
1.3361 + PgHdr *pPage;
1.3362 + if( pg==pPg->pgno || !sqlite3BitvecTest(pPager->pInJournal, pg) ){
1.3363 + if( pg!=PAGER_MJ_PGNO(pPager) ){
1.3364 + rc = sqlite3PagerGet(pPager, pg, &pPage);
1.3365 + if( rc==SQLITE_OK ){
1.3366 + rc = pager_write(pPage);
1.3367 + if( pPage->flags&PGHDR_NEED_SYNC ){
1.3368 + needSync = 1;
1.3369 + }
1.3370 + sqlite3PagerUnref(pPage);
1.3371 + }
1.3372 + }
1.3373 + }else if( (pPage = pager_lookup(pPager, pg))!=0 ){
1.3374 + if( pPage->flags&PGHDR_NEED_SYNC ){
1.3375 + needSync = 1;
1.3376 + }
1.3377 + sqlite3PagerUnref(pPage);
1.3378 + }
1.3379 + }
1.3380 +
1.3381 + /* If the PgHdr.needSync flag is set for any of the nPage pages
1.3382 + ** starting at pg1, then it needs to be set for all of them. Because
1.3383 + ** writing to any of these nPage pages may damage the others, the
1.3384 + ** journal file must contain sync()ed copies of all of them
1.3385 + ** before any of them can be written out to the database file.
1.3386 + */
1.3387 + if( needSync ){
1.3388 + assert( !MEMDB && pPager->noSync==0 );
1.3389 + for(ii=0; ii<nPage && needSync; ii++){
1.3390 + PgHdr *pPage = pager_lookup(pPager, pg1+ii);
1.3391 + if( pPage ) pPage->flags |= PGHDR_NEED_SYNC;
1.3392 + sqlite3PagerUnref(pPage);
1.3393 + }
1.3394 + assert(pPager->needSync);
1.3395 + }
1.3396 +
1.3397 + assert( pPager->doNotSync==1 );
1.3398 + pPager->doNotSync = 0;
1.3399 + }else{
1.3400 + rc = pager_write(pDbPage);
1.3401 + }
1.3402 + return rc;
1.3403 +}
1.3404 +
1.3405 +/*
1.3406 +** Return TRUE if the page given in the argument was previously passed
1.3407 +** to sqlite3PagerWrite(). In other words, return TRUE if it is ok
1.3408 +** to change the content of the page.
1.3409 +*/
1.3410 +#ifndef NDEBUG
1.3411 +int sqlite3PagerIswriteable(DbPage *pPg){
1.3412 + return pPg->flags&PGHDR_DIRTY;
1.3413 +}
1.3414 +#endif
1.3415 +
1.3416 +/*
1.3417 +** A call to this routine tells the pager that it is not necessary to
1.3418 +** write the information on page pPg back to the disk, even though
1.3419 +** that page might be marked as dirty.
1.3420 +**
1.3421 +** The overlying software layer calls this routine when all of the data
1.3422 +** on the given page is unused. The pager marks the page as clean so
1.3423 +** that it does not get written to disk.
1.3424 +**
1.3425 +** Tests show that this optimization, together with the
1.3426 +** sqlite3PagerDontRollback() below, more than double the speed
1.3427 +** of large INSERT operations and quadruple the speed of large DELETEs.
1.3428 +**
1.3429 +** When this routine is called, set the alwaysRollback flag to true.
1.3430 +** Subsequent calls to sqlite3PagerDontRollback() for the same page
1.3431 +** will thereafter be ignored. This is necessary to avoid a problem
1.3432 +** where a page with data is added to the freelist during one part of
1.3433 +** a transaction then removed from the freelist during a later part
1.3434 +** of the same transaction and reused for some other purpose. When it
1.3435 +** is first added to the freelist, this routine is called. When reused,
1.3436 +** the sqlite3PagerDontRollback() routine is called. But because the
1.3437 +** page contains critical data, we still need to be sure it gets
1.3438 +** rolled back in spite of the sqlite3PagerDontRollback() call.
1.3439 +*/
1.3440 +int sqlite3PagerDontWrite(DbPage *pDbPage){
1.3441 + PgHdr *pPg = pDbPage;
1.3442 + Pager *pPager = pPg->pPager;
1.3443 + int rc;
1.3444 +
1.3445 + if( MEMDB || pPg->pgno>pPager->origDbSize ){
1.3446 + return SQLITE_OK;
1.3447 + }
1.3448 + if( pPager->pAlwaysRollback==0 ){
1.3449 + assert( pPager->pInJournal );
1.3450 + pPager->pAlwaysRollback = sqlite3BitvecCreate(pPager->origDbSize);
1.3451 + if( !pPager->pAlwaysRollback ){
1.3452 + return SQLITE_NOMEM;
1.3453 + }
1.3454 + }
1.3455 + rc = sqlite3BitvecSet(pPager->pAlwaysRollback, pPg->pgno);
1.3456 +
1.3457 + if( rc==SQLITE_OK && (pPg->flags&PGHDR_DIRTY) && !pPager->stmtInUse ){
1.3458 + assert( pPager->state>=PAGER_SHARED );
1.3459 + if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
1.3460 + /* If this pages is the last page in the file and the file has grown
1.3461 + ** during the current transaction, then do NOT mark the page as clean.
1.3462 + ** When the database file grows, we must make sure that the last page
1.3463 + ** gets written at least once so that the disk file will be the correct
1.3464 + ** size. If you do not write this page and the size of the file
1.3465 + ** on the disk ends up being too small, that can lead to database
1.3466 + ** corruption during the next transaction.
1.3467 + */
1.3468 + }else{
1.3469 + PAGERTRACE3("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager));
1.3470 + IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno))
1.3471 + pPg->flags |= PGHDR_DONT_WRITE;
1.3472 +#ifdef SQLITE_CHECK_PAGES
1.3473 + pPg->pageHash = pager_pagehash(pPg);
1.3474 +#endif
1.3475 + }
1.3476 + }
1.3477 + return rc;
1.3478 +}
1.3479 +
1.3480 +/*
1.3481 +** A call to this routine tells the pager that if a rollback occurs,
1.3482 +** it is not necessary to restore the data on the given page. This
1.3483 +** means that the pager does not have to record the given page in the
1.3484 +** rollback journal.
1.3485 +**
1.3486 +** If we have not yet actually read the content of this page (if
1.3487 +** the PgHdr.needRead flag is set) then this routine acts as a promise
1.3488 +** that we will never need to read the page content in the future.
1.3489 +** so the needRead flag can be cleared at this point.
1.3490 +*/
1.3491 +void sqlite3PagerDontRollback(DbPage *pPg){
1.3492 + Pager *pPager = pPg->pPager;
1.3493 +
1.3494 + assert( pPager->state>=PAGER_RESERVED );
1.3495 +
1.3496 + /* If the journal file is not open, or DontWrite() has been called on
1.3497 + ** this page (DontWrite() sets the alwaysRollback flag), then this
1.3498 + ** function is a no-op.
1.3499 + */
1.3500 + if( pPager->journalOpen==0
1.3501 + || sqlite3BitvecTest(pPager->pAlwaysRollback, pPg->pgno)
1.3502 + || pPg->pgno>pPager->origDbSize
1.3503 + ){
1.3504 + return;
1.3505 + }
1.3506 + assert( !MEMDB ); /* For a memdb, pPager->journalOpen is always 0 */
1.3507 +
1.3508 +#ifdef SQLITE_SECURE_DELETE
1.3509 + if( (pPg->flags & PGHDR_IN_JOURNAL)!=0 || (int)pPg->pgno>pPager->origDbSize ){
1.3510 + return;
1.3511 + }
1.3512 +#endif
1.3513 +
1.3514 + /* If SECURE_DELETE is disabled, then there is no way that this
1.3515 + ** routine can be called on a page for which sqlite3PagerDontWrite()
1.3516 + ** has not been previously called during the same transaction.
1.3517 + ** And if DontWrite() has previously been called, the following
1.3518 + ** conditions must be met.
1.3519 + **
1.3520 + ** (Later:) Not true. If the database is corrupted by having duplicate
1.3521 + ** pages on the freelist (ex: corrupt9.test) then the following is not
1.3522 + ** necessarily true:
1.3523 + */
1.3524 + /* assert( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ); */
1.3525 +
1.3526 + assert( pPager->pInJournal!=0 );
1.3527 + sqlite3BitvecSet(pPager->pInJournal, pPg->pgno);
1.3528 + pPg->flags |= PGHDR_IN_JOURNAL;
1.3529 + pPg->flags &= ~PGHDR_NEED_READ;
1.3530 + if( pPager->stmtInUse ){
1.3531 + assert( pPager->stmtSize >= pPager->origDbSize );
1.3532 + sqlite3BitvecSet(pPager->pInStmt, pPg->pgno);
1.3533 + }
1.3534 + PAGERTRACE3("DONT_ROLLBACK page %d of %d\n", pPg->pgno, PAGERID(pPager));
1.3535 + IOTRACE(("GARBAGE %p %d\n", pPager, pPg->pgno))
1.3536 +}
1.3537 +
1.3538 +
1.3539 +/*
1.3540 +** This routine is called to increment the database file change-counter,
1.3541 +** stored at byte 24 of the pager file.
1.3542 +*/
1.3543 +static int pager_incr_changecounter(Pager *pPager, int isDirect){
1.3544 + PgHdr *pPgHdr;
1.3545 + u32 change_counter;
1.3546 + int rc = SQLITE_OK;
1.3547 +
1.3548 +#ifndef SQLITE_ENABLE_ATOMIC_WRITE
1.3549 + assert( isDirect==0 ); /* isDirect is only true for atomic writes */
1.3550 +#endif
1.3551 + if( !pPager->changeCountDone ){
1.3552 + /* Open page 1 of the file for writing. */
1.3553 + rc = sqlite3PagerGet(pPager, 1, &pPgHdr);
1.3554 + if( rc!=SQLITE_OK ) return rc;
1.3555 +
1.3556 + if( !isDirect ){
1.3557 + rc = sqlite3PagerWrite(pPgHdr);
1.3558 + if( rc!=SQLITE_OK ){
1.3559 + sqlite3PagerUnref(pPgHdr);
1.3560 + return rc;
1.3561 + }
1.3562 + }
1.3563 +
1.3564 + /* Increment the value just read and write it back to byte 24. */
1.3565 + change_counter = sqlite3Get4byte((u8*)pPager->dbFileVers);
1.3566 + change_counter++;
1.3567 + put32bits(((char*)pPgHdr->pData)+24, change_counter);
1.3568 +
1.3569 +#ifdef SQLITE_ENABLE_ATOMIC_WRITE
1.3570 + if( isDirect && pPager->fd->pMethods ){
1.3571 + const void *zBuf = pPgHdr->pData;
1.3572 + rc = sqlite3OsWrite(pPager->fd, zBuf, pPager->pageSize, 0);
1.3573 + }
1.3574 +#endif
1.3575 +
1.3576 + /* Release the page reference. */
1.3577 + sqlite3PagerUnref(pPgHdr);
1.3578 + pPager->changeCountDone = 1;
1.3579 + }
1.3580 + return rc;
1.3581 +}
1.3582 +
1.3583 +/*
1.3584 +** Sync the pager file to disk.
1.3585 +*/
1.3586 +int sqlite3PagerSync(Pager *pPager){
1.3587 + int rc;
1.3588 + if( MEMDB ){
1.3589 + rc = SQLITE_OK;
1.3590 + }else{
1.3591 + rc = sqlite3OsSync(pPager->fd, pPager->sync_flags);
1.3592 + }
1.3593 + return rc;
1.3594 +}
1.3595 +
1.3596 +/*
1.3597 +** Sync the database file for the pager pPager. zMaster points to the name
1.3598 +** of a master journal file that should be written into the individual
1.3599 +** journal file. zMaster may be NULL, which is interpreted as no master
1.3600 +** journal (a single database transaction).
1.3601 +**
1.3602 +** This routine ensures that the journal is synced, all dirty pages written
1.3603 +** to the database file and the database file synced. The only thing that
1.3604 +** remains to commit the transaction is to delete the journal file (or
1.3605 +** master journal file if specified).
1.3606 +**
1.3607 +** Note that if zMaster==NULL, this does not overwrite a previous value
1.3608 +** passed to an sqlite3PagerCommitPhaseOne() call.
1.3609 +**
1.3610 +** If parameter nTrunc is non-zero, then the pager file is truncated to
1.3611 +** nTrunc pages (this is used by auto-vacuum databases).
1.3612 +**
1.3613 +** If the final parameter - noSync - is true, then the database file itself
1.3614 +** is not synced. The caller must call sqlite3PagerSync() directly to
1.3615 +** sync the database file before calling CommitPhaseTwo() to delete the
1.3616 +** journal file in this case.
1.3617 +*/
1.3618 +int sqlite3PagerCommitPhaseOne(
1.3619 + Pager *pPager,
1.3620 + const char *zMaster,
1.3621 + Pgno nTrunc,
1.3622 + int noSync
1.3623 +){
1.3624 + int rc = SQLITE_OK;
1.3625 +
1.3626 + if( pPager->errCode ){
1.3627 + return pPager->errCode;
1.3628 + }
1.3629 +
1.3630 + /* If no changes have been made, we can leave the transaction early.
1.3631 + */
1.3632 + if( pPager->dbModified==0 &&
1.3633 + (pPager->journalMode!=PAGER_JOURNALMODE_DELETE ||
1.3634 + pPager->exclusiveMode!=0) ){
1.3635 + assert( pPager->dirtyCache==0 || pPager->journalOpen==0 );
1.3636 + return SQLITE_OK;
1.3637 + }
1.3638 +
1.3639 + PAGERTRACE4("DATABASE SYNC: File=%s zMaster=%s nTrunc=%d\n",
1.3640 + pPager->zFilename, zMaster, nTrunc);
1.3641 +
1.3642 + /* If this is an in-memory db, or no pages have been written to, or this
1.3643 + ** function has already been called, it is a no-op.
1.3644 + */
1.3645 + if( pPager->state!=PAGER_SYNCED && !MEMDB && pPager->dirtyCache ){
1.3646 + PgHdr *pPg;
1.3647 +
1.3648 +#ifdef SQLITE_ENABLE_ATOMIC_WRITE
1.3649 + /* The atomic-write optimization can be used if all of the
1.3650 + ** following are true:
1.3651 + **
1.3652 + ** + The file-system supports the atomic-write property for
1.3653 + ** blocks of size page-size, and
1.3654 + ** + This commit is not part of a multi-file transaction, and
1.3655 + ** + Exactly one page has been modified and store in the journal file.
1.3656 + **
1.3657 + ** If the optimization can be used, then the journal file will never
1.3658 + ** be created for this transaction.
1.3659 + */
1.3660 + int useAtomicWrite;
1.3661 + pPg = sqlite3PcacheDirtyList(pPager->pPCache);
1.3662 + useAtomicWrite = (
1.3663 + !zMaster &&
1.3664 + pPager->journalOpen &&
1.3665 + pPager->journalOff==jrnlBufferSize(pPager) &&
1.3666 + nTrunc==0 &&
1.3667 + (pPg==0 || pPg->pDirty==0)
1.3668 + );
1.3669 + assert( pPager->journalOpen || pPager->journalMode==PAGER_JOURNALMODE_OFF );
1.3670 + if( useAtomicWrite ){
1.3671 + /* Update the nRec field in the journal file. */
1.3672 + int offset = pPager->journalHdr + sizeof(aJournalMagic);
1.3673 + assert(pPager->nRec==1);
1.3674 + rc = write32bits(pPager->jfd, offset, pPager->nRec);
1.3675 +
1.3676 + /* Update the db file change counter. The following call will modify
1.3677 + ** the in-memory representation of page 1 to include the updated
1.3678 + ** change counter and then write page 1 directly to the database
1.3679 + ** file. Because of the atomic-write property of the host file-system,
1.3680 + ** this is safe.
1.3681 + */
1.3682 + if( rc==SQLITE_OK ){
1.3683 + rc = pager_incr_changecounter(pPager, 1);
1.3684 + }
1.3685 + }else{
1.3686 + rc = sqlite3JournalCreate(pPager->jfd);
1.3687 + }
1.3688 +
1.3689 + if( !useAtomicWrite && rc==SQLITE_OK )
1.3690 +#endif
1.3691 +
1.3692 + /* If a master journal file name has already been written to the
1.3693 + ** journal file, then no sync is required. This happens when it is
1.3694 + ** written, then the process fails to upgrade from a RESERVED to an
1.3695 + ** EXCLUSIVE lock. The next time the process tries to commit the
1.3696 + ** transaction the m-j name will have already been written.
1.3697 + */
1.3698 + if( !pPager->setMaster ){
1.3699 + rc = pager_incr_changecounter(pPager, 0);
1.3700 + if( rc!=SQLITE_OK ) goto sync_exit;
1.3701 + if( pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
1.3702 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.3703 + if( nTrunc!=0 ){
1.3704 + /* If this transaction has made the database smaller, then all pages
1.3705 + ** being discarded by the truncation must be written to the journal
1.3706 + ** file.
1.3707 + */
1.3708 + Pgno i;
1.3709 + int iSkip = PAGER_MJ_PGNO(pPager);
1.3710 + for( i=nTrunc+1; i<=pPager->origDbSize; i++ ){
1.3711 + if( !sqlite3BitvecTest(pPager->pInJournal, i) && i!=iSkip ){
1.3712 + rc = sqlite3PagerGet(pPager, i, &pPg);
1.3713 + if( rc!=SQLITE_OK ) goto sync_exit;
1.3714 + rc = sqlite3PagerWrite(pPg);
1.3715 + sqlite3PagerUnref(pPg);
1.3716 + if( rc!=SQLITE_OK ) goto sync_exit;
1.3717 + }
1.3718 + }
1.3719 + }
1.3720 +#endif
1.3721 + rc = writeMasterJournal(pPager, zMaster);
1.3722 + if( rc!=SQLITE_OK ) goto sync_exit;
1.3723 + rc = syncJournal(pPager);
1.3724 + }
1.3725 + }
1.3726 + if( rc!=SQLITE_OK ) goto sync_exit;
1.3727 +
1.3728 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.3729 + if( nTrunc!=0 ){
1.3730 + rc = sqlite3PagerTruncate(pPager, nTrunc);
1.3731 + if( rc!=SQLITE_OK ) goto sync_exit;
1.3732 + }
1.3733 +#endif
1.3734 +
1.3735 + /* Write all dirty pages to the database file */
1.3736 + pPg = sqlite3PcacheDirtyList(pPager->pPCache);
1.3737 + rc = pager_write_pagelist(pPg);
1.3738 + if( rc!=SQLITE_OK ){
1.3739 + assert( rc!=SQLITE_IOERR_BLOCKED );
1.3740 + /* The error might have left the dirty list all fouled up here,
1.3741 + ** but that does not matter because if the if the dirty list did
1.3742 + ** get corrupted, then the transaction will roll back and
1.3743 + ** discard the dirty list. There is an assert in
1.3744 + ** pager_get_all_dirty_pages() that verifies that no attempt
1.3745 + ** is made to use an invalid dirty list.
1.3746 + */
1.3747 + goto sync_exit;
1.3748 + }
1.3749 + sqlite3PcacheCleanAll(pPager->pPCache);
1.3750 +
1.3751 + /* Sync the database file. */
1.3752 + if( !pPager->noSync && !noSync ){
1.3753 + rc = sqlite3OsSync(pPager->fd, pPager->sync_flags);
1.3754 + }
1.3755 + IOTRACE(("DBSYNC %p\n", pPager))
1.3756 +
1.3757 + pPager->state = PAGER_SYNCED;
1.3758 + }else if( MEMDB && nTrunc!=0 ){
1.3759 + rc = sqlite3PagerTruncate(pPager, nTrunc);
1.3760 + }
1.3761 +
1.3762 +sync_exit:
1.3763 + if( rc==SQLITE_IOERR_BLOCKED ){
1.3764 + /* pager_incr_changecounter() may attempt to obtain an exclusive
1.3765 + * lock to spill the cache and return IOERR_BLOCKED. But since
1.3766 + * there is no chance the cache is inconsistent, it is
1.3767 + * better to return SQLITE_BUSY.
1.3768 + */
1.3769 + rc = SQLITE_BUSY;
1.3770 + }
1.3771 + return rc;
1.3772 +}
1.3773 +
1.3774 +
1.3775 +/*
1.3776 +** Commit all changes to the database and release the write lock.
1.3777 +**
1.3778 +** If the commit fails for any reason, a rollback attempt is made
1.3779 +** and an error code is returned. If the commit worked, SQLITE_OK
1.3780 +** is returned.
1.3781 +*/
1.3782 +int sqlite3PagerCommitPhaseTwo(Pager *pPager){
1.3783 + int rc = SQLITE_OK;
1.3784 +
1.3785 + if( pPager->errCode ){
1.3786 + return pPager->errCode;
1.3787 + }
1.3788 + if( pPager->state<PAGER_RESERVED ){
1.3789 + return SQLITE_ERROR;
1.3790 + }
1.3791 + if( pPager->dbModified==0 &&
1.3792 + (pPager->journalMode!=PAGER_JOURNALMODE_DELETE ||
1.3793 + pPager->exclusiveMode!=0) ){
1.3794 + assert( pPager->dirtyCache==0 || pPager->journalOpen==0 );
1.3795 + return SQLITE_OK;
1.3796 + }
1.3797 + PAGERTRACE2("COMMIT %d\n", PAGERID(pPager));
1.3798 + if( MEMDB ){
1.3799 + sqlite3PcacheCommit(pPager->pPCache, 0);
1.3800 + sqlite3PcacheCleanAll(pPager->pPCache);
1.3801 + sqlite3PcacheAssertFlags(pPager->pPCache, 0, PGHDR_IN_JOURNAL);
1.3802 + pPager->state = PAGER_SHARED;
1.3803 + }else{
1.3804 + assert( pPager->state==PAGER_SYNCED || !pPager->dirtyCache );
1.3805 + rc = pager_end_transaction(pPager, pPager->setMaster);
1.3806 + rc = pager_error(pPager, rc);
1.3807 + }
1.3808 + return rc;
1.3809 +}
1.3810 +
1.3811 +/*
1.3812 +** Rollback all changes. The database falls back to PAGER_SHARED mode.
1.3813 +** All in-memory cache pages revert to their original data contents.
1.3814 +** The journal is deleted.
1.3815 +**
1.3816 +** This routine cannot fail unless some other process is not following
1.3817 +** the correct locking protocol or unless some other
1.3818 +** process is writing trash into the journal file (SQLITE_CORRUPT) or
1.3819 +** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
1.3820 +** codes are returned for all these occasions. Otherwise,
1.3821 +** SQLITE_OK is returned.
1.3822 +*/
1.3823 +int sqlite3PagerRollback(Pager *pPager){
1.3824 + int rc = SQLITE_OK;
1.3825 + PAGERTRACE2("ROLLBACK %d\n", PAGERID(pPager));
1.3826 + if( MEMDB ){
1.3827 + sqlite3PcacheRollback(pPager->pPCache, 1, pPager->xReiniter);
1.3828 + sqlite3PcacheRollback(pPager->pPCache, 0, pPager->xReiniter);
1.3829 + sqlite3PcacheCleanAll(pPager->pPCache);
1.3830 + sqlite3PcacheAssertFlags(pPager->pPCache, 0, PGHDR_IN_JOURNAL);
1.3831 + pPager->dbSize = pPager->origDbSize;
1.3832 + pager_truncate_cache(pPager);
1.3833 + pPager->stmtInUse = 0;
1.3834 + pPager->state = PAGER_SHARED;
1.3835 + }else if( !pPager->dirtyCache || !pPager->journalOpen ){
1.3836 + rc = pager_end_transaction(pPager, pPager->setMaster);
1.3837 + }else if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
1.3838 + if( pPager->state>=PAGER_EXCLUSIVE ){
1.3839 + pager_playback(pPager, 0);
1.3840 + }
1.3841 + rc = pPager->errCode;
1.3842 + }else{
1.3843 + if( pPager->state==PAGER_RESERVED ){
1.3844 + int rc2;
1.3845 + rc = pager_playback(pPager, 0);
1.3846 + rc2 = pager_end_transaction(pPager, pPager->setMaster);
1.3847 + if( rc==SQLITE_OK ){
1.3848 + rc = rc2;
1.3849 + }
1.3850 + }else{
1.3851 + rc = pager_playback(pPager, 0);
1.3852 + }
1.3853 +
1.3854 + pPager->dbSize = -1;
1.3855 +
1.3856 + /* If an error occurs during a ROLLBACK, we can no longer trust the pager
1.3857 + ** cache. So call pager_error() on the way out to make any error
1.3858 + ** persistent.
1.3859 + */
1.3860 + rc = pager_error(pPager, rc);
1.3861 + }
1.3862 + return rc;
1.3863 +}
1.3864 +
1.3865 +/*
1.3866 +** Return TRUE if the database file is opened read-only. Return FALSE
1.3867 +** if the database is (in theory) writable.
1.3868 +*/
1.3869 +int sqlite3PagerIsreadonly(Pager *pPager){
1.3870 + return pPager->readOnly;
1.3871 +}
1.3872 +
1.3873 +/*
1.3874 +** Return the number of references to the pager.
1.3875 +*/
1.3876 +int sqlite3PagerRefcount(Pager *pPager){
1.3877 + return sqlite3PcacheRefCount(pPager->pPCache);
1.3878 +}
1.3879 +
1.3880 +/*
1.3881 +** Return the number of references to the specified page.
1.3882 +*/
1.3883 +int sqlite3PagerPageRefcount(DbPage *pPage){
1.3884 + return sqlite3PcachePageRefcount(pPage);
1.3885 +}
1.3886 +
1.3887 +#ifdef SQLITE_TEST
1.3888 +/*
1.3889 +** This routine is used for testing and analysis only.
1.3890 +*/
1.3891 +int *sqlite3PagerStats(Pager *pPager){
1.3892 + static int a[11];
1.3893 + a[0] = sqlite3PcacheRefCount(pPager->pPCache);
1.3894 + a[1] = sqlite3PcachePagecount(pPager->pPCache);
1.3895 + a[2] = sqlite3PcacheGetCachesize(pPager->pPCache);
1.3896 + a[3] = pPager->dbSize;
1.3897 + a[4] = pPager->state;
1.3898 + a[5] = pPager->errCode;
1.3899 + a[6] = pPager->nHit;
1.3900 + a[7] = pPager->nMiss;
1.3901 + a[8] = 0; /* Used to be pPager->nOvfl */
1.3902 + a[9] = pPager->nRead;
1.3903 + a[10] = pPager->nWrite;
1.3904 + return a;
1.3905 +}
1.3906 +int sqlite3PagerIsMemdb(Pager *pPager){
1.3907 + return MEMDB;
1.3908 +}
1.3909 +#endif
1.3910 +
1.3911 +/*
1.3912 +** Set the statement rollback point.
1.3913 +**
1.3914 +** This routine should be called with the transaction journal already
1.3915 +** open. A new statement journal is created that can be used to rollback
1.3916 +** changes of a single SQL command within a larger transaction.
1.3917 +*/
1.3918 +static int pagerStmtBegin(Pager *pPager){
1.3919 + int rc;
1.3920 + assert( !pPager->stmtInUse );
1.3921 + assert( pPager->state>=PAGER_SHARED );
1.3922 + assert( pPager->dbSize>=0 );
1.3923 + PAGERTRACE2("STMT-BEGIN %d\n", PAGERID(pPager));
1.3924 + if( MEMDB ){
1.3925 + pPager->stmtInUse = 1;
1.3926 + pPager->stmtSize = pPager->dbSize;
1.3927 + return SQLITE_OK;
1.3928 + }
1.3929 + if( !pPager->journalOpen ){
1.3930 + pPager->stmtAutoopen = 1;
1.3931 + return SQLITE_OK;
1.3932 + }
1.3933 + assert( pPager->journalOpen );
1.3934 + assert( pPager->pInStmt==0 );
1.3935 + pPager->pInStmt = sqlite3BitvecCreate(pPager->dbSize);
1.3936 + if( pPager->pInStmt==0 ){
1.3937 + /* sqlite3OsLock(pPager->fd, SHARED_LOCK); */
1.3938 + return SQLITE_NOMEM;
1.3939 + }
1.3940 + pPager->stmtJSize = pPager->journalOff;
1.3941 + pPager->stmtSize = pPager->dbSize;
1.3942 + pPager->stmtHdrOff = 0;
1.3943 + pPager->stmtCksum = pPager->cksumInit;
1.3944 + if( !pPager->stmtOpen ){
1.3945 + rc = sqlite3PagerOpentemp(pPager, pPager->stfd, SQLITE_OPEN_SUBJOURNAL);
1.3946 + if( rc ){
1.3947 + goto stmt_begin_failed;
1.3948 + }
1.3949 + pPager->stmtOpen = 1;
1.3950 + pPager->stmtNRec = 0;
1.3951 + }
1.3952 + pPager->stmtInUse = 1;
1.3953 + return SQLITE_OK;
1.3954 +
1.3955 +stmt_begin_failed:
1.3956 + if( pPager->pInStmt ){
1.3957 + sqlite3BitvecDestroy(pPager->pInStmt);
1.3958 + pPager->pInStmt = 0;
1.3959 + }
1.3960 + return rc;
1.3961 +}
1.3962 +int sqlite3PagerStmtBegin(Pager *pPager){
1.3963 + int rc;
1.3964 + rc = pagerStmtBegin(pPager);
1.3965 + return rc;
1.3966 +}
1.3967 +
1.3968 +/*
1.3969 +** Commit a statement.
1.3970 +*/
1.3971 +int sqlite3PagerStmtCommit(Pager *pPager){
1.3972 + if( pPager->stmtInUse ){
1.3973 + PAGERTRACE2("STMT-COMMIT %d\n", PAGERID(pPager));
1.3974 + if( !MEMDB ){
1.3975 + sqlite3BitvecDestroy(pPager->pInStmt);
1.3976 + pPager->pInStmt = 0;
1.3977 + }else{
1.3978 + sqlite3PcacheCommit(pPager->pPCache, 1);
1.3979 + }
1.3980 + pPager->stmtNRec = 0;
1.3981 + pPager->stmtInUse = 0;
1.3982 + }
1.3983 + pPager->stmtAutoopen = 0;
1.3984 + return SQLITE_OK;
1.3985 +}
1.3986 +
1.3987 +/*
1.3988 +** Rollback a statement.
1.3989 +*/
1.3990 +int sqlite3PagerStmtRollback(Pager *pPager){
1.3991 + int rc;
1.3992 + if( pPager->stmtInUse ){
1.3993 + PAGERTRACE2("STMT-ROLLBACK %d\n", PAGERID(pPager));
1.3994 + if( MEMDB ){
1.3995 + sqlite3PcacheRollback(pPager->pPCache, 1, pPager->xReiniter);
1.3996 + pPager->dbSize = pPager->stmtSize;
1.3997 + pager_truncate_cache(pPager);
1.3998 + rc = SQLITE_OK;
1.3999 + }else{
1.4000 + rc = pager_stmt_playback(pPager);
1.4001 + }
1.4002 + sqlite3PagerStmtCommit(pPager);
1.4003 + }else{
1.4004 + rc = SQLITE_OK;
1.4005 + }
1.4006 + pPager->stmtAutoopen = 0;
1.4007 + return rc;
1.4008 +}
1.4009 +
1.4010 +/*
1.4011 +** Return the full pathname of the database file.
1.4012 +*/
1.4013 +const char *sqlite3PagerFilename(Pager *pPager){
1.4014 + return pPager->zFilename;
1.4015 +}
1.4016 +
1.4017 +/*
1.4018 +** Return the VFS structure for the pager.
1.4019 +*/
1.4020 +const sqlite3_vfs *sqlite3PagerVfs(Pager *pPager){
1.4021 + return pPager->pVfs;
1.4022 +}
1.4023 +
1.4024 +/*
1.4025 +** Return the file handle for the database file associated
1.4026 +** with the pager. This might return NULL if the file has
1.4027 +** not yet been opened.
1.4028 +*/
1.4029 +sqlite3_file *sqlite3PagerFile(Pager *pPager){
1.4030 + return pPager->fd;
1.4031 +}
1.4032 +
1.4033 +/*
1.4034 +** Return the directory of the database file.
1.4035 +*/
1.4036 +const char *sqlite3PagerDirname(Pager *pPager){
1.4037 + return pPager->zDirectory;
1.4038 +}
1.4039 +
1.4040 +/*
1.4041 +** Return the full pathname of the journal file.
1.4042 +*/
1.4043 +const char *sqlite3PagerJournalname(Pager *pPager){
1.4044 + return pPager->zJournal;
1.4045 +}
1.4046 +
1.4047 +/*
1.4048 +** Return true if fsync() calls are disabled for this pager. Return FALSE
1.4049 +** if fsync()s are executed normally.
1.4050 +*/
1.4051 +int sqlite3PagerNosync(Pager *pPager){
1.4052 + return pPager->noSync;
1.4053 +}
1.4054 +
1.4055 +#ifdef SQLITE_HAS_CODEC
1.4056 +/*
1.4057 +** Set the codec for this pager
1.4058 +*/
1.4059 +void sqlite3PagerSetCodec(
1.4060 + Pager *pPager,
1.4061 + void *(*xCodec)(void*,void*,Pgno,int),
1.4062 + void *pCodecArg
1.4063 +){
1.4064 + pPager->xCodec = xCodec;
1.4065 + pPager->pCodecArg = pCodecArg;
1.4066 +}
1.4067 +#endif
1.4068 +
1.4069 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.4070 +/*
1.4071 +** Move the page pPg to location pgno in the file.
1.4072 +**
1.4073 +** There must be no references to the page previously located at
1.4074 +** pgno (which we call pPgOld) though that page is allowed to be
1.4075 +** in cache. If the page previously located at pgno is not already
1.4076 +** in the rollback journal, it is not put there by by this routine.
1.4077 +**
1.4078 +** References to the page pPg remain valid. Updating any
1.4079 +** meta-data associated with pPg (i.e. data stored in the nExtra bytes
1.4080 +** allocated along with the page) is the responsibility of the caller.
1.4081 +**
1.4082 +** A transaction must be active when this routine is called. It used to be
1.4083 +** required that a statement transaction was not active, but this restriction
1.4084 +** has been removed (CREATE INDEX needs to move a page when a statement
1.4085 +** transaction is active).
1.4086 +**
1.4087 +** If the fourth argument, isCommit, is non-zero, then this page is being
1.4088 +** moved as part of a database reorganization just before the transaction
1.4089 +** is being committed. In this case, it is guaranteed that the database page
1.4090 +** pPg refers to will not be written to again within this transaction.
1.4091 +*/
1.4092 +int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno, int isCommit){
1.4093 + PgHdr *pPgOld; /* The page being overwritten. */
1.4094 + Pgno needSyncPgno = 0;
1.4095 +
1.4096 + assert( pPg->nRef>0 );
1.4097 +
1.4098 + PAGERTRACE5("MOVE %d page %d (needSync=%d) moves to %d\n",
1.4099 + PAGERID(pPager), pPg->pgno, (pPg->flags&PGHDR_NEED_SYNC)?1:0, pgno);
1.4100 + IOTRACE(("MOVE %p %d %d\n", pPager, pPg->pgno, pgno))
1.4101 +
1.4102 + pager_get_content(pPg);
1.4103 +
1.4104 + /* If the journal needs to be sync()ed before page pPg->pgno can
1.4105 + ** be written to, store pPg->pgno in local variable needSyncPgno.
1.4106 + **
1.4107 + ** If the isCommit flag is set, there is no need to remember that
1.4108 + ** the journal needs to be sync()ed before database page pPg->pgno
1.4109 + ** can be written to. The caller has already promised not to write to it.
1.4110 + */
1.4111 + if( (pPg->flags&PGHDR_NEED_SYNC) && !isCommit ){
1.4112 + needSyncPgno = pPg->pgno;
1.4113 + assert( (pPg->flags&PGHDR_IN_JOURNAL) || (int)pgno>pPager->origDbSize );
1.4114 + assert( pPg->flags&PGHDR_DIRTY );
1.4115 + assert( pPager->needSync );
1.4116 + }
1.4117 +
1.4118 + /* If the cache contains a page with page-number pgno, remove it
1.4119 + ** from its hash chain. Also, if the PgHdr.needSync was set for
1.4120 + ** page pgno before the 'move' operation, it needs to be retained
1.4121 + ** for the page moved there.
1.4122 + */
1.4123 + pPg->flags &= ~(PGHDR_NEED_SYNC|PGHDR_IN_JOURNAL);
1.4124 + pPgOld = pager_lookup(pPager, pgno);
1.4125 + assert( !pPgOld || pPgOld->nRef==1 );
1.4126 + if( pPgOld ){
1.4127 + pPg->flags |= (pPgOld->flags&PGHDR_NEED_SYNC);
1.4128 + }
1.4129 + if( sqlite3BitvecTest(pPager->pInJournal, pgno) ){
1.4130 + assert( !MEMDB );
1.4131 + pPg->flags |= PGHDR_IN_JOURNAL;
1.4132 + }
1.4133 +
1.4134 + sqlite3PcacheMove(pPg, pgno);
1.4135 + if( pPgOld ){
1.4136 + sqlite3PcacheMove(pPgOld, 0);
1.4137 + sqlite3PcacheRelease(pPgOld);
1.4138 + }
1.4139 +
1.4140 + makeDirty(pPg);
1.4141 + pPager->dirtyCache = 1;
1.4142 + pPager->dbModified = 1;
1.4143 +
1.4144 + if( needSyncPgno ){
1.4145 + /* If needSyncPgno is non-zero, then the journal file needs to be
1.4146 + ** sync()ed before any data is written to database file page needSyncPgno.
1.4147 + ** Currently, no such page exists in the page-cache and the
1.4148 + ** "is journaled" bitvec flag has been set. This needs to be remedied by
1.4149 + ** loading the page into the pager-cache and setting the PgHdr.needSync
1.4150 + ** flag.
1.4151 + **
1.4152 + ** If the attempt to load the page into the page-cache fails, (due
1.4153 + ** to a malloc() or IO failure), clear the bit in the pInJournal[]
1.4154 + ** array. Otherwise, if the page is loaded and written again in
1.4155 + ** this transaction, it may be written to the database file before
1.4156 + ** it is synced into the journal file. This way, it may end up in
1.4157 + ** the journal file twice, but that is not a problem.
1.4158 + **
1.4159 + ** The sqlite3PagerGet() call may cause the journal to sync. So make
1.4160 + ** sure the Pager.needSync flag is set too.
1.4161 + */
1.4162 + int rc;
1.4163 + PgHdr *pPgHdr;
1.4164 + assert( pPager->needSync );
1.4165 + rc = sqlite3PagerGet(pPager, needSyncPgno, &pPgHdr);
1.4166 + if( rc!=SQLITE_OK ){
1.4167 + if( pPager->pInJournal && (int)needSyncPgno<=pPager->origDbSize ){
1.4168 + sqlite3BitvecClear(pPager->pInJournal, needSyncPgno);
1.4169 + }
1.4170 + return rc;
1.4171 + }
1.4172 + pPager->needSync = 1;
1.4173 + assert( pPager->noSync==0 && !MEMDB );
1.4174 + pPgHdr->flags |= PGHDR_NEED_SYNC;
1.4175 + pPgHdr->flags |= PGHDR_IN_JOURNAL;
1.4176 + makeDirty(pPgHdr);
1.4177 + sqlite3PagerUnref(pPgHdr);
1.4178 + }
1.4179 +
1.4180 + return SQLITE_OK;
1.4181 +}
1.4182 +#endif
1.4183 +
1.4184 +/*
1.4185 +** Return a pointer to the data for the specified page.
1.4186 +*/
1.4187 +void *sqlite3PagerGetData(DbPage *pPg){
1.4188 + assert( pPg->nRef>0 || pPg->pPager->memDb );
1.4189 + return pPg->pData;
1.4190 +}
1.4191 +
1.4192 +/*
1.4193 +** Return a pointer to the Pager.nExtra bytes of "extra" space
1.4194 +** allocated along with the specified page.
1.4195 +*/
1.4196 +void *sqlite3PagerGetExtra(DbPage *pPg){
1.4197 + Pager *pPager = pPg->pPager;
1.4198 + return (pPager?pPg->pExtra:0);
1.4199 +}
1.4200 +
1.4201 +/*
1.4202 +** Get/set the locking-mode for this pager. Parameter eMode must be one
1.4203 +** of PAGER_LOCKINGMODE_QUERY, PAGER_LOCKINGMODE_NORMAL or
1.4204 +** PAGER_LOCKINGMODE_EXCLUSIVE. If the parameter is not _QUERY, then
1.4205 +** the locking-mode is set to the value specified.
1.4206 +**
1.4207 +** The returned value is either PAGER_LOCKINGMODE_NORMAL or
1.4208 +** PAGER_LOCKINGMODE_EXCLUSIVE, indicating the current (possibly updated)
1.4209 +** locking-mode.
1.4210 +*/
1.4211 +int sqlite3PagerLockingMode(Pager *pPager, int eMode){
1.4212 + assert( eMode==PAGER_LOCKINGMODE_QUERY
1.4213 + || eMode==PAGER_LOCKINGMODE_NORMAL
1.4214 + || eMode==PAGER_LOCKINGMODE_EXCLUSIVE );
1.4215 + assert( PAGER_LOCKINGMODE_QUERY<0 );
1.4216 + assert( PAGER_LOCKINGMODE_NORMAL>=0 && PAGER_LOCKINGMODE_EXCLUSIVE>=0 );
1.4217 + if( eMode>=0 && !pPager->tempFile ){
1.4218 + pPager->exclusiveMode = eMode;
1.4219 + }
1.4220 + return (int)pPager->exclusiveMode;
1.4221 +}
1.4222 +
1.4223 +/*
1.4224 +** Get/set the journal-mode for this pager. Parameter eMode must be one of:
1.4225 +**
1.4226 +** PAGER_JOURNALMODE_QUERY
1.4227 +** PAGER_JOURNALMODE_DELETE
1.4228 +** PAGER_JOURNALMODE_TRUNCATE
1.4229 +** PAGER_JOURNALMODE_PERSIST
1.4230 +** PAGER_JOURNALMODE_OFF
1.4231 +**
1.4232 +** If the parameter is not _QUERY, then the journal-mode is set to the
1.4233 +** value specified.
1.4234 +**
1.4235 +** The returned indicate the current (possibly updated)
1.4236 +** journal-mode.
1.4237 +*/
1.4238 +int sqlite3PagerJournalMode(Pager *pPager, int eMode){
1.4239 + assert( eMode==PAGER_JOURNALMODE_QUERY
1.4240 + || eMode==PAGER_JOURNALMODE_DELETE
1.4241 + || eMode==PAGER_JOURNALMODE_TRUNCATE
1.4242 + || eMode==PAGER_JOURNALMODE_PERSIST
1.4243 + || eMode==PAGER_JOURNALMODE_OFF );
1.4244 + assert( PAGER_JOURNALMODE_QUERY<0 );
1.4245 + if( eMode>=0 ){
1.4246 + pPager->journalMode = eMode;
1.4247 + }else{
1.4248 + assert( eMode==PAGER_JOURNALMODE_QUERY );
1.4249 + }
1.4250 + return (int)pPager->journalMode;
1.4251 +}
1.4252 +
1.4253 +/*
1.4254 +** Get/set the size-limit used for persistent journal files.
1.4255 +*/
1.4256 +i64 sqlite3PagerJournalSizeLimit(Pager *pPager, i64 iLimit){
1.4257 + if( iLimit>=-1 ){
1.4258 + pPager->journalSizeLimit = iLimit;
1.4259 + }
1.4260 + return pPager->journalSizeLimit;
1.4261 +}
1.4262 +
1.4263 +#endif /* SQLITE_OMIT_DISKIO */