1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/persistentdata/persistentstorage/sql/SQLite/pager.c Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,5377 @@
1.4 +/*
1.5 +** 2001 September 15
1.6 +**
1.7 +** The author disclaims copyright to this source code. In place of
1.8 +** a legal notice, here is a blessing:
1.9 +**
1.10 +** May you do good and not evil.
1.11 +** May you find forgiveness for yourself and forgive others.
1.12 +** May you share freely, never taking more than you give.
1.13 +**
1.14 +*************************************************************************
1.15 +** This is the implementation of the page cache subsystem or "pager".
1.16 +**
1.17 +** The pager is used to access a database disk file. It implements
1.18 +** atomic commit and rollback through the use of a journal file that
1.19 +** is separate from the database file. The pager also implements file
1.20 +** locking to prevent two processes from writing the same database
1.21 +** file simultaneously, or one process from reading the database while
1.22 +** another is writing.
1.23 +**
1.24 +** @(#) $Id: pager.c,v 1.469 2008/08/02 03:50:39 drh Exp $
1.25 +*/
1.26 +#ifndef SQLITE_OMIT_DISKIO
1.27 +#include "sqliteInt.h"
1.28 +#include <assert.h>
1.29 +#include <string.h>
1.30 +
1.31 +/*
1.32 +** Macros for troubleshooting. Normally turned off
1.33 +*/
1.34 +#if 0
1.35 +#define sqlite3DebugPrintf printf
1.36 +#define PAGERTRACE1(X) sqlite3DebugPrintf(X)
1.37 +#define PAGERTRACE2(X,Y) sqlite3DebugPrintf(X,Y)
1.38 +#define PAGERTRACE3(X,Y,Z) sqlite3DebugPrintf(X,Y,Z)
1.39 +#define PAGERTRACE4(X,Y,Z,W) sqlite3DebugPrintf(X,Y,Z,W)
1.40 +#define PAGERTRACE5(X,Y,Z,W,V) sqlite3DebugPrintf(X,Y,Z,W,V)
1.41 +#else
1.42 +#define PAGERTRACE1(X)
1.43 +#define PAGERTRACE2(X,Y)
1.44 +#define PAGERTRACE3(X,Y,Z)
1.45 +#define PAGERTRACE4(X,Y,Z,W)
1.46 +#define PAGERTRACE5(X,Y,Z,W,V)
1.47 +#endif
1.48 +
1.49 +/*
1.50 +** The following two macros are used within the PAGERTRACEX() macros above
1.51 +** to print out file-descriptors.
1.52 +**
1.53 +** PAGERID() takes a pointer to a Pager struct as its argument. The
1.54 +** associated file-descriptor is returned. FILEHANDLEID() takes an sqlite3_file
1.55 +** struct as its argument.
1.56 +*/
1.57 +#define PAGERID(p) ((int)(p->fd))
1.58 +#define FILEHANDLEID(fd) ((int)fd)
1.59 +
1.60 +/*
1.61 +** The page cache as a whole is always in one of the following
1.62 +** states:
1.63 +**
1.64 +** PAGER_UNLOCK The page cache is not currently reading or
1.65 +** writing the database file. There is no
1.66 +** data held in memory. This is the initial
1.67 +** state.
1.68 +**
1.69 +** PAGER_SHARED The page cache is reading the database.
1.70 +** Writing is not permitted. There can be
1.71 +** multiple readers accessing the same database
1.72 +** file at the same time.
1.73 +**
1.74 +** PAGER_RESERVED This process has reserved the database for writing
1.75 +** but has not yet made any changes. Only one process
1.76 +** at a time can reserve the database. The original
1.77 +** database file has not been modified so other
1.78 +** processes may still be reading the on-disk
1.79 +** database file.
1.80 +**
1.81 +** PAGER_EXCLUSIVE The page cache is writing the database.
1.82 +** Access is exclusive. No other processes or
1.83 +** threads can be reading or writing while one
1.84 +** process is writing.
1.85 +**
1.86 +** PAGER_SYNCED The pager moves to this state from PAGER_EXCLUSIVE
1.87 +** after all dirty pages have been written to the
1.88 +** database file and the file has been synced to
1.89 +** disk. All that remains to do is to remove or
1.90 +** truncate the journal file and the transaction
1.91 +** will be committed.
1.92 +**
1.93 +** The page cache comes up in PAGER_UNLOCK. The first time a
1.94 +** sqlite3PagerGet() occurs, the state transitions to PAGER_SHARED.
1.95 +** After all pages have been released using sqlite_page_unref(),
1.96 +** the state transitions back to PAGER_UNLOCK. The first time
1.97 +** that sqlite3PagerWrite() is called, the state transitions to
1.98 +** PAGER_RESERVED. (Note that sqlite3PagerWrite() can only be
1.99 +** called on an outstanding page which means that the pager must
1.100 +** be in PAGER_SHARED before it transitions to PAGER_RESERVED.)
1.101 +** PAGER_RESERVED means that there is an open rollback journal.
1.102 +** The transition to PAGER_EXCLUSIVE occurs before any changes
1.103 +** are made to the database file, though writes to the rollback
1.104 +** journal occurs with just PAGER_RESERVED. After an sqlite3PagerRollback()
1.105 +** or sqlite3PagerCommitPhaseTwo(), the state can go back to PAGER_SHARED,
1.106 +** or it can stay at PAGER_EXCLUSIVE if we are in exclusive access mode.
1.107 +*/
1.108 +#define PAGER_UNLOCK 0
1.109 +#define PAGER_SHARED 1 /* same as SHARED_LOCK */
1.110 +#define PAGER_RESERVED 2 /* same as RESERVED_LOCK */
1.111 +#define PAGER_EXCLUSIVE 4 /* same as EXCLUSIVE_LOCK */
1.112 +#define PAGER_SYNCED 5
1.113 +
1.114 +/*
1.115 +** If the SQLITE_BUSY_RESERVED_LOCK macro is set to true at compile-time,
1.116 +** then failed attempts to get a reserved lock will invoke the busy callback.
1.117 +** This is off by default. To see why, consider the following scenario:
1.118 +**
1.119 +** Suppose thread A already has a shared lock and wants a reserved lock.
1.120 +** Thread B already has a reserved lock and wants an exclusive lock. If
1.121 +** both threads are using their busy callbacks, it might be a long time
1.122 +** be for one of the threads give up and allows the other to proceed.
1.123 +** But if the thread trying to get the reserved lock gives up quickly
1.124 +** (if it never invokes its busy callback) then the contention will be
1.125 +** resolved quickly.
1.126 +*/
1.127 +#ifndef SQLITE_BUSY_RESERVED_LOCK
1.128 +# define SQLITE_BUSY_RESERVED_LOCK 0
1.129 +#endif
1.130 +
1.131 +/*
1.132 +** This macro rounds values up so that if the value is an address it
1.133 +** is guaranteed to be an address that is aligned to an 8-byte boundary.
1.134 +*/
1.135 +#define FORCE_ALIGNMENT(X) (((X)+7)&~7)
1.136 +
1.137 +typedef struct PgHdr PgHdr;
1.138 +
1.139 +/*
1.140 +** Each pager stores all currently unreferenced pages in a list sorted
1.141 +** in least-recently-used (LRU) order (i.e. the first item on the list has
1.142 +** not been referenced in a long time, the last item has been recently
1.143 +** used). An instance of this structure is included as part of each
1.144 +** pager structure for this purpose (variable Pager.lru).
1.145 +**
1.146 +** Additionally, if memory-management is enabled, all unreferenced pages
1.147 +** are stored in a global LRU list (global variable sqlite3LruPageList).
1.148 +**
1.149 +** In both cases, the PagerLruList.pFirstSynced variable points to
1.150 +** the first page in the corresponding list that does not require an
1.151 +** fsync() operation before its memory can be reclaimed. If no such
1.152 +** page exists, PagerLruList.pFirstSynced is set to NULL.
1.153 +*/
1.154 +typedef struct PagerLruList PagerLruList;
1.155 +struct PagerLruList {
1.156 + PgHdr *pFirst; /* First page in LRU list */
1.157 + PgHdr *pLast; /* Last page in LRU list (the most recently used) */
1.158 + PgHdr *pFirstSynced; /* First page in list with PgHdr.needSync==0 */
1.159 +};
1.160 +
1.161 +/*
1.162 +** The following structure contains the next and previous pointers used
1.163 +** to link a PgHdr structure into a PagerLruList linked list.
1.164 +*/
1.165 +typedef struct PagerLruLink PagerLruLink;
1.166 +struct PagerLruLink {
1.167 + PgHdr *pNext;
1.168 + PgHdr *pPrev;
1.169 +};
1.170 +
1.171 +/*
1.172 +** Each in-memory image of a page begins with the following header.
1.173 +** This header is only visible to this pager module. The client
1.174 +** code that calls pager sees only the data that follows the header.
1.175 +**
1.176 +** Client code should call sqlite3PagerWrite() on a page prior to making
1.177 +** any modifications to that page. The first time sqlite3PagerWrite()
1.178 +** is called, the original page contents are written into the rollback
1.179 +** journal and PgHdr.inJournal and PgHdr.needSync are set. Later, once
1.180 +** the journal page has made it onto the disk surface, PgHdr.needSync
1.181 +** is cleared. The modified page cannot be written back into the original
1.182 +** database file until the journal pages has been synced to disk and the
1.183 +** PgHdr.needSync has been cleared.
1.184 +**
1.185 +** The PgHdr.dirty flag is set when sqlite3PagerWrite() is called and
1.186 +** is cleared again when the page content is written back to the original
1.187 +** database file.
1.188 +**
1.189 +** Details of important structure elements:
1.190 +**
1.191 +** needSync
1.192 +**
1.193 +** If this is true, this means that it is not safe to write the page
1.194 +** content to the database because the original content needed
1.195 +** for rollback has not by synced to the main rollback journal.
1.196 +** The original content may have been written to the rollback journal
1.197 +** but it has not yet been synced. So we cannot write to the database
1.198 +** file because power failure might cause the page in the journal file
1.199 +** to never reach the disk. It is as if the write to the journal file
1.200 +** does not occur until the journal file is synced.
1.201 +**
1.202 +** This flag is false if the page content exactly matches what
1.203 +** currently exists in the database file. The needSync flag is also
1.204 +** false if the original content has been written to the main rollback
1.205 +** journal and synced. If the page represents a new page that has
1.206 +** been added onto the end of the database during the current
1.207 +** transaction, the needSync flag is true until the original database
1.208 +** size in the journal header has been synced to disk.
1.209 +**
1.210 +** inJournal
1.211 +**
1.212 +** This is true if the original page has been written into the main
1.213 +** rollback journal. This is always false for new pages added to
1.214 +** the end of the database file during the current transaction.
1.215 +** And this flag says nothing about whether or not the journal
1.216 +** has been synced to disk. For pages that are in the original
1.217 +** database file, the following expression should always be true:
1.218 +**
1.219 +** inJournal = sqlite3BitvecTest(pPager->pInJournal, pgno)
1.220 +**
1.221 +** The pPager->pInJournal object is only valid for the original
1.222 +** pages of the database, not new pages that are added to the end
1.223 +** of the database, so obviously the above expression cannot be
1.224 +** valid for new pages. For new pages inJournal is always 0.
1.225 +**
1.226 +** dirty
1.227 +**
1.228 +** When true, this means that the content of the page has been
1.229 +** modified and needs to be written back to the database file.
1.230 +** If false, it means that either the content of the page is
1.231 +** unchanged or else the content is unimportant and we do not
1.232 +** care whether or not it is preserved.
1.233 +**
1.234 +** alwaysRollback
1.235 +**
1.236 +** This means that the sqlite3PagerDontRollback() API should be
1.237 +** ignored for this page. The DontRollback() API attempts to say
1.238 +** that the content of the page on disk is unimportant (it is an
1.239 +** unused page on the freelist) so that it is unnecessary to
1.240 +** rollback changes to this page because the content of the page
1.241 +** can change without changing the meaning of the database. This
1.242 +** flag overrides any DontRollback() attempt. This flag is set
1.243 +** when a page that originally contained valid data is added to
1.244 +** the freelist. Later in the same transaction, this page might
1.245 +** be pulled from the freelist and reused for something different
1.246 +** and at that point the DontRollback() API will be called because
1.247 +** pages taken from the freelist do not need to be protected by
1.248 +** the rollback journal. But this flag says that the page was
1.249 +** not originally part of the freelist so that it still needs to
1.250 +** be rolled back in spite of any subsequent DontRollback() calls.
1.251 +**
1.252 +** needRead
1.253 +**
1.254 +** This flag means (when true) that the content of the page has
1.255 +** not yet been loaded from disk. The in-memory content is just
1.256 +** garbage. (Actually, we zero the content, but you should not
1.257 +** make any assumptions about the content nevertheless.) If the
1.258 +** content is needed in the future, it should be read from the
1.259 +** original database file.
1.260 +*/
1.261 +struct PgHdr {
1.262 + Pager *pPager; /* The pager to which this page belongs */
1.263 + Pgno pgno; /* The page number for this page */
1.264 + PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
1.265 + PagerLruLink free; /* Next and previous free pages */
1.266 + PgHdr *pNextAll; /* A list of all pages */
1.267 + u8 inJournal; /* TRUE if has been written to journal */
1.268 + u8 dirty; /* TRUE if we need to write back changes */
1.269 + u8 needSync; /* Sync journal before writing this page */
1.270 + u8 alwaysRollback; /* Disable DontRollback() for this page */
1.271 + u8 needRead; /* Read content if PagerWrite() is called */
1.272 + short int nRef; /* Number of users of this page */
1.273 + PgHdr *pDirty, *pPrevDirty; /* Dirty pages */
1.274 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
1.275 + PgHdr *pPrevAll; /* A list of all pages */
1.276 + PagerLruLink gfree; /* Global list of nRef==0 pages */
1.277 +#endif
1.278 +#ifdef SQLITE_CHECK_PAGES
1.279 + u32 pageHash;
1.280 +#endif
1.281 + void *pData; /* Page data */
1.282 + /* Pager.nExtra bytes of local data appended to this header */
1.283 +};
1.284 +
1.285 +/*
1.286 +** For an in-memory only database, some extra information is recorded about
1.287 +** each page so that changes can be rolled back. (Journal files are not
1.288 +** used for in-memory databases.) The following information is added to
1.289 +** the end of every EXTRA block for in-memory databases.
1.290 +**
1.291 +** This information could have been added directly to the PgHdr structure.
1.292 +** But then it would take up an extra 8 bytes of storage on every PgHdr
1.293 +** even for disk-based databases. Splitting it out saves 8 bytes. This
1.294 +** is only a savings of 0.8% but those percentages add up.
1.295 +*/
1.296 +typedef struct PgHistory PgHistory;
1.297 +struct PgHistory {
1.298 + u8 *pOrig; /* Original page text. Restore to this on a full rollback */
1.299 + u8 *pStmt; /* Text as it was at the beginning of the current statement */
1.300 + PgHdr *pNextStmt, *pPrevStmt; /* List of pages in the statement journal */
1.301 + u8 inStmt; /* TRUE if in the statement subjournal */
1.302 +};
1.303 +
1.304 +/*
1.305 +** A macro used for invoking the codec if there is one
1.306 +*/
1.307 +#ifdef SQLITE_HAS_CODEC
1.308 +# define CODEC1(P,D,N,X) if( P->xCodec!=0 ){ P->xCodec(P->pCodecArg,D,N,X); }
1.309 +# define CODEC2(P,D,N,X) ((char*)(P->xCodec!=0?P->xCodec(P->pCodecArg,D,N,X):D))
1.310 +#else
1.311 +# define CODEC1(P,D,N,X) /* NO-OP */
1.312 +# define CODEC2(P,D,N,X) ((char*)D)
1.313 +#endif
1.314 +
1.315 +/*
1.316 +** Convert a pointer to a PgHdr into a pointer to its data
1.317 +** and back again.
1.318 +*/
1.319 +#define PGHDR_TO_DATA(P) ((P)->pData)
1.320 +#define PGHDR_TO_EXTRA(G,P) ((void*)&((G)[1]))
1.321 +#define PGHDR_TO_HIST(P,PGR) \
1.322 + ((PgHistory*)&((char*)(&(P)[1]))[(PGR)->nExtra])
1.323 +
1.324 +/*
1.325 +** A open page cache is an instance of the following structure.
1.326 +**
1.327 +** Pager.errCode may be set to SQLITE_IOERR, SQLITE_CORRUPT, or
1.328 +** or SQLITE_FULL. Once one of the first three errors occurs, it persists
1.329 +** and is returned as the result of every major pager API call. The
1.330 +** SQLITE_FULL return code is slightly different. It persists only until the
1.331 +** next successful rollback is performed on the pager cache. Also,
1.332 +** SQLITE_FULL does not affect the sqlite3PagerGet() and sqlite3PagerLookup()
1.333 +** APIs, they may still be used successfully.
1.334 +*/
1.335 +struct Pager {
1.336 + sqlite3_vfs *pVfs; /* OS functions to use for IO */
1.337 + u8 journalOpen; /* True if journal file descriptors is valid */
1.338 + u8 journalStarted; /* True if header of journal is synced */
1.339 + u8 useJournal; /* Use a rollback journal on this file */
1.340 + u8 noReadlock; /* Do not bother to obtain readlocks */
1.341 + u8 stmtOpen; /* True if the statement subjournal is open */
1.342 + u8 stmtInUse; /* True we are in a statement subtransaction */
1.343 + u8 stmtAutoopen; /* Open stmt journal when main journal is opened*/
1.344 + u8 noSync; /* Do not sync the journal if true */
1.345 + u8 fullSync; /* Do extra syncs of the journal for robustness */
1.346 + u8 sync_flags; /* One of SYNC_NORMAL or SYNC_FULL */
1.347 + u8 state; /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */
1.348 + u8 tempFile; /* zFilename is a temporary file */
1.349 + u8 readOnly; /* True for a read-only database */
1.350 + u8 needSync; /* True if an fsync() is needed on the journal */
1.351 + u8 dirtyCache; /* True if cached pages have changed */
1.352 + u8 alwaysRollback; /* Disable DontRollback() for all pages */
1.353 + u8 memDb; /* True to inhibit all file I/O */
1.354 + u8 setMaster; /* True if a m-j name has been written to jrnl */
1.355 + u8 doNotSync; /* Boolean. While true, do not spill the cache */
1.356 + u8 exclusiveMode; /* Boolean. True if locking_mode==EXCLUSIVE */
1.357 + u8 journalMode; /* On of the PAGER_JOURNALMODE_* values */
1.358 + u8 dbModified; /* True if there are any changes to the Db */
1.359 + u8 changeCountDone; /* Set after incrementing the change-counter */
1.360 + u32 vfsFlags; /* Flags for sqlite3_vfs.xOpen() */
1.361 + int errCode; /* One of several kinds of errors */
1.362 + int dbSize; /* Number of pages in the file */
1.363 + int origDbSize; /* dbSize before the current change */
1.364 + int stmtSize; /* Size of database (in pages) at stmt_begin() */
1.365 + int nRec; /* Number of pages written to the journal */
1.366 + u32 cksumInit; /* Quasi-random value added to every checksum */
1.367 + int stmtNRec; /* Number of records in stmt subjournal */
1.368 + int nExtra; /* Add this many bytes to each in-memory page */
1.369 + int pageSize; /* Number of bytes in a page */
1.370 + int nPage; /* Total number of in-memory pages */
1.371 + int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
1.372 + int mxPage; /* Maximum number of pages to hold in cache */
1.373 + Pgno mxPgno; /* Maximum allowed size of the database */
1.374 + Bitvec *pInJournal; /* One bit for each page in the database file */
1.375 + Bitvec *pInStmt; /* One bit for each page in the database */
1.376 + char *zFilename; /* Name of the database file */
1.377 + char *zJournal; /* Name of the journal file */
1.378 + char *zDirectory; /* Directory hold database and journal files */
1.379 + sqlite3_file *fd, *jfd; /* File descriptors for database and journal */
1.380 + sqlite3_file *stfd; /* File descriptor for the statement subjournal*/
1.381 + BusyHandler *pBusyHandler; /* Pointer to sqlite.busyHandler */
1.382 + PagerLruList lru; /* LRU list of free pages */
1.383 + PgHdr *pAll; /* List of all pages */
1.384 + PgHdr *pStmt; /* List of pages in the statement subjournal */
1.385 + PgHdr *pDirty; /* List of all dirty pages */
1.386 + i64 journalOff; /* Current byte offset in the journal file */
1.387 + i64 journalHdr; /* Byte offset to previous journal header */
1.388 + i64 stmtHdrOff; /* First journal header written this statement */
1.389 + i64 stmtCksum; /* cksumInit when statement was started */
1.390 + i64 stmtJSize; /* Size of journal at stmt_begin() */
1.391 + int sectorSize; /* Assumed sector size during rollback */
1.392 +#ifdef SQLITE_TEST
1.393 + int nHit, nMiss; /* Cache hits and missing */
1.394 + int nRead, nWrite; /* Database pages read/written */
1.395 +#endif
1.396 + void (*xDestructor)(DbPage*,int); /* Call this routine when freeing pages */
1.397 + void (*xReiniter)(DbPage*,int); /* Call this routine when reloading pages */
1.398 +#ifdef SQLITE_HAS_CODEC
1.399 + void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
1.400 + void *pCodecArg; /* First argument to xCodec() */
1.401 +#endif
1.402 + int nHash; /* Size of the pager hash table */
1.403 + PgHdr **aHash; /* Hash table to map page number to PgHdr */
1.404 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
1.405 + Pager *pNext; /* Doubly linked list of pagers on which */
1.406 + Pager *pPrev; /* sqlite3_release_memory() will work */
1.407 + volatile int iInUseMM; /* Non-zero if unavailable to MM */
1.408 + volatile int iInUseDB; /* Non-zero if in sqlite3_release_memory() */
1.409 +#endif
1.410 + char *pTmpSpace; /* Pager.pageSize bytes of space for tmp use */
1.411 + char dbFileVers[16]; /* Changes whenever database file changes */
1.412 + i64 journalSizeLimit; /* Size limit for persistent journal files */
1.413 +};
1.414 +
1.415 +/*
1.416 +** The following global variables hold counters used for
1.417 +** testing purposes only. These variables do not exist in
1.418 +** a non-testing build. These variables are not thread-safe.
1.419 +*/
1.420 +#ifdef SQLITE_TEST
1.421 +int sqlite3_pager_readdb_count = 0; /* Number of full pages read from DB */
1.422 +int sqlite3_pager_writedb_count = 0; /* Number of full pages written to DB */
1.423 +int sqlite3_pager_writej_count = 0; /* Number of pages written to journal */
1.424 +int sqlite3_pager_pgfree_count = 0; /* Number of cache pages freed */
1.425 +# define PAGER_INCR(v) v++
1.426 +#else
1.427 +# define PAGER_INCR(v)
1.428 +#endif
1.429 +
1.430 +/*
1.431 +** The following variable points to the head of a double-linked list
1.432 +** of all pagers that are eligible for page stealing by the
1.433 +** sqlite3_release_memory() interface. Access to this list is
1.434 +** protected by the SQLITE_MUTEX_STATIC_MEM2 mutex.
1.435 +*/
1.436 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
1.437 +static Pager *sqlite3PagerList = 0;
1.438 +static PagerLruList sqlite3LruPageList = {0, 0, 0};
1.439 +#endif
1.440 +
1.441 +
1.442 +/*
1.443 +** Journal files begin with the following magic string. The data
1.444 +** was obtained from /dev/random. It is used only as a sanity check.
1.445 +**
1.446 +** Since version 2.8.0, the journal format contains additional sanity
1.447 +** checking information. If the power fails while the journal is begin
1.448 +** written, semi-random garbage data might appear in the journal
1.449 +** file after power is restored. If an attempt is then made
1.450 +** to roll the journal back, the database could be corrupted. The additional
1.451 +** sanity checking data is an attempt to discover the garbage in the
1.452 +** journal and ignore it.
1.453 +**
1.454 +** The sanity checking information for the new journal format consists
1.455 +** of a 32-bit checksum on each page of data. The checksum covers both
1.456 +** the page number and the pPager->pageSize bytes of data for the page.
1.457 +** This cksum is initialized to a 32-bit random value that appears in the
1.458 +** journal file right after the header. The random initializer is important,
1.459 +** because garbage data that appears at the end of a journal is likely
1.460 +** data that was once in other files that have now been deleted. If the
1.461 +** garbage data came from an obsolete journal file, the checksums might
1.462 +** be correct. But by initializing the checksum to random value which
1.463 +** is different for every journal, we minimize that risk.
1.464 +*/
1.465 +static const unsigned char aJournalMagic[] = {
1.466 + 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7,
1.467 +};
1.468 +
1.469 +/*
1.470 +** The size of the header and of each page in the journal is determined
1.471 +** by the following macros.
1.472 +*/
1.473 +#define JOURNAL_PG_SZ(pPager) ((pPager->pageSize) + 8)
1.474 +
1.475 +/*
1.476 +** The journal header size for this pager. In the future, this could be
1.477 +** set to some value read from the disk controller. The important
1.478 +** characteristic is that it is the same size as a disk sector.
1.479 +*/
1.480 +#define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize)
1.481 +
1.482 +/*
1.483 +** The macro MEMDB is true if we are dealing with an in-memory database.
1.484 +** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set,
1.485 +** the value of MEMDB will be a constant and the compiler will optimize
1.486 +** out code that would never execute.
1.487 +*/
1.488 +#ifdef SQLITE_OMIT_MEMORYDB
1.489 +# define MEMDB 0
1.490 +#else
1.491 +# define MEMDB pPager->memDb
1.492 +#endif
1.493 +
1.494 +/*
1.495 +** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is
1.496 +** reserved for working around a windows/posix incompatibility). It is
1.497 +** used in the journal to signify that the remainder of the journal file
1.498 +** is devoted to storing a master journal name - there are no more pages to
1.499 +** roll back. See comments for function writeMasterJournal() for details.
1.500 +*/
1.501 +/* #define PAGER_MJ_PGNO(x) (PENDING_BYTE/((x)->pageSize)) */
1.502 +#define PAGER_MJ_PGNO(x) ((PENDING_BYTE/((x)->pageSize))+1)
1.503 +
1.504 +/*
1.505 +** The maximum legal page number is (2^31 - 1).
1.506 +*/
1.507 +#define PAGER_MAX_PGNO 2147483647
1.508 +
1.509 +/*
1.510 +** The pagerEnter() and pagerLeave() routines acquire and release
1.511 +** a mutex on each pager. The mutex is recursive.
1.512 +**
1.513 +** This is a special-purpose mutex. It only provides mutual exclusion
1.514 +** between the Btree and the Memory Management sqlite3_release_memory()
1.515 +** function. It does not prevent, for example, two Btrees from accessing
1.516 +** the same pager at the same time. Other general-purpose mutexes in
1.517 +** the btree layer handle that chore.
1.518 +*/
1.519 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
1.520 + static void pagerEnter(Pager *p){
1.521 + p->iInUseDB++;
1.522 + if( p->iInUseMM && p->iInUseDB==1 ){
1.523 +#ifndef SQLITE_MUTEX_NOOP
1.524 + sqlite3_mutex *mutex;
1.525 + mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MEM2);
1.526 +#endif
1.527 + p->iInUseDB = 0;
1.528 + sqlite3_mutex_enter(mutex);
1.529 + p->iInUseDB = 1;
1.530 + sqlite3_mutex_leave(mutex);
1.531 + }
1.532 + assert( p->iInUseMM==0 );
1.533 + }
1.534 + static void pagerLeave(Pager *p){
1.535 + p->iInUseDB--;
1.536 + assert( p->iInUseDB>=0 );
1.537 + }
1.538 +#else
1.539 +# define pagerEnter(X)
1.540 +# define pagerLeave(X)
1.541 +#endif
1.542 +
1.543 +/*
1.544 +** Add page pPg to the end of the linked list managed by structure
1.545 +** pList (pPg becomes the last entry in the list - the most recently
1.546 +** used). Argument pLink should point to either pPg->free or pPg->gfree,
1.547 +** depending on whether pPg is being added to the pager-specific or
1.548 +** global LRU list.
1.549 +*/
1.550 +static void listAdd(PagerLruList *pList, PagerLruLink *pLink, PgHdr *pPg){
1.551 + pLink->pNext = 0;
1.552 + pLink->pPrev = pList->pLast;
1.553 +
1.554 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
1.555 + assert(pLink==&pPg->free || pLink==&pPg->gfree);
1.556 + assert(pLink==&pPg->gfree || pList!=&sqlite3LruPageList);
1.557 +#endif
1.558 +
1.559 + if( pList->pLast ){
1.560 + int iOff = (char *)pLink - (char *)pPg;
1.561 + PagerLruLink *pLastLink = (PagerLruLink *)(&((u8 *)pList->pLast)[iOff]);
1.562 + pLastLink->pNext = pPg;
1.563 + }else{
1.564 + assert(!pList->pFirst);
1.565 + pList->pFirst = pPg;
1.566 + }
1.567 +
1.568 + pList->pLast = pPg;
1.569 + if( !pList->pFirstSynced && pPg->needSync==0 ){
1.570 + pList->pFirstSynced = pPg;
1.571 + }
1.572 +}
1.573 +
1.574 +/*
1.575 +** Remove pPg from the list managed by the structure pointed to by pList.
1.576 +**
1.577 +** Argument pLink should point to either pPg->free or pPg->gfree, depending
1.578 +** on whether pPg is being added to the pager-specific or global LRU list.
1.579 +*/
1.580 +static void listRemove(PagerLruList *pList, PagerLruLink *pLink, PgHdr *pPg){
1.581 + int iOff = (char *)pLink - (char *)pPg;
1.582 +
1.583 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
1.584 + assert(pLink==&pPg->free || pLink==&pPg->gfree);
1.585 + assert(pLink==&pPg->gfree || pList!=&sqlite3LruPageList);
1.586 +#endif
1.587 +
1.588 + if( pPg==pList->pFirst ){
1.589 + pList->pFirst = pLink->pNext;
1.590 + }
1.591 + if( pPg==pList->pLast ){
1.592 + pList->pLast = pLink->pPrev;
1.593 + }
1.594 + if( pLink->pPrev ){
1.595 + PagerLruLink *pPrevLink = (PagerLruLink *)(&((u8 *)pLink->pPrev)[iOff]);
1.596 + pPrevLink->pNext = pLink->pNext;
1.597 + }
1.598 + if( pLink->pNext ){
1.599 + PagerLruLink *pNextLink = (PagerLruLink *)(&((u8 *)pLink->pNext)[iOff]);
1.600 + pNextLink->pPrev = pLink->pPrev;
1.601 + }
1.602 + if( pPg==pList->pFirstSynced ){
1.603 + PgHdr *p = pLink->pNext;
1.604 + while( p && p->needSync ){
1.605 + PagerLruLink *pL = (PagerLruLink *)(&((u8 *)p)[iOff]);
1.606 + p = pL->pNext;
1.607 + }
1.608 + pList->pFirstSynced = p;
1.609 + }
1.610 +
1.611 + pLink->pNext = pLink->pPrev = 0;
1.612 +}
1.613 +
1.614 +/*
1.615 +** Add page pPg to the list of free pages for the pager. If
1.616 +** memory-management is enabled, also add the page to the global
1.617 +** list of free pages.
1.618 +*/
1.619 +static void lruListAdd(PgHdr *pPg){
1.620 + listAdd(&pPg->pPager->lru, &pPg->free, pPg);
1.621 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
1.622 + if( !pPg->pPager->memDb ){
1.623 + sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_LRU));
1.624 + listAdd(&sqlite3LruPageList, &pPg->gfree, pPg);
1.625 + sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_LRU));
1.626 + }
1.627 +#endif
1.628 +}
1.629 +
1.630 +/*
1.631 +** Remove page pPg from the list of free pages for the associated pager.
1.632 +** If memory-management is enabled, also remove pPg from the global list
1.633 +** of free pages.
1.634 +*/
1.635 +static void lruListRemove(PgHdr *pPg){
1.636 + listRemove(&pPg->pPager->lru, &pPg->free, pPg);
1.637 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
1.638 + if( !pPg->pPager->memDb ){
1.639 + sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_LRU));
1.640 + listRemove(&sqlite3LruPageList, &pPg->gfree, pPg);
1.641 + sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_LRU));
1.642 + }
1.643 +#endif
1.644 +}
1.645 +
1.646 +/*
1.647 +** This function is called just after the needSync flag has been cleared
1.648 +** from all pages managed by pPager (usually because the journal file
1.649 +** has just been synced). It updates the pPager->lru.pFirstSynced variable
1.650 +** and, if memory-management is enabled, the sqlite3LruPageList.pFirstSynced
1.651 +** variable also.
1.652 +*/
1.653 +static void lruListSetFirstSynced(Pager *pPager){
1.654 + pPager->lru.pFirstSynced = pPager->lru.pFirst;
1.655 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
1.656 + if( !pPager->memDb ){
1.657 + PgHdr *p;
1.658 + sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_LRU));
1.659 + for(p=sqlite3LruPageList.pFirst; p && p->needSync; p=p->gfree.pNext);
1.660 + assert(p==pPager->lru.pFirstSynced || p==sqlite3LruPageList.pFirstSynced);
1.661 + sqlite3LruPageList.pFirstSynced = p;
1.662 + sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_LRU));
1.663 + }
1.664 +#endif
1.665 +}
1.666 +
1.667 +/*
1.668 +** Return true if page *pPg has already been written to the statement
1.669 +** journal (or statement snapshot has been created, if *pPg is part
1.670 +** of an in-memory database).
1.671 +*/
1.672 +static int pageInStatement(PgHdr *pPg){
1.673 + Pager *pPager = pPg->pPager;
1.674 + if( MEMDB ){
1.675 + return PGHDR_TO_HIST(pPg, pPager)->inStmt;
1.676 + }else{
1.677 + return sqlite3BitvecTest(pPager->pInStmt, pPg->pgno);
1.678 + }
1.679 +}
1.680 +
1.681 +/*
1.682 +** Change the size of the pager hash table to N. N must be a power
1.683 +** of two.
1.684 +*/
1.685 +static void pager_resize_hash_table(Pager *pPager, int N){
1.686 + PgHdr **aHash, *pPg;
1.687 + assert( N>0 && (N&(N-1))==0 );
1.688 +#ifdef SQLITE_MALLOC_SOFT_LIMIT
1.689 + if( N*sizeof(aHash[0])>SQLITE_MALLOC_SOFT_LIMIT ){
1.690 + N = SQLITE_MALLOC_SOFT_LIMIT/sizeof(aHash[0]);
1.691 + }
1.692 + if( N==pPager->nHash ) return;
1.693 +#endif
1.694 + pagerLeave(pPager);
1.695 + if( pPager->aHash!=0 ) sqlite3BeginBenignMalloc();
1.696 + aHash = sqlite3MallocZero( sizeof(aHash[0])*N );
1.697 + if( pPager->aHash!=0 ) sqlite3EndBenignMalloc();
1.698 + pagerEnter(pPager);
1.699 + if( aHash==0 ){
1.700 + /* Failure to rehash is not an error. It is only a performance hit. */
1.701 + return;
1.702 + }
1.703 + sqlite3_free(pPager->aHash);
1.704 + pPager->nHash = N;
1.705 + pPager->aHash = aHash;
1.706 + for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1.707 + int h;
1.708 + if( pPg->pgno==0 ){
1.709 + assert( pPg->pNextHash==0 && pPg->pPrevHash==0 );
1.710 + continue;
1.711 + }
1.712 + h = pPg->pgno & (N-1);
1.713 + pPg->pNextHash = aHash[h];
1.714 + if( aHash[h] ){
1.715 + aHash[h]->pPrevHash = pPg;
1.716 + }
1.717 + aHash[h] = pPg;
1.718 + pPg->pPrevHash = 0;
1.719 + }
1.720 +}
1.721 +
1.722 +/*
1.723 +** Read a 32-bit integer from the given file descriptor. Store the integer
1.724 +** that is read in *pRes. Return SQLITE_OK if everything worked, or an
1.725 +** error code is something goes wrong.
1.726 +**
1.727 +** All values are stored on disk as big-endian.
1.728 +*/
1.729 +static int read32bits(sqlite3_file *fd, i64 offset, u32 *pRes){
1.730 + unsigned char ac[4];
1.731 + int rc = sqlite3OsRead(fd, ac, sizeof(ac), offset);
1.732 + if( rc==SQLITE_OK ){
1.733 + *pRes = sqlite3Get4byte(ac);
1.734 + }
1.735 + return rc;
1.736 +}
1.737 +
1.738 +/*
1.739 +** Write a 32-bit integer into a string buffer in big-endian byte order.
1.740 +*/
1.741 +#define put32bits(A,B) sqlite3Put4byte((u8*)A,B)
1.742 +
1.743 +/*
1.744 +** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK
1.745 +** on success or an error code is something goes wrong.
1.746 +*/
1.747 +static int write32bits(sqlite3_file *fd, i64 offset, u32 val){
1.748 + char ac[4];
1.749 + put32bits(ac, val);
1.750 + return sqlite3OsWrite(fd, ac, 4, offset);
1.751 +}
1.752 +
1.753 +/*
1.754 +** If file pFd is open, call sqlite3OsUnlock() on it.
1.755 +*/
1.756 +static int osUnlock(sqlite3_file *pFd, int eLock){
1.757 + if( !pFd->pMethods ){
1.758 + return SQLITE_OK;
1.759 + }
1.760 + return sqlite3OsUnlock(pFd, eLock);
1.761 +}
1.762 +
1.763 +/*
1.764 +** This function determines whether or not the atomic-write optimization
1.765 +** can be used with this pager. The optimization can be used if:
1.766 +**
1.767 +** (a) the value returned by OsDeviceCharacteristics() indicates that
1.768 +** a database page may be written atomically, and
1.769 +** (b) the value returned by OsSectorSize() is less than or equal
1.770 +** to the page size.
1.771 +**
1.772 +** If the optimization cannot be used, 0 is returned. If it can be used,
1.773 +** then the value returned is the size of the journal file when it
1.774 +** contains rollback data for exactly one page.
1.775 +*/
1.776 +#ifdef SQLITE_ENABLE_ATOMIC_WRITE
1.777 +static int jrnlBufferSize(Pager *pPager){
1.778 + int dc; /* Device characteristics */
1.779 + int nSector; /* Sector size */
1.780 + int szPage; /* Page size */
1.781 + sqlite3_file *fd = pPager->fd;
1.782 +
1.783 + if( fd->pMethods ){
1.784 + dc = sqlite3OsDeviceCharacteristics(fd);
1.785 + nSector = sqlite3OsSectorSize(fd);
1.786 + szPage = pPager->pageSize;
1.787 + }
1.788 +
1.789 + assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
1.790 + assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
1.791 +
1.792 + if( !fd->pMethods ||
1.793 + (dc & (SQLITE_IOCAP_ATOMIC|(szPage>>8)) && nSector<=szPage) ){
1.794 + return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager);
1.795 + }
1.796 + return 0;
1.797 +}
1.798 +#endif
1.799 +
1.800 +/*
1.801 +** This function should be called when an error occurs within the pager
1.802 +** code. The first argument is a pointer to the pager structure, the
1.803 +** second the error-code about to be returned by a pager API function.
1.804 +** The value returned is a copy of the second argument to this function.
1.805 +**
1.806 +** If the second argument is SQLITE_IOERR, SQLITE_CORRUPT, or SQLITE_FULL
1.807 +** the error becomes persistent. Until the persisten error is cleared,
1.808 +** subsequent API calls on this Pager will immediately return the same
1.809 +** error code.
1.810 +**
1.811 +** A persistent error indicates that the contents of the pager-cache
1.812 +** cannot be trusted. This state can be cleared by completely discarding
1.813 +** the contents of the pager-cache. If a transaction was active when
1.814 +** the persistent error occured, then the rollback journal may need
1.815 +** to be replayed.
1.816 +*/
1.817 +static void pager_unlock(Pager *pPager);
1.818 +static int pager_error(Pager *pPager, int rc){
1.819 + int rc2 = rc & 0xff;
1.820 + assert(
1.821 + pPager->errCode==SQLITE_FULL ||
1.822 + pPager->errCode==SQLITE_OK ||
1.823 + (pPager->errCode & 0xff)==SQLITE_IOERR
1.824 + );
1.825 + if(
1.826 + rc2==SQLITE_FULL ||
1.827 + rc2==SQLITE_IOERR ||
1.828 + rc2==SQLITE_CORRUPT
1.829 + ){
1.830 + pPager->errCode = rc;
1.831 + if( pPager->state==PAGER_UNLOCK && pPager->nRef==0 ){
1.832 + /* If the pager is already unlocked, call pager_unlock() now to
1.833 + ** clear the error state and ensure that the pager-cache is
1.834 + ** completely empty.
1.835 + */
1.836 + pager_unlock(pPager);
1.837 + }
1.838 + }
1.839 + return rc;
1.840 +}
1.841 +
1.842 +/*
1.843 +** If SQLITE_CHECK_PAGES is defined then we do some sanity checking
1.844 +** on the cache using a hash function. This is used for testing
1.845 +** and debugging only.
1.846 +*/
1.847 +#ifdef SQLITE_CHECK_PAGES
1.848 +/*
1.849 +** Return a 32-bit hash of the page data for pPage.
1.850 +*/
1.851 +static u32 pager_datahash(int nByte, unsigned char *pData){
1.852 + u32 hash = 0;
1.853 + int i;
1.854 + for(i=0; i<nByte; i++){
1.855 + hash = (hash*1039) + pData[i];
1.856 + }
1.857 + return hash;
1.858 +}
1.859 +static u32 pager_pagehash(PgHdr *pPage){
1.860 + return pager_datahash(pPage->pPager->pageSize,
1.861 + (unsigned char *)PGHDR_TO_DATA(pPage));
1.862 +}
1.863 +
1.864 +/*
1.865 +** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES
1.866 +** is defined, and NDEBUG is not defined, an assert() statement checks
1.867 +** that the page is either dirty or still matches the calculated page-hash.
1.868 +*/
1.869 +#define CHECK_PAGE(x) checkPage(x)
1.870 +static void checkPage(PgHdr *pPg){
1.871 + Pager *pPager = pPg->pPager;
1.872 + assert( !pPg->pageHash || pPager->errCode || MEMDB || pPg->dirty ||
1.873 + pPg->pageHash==pager_pagehash(pPg) );
1.874 +}
1.875 +
1.876 +#else
1.877 +#define pager_datahash(X,Y) 0
1.878 +#define pager_pagehash(X) 0
1.879 +#define CHECK_PAGE(x)
1.880 +#endif
1.881 +
1.882 +/*
1.883 +** When this is called the journal file for pager pPager must be open.
1.884 +** The master journal file name is read from the end of the file and
1.885 +** written into memory supplied by the caller.
1.886 +**
1.887 +** zMaster must point to a buffer of at least nMaster bytes allocated by
1.888 +** the caller. This should be sqlite3_vfs.mxPathname+1 (to ensure there is
1.889 +** enough space to write the master journal name). If the master journal
1.890 +** name in the journal is longer than nMaster bytes (including a
1.891 +** nul-terminator), then this is handled as if no master journal name
1.892 +** were present in the journal.
1.893 +**
1.894 +** If no master journal file name is present zMaster[0] is set to 0 and
1.895 +** SQLITE_OK returned.
1.896 +*/
1.897 +static int readMasterJournal(sqlite3_file *pJrnl, char *zMaster, int nMaster){
1.898 + int rc;
1.899 + u32 len;
1.900 + i64 szJ;
1.901 + u32 cksum;
1.902 + u32 u; /* Unsigned loop counter */
1.903 + unsigned char aMagic[8]; /* A buffer to hold the magic header */
1.904 +
1.905 + zMaster[0] = '\0';
1.906 +
1.907 + rc = sqlite3OsFileSize(pJrnl, &szJ);
1.908 + if( rc!=SQLITE_OK || szJ<16 ) return rc;
1.909 +
1.910 + rc = read32bits(pJrnl, szJ-16, &len);
1.911 + if( rc!=SQLITE_OK ) return rc;
1.912 +
1.913 + if( len>=nMaster ){
1.914 + return SQLITE_OK;
1.915 + }
1.916 +
1.917 + rc = read32bits(pJrnl, szJ-12, &cksum);
1.918 + if( rc!=SQLITE_OK ) return rc;
1.919 +
1.920 + rc = sqlite3OsRead(pJrnl, aMagic, 8, szJ-8);
1.921 + if( rc!=SQLITE_OK || memcmp(aMagic, aJournalMagic, 8) ) return rc;
1.922 +
1.923 + rc = sqlite3OsRead(pJrnl, zMaster, len, szJ-16-len);
1.924 + if( rc!=SQLITE_OK ){
1.925 + return rc;
1.926 + }
1.927 + zMaster[len] = '\0';
1.928 +
1.929 + /* See if the checksum matches the master journal name */
1.930 + for(u=0; u<len; u++){
1.931 + cksum -= zMaster[u];
1.932 + }
1.933 + if( cksum ){
1.934 + /* If the checksum doesn't add up, then one or more of the disk sectors
1.935 + ** containing the master journal filename is corrupted. This means
1.936 + ** definitely roll back, so just return SQLITE_OK and report a (nul)
1.937 + ** master-journal filename.
1.938 + */
1.939 + zMaster[0] = '\0';
1.940 + }
1.941 +
1.942 + return SQLITE_OK;
1.943 +}
1.944 +
1.945 +/*
1.946 +** Seek the journal file descriptor to the next sector boundary where a
1.947 +** journal header may be read or written. Pager.journalOff is updated with
1.948 +** the new seek offset.
1.949 +**
1.950 +** i.e for a sector size of 512:
1.951 +**
1.952 +** Input Offset Output Offset
1.953 +** ---------------------------------------
1.954 +** 0 0
1.955 +** 512 512
1.956 +** 100 512
1.957 +** 2000 2048
1.958 +**
1.959 +*/
1.960 +static void seekJournalHdr(Pager *pPager){
1.961 + i64 offset = 0;
1.962 + i64 c = pPager->journalOff;
1.963 + if( c ){
1.964 + offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager);
1.965 + }
1.966 + assert( offset%JOURNAL_HDR_SZ(pPager)==0 );
1.967 + assert( offset>=c );
1.968 + assert( (offset-c)<JOURNAL_HDR_SZ(pPager) );
1.969 + pPager->journalOff = offset;
1.970 +}
1.971 +
1.972 +/*
1.973 +** Write zeros over the header of the journal file. This has the
1.974 +** effect of invalidating the journal file and committing the
1.975 +** transaction.
1.976 +*/
1.977 +static int zeroJournalHdr(Pager *pPager, int doTruncate){
1.978 + int rc = SQLITE_OK;
1.979 + static const char zeroHdr[28];
1.980 +
1.981 + if( pPager->journalOff ){
1.982 + i64 iLimit = pPager->journalSizeLimit;
1.983 +
1.984 + IOTRACE(("JZEROHDR %p\n", pPager))
1.985 + if( doTruncate || iLimit==0 ){
1.986 + rc = sqlite3OsTruncate(pPager->jfd, 0);
1.987 + }else{
1.988 + rc = sqlite3OsWrite(pPager->jfd, zeroHdr, sizeof(zeroHdr), 0);
1.989 + }
1.990 + if( rc==SQLITE_OK && !pPager->noSync ){
1.991 + rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_DATAONLY|pPager->sync_flags);
1.992 + }
1.993 +
1.994 + /* At this point the transaction is committed but the write lock
1.995 + ** is still held on the file. If there is a size limit configured for
1.996 + ** the persistent journal and the journal file currently consumes more
1.997 + ** space than that limit allows for, truncate it now. There is no need
1.998 + ** to sync the file following this operation.
1.999 + */
1.1000 + if( rc==SQLITE_OK && iLimit>0 ){
1.1001 + i64 sz;
1.1002 + rc = sqlite3OsFileSize(pPager->jfd, &sz);
1.1003 + if( rc==SQLITE_OK && sz>iLimit ){
1.1004 + rc = sqlite3OsTruncate(pPager->jfd, iLimit);
1.1005 + }
1.1006 + }
1.1007 + }
1.1008 + return rc;
1.1009 +}
1.1010 +
1.1011 +/*
1.1012 +** The journal file must be open when this routine is called. A journal
1.1013 +** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the
1.1014 +** current location.
1.1015 +**
1.1016 +** The format for the journal header is as follows:
1.1017 +** - 8 bytes: Magic identifying journal format.
1.1018 +** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
1.1019 +** - 4 bytes: Random number used for page hash.
1.1020 +** - 4 bytes: Initial database page count.
1.1021 +** - 4 bytes: Sector size used by the process that wrote this journal.
1.1022 +** - 4 bytes: Database page size.
1.1023 +**
1.1024 +** Followed by (JOURNAL_HDR_SZ - 28) bytes of unused space.
1.1025 +*/
1.1026 +static int writeJournalHdr(Pager *pPager){
1.1027 + int rc = SQLITE_OK;
1.1028 + char *zHeader = pPager->pTmpSpace;
1.1029 + int nHeader = pPager->pageSize;
1.1030 + int nWrite;
1.1031 +
1.1032 + if( nHeader>JOURNAL_HDR_SZ(pPager) ){
1.1033 + nHeader = JOURNAL_HDR_SZ(pPager);
1.1034 + }
1.1035 +
1.1036 + if( pPager->stmtHdrOff==0 ){
1.1037 + pPager->stmtHdrOff = pPager->journalOff;
1.1038 + }
1.1039 +
1.1040 + seekJournalHdr(pPager);
1.1041 + pPager->journalHdr = pPager->journalOff;
1.1042 +
1.1043 + memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
1.1044 +
1.1045 + /*
1.1046 + ** Write the nRec Field - the number of page records that follow this
1.1047 + ** journal header. Normally, zero is written to this value at this time.
1.1048 + ** After the records are added to the journal (and the journal synced,
1.1049 + ** if in full-sync mode), the zero is overwritten with the true number
1.1050 + ** of records (see syncJournal()).
1.1051 + **
1.1052 + ** A faster alternative is to write 0xFFFFFFFF to the nRec field. When
1.1053 + ** reading the journal this value tells SQLite to assume that the
1.1054 + ** rest of the journal file contains valid page records. This assumption
1.1055 + ** is dangerous, as if a failure occured whilst writing to the journal
1.1056 + ** file it may contain some garbage data. There are two scenarios
1.1057 + ** where this risk can be ignored:
1.1058 + **
1.1059 + ** * When the pager is in no-sync mode. Corruption can follow a
1.1060 + ** power failure in this case anyway.
1.1061 + **
1.1062 + ** * When the SQLITE_IOCAP_SAFE_APPEND flag is set. This guarantees
1.1063 + ** that garbage data is never appended to the journal file.
1.1064 + */
1.1065 + assert(pPager->fd->pMethods||pPager->noSync);
1.1066 + if( (pPager->noSync)
1.1067 + || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND)
1.1068 + ){
1.1069 + put32bits(&zHeader[sizeof(aJournalMagic)], 0xffffffff);
1.1070 + }else{
1.1071 + put32bits(&zHeader[sizeof(aJournalMagic)], 0);
1.1072 + }
1.1073 +
1.1074 + /* The random check-hash initialiser */
1.1075 + sqlite3_randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
1.1076 + put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit);
1.1077 + /* The initial database size */
1.1078 + put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbSize);
1.1079 + /* The assumed sector size for this process */
1.1080 + put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize);
1.1081 + if( pPager->journalHdr==0 ){
1.1082 + /* The page size */
1.1083 + put32bits(&zHeader[sizeof(aJournalMagic)+16], pPager->pageSize);
1.1084 + }
1.1085 +
1.1086 + for(nWrite=0; rc==SQLITE_OK&&nWrite<JOURNAL_HDR_SZ(pPager); nWrite+=nHeader){
1.1087 + IOTRACE(("JHDR %p %lld %d\n", pPager, pPager->journalHdr, nHeader))
1.1088 + rc = sqlite3OsWrite(pPager->jfd, zHeader, nHeader, pPager->journalOff);
1.1089 + pPager->journalOff += nHeader;
1.1090 + }
1.1091 +
1.1092 + return rc;
1.1093 +}
1.1094 +
1.1095 +/*
1.1096 +** The journal file must be open when this is called. A journal header file
1.1097 +** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal
1.1098 +** file. See comments above function writeJournalHdr() for a description of
1.1099 +** the journal header format.
1.1100 +**
1.1101 +** If the header is read successfully, *nRec is set to the number of
1.1102 +** page records following this header and *dbSize is set to the size of the
1.1103 +** database before the transaction began, in pages. Also, pPager->cksumInit
1.1104 +** is set to the value read from the journal header. SQLITE_OK is returned
1.1105 +** in this case.
1.1106 +**
1.1107 +** If the journal header file appears to be corrupted, SQLITE_DONE is
1.1108 +** returned and *nRec and *dbSize are not set. If JOURNAL_HDR_SZ bytes
1.1109 +** cannot be read from the journal file an error code is returned.
1.1110 +*/
1.1111 +static int readJournalHdr(
1.1112 + Pager *pPager,
1.1113 + i64 journalSize,
1.1114 + u32 *pNRec,
1.1115 + u32 *pDbSize
1.1116 +){
1.1117 + int rc;
1.1118 + unsigned char aMagic[8]; /* A buffer to hold the magic header */
1.1119 + i64 jrnlOff;
1.1120 + int iPageSize;
1.1121 +
1.1122 + seekJournalHdr(pPager);
1.1123 + if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){
1.1124 + return SQLITE_DONE;
1.1125 + }
1.1126 + jrnlOff = pPager->journalOff;
1.1127 +
1.1128 + rc = sqlite3OsRead(pPager->jfd, aMagic, sizeof(aMagic), jrnlOff);
1.1129 + if( rc ) return rc;
1.1130 + jrnlOff += sizeof(aMagic);
1.1131 +
1.1132 + if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){
1.1133 + return SQLITE_DONE;
1.1134 + }
1.1135 +
1.1136 + rc = read32bits(pPager->jfd, jrnlOff, pNRec);
1.1137 + if( rc ) return rc;
1.1138 +
1.1139 + rc = read32bits(pPager->jfd, jrnlOff+4, &pPager->cksumInit);
1.1140 + if( rc ) return rc;
1.1141 +
1.1142 + rc = read32bits(pPager->jfd, jrnlOff+8, pDbSize);
1.1143 + if( rc ) return rc;
1.1144 +
1.1145 + rc = read32bits(pPager->jfd, jrnlOff+16, (u32 *)&iPageSize);
1.1146 + if( rc==SQLITE_OK
1.1147 + && iPageSize>=512
1.1148 + && iPageSize<=SQLITE_MAX_PAGE_SIZE
1.1149 + && ((iPageSize-1)&iPageSize)==0
1.1150 + ){
1.1151 + u16 pagesize = iPageSize;
1.1152 + rc = sqlite3PagerSetPagesize(pPager, &pagesize);
1.1153 + }
1.1154 + if( rc ) return rc;
1.1155 +
1.1156 + /* Update the assumed sector-size to match the value used by
1.1157 + ** the process that created this journal. If this journal was
1.1158 + ** created by a process other than this one, then this routine
1.1159 + ** is being called from within pager_playback(). The local value
1.1160 + ** of Pager.sectorSize is restored at the end of that routine.
1.1161 + */
1.1162 + rc = read32bits(pPager->jfd, jrnlOff+12, (u32 *)&pPager->sectorSize);
1.1163 + if( rc ) return rc;
1.1164 +
1.1165 + pPager->journalOff += JOURNAL_HDR_SZ(pPager);
1.1166 + return SQLITE_OK;
1.1167 +}
1.1168 +
1.1169 +
1.1170 +/*
1.1171 +** Write the supplied master journal name into the journal file for pager
1.1172 +** pPager at the current location. The master journal name must be the last
1.1173 +** thing written to a journal file. If the pager is in full-sync mode, the
1.1174 +** journal file descriptor is advanced to the next sector boundary before
1.1175 +** anything is written. The format is:
1.1176 +**
1.1177 +** + 4 bytes: PAGER_MJ_PGNO.
1.1178 +** + N bytes: length of master journal name.
1.1179 +** + 4 bytes: N
1.1180 +** + 4 bytes: Master journal name checksum.
1.1181 +** + 8 bytes: aJournalMagic[].
1.1182 +**
1.1183 +** The master journal page checksum is the sum of the bytes in the master
1.1184 +** journal name.
1.1185 +**
1.1186 +** If zMaster is a NULL pointer (occurs for a single database transaction),
1.1187 +** this call is a no-op.
1.1188 +*/
1.1189 +static int writeMasterJournal(Pager *pPager, const char *zMaster){
1.1190 + int rc;
1.1191 + int len;
1.1192 + int i;
1.1193 + i64 jrnlOff;
1.1194 + i64 jrnlSize;
1.1195 + u32 cksum = 0;
1.1196 + char zBuf[sizeof(aJournalMagic)+2*4];
1.1197 +
1.1198 + if( !zMaster || pPager->setMaster) return SQLITE_OK;
1.1199 + pPager->setMaster = 1;
1.1200 +
1.1201 + len = strlen(zMaster);
1.1202 + for(i=0; i<len; i++){
1.1203 + cksum += zMaster[i];
1.1204 + }
1.1205 +
1.1206 + /* If in full-sync mode, advance to the next disk sector before writing
1.1207 + ** the master journal name. This is in case the previous page written to
1.1208 + ** the journal has already been synced.
1.1209 + */
1.1210 + if( pPager->fullSync ){
1.1211 + seekJournalHdr(pPager);
1.1212 + }
1.1213 + jrnlOff = pPager->journalOff;
1.1214 + pPager->journalOff += (len+20);
1.1215 +
1.1216 + rc = write32bits(pPager->jfd, jrnlOff, PAGER_MJ_PGNO(pPager));
1.1217 + if( rc!=SQLITE_OK ) return rc;
1.1218 + jrnlOff += 4;
1.1219 +
1.1220 + rc = sqlite3OsWrite(pPager->jfd, zMaster, len, jrnlOff);
1.1221 + if( rc!=SQLITE_OK ) return rc;
1.1222 + jrnlOff += len;
1.1223 +
1.1224 + put32bits(zBuf, len);
1.1225 + put32bits(&zBuf[4], cksum);
1.1226 + memcpy(&zBuf[8], aJournalMagic, sizeof(aJournalMagic));
1.1227 + rc = sqlite3OsWrite(pPager->jfd, zBuf, 8+sizeof(aJournalMagic), jrnlOff);
1.1228 + jrnlOff += 8+sizeof(aJournalMagic);
1.1229 + pPager->needSync = !pPager->noSync;
1.1230 +
1.1231 + /* If the pager is in peristent-journal mode, then the physical
1.1232 + ** journal-file may extend past the end of the master-journal name
1.1233 + ** and 8 bytes of magic data just written to the file. This is
1.1234 + ** dangerous because the code to rollback a hot-journal file
1.1235 + ** will not be able to find the master-journal name to determine
1.1236 + ** whether or not the journal is hot.
1.1237 + **
1.1238 + ** Easiest thing to do in this scenario is to truncate the journal
1.1239 + ** file to the required size.
1.1240 + */
1.1241 + if( (rc==SQLITE_OK)
1.1242 + && (rc = sqlite3OsFileSize(pPager->jfd, &jrnlSize))==SQLITE_OK
1.1243 + && jrnlSize>jrnlOff
1.1244 + ){
1.1245 + rc = sqlite3OsTruncate(pPager->jfd, jrnlOff);
1.1246 + }
1.1247 + return rc;
1.1248 +}
1.1249 +
1.1250 +/*
1.1251 +** Add or remove a page from the list of all pages that are in the
1.1252 +** statement journal.
1.1253 +**
1.1254 +** The Pager keeps a separate list of pages that are currently in
1.1255 +** the statement journal. This helps the sqlite3PagerStmtCommit()
1.1256 +** routine run MUCH faster for the common case where there are many
1.1257 +** pages in memory but only a few are in the statement journal.
1.1258 +*/
1.1259 +static void page_add_to_stmt_list(PgHdr *pPg){
1.1260 + Pager *pPager = pPg->pPager;
1.1261 + PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
1.1262 + assert( MEMDB );
1.1263 + if( !pHist->inStmt ){
1.1264 + assert( pHist->pPrevStmt==0 && pHist->pNextStmt==0 );
1.1265 + if( pPager->pStmt ){
1.1266 + PGHDR_TO_HIST(pPager->pStmt, pPager)->pPrevStmt = pPg;
1.1267 + }
1.1268 + pHist->pNextStmt = pPager->pStmt;
1.1269 + pPager->pStmt = pPg;
1.1270 + pHist->inStmt = 1;
1.1271 + }
1.1272 +}
1.1273 +
1.1274 +/*
1.1275 +** Find a page in the hash table given its page number. Return
1.1276 +** a pointer to the page or NULL if not found.
1.1277 +*/
1.1278 +static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
1.1279 + PgHdr *p;
1.1280 + if( pPager->aHash==0 ) return 0;
1.1281 + p = pPager->aHash[pgno & (pPager->nHash-1)];
1.1282 + while( p && p->pgno!=pgno ){
1.1283 + p = p->pNextHash;
1.1284 + }
1.1285 + return p;
1.1286 +}
1.1287 +
1.1288 +/*
1.1289 +** Clear the in-memory cache. This routine
1.1290 +** sets the state of the pager back to what it was when it was first
1.1291 +** opened. Any outstanding pages are invalidated and subsequent attempts
1.1292 +** to access those pages will likely result in a coredump.
1.1293 +*/
1.1294 +static void pager_reset(Pager *pPager){
1.1295 + PgHdr *pPg, *pNext;
1.1296 + if( pPager->errCode ) return;
1.1297 + for(pPg=pPager->pAll; pPg; pPg=pNext){
1.1298 + IOTRACE(("PGFREE %p %d\n", pPager, pPg->pgno));
1.1299 + PAGER_INCR(sqlite3_pager_pgfree_count);
1.1300 + pNext = pPg->pNextAll;
1.1301 + lruListRemove(pPg);
1.1302 + sqlite3PageFree(pPg->pData);
1.1303 + sqlite3_free(pPg);
1.1304 + }
1.1305 + assert(pPager->lru.pFirst==0);
1.1306 + assert(pPager->lru.pFirstSynced==0);
1.1307 + assert(pPager->lru.pLast==0);
1.1308 + pPager->pStmt = 0;
1.1309 + pPager->pAll = 0;
1.1310 + pPager->pDirty = 0;
1.1311 + pPager->nHash = 0;
1.1312 + sqlite3_free(pPager->aHash);
1.1313 + pPager->nPage = 0;
1.1314 + pPager->aHash = 0;
1.1315 + pPager->nRef = 0;
1.1316 +}
1.1317 +
1.1318 +/*
1.1319 +** Unlock the database file.
1.1320 +**
1.1321 +** If the pager is currently in error state, discard the contents of
1.1322 +** the cache and reset the Pager structure internal state. If there is
1.1323 +** an open journal-file, then the next time a shared-lock is obtained
1.1324 +** on the pager file (by this or any other process), it will be
1.1325 +** treated as a hot-journal and rolled back.
1.1326 +*/
1.1327 +static void pager_unlock(Pager *pPager){
1.1328 + if( !pPager->exclusiveMode ){
1.1329 + if( !MEMDB ){
1.1330 + int rc = osUnlock(pPager->fd, NO_LOCK);
1.1331 + if( rc ) pPager->errCode = rc;
1.1332 + pPager->dbSize = -1;
1.1333 + IOTRACE(("UNLOCK %p\n", pPager))
1.1334 +
1.1335 + /* Always close the journal file when dropping the database lock.
1.1336 + ** Otherwise, another connection with journal_mode=delete might
1.1337 + ** delete the file out from under us.
1.1338 + */
1.1339 + if( pPager->journalOpen ){
1.1340 + sqlite3OsClose(pPager->jfd);
1.1341 + pPager->journalOpen = 0;
1.1342 + sqlite3BitvecDestroy(pPager->pInJournal);
1.1343 + pPager->pInJournal = 0;
1.1344 + }
1.1345 +
1.1346 + /* If Pager.errCode is set, the contents of the pager cache cannot be
1.1347 + ** trusted. Now that the pager file is unlocked, the contents of the
1.1348 + ** cache can be discarded and the error code safely cleared.
1.1349 + */
1.1350 + if( pPager->errCode ){
1.1351 + if( rc==SQLITE_OK ) pPager->errCode = SQLITE_OK;
1.1352 + pager_reset(pPager);
1.1353 + if( pPager->stmtOpen ){
1.1354 + sqlite3OsClose(pPager->stfd);
1.1355 + sqlite3BitvecDestroy(pPager->pInStmt);
1.1356 + pPager->pInStmt = 0;
1.1357 + }
1.1358 + pPager->stmtOpen = 0;
1.1359 + pPager->stmtInUse = 0;
1.1360 + pPager->journalOff = 0;
1.1361 + pPager->journalStarted = 0;
1.1362 + pPager->stmtAutoopen = 0;
1.1363 + pPager->origDbSize = 0;
1.1364 + }
1.1365 + }
1.1366 +
1.1367 + if( !MEMDB || pPager->errCode==SQLITE_OK ){
1.1368 + pPager->state = PAGER_UNLOCK;
1.1369 + pPager->changeCountDone = 0;
1.1370 + }
1.1371 + }
1.1372 +}
1.1373 +
1.1374 +/*
1.1375 +** Execute a rollback if a transaction is active and unlock the
1.1376 +** database file. If the pager has already entered the error state,
1.1377 +** do not attempt the rollback.
1.1378 +*/
1.1379 +static void pagerUnlockAndRollback(Pager *p){
1.1380 + /* assert( p->state>=PAGER_RESERVED || p->journalOpen==0 ); */
1.1381 + if( p->errCode==SQLITE_OK && p->state>=PAGER_RESERVED ){
1.1382 + sqlite3BeginBenignMalloc();
1.1383 + sqlite3PagerRollback(p);
1.1384 + sqlite3EndBenignMalloc();
1.1385 + }
1.1386 + pager_unlock(p);
1.1387 +#if 0
1.1388 + assert( p->errCode || !p->journalOpen || (p->exclusiveMode&&!p->journalOff) );
1.1389 + assert( p->errCode || !p->stmtOpen || p->exclusiveMode );
1.1390 +#endif
1.1391 +}
1.1392 +
1.1393 +/*
1.1394 +** This routine ends a transaction. A transaction is ended by either
1.1395 +** a COMMIT or a ROLLBACK.
1.1396 +**
1.1397 +** When this routine is called, the pager has the journal file open and
1.1398 +** a RESERVED or EXCLUSIVE lock on the database. This routine will release
1.1399 +** the database lock and acquires a SHARED lock in its place if that is
1.1400 +** the appropriate thing to do. Release locks usually is appropriate,
1.1401 +** unless we are in exclusive access mode or unless this is a
1.1402 +** COMMIT AND BEGIN or ROLLBACK AND BEGIN operation.
1.1403 +**
1.1404 +** The journal file is either deleted or truncated.
1.1405 +**
1.1406 +** TODO: Consider keeping the journal file open for temporary databases.
1.1407 +** This might give a performance improvement on windows where opening
1.1408 +** a file is an expensive operation.
1.1409 +*/
1.1410 +static int pager_end_transaction(Pager *pPager, int hasMaster){
1.1411 + PgHdr *pPg;
1.1412 + int rc = SQLITE_OK;
1.1413 + int rc2 = SQLITE_OK;
1.1414 + assert( !MEMDB );
1.1415 + if( pPager->state<PAGER_RESERVED ){
1.1416 + return SQLITE_OK;
1.1417 + }
1.1418 + sqlite3PagerStmtCommit(pPager);
1.1419 + if( pPager->stmtOpen && !pPager->exclusiveMode ){
1.1420 + sqlite3OsClose(pPager->stfd);
1.1421 + pPager->stmtOpen = 0;
1.1422 + }
1.1423 + if( pPager->journalOpen ){
1.1424 + if( pPager->exclusiveMode
1.1425 + || pPager->journalMode==PAGER_JOURNALMODE_PERSIST
1.1426 + ){
1.1427 + rc = zeroJournalHdr(pPager, hasMaster);
1.1428 + pager_error(pPager, rc);
1.1429 + pPager->journalOff = 0;
1.1430 + pPager->journalStarted = 0;
1.1431 + }else{
1.1432 + sqlite3OsClose(pPager->jfd);
1.1433 + pPager->journalOpen = 0;
1.1434 + if( rc==SQLITE_OK && !pPager->tempFile ){
1.1435 + rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
1.1436 + }
1.1437 + }
1.1438 + sqlite3BitvecDestroy(pPager->pInJournal);
1.1439 + pPager->pInJournal = 0;
1.1440 + for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1.1441 + pPg->inJournal = 0;
1.1442 + pPg->dirty = 0;
1.1443 + pPg->needSync = 0;
1.1444 + pPg->alwaysRollback = 0;
1.1445 +#ifdef SQLITE_CHECK_PAGES
1.1446 + pPg->pageHash = pager_pagehash(pPg);
1.1447 +#endif
1.1448 + }
1.1449 + pPager->pDirty = 0;
1.1450 + pPager->dirtyCache = 0;
1.1451 + pPager->nRec = 0;
1.1452 + }else{
1.1453 + assert( pPager->pInJournal==0 );
1.1454 + }
1.1455 +
1.1456 + if( !pPager->exclusiveMode ){
1.1457 + rc2 = osUnlock(pPager->fd, SHARED_LOCK);
1.1458 + pPager->state = PAGER_SHARED;
1.1459 + }else if( pPager->state==PAGER_SYNCED ){
1.1460 + pPager->state = PAGER_EXCLUSIVE;
1.1461 + }
1.1462 + pPager->origDbSize = 0;
1.1463 + pPager->setMaster = 0;
1.1464 + pPager->needSync = 0;
1.1465 + lruListSetFirstSynced(pPager);
1.1466 + pPager->dbSize = -1;
1.1467 + pPager->dbModified = 0;
1.1468 +
1.1469 + return (rc==SQLITE_OK?rc2:rc);
1.1470 +}
1.1471 +
1.1472 +/*
1.1473 +** Compute and return a checksum for the page of data.
1.1474 +**
1.1475 +** This is not a real checksum. It is really just the sum of the
1.1476 +** random initial value and the page number. We experimented with
1.1477 +** a checksum of the entire data, but that was found to be too slow.
1.1478 +**
1.1479 +** Note that the page number is stored at the beginning of data and
1.1480 +** the checksum is stored at the end. This is important. If journal
1.1481 +** corruption occurs due to a power failure, the most likely scenario
1.1482 +** is that one end or the other of the record will be changed. It is
1.1483 +** much less likely that the two ends of the journal record will be
1.1484 +** correct and the middle be corrupt. Thus, this "checksum" scheme,
1.1485 +** though fast and simple, catches the mostly likely kind of corruption.
1.1486 +**
1.1487 +** FIX ME: Consider adding every 200th (or so) byte of the data to the
1.1488 +** checksum. That way if a single page spans 3 or more disk sectors and
1.1489 +** only the middle sector is corrupt, we will still have a reasonable
1.1490 +** chance of failing the checksum and thus detecting the problem.
1.1491 +*/
1.1492 +static u32 pager_cksum(Pager *pPager, const u8 *aData){
1.1493 + u32 cksum = pPager->cksumInit;
1.1494 + int i = pPager->pageSize-200;
1.1495 + while( i>0 ){
1.1496 + cksum += aData[i];
1.1497 + i -= 200;
1.1498 + }
1.1499 + return cksum;
1.1500 +}
1.1501 +
1.1502 +/* Forward declaration */
1.1503 +static void makeClean(PgHdr*);
1.1504 +
1.1505 +/*
1.1506 +** Read a single page from the journal file opened on file descriptor
1.1507 +** jfd. Playback this one page.
1.1508 +**
1.1509 +** If useCksum==0 it means this journal does not use checksums. Checksums
1.1510 +** are not used in statement journals because statement journals do not
1.1511 +** need to survive power failures.
1.1512 +*/
1.1513 +static int pager_playback_one_page(
1.1514 + Pager *pPager,
1.1515 + sqlite3_file *jfd,
1.1516 + i64 offset,
1.1517 + int useCksum
1.1518 +){
1.1519 + int rc;
1.1520 + PgHdr *pPg; /* An existing page in the cache */
1.1521 + Pgno pgno; /* The page number of a page in journal */
1.1522 + u32 cksum; /* Checksum used for sanity checking */
1.1523 + u8 *aData = (u8 *)pPager->pTmpSpace; /* Temp storage for a page */
1.1524 +
1.1525 + /* useCksum should be true for the main journal and false for
1.1526 + ** statement journals. Verify that this is always the case
1.1527 + */
1.1528 + assert( jfd == (useCksum ? pPager->jfd : pPager->stfd) );
1.1529 + assert( aData );
1.1530 +
1.1531 + rc = read32bits(jfd, offset, &pgno);
1.1532 + if( rc!=SQLITE_OK ) return rc;
1.1533 + rc = sqlite3OsRead(jfd, aData, pPager->pageSize, offset+4);
1.1534 + if( rc!=SQLITE_OK ) return rc;
1.1535 + pPager->journalOff += pPager->pageSize + 4;
1.1536 +
1.1537 + /* Sanity checking on the page. This is more important that I originally
1.1538 + ** thought. If a power failure occurs while the journal is being written,
1.1539 + ** it could cause invalid data to be written into the journal. We need to
1.1540 + ** detect this invalid data (with high probability) and ignore it.
1.1541 + */
1.1542 + if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
1.1543 + return SQLITE_DONE;
1.1544 + }
1.1545 + if( pgno>(unsigned)pPager->dbSize ){
1.1546 + return SQLITE_OK;
1.1547 + }
1.1548 + if( useCksum ){
1.1549 + rc = read32bits(jfd, offset+pPager->pageSize+4, &cksum);
1.1550 + if( rc ) return rc;
1.1551 + pPager->journalOff += 4;
1.1552 + if( pager_cksum(pPager, aData)!=cksum ){
1.1553 + return SQLITE_DONE;
1.1554 + }
1.1555 + }
1.1556 +
1.1557 + assert( pPager->state==PAGER_RESERVED || pPager->state>=PAGER_EXCLUSIVE );
1.1558 +
1.1559 + /* If the pager is in RESERVED state, then there must be a copy of this
1.1560 + ** page in the pager cache. In this case just update the pager cache,
1.1561 + ** not the database file. The page is left marked dirty in this case.
1.1562 + **
1.1563 + ** An exception to the above rule: If the database is in no-sync mode
1.1564 + ** and a page is moved during an incremental vacuum then the page may
1.1565 + ** not be in the pager cache. Later: if a malloc() or IO error occurs
1.1566 + ** during a Movepage() call, then the page may not be in the cache
1.1567 + ** either. So the condition described in the above paragraph is not
1.1568 + ** assert()able.
1.1569 + **
1.1570 + ** If in EXCLUSIVE state, then we update the pager cache if it exists
1.1571 + ** and the main file. The page is then marked not dirty.
1.1572 + **
1.1573 + ** Ticket #1171: The statement journal might contain page content that is
1.1574 + ** different from the page content at the start of the transaction.
1.1575 + ** This occurs when a page is changed prior to the start of a statement
1.1576 + ** then changed again within the statement. When rolling back such a
1.1577 + ** statement we must not write to the original database unless we know
1.1578 + ** for certain that original page contents are synced into the main rollback
1.1579 + ** journal. Otherwise, a power loss might leave modified data in the
1.1580 + ** database file without an entry in the rollback journal that can
1.1581 + ** restore the database to its original form. Two conditions must be
1.1582 + ** met before writing to the database files. (1) the database must be
1.1583 + ** locked. (2) we know that the original page content is fully synced
1.1584 + ** in the main journal either because the page is not in cache or else
1.1585 + ** the page is marked as needSync==0.
1.1586 + **
1.1587 + ** 2008-04-14: When attempting to vacuum a corrupt database file, it
1.1588 + ** is possible to fail a statement on a database that does not yet exist.
1.1589 + ** Do not attempt to write if database file has never been opened.
1.1590 + */
1.1591 + pPg = pager_lookup(pPager, pgno);
1.1592 + PAGERTRACE4("PLAYBACK %d page %d hash(%08x)\n",
1.1593 + PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, aData));
1.1594 + if( pPager->state>=PAGER_EXCLUSIVE && (pPg==0 || pPg->needSync==0)
1.1595 + && pPager->fd->pMethods ){
1.1596 + i64 offset = (pgno-1)*(i64)pPager->pageSize;
1.1597 + rc = sqlite3OsWrite(pPager->fd, aData, pPager->pageSize, offset);
1.1598 + if( pPg ){
1.1599 + makeClean(pPg);
1.1600 + }
1.1601 + }
1.1602 + if( pPg ){
1.1603 + /* No page should ever be explicitly rolled back that is in use, except
1.1604 + ** for page 1 which is held in use in order to keep the lock on the
1.1605 + ** database active. However such a page may be rolled back as a result
1.1606 + ** of an internal error resulting in an automatic call to
1.1607 + ** sqlite3PagerRollback().
1.1608 + */
1.1609 + void *pData;
1.1610 + /* assert( pPg->nRef==0 || pPg->pgno==1 ); */
1.1611 + pData = PGHDR_TO_DATA(pPg);
1.1612 + memcpy(pData, aData, pPager->pageSize);
1.1613 + if( pPager->xReiniter ){
1.1614 + pPager->xReiniter(pPg, pPager->pageSize);
1.1615 + }
1.1616 +#ifdef SQLITE_CHECK_PAGES
1.1617 + pPg->pageHash = pager_pagehash(pPg);
1.1618 +#endif
1.1619 + /* If this was page 1, then restore the value of Pager.dbFileVers.
1.1620 + ** Do this before any decoding. */
1.1621 + if( pgno==1 ){
1.1622 + memcpy(&pPager->dbFileVers, &((u8*)pData)[24],sizeof(pPager->dbFileVers));
1.1623 + }
1.1624 +
1.1625 + /* Decode the page just read from disk */
1.1626 + CODEC1(pPager, pData, pPg->pgno, 3);
1.1627 + }
1.1628 + return rc;
1.1629 +}
1.1630 +
1.1631 +/*
1.1632 +** Parameter zMaster is the name of a master journal file. A single journal
1.1633 +** file that referred to the master journal file has just been rolled back.
1.1634 +** This routine checks if it is possible to delete the master journal file,
1.1635 +** and does so if it is.
1.1636 +**
1.1637 +** Argument zMaster may point to Pager.pTmpSpace. So that buffer is not
1.1638 +** available for use within this function.
1.1639 +**
1.1640 +**
1.1641 +** The master journal file contains the names of all child journals.
1.1642 +** To tell if a master journal can be deleted, check to each of the
1.1643 +** children. If all children are either missing or do not refer to
1.1644 +** a different master journal, then this master journal can be deleted.
1.1645 +*/
1.1646 +static int pager_delmaster(Pager *pPager, const char *zMaster){
1.1647 + sqlite3_vfs *pVfs = pPager->pVfs;
1.1648 + int rc;
1.1649 + int master_open = 0;
1.1650 + sqlite3_file *pMaster;
1.1651 + sqlite3_file *pJournal;
1.1652 + char *zMasterJournal = 0; /* Contents of master journal file */
1.1653 + i64 nMasterJournal; /* Size of master journal file */
1.1654 +
1.1655 + /* Open the master journal file exclusively in case some other process
1.1656 + ** is running this routine also. Not that it makes too much difference.
1.1657 + */
1.1658 + pMaster = (sqlite3_file *)sqlite3Malloc(pVfs->szOsFile * 2);
1.1659 + pJournal = (sqlite3_file *)(((u8 *)pMaster) + pVfs->szOsFile);
1.1660 + if( !pMaster ){
1.1661 + rc = SQLITE_NOMEM;
1.1662 + }else{
1.1663 + int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MASTER_JOURNAL);
1.1664 + rc = sqlite3OsOpen(pVfs, zMaster, pMaster, flags, 0);
1.1665 + }
1.1666 + if( rc!=SQLITE_OK ) goto delmaster_out;
1.1667 + master_open = 1;
1.1668 +
1.1669 + rc = sqlite3OsFileSize(pMaster, &nMasterJournal);
1.1670 + if( rc!=SQLITE_OK ) goto delmaster_out;
1.1671 +
1.1672 + if( nMasterJournal>0 ){
1.1673 + char *zJournal;
1.1674 + char *zMasterPtr = 0;
1.1675 + int nMasterPtr = pPager->pVfs->mxPathname+1;
1.1676 +
1.1677 + /* Load the entire master journal file into space obtained from
1.1678 + ** sqlite3_malloc() and pointed to by zMasterJournal.
1.1679 + */
1.1680 + zMasterJournal = (char *)sqlite3Malloc(nMasterJournal + nMasterPtr);
1.1681 + if( !zMasterJournal ){
1.1682 + rc = SQLITE_NOMEM;
1.1683 + goto delmaster_out;
1.1684 + }
1.1685 + zMasterPtr = &zMasterJournal[nMasterJournal];
1.1686 + rc = sqlite3OsRead(pMaster, zMasterJournal, nMasterJournal, 0);
1.1687 + if( rc!=SQLITE_OK ) goto delmaster_out;
1.1688 +
1.1689 + zJournal = zMasterJournal;
1.1690 + while( (zJournal-zMasterJournal)<nMasterJournal ){
1.1691 + int exists;
1.1692 + rc = sqlite3OsAccess(pVfs, zJournal, SQLITE_ACCESS_EXISTS, &exists);
1.1693 + if( rc!=SQLITE_OK ){
1.1694 + goto delmaster_out;
1.1695 + }
1.1696 + if( exists ){
1.1697 + /* One of the journals pointed to by the master journal exists.
1.1698 + ** Open it and check if it points at the master journal. If
1.1699 + ** so, return without deleting the master journal file.
1.1700 + */
1.1701 + int c;
1.1702 + int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL);
1.1703 + rc = sqlite3OsOpen(pVfs, zJournal, pJournal, flags, 0);
1.1704 + if( rc!=SQLITE_OK ){
1.1705 + goto delmaster_out;
1.1706 + }
1.1707 +
1.1708 + rc = readMasterJournal(pJournal, zMasterPtr, nMasterPtr);
1.1709 + sqlite3OsClose(pJournal);
1.1710 + if( rc!=SQLITE_OK ){
1.1711 + goto delmaster_out;
1.1712 + }
1.1713 +
1.1714 + c = zMasterPtr[0]!=0 && strcmp(zMasterPtr, zMaster)==0;
1.1715 + if( c ){
1.1716 + /* We have a match. Do not delete the master journal file. */
1.1717 + goto delmaster_out;
1.1718 + }
1.1719 + }
1.1720 + zJournal += (strlen(zJournal)+1);
1.1721 + }
1.1722 + }
1.1723 +
1.1724 + rc = sqlite3OsDelete(pVfs, zMaster, 0);
1.1725 +
1.1726 +delmaster_out:
1.1727 + if( zMasterJournal ){
1.1728 + sqlite3_free(zMasterJournal);
1.1729 + }
1.1730 + if( master_open ){
1.1731 + sqlite3OsClose(pMaster);
1.1732 + }
1.1733 + sqlite3_free(pMaster);
1.1734 + return rc;
1.1735 +}
1.1736 +
1.1737 +
1.1738 +static void pager_truncate_cache(Pager *pPager);
1.1739 +
1.1740 +/*
1.1741 +** Truncate the main file of the given pager to the number of pages
1.1742 +** indicated. Also truncate the cached representation of the file.
1.1743 +**
1.1744 +** Might might be the case that the file on disk is smaller than nPage.
1.1745 +** This can happen, for example, if we are in the middle of a transaction
1.1746 +** which has extended the file size and the new pages are still all held
1.1747 +** in cache, then an INSERT or UPDATE does a statement rollback. Some
1.1748 +** operating system implementations can get confused if you try to
1.1749 +** truncate a file to some size that is larger than it currently is,
1.1750 +** so detect this case and write a single zero byte to the end of the new
1.1751 +** file instead.
1.1752 +*/
1.1753 +static int pager_truncate(Pager *pPager, int nPage){
1.1754 + int rc = SQLITE_OK;
1.1755 + if( pPager->state>=PAGER_EXCLUSIVE && pPager->fd->pMethods ){
1.1756 + i64 currentSize, newSize;
1.1757 + rc = sqlite3OsFileSize(pPager->fd, ¤tSize);
1.1758 + newSize = pPager->pageSize*(i64)nPage;
1.1759 + if( rc==SQLITE_OK && currentSize!=newSize ){
1.1760 + if( currentSize>newSize ){
1.1761 + rc = sqlite3OsTruncate(pPager->fd, newSize);
1.1762 + }else{
1.1763 + rc = sqlite3OsWrite(pPager->fd, "", 1, newSize-1);
1.1764 + }
1.1765 + }
1.1766 + }
1.1767 + if( rc==SQLITE_OK ){
1.1768 + pPager->dbSize = nPage;
1.1769 + pager_truncate_cache(pPager);
1.1770 + }
1.1771 + return rc;
1.1772 +}
1.1773 +
1.1774 +/*
1.1775 +** Set the sectorSize for the given pager.
1.1776 +**
1.1777 +** The sector size is at least as big as the sector size reported
1.1778 +** by sqlite3OsSectorSize(). The minimum sector size is 512.
1.1779 +*/
1.1780 +static void setSectorSize(Pager *pPager){
1.1781 + assert(pPager->fd->pMethods||pPager->tempFile);
1.1782 + if( !pPager->tempFile ){
1.1783 + /* Sector size doesn't matter for temporary files. Also, the file
1.1784 + ** may not have been opened yet, in whcih case the OsSectorSize()
1.1785 + ** call will segfault.
1.1786 + */
1.1787 + pPager->sectorSize = sqlite3OsSectorSize(pPager->fd);
1.1788 + }
1.1789 + if( pPager->sectorSize<512 ){
1.1790 + pPager->sectorSize = 512;
1.1791 + }
1.1792 +}
1.1793 +
1.1794 +/*
1.1795 +** Playback the journal and thus restore the database file to
1.1796 +** the state it was in before we started making changes.
1.1797 +**
1.1798 +** The journal file format is as follows:
1.1799 +**
1.1800 +** (1) 8 byte prefix. A copy of aJournalMagic[].
1.1801 +** (2) 4 byte big-endian integer which is the number of valid page records
1.1802 +** in the journal. If this value is 0xffffffff, then compute the
1.1803 +** number of page records from the journal size.
1.1804 +** (3) 4 byte big-endian integer which is the initial value for the
1.1805 +** sanity checksum.
1.1806 +** (4) 4 byte integer which is the number of pages to truncate the
1.1807 +** database to during a rollback.
1.1808 +** (5) 4 byte big-endian integer which is the sector size. The header
1.1809 +** is this many bytes in size.
1.1810 +** (6) 4 byte big-endian integer which is the page case.
1.1811 +** (7) 4 byte integer which is the number of bytes in the master journal
1.1812 +** name. The value may be zero (indicate that there is no master
1.1813 +** journal.)
1.1814 +** (8) N bytes of the master journal name. The name will be nul-terminated
1.1815 +** and might be shorter than the value read from (5). If the first byte
1.1816 +** of the name is \000 then there is no master journal. The master
1.1817 +** journal name is stored in UTF-8.
1.1818 +** (9) Zero or more pages instances, each as follows:
1.1819 +** + 4 byte page number.
1.1820 +** + pPager->pageSize bytes of data.
1.1821 +** + 4 byte checksum
1.1822 +**
1.1823 +** When we speak of the journal header, we mean the first 8 items above.
1.1824 +** Each entry in the journal is an instance of the 9th item.
1.1825 +**
1.1826 +** Call the value from the second bullet "nRec". nRec is the number of
1.1827 +** valid page entries in the journal. In most cases, you can compute the
1.1828 +** value of nRec from the size of the journal file. But if a power
1.1829 +** failure occurred while the journal was being written, it could be the
1.1830 +** case that the size of the journal file had already been increased but
1.1831 +** the extra entries had not yet made it safely to disk. In such a case,
1.1832 +** the value of nRec computed from the file size would be too large. For
1.1833 +** that reason, we always use the nRec value in the header.
1.1834 +**
1.1835 +** If the nRec value is 0xffffffff it means that nRec should be computed
1.1836 +** from the file size. This value is used when the user selects the
1.1837 +** no-sync option for the journal. A power failure could lead to corruption
1.1838 +** in this case. But for things like temporary table (which will be
1.1839 +** deleted when the power is restored) we don't care.
1.1840 +**
1.1841 +** If the file opened as the journal file is not a well-formed
1.1842 +** journal file then all pages up to the first corrupted page are rolled
1.1843 +** back (or no pages if the journal header is corrupted). The journal file
1.1844 +** is then deleted and SQLITE_OK returned, just as if no corruption had
1.1845 +** been encountered.
1.1846 +**
1.1847 +** If an I/O or malloc() error occurs, the journal-file is not deleted
1.1848 +** and an error code is returned.
1.1849 +*/
1.1850 +static int pager_playback(Pager *pPager, int isHot){
1.1851 + sqlite3_vfs *pVfs = pPager->pVfs;
1.1852 + i64 szJ; /* Size of the journal file in bytes */
1.1853 + u32 nRec; /* Number of Records in the journal */
1.1854 + u32 u; /* Unsigned loop counter */
1.1855 + Pgno mxPg = 0; /* Size of the original file in pages */
1.1856 + int rc; /* Result code of a subroutine */
1.1857 + int res = 1; /* Value returned by sqlite3OsAccess() */
1.1858 + char *zMaster = 0; /* Name of master journal file if any */
1.1859 +
1.1860 + /* Figure out how many records are in the journal. Abort early if
1.1861 + ** the journal is empty.
1.1862 + */
1.1863 + assert( pPager->journalOpen );
1.1864 + rc = sqlite3OsFileSize(pPager->jfd, &szJ);
1.1865 + if( rc!=SQLITE_OK || szJ==0 ){
1.1866 + goto end_playback;
1.1867 + }
1.1868 +
1.1869 + /* Read the master journal name from the journal, if it is present.
1.1870 + ** If a master journal file name is specified, but the file is not
1.1871 + ** present on disk, then the journal is not hot and does not need to be
1.1872 + ** played back.
1.1873 + */
1.1874 + zMaster = pPager->pTmpSpace;
1.1875 + rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
1.1876 + if( rc==SQLITE_OK && zMaster[0] ){
1.1877 + rc = sqlite3OsAccess(pVfs, zMaster, SQLITE_ACCESS_EXISTS, &res);
1.1878 + }
1.1879 + zMaster = 0;
1.1880 + if( rc!=SQLITE_OK || !res ){
1.1881 + goto end_playback;
1.1882 + }
1.1883 + pPager->journalOff = 0;
1.1884 +
1.1885 + /* This loop terminates either when the readJournalHdr() call returns
1.1886 + ** SQLITE_DONE or an IO error occurs. */
1.1887 + while( 1 ){
1.1888 +
1.1889 + /* Read the next journal header from the journal file. If there are
1.1890 + ** not enough bytes left in the journal file for a complete header, or
1.1891 + ** it is corrupted, then a process must of failed while writing it.
1.1892 + ** This indicates nothing more needs to be rolled back.
1.1893 + */
1.1894 + rc = readJournalHdr(pPager, szJ, &nRec, &mxPg);
1.1895 + if( rc!=SQLITE_OK ){
1.1896 + if( rc==SQLITE_DONE ){
1.1897 + rc = SQLITE_OK;
1.1898 + }
1.1899 + goto end_playback;
1.1900 + }
1.1901 +
1.1902 + /* If nRec is 0xffffffff, then this journal was created by a process
1.1903 + ** working in no-sync mode. This means that the rest of the journal
1.1904 + ** file consists of pages, there are no more journal headers. Compute
1.1905 + ** the value of nRec based on this assumption.
1.1906 + */
1.1907 + if( nRec==0xffffffff ){
1.1908 + assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) );
1.1909 + nRec = (szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager);
1.1910 + }
1.1911 +
1.1912 + /* If nRec is 0 and this rollback is of a transaction created by this
1.1913 + ** process and if this is the final header in the journal, then it means
1.1914 + ** that this part of the journal was being filled but has not yet been
1.1915 + ** synced to disk. Compute the number of pages based on the remaining
1.1916 + ** size of the file.
1.1917 + **
1.1918 + ** The third term of the test was added to fix ticket #2565.
1.1919 + */
1.1920 + if( nRec==0 && !isHot &&
1.1921 + pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){
1.1922 + nRec = (szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager);
1.1923 + }
1.1924 +
1.1925 + /* If this is the first header read from the journal, truncate the
1.1926 + ** database file back to its original size.
1.1927 + */
1.1928 + if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){
1.1929 + rc = pager_truncate(pPager, mxPg);
1.1930 + if( rc!=SQLITE_OK ){
1.1931 + goto end_playback;
1.1932 + }
1.1933 + }
1.1934 +
1.1935 + /* Copy original pages out of the journal and back into the database file.
1.1936 + */
1.1937 + for(u=0; u<nRec; u++){
1.1938 + rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1);
1.1939 + if( rc!=SQLITE_OK ){
1.1940 + if( rc==SQLITE_DONE ){
1.1941 + rc = SQLITE_OK;
1.1942 + pPager->journalOff = szJ;
1.1943 + break;
1.1944 + }else{
1.1945 + goto end_playback;
1.1946 + }
1.1947 + }
1.1948 + }
1.1949 + }
1.1950 + /*NOTREACHED*/
1.1951 + assert( 0 );
1.1952 +
1.1953 +end_playback:
1.1954 + if( rc==SQLITE_OK ){
1.1955 + zMaster = pPager->pTmpSpace;
1.1956 + rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
1.1957 + }
1.1958 + if( rc==SQLITE_OK ){
1.1959 + rc = pager_end_transaction(pPager, zMaster[0]!='\0');
1.1960 + }
1.1961 + if( rc==SQLITE_OK && zMaster[0] ){
1.1962 + /* If there was a master journal and this routine will return success,
1.1963 + ** see if it is possible to delete the master journal.
1.1964 + */
1.1965 + rc = pager_delmaster(pPager, zMaster);
1.1966 + }
1.1967 +
1.1968 + /* The Pager.sectorSize variable may have been updated while rolling
1.1969 + ** back a journal created by a process with a different sector size
1.1970 + ** value. Reset it to the correct value for this process.
1.1971 + */
1.1972 + setSectorSize(pPager);
1.1973 + return rc;
1.1974 +}
1.1975 +
1.1976 +/*
1.1977 +** Playback the statement journal.
1.1978 +**
1.1979 +** This is similar to playing back the transaction journal but with
1.1980 +** a few extra twists.
1.1981 +**
1.1982 +** (1) The number of pages in the database file at the start of
1.1983 +** the statement is stored in pPager->stmtSize, not in the
1.1984 +** journal file itself.
1.1985 +**
1.1986 +** (2) In addition to playing back the statement journal, also
1.1987 +** playback all pages of the transaction journal beginning
1.1988 +** at offset pPager->stmtJSize.
1.1989 +*/
1.1990 +static int pager_stmt_playback(Pager *pPager){
1.1991 + i64 szJ; /* Size of the full journal */
1.1992 + i64 hdrOff;
1.1993 + int nRec; /* Number of Records */
1.1994 + int i; /* Loop counter */
1.1995 + int rc;
1.1996 +
1.1997 + szJ = pPager->journalOff;
1.1998 +
1.1999 + /* Set hdrOff to be the offset just after the end of the last journal
1.2000 + ** page written before the first journal-header for this statement
1.2001 + ** transaction was written, or the end of the file if no journal
1.2002 + ** header was written.
1.2003 + */
1.2004 + hdrOff = pPager->stmtHdrOff;
1.2005 + assert( pPager->fullSync || !hdrOff );
1.2006 + if( !hdrOff ){
1.2007 + hdrOff = szJ;
1.2008 + }
1.2009 +
1.2010 + /* Truncate the database back to its original size.
1.2011 + */
1.2012 + rc = pager_truncate(pPager, pPager->stmtSize);
1.2013 + assert( pPager->state>=PAGER_SHARED );
1.2014 +
1.2015 + /* Figure out how many records are in the statement journal.
1.2016 + */
1.2017 + assert( pPager->stmtInUse && pPager->journalOpen );
1.2018 + nRec = pPager->stmtNRec;
1.2019 +
1.2020 + /* Copy original pages out of the statement journal and back into the
1.2021 + ** database file. Note that the statement journal omits checksums from
1.2022 + ** each record since power-failure recovery is not important to statement
1.2023 + ** journals.
1.2024 + */
1.2025 + for(i=0; i<nRec; i++){
1.2026 + i64 offset = i*(4+pPager->pageSize);
1.2027 + rc = pager_playback_one_page(pPager, pPager->stfd, offset, 0);
1.2028 + assert( rc!=SQLITE_DONE );
1.2029 + if( rc!=SQLITE_OK ) goto end_stmt_playback;
1.2030 + }
1.2031 +
1.2032 + /* Now roll some pages back from the transaction journal. Pager.stmtJSize
1.2033 + ** was the size of the journal file when this statement was started, so
1.2034 + ** everything after that needs to be rolled back, either into the
1.2035 + ** database, the memory cache, or both.
1.2036 + **
1.2037 + ** If it is not zero, then Pager.stmtHdrOff is the offset to the start
1.2038 + ** of the first journal header written during this statement transaction.
1.2039 + */
1.2040 + pPager->journalOff = pPager->stmtJSize;
1.2041 + pPager->cksumInit = pPager->stmtCksum;
1.2042 + while( pPager->journalOff < hdrOff ){
1.2043 + rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1);
1.2044 + assert( rc!=SQLITE_DONE );
1.2045 + if( rc!=SQLITE_OK ) goto end_stmt_playback;
1.2046 + }
1.2047 +
1.2048 + while( pPager->journalOff < szJ ){
1.2049 + u32 nJRec; /* Number of Journal Records */
1.2050 + u32 dummy;
1.2051 + rc = readJournalHdr(pPager, szJ, &nJRec, &dummy);
1.2052 + if( rc!=SQLITE_OK ){
1.2053 + assert( rc!=SQLITE_DONE );
1.2054 + goto end_stmt_playback;
1.2055 + }
1.2056 + if( nJRec==0 ){
1.2057 + nJRec = (szJ - pPager->journalOff) / (pPager->pageSize+8);
1.2058 + }
1.2059 + for(i=nJRec-1; i>=0 && pPager->journalOff < szJ; i--){
1.2060 + rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1);
1.2061 + assert( rc!=SQLITE_DONE );
1.2062 + if( rc!=SQLITE_OK ) goto end_stmt_playback;
1.2063 + }
1.2064 + }
1.2065 +
1.2066 + pPager->journalOff = szJ;
1.2067 +
1.2068 +end_stmt_playback:
1.2069 + if( rc==SQLITE_OK) {
1.2070 + pPager->journalOff = szJ;
1.2071 + /* pager_reload_cache(pPager); */
1.2072 + }
1.2073 + return rc;
1.2074 +}
1.2075 +
1.2076 +/*
1.2077 +** Change the maximum number of in-memory pages that are allowed.
1.2078 +*/
1.2079 +void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){
1.2080 + if( mxPage>10 ){
1.2081 + pPager->mxPage = mxPage;
1.2082 + }else{
1.2083 + pPager->mxPage = 10;
1.2084 + }
1.2085 +}
1.2086 +
1.2087 +/*
1.2088 +** Adjust the robustness of the database to damage due to OS crashes
1.2089 +** or power failures by changing the number of syncs()s when writing
1.2090 +** the rollback journal. There are three levels:
1.2091 +**
1.2092 +** OFF sqlite3OsSync() is never called. This is the default
1.2093 +** for temporary and transient files.
1.2094 +**
1.2095 +** NORMAL The journal is synced once before writes begin on the
1.2096 +** database. This is normally adequate protection, but
1.2097 +** it is theoretically possible, though very unlikely,
1.2098 +** that an inopertune power failure could leave the journal
1.2099 +** in a state which would cause damage to the database
1.2100 +** when it is rolled back.
1.2101 +**
1.2102 +** FULL The journal is synced twice before writes begin on the
1.2103 +** database (with some additional information - the nRec field
1.2104 +** of the journal header - being written in between the two
1.2105 +** syncs). If we assume that writing a
1.2106 +** single disk sector is atomic, then this mode provides
1.2107 +** assurance that the journal will not be corrupted to the
1.2108 +** point of causing damage to the database during rollback.
1.2109 +**
1.2110 +** Numeric values associated with these states are OFF==1, NORMAL=2,
1.2111 +** and FULL=3.
1.2112 +*/
1.2113 +#ifndef SQLITE_OMIT_PAGER_PRAGMAS
1.2114 +void sqlite3PagerSetSafetyLevel(Pager *pPager, int level, int full_fsync){
1.2115 + pPager->noSync = level==1 || pPager->tempFile;
1.2116 + pPager->fullSync = level==3 && !pPager->tempFile;
1.2117 + pPager->sync_flags = (full_fsync?SQLITE_SYNC_FULL:SQLITE_SYNC_NORMAL);
1.2118 + if( pPager->noSync ) pPager->needSync = 0;
1.2119 +}
1.2120 +#endif
1.2121 +
1.2122 +/*
1.2123 +** The following global variable is incremented whenever the library
1.2124 +** attempts to open a temporary file. This information is used for
1.2125 +** testing and analysis only.
1.2126 +*/
1.2127 +#ifdef SQLITE_TEST
1.2128 +int sqlite3_opentemp_count = 0;
1.2129 +#endif
1.2130 +
1.2131 +/*
1.2132 +** Open a temporary file.
1.2133 +**
1.2134 +** Write the file descriptor into *fd. Return SQLITE_OK on success or some
1.2135 +** other error code if we fail. The OS will automatically delete the temporary
1.2136 +** file when it is closed.
1.2137 +*/
1.2138 +static int sqlite3PagerOpentemp(
1.2139 + Pager *pPager, /* The pager object */
1.2140 + sqlite3_file *pFile, /* Write the file descriptor here */
1.2141 + int vfsFlags /* Flags passed through to the VFS */
1.2142 +){
1.2143 + int rc;
1.2144 +
1.2145 +#ifdef SQLITE_TEST
1.2146 + sqlite3_opentemp_count++; /* Used for testing and analysis only */
1.2147 +#endif
1.2148 +
1.2149 + vfsFlags |= SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE |
1.2150 + SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE;
1.2151 + rc = sqlite3OsOpen(pPager->pVfs, 0, pFile, vfsFlags, 0);
1.2152 + assert( rc!=SQLITE_OK || pFile->pMethods );
1.2153 + return rc;
1.2154 +}
1.2155 +
1.2156 +/*
1.2157 +** Create a new page cache and put a pointer to the page cache in *ppPager.
1.2158 +** The file to be cached need not exist. The file is not locked until
1.2159 +** the first call to sqlite3PagerGet() and is only held open until the
1.2160 +** last page is released using sqlite3PagerUnref().
1.2161 +**
1.2162 +** If zFilename is NULL then a randomly-named temporary file is created
1.2163 +** and used as the file to be cached. The file will be deleted
1.2164 +** automatically when it is closed.
1.2165 +**
1.2166 +** If zFilename is ":memory:" then all information is held in cache.
1.2167 +** It is never written to disk. This can be used to implement an
1.2168 +** in-memory database.
1.2169 +*/
1.2170 +int sqlite3PagerOpen(
1.2171 + sqlite3_vfs *pVfs, /* The virtual file system to use */
1.2172 + Pager **ppPager, /* Return the Pager structure here */
1.2173 + const char *zFilename, /* Name of the database file to open */
1.2174 + int nExtra, /* Extra bytes append to each in-memory page */
1.2175 + int flags, /* flags controlling this file */
1.2176 + int vfsFlags /* flags passed through to sqlite3_vfs.xOpen() */
1.2177 +){
1.2178 + u8 *pPtr;
1.2179 + Pager *pPager = 0;
1.2180 + int rc = SQLITE_OK;
1.2181 + int i;
1.2182 + int tempFile = 0;
1.2183 + int memDb = 0;
1.2184 + int readOnly = 0;
1.2185 + int useJournal = (flags & PAGER_OMIT_JOURNAL)==0;
1.2186 + int noReadlock = (flags & PAGER_NO_READLOCK)!=0;
1.2187 + int journalFileSize = sqlite3JournalSize(pVfs);
1.2188 + int szPageDflt = SQLITE_DEFAULT_PAGE_SIZE;
1.2189 + char *zPathname = 0;
1.2190 + int nPathname = 0;
1.2191 +
1.2192 + /* The default return is a NULL pointer */
1.2193 + *ppPager = 0;
1.2194 +
1.2195 + /* Compute and store the full pathname in an allocated buffer pointed
1.2196 + ** to by zPathname, length nPathname. Or, if this is a temporary file,
1.2197 + ** leave both nPathname and zPathname set to 0.
1.2198 + */
1.2199 + if( zFilename && zFilename[0] ){
1.2200 + nPathname = pVfs->mxPathname+1;
1.2201 + zPathname = sqlite3Malloc(nPathname*2);
1.2202 + if( zPathname==0 ){
1.2203 + return SQLITE_NOMEM;
1.2204 + }
1.2205 +#ifndef SQLITE_OMIT_MEMORYDB
1.2206 + if( strcmp(zFilename,":memory:")==0 ){
1.2207 + memDb = 1;
1.2208 + zPathname[0] = 0;
1.2209 + }else
1.2210 +#endif
1.2211 + {
1.2212 + rc = sqlite3OsFullPathname(pVfs, zFilename, nPathname, zPathname);
1.2213 + }
1.2214 + if( rc!=SQLITE_OK ){
1.2215 + sqlite3_free(zPathname);
1.2216 + return rc;
1.2217 + }
1.2218 + nPathname = strlen(zPathname);
1.2219 + }
1.2220 +
1.2221 + /* Allocate memory for the pager structure */
1.2222 + pPager = sqlite3MallocZero(
1.2223 + sizeof(*pPager) + /* Pager structure */
1.2224 + journalFileSize + /* The journal file structure */
1.2225 + pVfs->szOsFile * 3 + /* The main db and two journal files */
1.2226 + 3*nPathname + 40 /* zFilename, zDirectory, zJournal */
1.2227 + );
1.2228 + if( !pPager ){
1.2229 + sqlite3_free(zPathname);
1.2230 + return SQLITE_NOMEM;
1.2231 + }
1.2232 + pPtr = (u8 *)&pPager[1];
1.2233 + pPager->vfsFlags = vfsFlags;
1.2234 + pPager->fd = (sqlite3_file*)&pPtr[pVfs->szOsFile*0];
1.2235 + pPager->stfd = (sqlite3_file*)&pPtr[pVfs->szOsFile*1];
1.2236 + pPager->jfd = (sqlite3_file*)&pPtr[pVfs->szOsFile*2];
1.2237 + pPager->zFilename = (char*)&pPtr[pVfs->szOsFile*2+journalFileSize];
1.2238 + pPager->zDirectory = &pPager->zFilename[nPathname+1];
1.2239 + pPager->zJournal = &pPager->zDirectory[nPathname+1];
1.2240 + pPager->pVfs = pVfs;
1.2241 + if( zPathname ){
1.2242 + memcpy(pPager->zFilename, zPathname, nPathname+1);
1.2243 + sqlite3_free(zPathname);
1.2244 + }
1.2245 +
1.2246 + /* Open the pager file.
1.2247 + */
1.2248 + if( zFilename && zFilename[0] && !memDb ){
1.2249 + if( nPathname>(pVfs->mxPathname - sizeof("-journal")) ){
1.2250 + rc = SQLITE_CANTOPEN;
1.2251 + }else{
1.2252 + int fout = 0;
1.2253 + rc = sqlite3OsOpen(pVfs, pPager->zFilename, pPager->fd,
1.2254 + pPager->vfsFlags, &fout);
1.2255 + readOnly = (fout&SQLITE_OPEN_READONLY);
1.2256 +
1.2257 + /* If the file was successfully opened for read/write access,
1.2258 + ** choose a default page size in case we have to create the
1.2259 + ** database file. The default page size is the maximum of:
1.2260 + **
1.2261 + ** + SQLITE_DEFAULT_PAGE_SIZE,
1.2262 + ** + The value returned by sqlite3OsSectorSize()
1.2263 + ** + The largest page size that can be written atomically.
1.2264 + */
1.2265 + if( rc==SQLITE_OK && !readOnly ){
1.2266 + int iSectorSize = sqlite3OsSectorSize(pPager->fd);
1.2267 + if( szPageDflt<iSectorSize ){
1.2268 + szPageDflt = iSectorSize;
1.2269 + }
1.2270 +#ifdef SQLITE_ENABLE_ATOMIC_WRITE
1.2271 + {
1.2272 + int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
1.2273 + int ii;
1.2274 + assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
1.2275 + assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
1.2276 + assert(SQLITE_MAX_DEFAULT_PAGE_SIZE<=65536);
1.2277 + for(ii=szPageDflt; ii<=SQLITE_MAX_DEFAULT_PAGE_SIZE; ii=ii*2){
1.2278 + if( iDc&(SQLITE_IOCAP_ATOMIC|(ii>>8)) ) szPageDflt = ii;
1.2279 + }
1.2280 + }
1.2281 +#endif
1.2282 + if( szPageDflt>SQLITE_MAX_DEFAULT_PAGE_SIZE ){
1.2283 + szPageDflt = SQLITE_MAX_DEFAULT_PAGE_SIZE;
1.2284 + }
1.2285 + }
1.2286 + }
1.2287 + }else if( !memDb ){
1.2288 + /* If a temporary file is requested, it is not opened immediately.
1.2289 + ** In this case we accept the default page size and delay actually
1.2290 + ** opening the file until the first call to OsWrite().
1.2291 + */
1.2292 + tempFile = 1;
1.2293 + pPager->state = PAGER_EXCLUSIVE;
1.2294 + }
1.2295 +
1.2296 + if( pPager && rc==SQLITE_OK ){
1.2297 + pPager->pTmpSpace = sqlite3PageMalloc(szPageDflt);
1.2298 + }
1.2299 +
1.2300 + /* If an error occured in either of the blocks above.
1.2301 + ** Free the Pager structure and close the file.
1.2302 + ** Since the pager is not allocated there is no need to set
1.2303 + ** any Pager.errMask variables.
1.2304 + */
1.2305 + if( !pPager || !pPager->pTmpSpace ){
1.2306 + sqlite3OsClose(pPager->fd);
1.2307 + sqlite3_free(pPager);
1.2308 + return ((rc==SQLITE_OK)?SQLITE_NOMEM:rc);
1.2309 + }
1.2310 +
1.2311 + PAGERTRACE3("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename);
1.2312 + IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename))
1.2313 +
1.2314 + /* Fill in Pager.zDirectory[] */
1.2315 + memcpy(pPager->zDirectory, pPager->zFilename, nPathname+1);
1.2316 + for(i=strlen(pPager->zDirectory); i>0 && pPager->zDirectory[i-1]!='/'; i--){}
1.2317 + if( i>0 ) pPager->zDirectory[i-1] = 0;
1.2318 +
1.2319 + /* Fill in Pager.zJournal[] */
1.2320 + if( zPathname ){
1.2321 + memcpy(pPager->zJournal, pPager->zFilename, nPathname);
1.2322 + memcpy(&pPager->zJournal[nPathname], "-journal", 9);
1.2323 + }else{
1.2324 + pPager->zJournal = 0;
1.2325 + }
1.2326 +
1.2327 + /* pPager->journalOpen = 0; */
1.2328 + pPager->useJournal = useJournal && !memDb;
1.2329 + pPager->noReadlock = noReadlock && readOnly;
1.2330 + /* pPager->stmtOpen = 0; */
1.2331 + /* pPager->stmtInUse = 0; */
1.2332 + /* pPager->nRef = 0; */
1.2333 + pPager->dbSize = memDb-1;
1.2334 + pPager->pageSize = szPageDflt;
1.2335 + /* pPager->stmtSize = 0; */
1.2336 + /* pPager->stmtJSize = 0; */
1.2337 + /* pPager->nPage = 0; */
1.2338 + pPager->mxPage = 100;
1.2339 + pPager->mxPgno = SQLITE_MAX_PAGE_COUNT;
1.2340 + /* pPager->state = PAGER_UNLOCK; */
1.2341 + assert( pPager->state == (tempFile ? PAGER_EXCLUSIVE : PAGER_UNLOCK) );
1.2342 + /* pPager->errMask = 0; */
1.2343 + pPager->tempFile = tempFile;
1.2344 + assert( tempFile==PAGER_LOCKINGMODE_NORMAL
1.2345 + || tempFile==PAGER_LOCKINGMODE_EXCLUSIVE );
1.2346 + assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 );
1.2347 + pPager->exclusiveMode = tempFile;
1.2348 + pPager->memDb = memDb;
1.2349 + pPager->readOnly = readOnly;
1.2350 + /* pPager->needSync = 0; */
1.2351 + pPager->noSync = pPager->tempFile || !useJournal;
1.2352 + pPager->fullSync = (pPager->noSync?0:1);
1.2353 + pPager->sync_flags = SQLITE_SYNC_NORMAL;
1.2354 + /* pPager->pFirst = 0; */
1.2355 + /* pPager->pFirstSynced = 0; */
1.2356 + /* pPager->pLast = 0; */
1.2357 + pPager->nExtra = FORCE_ALIGNMENT(nExtra);
1.2358 + pPager->journalSizeLimit = SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT;
1.2359 + assert(pPager->fd->pMethods||memDb||tempFile);
1.2360 + if( !memDb ){
1.2361 + setSectorSize(pPager);
1.2362 + }
1.2363 + /* pPager->pBusyHandler = 0; */
1.2364 + /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */
1.2365 + *ppPager = pPager;
1.2366 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
1.2367 + pPager->iInUseMM = 0;
1.2368 + pPager->iInUseDB = 0;
1.2369 + if( !memDb ){
1.2370 +#ifndef SQLITE_MUTEX_NOOP
1.2371 + sqlite3_mutex *mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MEM2);
1.2372 +#endif
1.2373 + sqlite3_mutex_enter(mutex);
1.2374 + pPager->pNext = sqlite3PagerList;
1.2375 + if( sqlite3PagerList ){
1.2376 + assert( sqlite3PagerList->pPrev==0 );
1.2377 + sqlite3PagerList->pPrev = pPager;
1.2378 + }
1.2379 + pPager->pPrev = 0;
1.2380 + sqlite3PagerList = pPager;
1.2381 + sqlite3_mutex_leave(mutex);
1.2382 + }
1.2383 +#endif
1.2384 + return SQLITE_OK;
1.2385 +}
1.2386 +
1.2387 +/*
1.2388 +** Set the busy handler function.
1.2389 +*/
1.2390 +void sqlite3PagerSetBusyhandler(Pager *pPager, BusyHandler *pBusyHandler){
1.2391 + pPager->pBusyHandler = pBusyHandler;
1.2392 +}
1.2393 +
1.2394 +/*
1.2395 +** Set the destructor for this pager. If not NULL, the destructor is called
1.2396 +** when the reference count on each page reaches zero. The destructor can
1.2397 +** be used to clean up information in the extra segment appended to each page.
1.2398 +**
1.2399 +** The destructor is not called as a result sqlite3PagerClose().
1.2400 +** Destructors are only called by sqlite3PagerUnref().
1.2401 +*/
1.2402 +void sqlite3PagerSetDestructor(Pager *pPager, void (*xDesc)(DbPage*,int)){
1.2403 + pPager->xDestructor = xDesc;
1.2404 +}
1.2405 +
1.2406 +/*
1.2407 +** Set the reinitializer for this pager. If not NULL, the reinitializer
1.2408 +** is called when the content of a page in cache is restored to its original
1.2409 +** value as a result of a rollback. The callback gives higher-level code
1.2410 +** an opportunity to restore the EXTRA section to agree with the restored
1.2411 +** page data.
1.2412 +*/
1.2413 +void sqlite3PagerSetReiniter(Pager *pPager, void (*xReinit)(DbPage*,int)){
1.2414 + pPager->xReiniter = xReinit;
1.2415 +}
1.2416 +
1.2417 +/*
1.2418 +** Set the page size to *pPageSize. If the suggest new page size is
1.2419 +** inappropriate, then an alternative page size is set to that
1.2420 +** value before returning.
1.2421 +*/
1.2422 +int sqlite3PagerSetPagesize(Pager *pPager, u16 *pPageSize){
1.2423 + int rc = SQLITE_OK;
1.2424 + u16 pageSize = *pPageSize;
1.2425 + assert( pageSize==0 || (pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE) );
1.2426 + if( pageSize && pageSize!=pPager->pageSize
1.2427 + && !pPager->memDb && pPager->nRef==0
1.2428 + ){
1.2429 + char *pNew = (char *)sqlite3PageMalloc(pageSize);
1.2430 + if( !pNew ){
1.2431 + rc = SQLITE_NOMEM;
1.2432 + }else{
1.2433 + pagerEnter(pPager);
1.2434 + pager_reset(pPager);
1.2435 + pPager->pageSize = pageSize;
1.2436 + setSectorSize(pPager);
1.2437 + sqlite3PageFree(pPager->pTmpSpace);
1.2438 + pPager->pTmpSpace = pNew;
1.2439 + pagerLeave(pPager);
1.2440 + }
1.2441 + }
1.2442 + *pPageSize = pPager->pageSize;
1.2443 + return rc;
1.2444 +}
1.2445 +
1.2446 +/*
1.2447 +** Return a pointer to the "temporary page" buffer held internally
1.2448 +** by the pager. This is a buffer that is big enough to hold the
1.2449 +** entire content of a database page. This buffer is used internally
1.2450 +** during rollback and will be overwritten whenever a rollback
1.2451 +** occurs. But other modules are free to use it too, as long as
1.2452 +** no rollbacks are happening.
1.2453 +*/
1.2454 +void *sqlite3PagerTempSpace(Pager *pPager){
1.2455 + return pPager->pTmpSpace;
1.2456 +}
1.2457 +
1.2458 +/*
1.2459 +** Attempt to set the maximum database page count if mxPage is positive.
1.2460 +** Make no changes if mxPage is zero or negative. And never reduce the
1.2461 +** maximum page count below the current size of the database.
1.2462 +**
1.2463 +** Regardless of mxPage, return the current maximum page count.
1.2464 +*/
1.2465 +int sqlite3PagerMaxPageCount(Pager *pPager, int mxPage){
1.2466 + if( mxPage>0 ){
1.2467 + pPager->mxPgno = mxPage;
1.2468 + }
1.2469 + sqlite3PagerPagecount(pPager, 0);
1.2470 + return pPager->mxPgno;
1.2471 +}
1.2472 +
1.2473 +/*
1.2474 +** The following set of routines are used to disable the simulated
1.2475 +** I/O error mechanism. These routines are used to avoid simulated
1.2476 +** errors in places where we do not care about errors.
1.2477 +**
1.2478 +** Unless -DSQLITE_TEST=1 is used, these routines are all no-ops
1.2479 +** and generate no code.
1.2480 +*/
1.2481 +#ifdef SQLITE_TEST
1.2482 +extern int sqlite3_io_error_pending;
1.2483 +extern int sqlite3_io_error_hit;
1.2484 +static int saved_cnt;
1.2485 +void disable_simulated_io_errors(void){
1.2486 + saved_cnt = sqlite3_io_error_pending;
1.2487 + sqlite3_io_error_pending = -1;
1.2488 +}
1.2489 +void enable_simulated_io_errors(void){
1.2490 + sqlite3_io_error_pending = saved_cnt;
1.2491 +}
1.2492 +#else
1.2493 +# define disable_simulated_io_errors()
1.2494 +# define enable_simulated_io_errors()
1.2495 +#endif
1.2496 +
1.2497 +/*
1.2498 +** Read the first N bytes from the beginning of the file into memory
1.2499 +** that pDest points to.
1.2500 +**
1.2501 +** No error checking is done. The rational for this is that this function
1.2502 +** may be called even if the file does not exist or contain a header. In
1.2503 +** these cases sqlite3OsRead() will return an error, to which the correct
1.2504 +** response is to zero the memory at pDest and continue. A real IO error
1.2505 +** will presumably recur and be picked up later (Todo: Think about this).
1.2506 +*/
1.2507 +int sqlite3PagerReadFileheader(Pager *pPager, int N, unsigned char *pDest){
1.2508 + int rc = SQLITE_OK;
1.2509 + memset(pDest, 0, N);
1.2510 + assert(MEMDB||pPager->fd->pMethods||pPager->tempFile);
1.2511 + if( pPager->fd->pMethods ){
1.2512 + IOTRACE(("DBHDR %p 0 %d\n", pPager, N))
1.2513 + rc = sqlite3OsRead(pPager->fd, pDest, N, 0);
1.2514 + if( rc==SQLITE_IOERR_SHORT_READ ){
1.2515 + rc = SQLITE_OK;
1.2516 + }
1.2517 + }
1.2518 + return rc;
1.2519 +}
1.2520 +
1.2521 +/*
1.2522 +** Return the total number of pages in the disk file associated with
1.2523 +** pPager.
1.2524 +**
1.2525 +** If the PENDING_BYTE lies on the page directly after the end of the
1.2526 +** file, then consider this page part of the file too. For example, if
1.2527 +** PENDING_BYTE is byte 4096 (the first byte of page 5) and the size of the
1.2528 +** file is 4096 bytes, 5 is returned instead of 4.
1.2529 +*/
1.2530 +int sqlite3PagerPagecount(Pager *pPager, int *pnPage){
1.2531 + i64 n = 0;
1.2532 + int rc;
1.2533 + assert( pPager!=0 );
1.2534 + if( pPager->errCode ){
1.2535 + return pPager->errCode;
1.2536 + }
1.2537 + if( pPager->dbSize>=0 ){
1.2538 + n = pPager->dbSize;
1.2539 + } else {
1.2540 + assert(pPager->fd->pMethods||pPager->tempFile);
1.2541 + if( (pPager->fd->pMethods)
1.2542 + && (rc = sqlite3OsFileSize(pPager->fd, &n))!=SQLITE_OK ){
1.2543 + pPager->nRef++;
1.2544 + pager_error(pPager, rc);
1.2545 + pPager->nRef--;
1.2546 + return rc;
1.2547 + }
1.2548 + if( n>0 && n<pPager->pageSize ){
1.2549 + n = 1;
1.2550 + }else{
1.2551 + n /= pPager->pageSize;
1.2552 + }
1.2553 + if( pPager->state!=PAGER_UNLOCK ){
1.2554 + pPager->dbSize = n;
1.2555 + }
1.2556 + }
1.2557 + if( n==(PENDING_BYTE/pPager->pageSize) ){
1.2558 + n++;
1.2559 + }
1.2560 + if( n>pPager->mxPgno ){
1.2561 + pPager->mxPgno = n;
1.2562 + }
1.2563 + if( pnPage ){
1.2564 + *pnPage = n;
1.2565 + }
1.2566 + return SQLITE_OK;
1.2567 +}
1.2568 +
1.2569 +
1.2570 +#ifndef SQLITE_OMIT_MEMORYDB
1.2571 +/*
1.2572 +** Clear a PgHistory block
1.2573 +*/
1.2574 +static void clearHistory(PgHistory *pHist){
1.2575 + sqlite3PageFree(pHist->pOrig);
1.2576 + sqlite3PageFree(pHist->pStmt);
1.2577 + pHist->pOrig = 0;
1.2578 + pHist->pStmt = 0;
1.2579 +}
1.2580 +#else
1.2581 +#define clearHistory(x)
1.2582 +#endif
1.2583 +
1.2584 +/*
1.2585 +** Forward declaration
1.2586 +*/
1.2587 +static int syncJournal(Pager*);
1.2588 +
1.2589 +/*
1.2590 +** Unlink pPg from its hash chain. Also set the page number to 0 to indicate
1.2591 +** that the page is not part of any hash chain. This is required because the
1.2592 +** sqlite3PagerMovepage() routine can leave a page in the
1.2593 +** pNextFree/pPrevFree list that is not a part of any hash-chain.
1.2594 +*/
1.2595 +static void unlinkHashChain(Pager *pPager, PgHdr *pPg){
1.2596 + if( pPg->pgno==0 ){
1.2597 + assert( pPg->pNextHash==0 && pPg->pPrevHash==0 );
1.2598 + return;
1.2599 + }
1.2600 + if( pPg->pNextHash ){
1.2601 + pPg->pNextHash->pPrevHash = pPg->pPrevHash;
1.2602 + }
1.2603 + if( pPg->pPrevHash ){
1.2604 + assert( pPager->aHash[pPg->pgno & (pPager->nHash-1)]!=pPg );
1.2605 + pPg->pPrevHash->pNextHash = pPg->pNextHash;
1.2606 + }else{
1.2607 + int h = pPg->pgno & (pPager->nHash-1);
1.2608 + pPager->aHash[h] = pPg->pNextHash;
1.2609 + }
1.2610 + if( MEMDB ){
1.2611 + clearHistory(PGHDR_TO_HIST(pPg, pPager));
1.2612 + }
1.2613 + pPg->pgno = 0;
1.2614 + pPg->pNextHash = pPg->pPrevHash = 0;
1.2615 +}
1.2616 +
1.2617 +/*
1.2618 +** Unlink a page from the free list (the list of all pages where nRef==0)
1.2619 +** and from its hash collision chain.
1.2620 +*/
1.2621 +static void unlinkPage(PgHdr *pPg){
1.2622 + Pager *pPager = pPg->pPager;
1.2623 +
1.2624 + /* Unlink from free page list */
1.2625 + lruListRemove(pPg);
1.2626 +
1.2627 + /* Unlink from the pgno hash table */
1.2628 + unlinkHashChain(pPager, pPg);
1.2629 +}
1.2630 +
1.2631 +/*
1.2632 +** This routine is used to truncate the cache when a database
1.2633 +** is truncated. Drop from the cache all pages whose pgno is
1.2634 +** larger than pPager->dbSize and is unreferenced.
1.2635 +**
1.2636 +** Referenced pages larger than pPager->dbSize are zeroed.
1.2637 +**
1.2638 +** Actually, at the point this routine is called, it would be
1.2639 +** an error to have a referenced page. But rather than delete
1.2640 +** that page and guarantee a subsequent segfault, it seems better
1.2641 +** to zero it and hope that we error out sanely.
1.2642 +*/
1.2643 +static void pager_truncate_cache(Pager *pPager){
1.2644 + PgHdr *pPg;
1.2645 + PgHdr **ppPg;
1.2646 + int dbSize = pPager->dbSize;
1.2647 +
1.2648 + ppPg = &pPager->pAll;
1.2649 + while( (pPg = *ppPg)!=0 ){
1.2650 + if( pPg->pgno<=dbSize ){
1.2651 + ppPg = &pPg->pNextAll;
1.2652 + }else if( pPg->nRef>0 ){
1.2653 + memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
1.2654 + ppPg = &pPg->pNextAll;
1.2655 + }else{
1.2656 + *ppPg = pPg->pNextAll;
1.2657 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
1.2658 + if( *ppPg ){
1.2659 + (*ppPg)->pPrevAll = pPg->pPrevAll;
1.2660 + }
1.2661 +#endif
1.2662 + IOTRACE(("PGFREE %p %d\n", pPager, pPg->pgno));
1.2663 + PAGER_INCR(sqlite3_pager_pgfree_count);
1.2664 + unlinkPage(pPg);
1.2665 + makeClean(pPg);
1.2666 + sqlite3PageFree(pPg->pData);
1.2667 + sqlite3_free(pPg);
1.2668 + pPager->nPage--;
1.2669 + }
1.2670 + }
1.2671 +}
1.2672 +
1.2673 +/*
1.2674 +** Try to obtain a lock on a file. Invoke the busy callback if the lock
1.2675 +** is currently not available. Repeat until the busy callback returns
1.2676 +** false or until the lock succeeds.
1.2677 +**
1.2678 +** Return SQLITE_OK on success and an error code if we cannot obtain
1.2679 +** the lock.
1.2680 +*/
1.2681 +static int pager_wait_on_lock(Pager *pPager, int locktype){
1.2682 + int rc;
1.2683 +
1.2684 + /* The OS lock values must be the same as the Pager lock values */
1.2685 + assert( PAGER_SHARED==SHARED_LOCK );
1.2686 + assert( PAGER_RESERVED==RESERVED_LOCK );
1.2687 + assert( PAGER_EXCLUSIVE==EXCLUSIVE_LOCK );
1.2688 +
1.2689 + /* If the file is currently unlocked then the size must be unknown */
1.2690 + assert( pPager->state>=PAGER_SHARED || pPager->dbSize<0 || MEMDB );
1.2691 +
1.2692 + if( pPager->state>=locktype ){
1.2693 + rc = SQLITE_OK;
1.2694 + }else{
1.2695 + if( pPager->pBusyHandler ) pPager->pBusyHandler->nBusy = 0;
1.2696 + do {
1.2697 + rc = sqlite3OsLock(pPager->fd, locktype);
1.2698 + }while( rc==SQLITE_BUSY && sqlite3InvokeBusyHandler(pPager->pBusyHandler) );
1.2699 + if( rc==SQLITE_OK ){
1.2700 + pPager->state = locktype;
1.2701 + IOTRACE(("LOCK %p %d\n", pPager, locktype))
1.2702 + }
1.2703 + }
1.2704 + return rc;
1.2705 +}
1.2706 +
1.2707 +/*
1.2708 +** Truncate the file to the number of pages specified.
1.2709 +*/
1.2710 +int sqlite3PagerTruncate(Pager *pPager, Pgno nPage){
1.2711 + int rc;
1.2712 + assert( pPager->state>=PAGER_SHARED || MEMDB );
1.2713 + sqlite3PagerPagecount(pPager, 0);
1.2714 + if( pPager->errCode ){
1.2715 + rc = pPager->errCode;
1.2716 + return rc;
1.2717 + }
1.2718 + if( nPage>=(unsigned)pPager->dbSize ){
1.2719 + return SQLITE_OK;
1.2720 + }
1.2721 + if( MEMDB ){
1.2722 + pPager->dbSize = nPage;
1.2723 + pager_truncate_cache(pPager);
1.2724 + return SQLITE_OK;
1.2725 + }
1.2726 + pagerEnter(pPager);
1.2727 + rc = syncJournal(pPager);
1.2728 + pagerLeave(pPager);
1.2729 + if( rc!=SQLITE_OK ){
1.2730 + return rc;
1.2731 + }
1.2732 +
1.2733 + /* Get an exclusive lock on the database before truncating. */
1.2734 + pagerEnter(pPager);
1.2735 + rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
1.2736 + pagerLeave(pPager);
1.2737 + if( rc!=SQLITE_OK ){
1.2738 + return rc;
1.2739 + }
1.2740 +
1.2741 + rc = pager_truncate(pPager, nPage);
1.2742 + return rc;
1.2743 +}
1.2744 +
1.2745 +/*
1.2746 +** Shutdown the page cache. Free all memory and close all files.
1.2747 +**
1.2748 +** If a transaction was in progress when this routine is called, that
1.2749 +** transaction is rolled back. All outstanding pages are invalidated
1.2750 +** and their memory is freed. Any attempt to use a page associated
1.2751 +** with this page cache after this function returns will likely
1.2752 +** result in a coredump.
1.2753 +**
1.2754 +** This function always succeeds. If a transaction is active an attempt
1.2755 +** is made to roll it back. If an error occurs during the rollback
1.2756 +** a hot journal may be left in the filesystem but no error is returned
1.2757 +** to the caller.
1.2758 +*/
1.2759 +int sqlite3PagerClose(Pager *pPager){
1.2760 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
1.2761 + if( !MEMDB ){
1.2762 +#ifndef SQLITE_MUTEX_NOOP
1.2763 + sqlite3_mutex *mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MEM2);
1.2764 +#endif
1.2765 + sqlite3_mutex_enter(mutex);
1.2766 + if( pPager->pPrev ){
1.2767 + pPager->pPrev->pNext = pPager->pNext;
1.2768 + }else{
1.2769 + sqlite3PagerList = pPager->pNext;
1.2770 + }
1.2771 + if( pPager->pNext ){
1.2772 + pPager->pNext->pPrev = pPager->pPrev;
1.2773 + }
1.2774 + sqlite3_mutex_leave(mutex);
1.2775 + }
1.2776 +#endif
1.2777 +
1.2778 + disable_simulated_io_errors();
1.2779 + sqlite3BeginBenignMalloc();
1.2780 + pPager->errCode = 0;
1.2781 + pPager->exclusiveMode = 0;
1.2782 + pager_reset(pPager);
1.2783 + pagerUnlockAndRollback(pPager);
1.2784 + enable_simulated_io_errors();
1.2785 + sqlite3EndBenignMalloc();
1.2786 + PAGERTRACE2("CLOSE %d\n", PAGERID(pPager));
1.2787 + IOTRACE(("CLOSE %p\n", pPager))
1.2788 + if( pPager->journalOpen ){
1.2789 + sqlite3OsClose(pPager->jfd);
1.2790 + }
1.2791 + sqlite3BitvecDestroy(pPager->pInJournal);
1.2792 + if( pPager->stmtOpen ){
1.2793 + sqlite3OsClose(pPager->stfd);
1.2794 + }
1.2795 + sqlite3OsClose(pPager->fd);
1.2796 + /* Temp files are automatically deleted by the OS
1.2797 + ** if( pPager->tempFile ){
1.2798 + ** sqlite3OsDelete(pPager->zFilename);
1.2799 + ** }
1.2800 + */
1.2801 +
1.2802 + sqlite3_free(pPager->aHash);
1.2803 + sqlite3PageFree(pPager->pTmpSpace);
1.2804 + sqlite3_free(pPager);
1.2805 + return SQLITE_OK;
1.2806 +}
1.2807 +
1.2808 +#if !defined(NDEBUG) || defined(SQLITE_TEST)
1.2809 +/*
1.2810 +** Return the page number for the given page data.
1.2811 +*/
1.2812 +Pgno sqlite3PagerPagenumber(DbPage *p){
1.2813 + return p->pgno;
1.2814 +}
1.2815 +#endif
1.2816 +
1.2817 +/*
1.2818 +** The page_ref() function increments the reference count for a page.
1.2819 +** If the page is currently on the freelist (the reference count is zero) then
1.2820 +** remove it from the freelist.
1.2821 +**
1.2822 +** For non-test systems, page_ref() is a macro that calls _page_ref()
1.2823 +** online of the reference count is zero. For test systems, page_ref()
1.2824 +** is a real function so that we can set breakpoints and trace it.
1.2825 +*/
1.2826 +static void _page_ref(PgHdr *pPg){
1.2827 + if( pPg->nRef==0 ){
1.2828 + /* The page is currently on the freelist. Remove it. */
1.2829 + lruListRemove(pPg);
1.2830 + pPg->pPager->nRef++;
1.2831 + }
1.2832 + pPg->nRef++;
1.2833 +}
1.2834 +#ifdef SQLITE_DEBUG
1.2835 + static void page_ref(PgHdr *pPg){
1.2836 + if( pPg->nRef==0 ){
1.2837 + _page_ref(pPg);
1.2838 + }else{
1.2839 + pPg->nRef++;
1.2840 + }
1.2841 + }
1.2842 +#else
1.2843 +# define page_ref(P) ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++)
1.2844 +#endif
1.2845 +
1.2846 +/*
1.2847 +** Increment the reference count for a page. The input pointer is
1.2848 +** a reference to the page data.
1.2849 +*/
1.2850 +int sqlite3PagerRef(DbPage *pPg){
1.2851 + pagerEnter(pPg->pPager);
1.2852 + page_ref(pPg);
1.2853 + pagerLeave(pPg->pPager);
1.2854 + return SQLITE_OK;
1.2855 +}
1.2856 +
1.2857 +/*
1.2858 +** Sync the journal. In other words, make sure all the pages that have
1.2859 +** been written to the journal have actually reached the surface of the
1.2860 +** disk. It is not safe to modify the original database file until after
1.2861 +** the journal has been synced. If the original database is modified before
1.2862 +** the journal is synced and a power failure occurs, the unsynced journal
1.2863 +** data would be lost and we would be unable to completely rollback the
1.2864 +** database changes. Database corruption would occur.
1.2865 +**
1.2866 +** This routine also updates the nRec field in the header of the journal.
1.2867 +** (See comments on the pager_playback() routine for additional information.)
1.2868 +** If the sync mode is FULL, two syncs will occur. First the whole journal
1.2869 +** is synced, then the nRec field is updated, then a second sync occurs.
1.2870 +**
1.2871 +** For temporary databases, we do not care if we are able to rollback
1.2872 +** after a power failure, so no sync occurs.
1.2873 +**
1.2874 +** If the IOCAP_SEQUENTIAL flag is set for the persistent media on which
1.2875 +** the database is stored, then OsSync() is never called on the journal
1.2876 +** file. In this case all that is required is to update the nRec field in
1.2877 +** the journal header.
1.2878 +**
1.2879 +** This routine clears the needSync field of every page current held in
1.2880 +** memory.
1.2881 +*/
1.2882 +static int syncJournal(Pager *pPager){
1.2883 + PgHdr *pPg;
1.2884 + int rc = SQLITE_OK;
1.2885 +
1.2886 + /* Sync the journal before modifying the main database
1.2887 + ** (assuming there is a journal and it needs to be synced.)
1.2888 + */
1.2889 + if( pPager->needSync ){
1.2890 + if( !pPager->tempFile ){
1.2891 + int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
1.2892 + assert( pPager->journalOpen );
1.2893 +
1.2894 + if( 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){
1.2895 + /* Write the nRec value into the journal file header. If in
1.2896 + ** full-synchronous mode, sync the journal first. This ensures that
1.2897 + ** all data has really hit the disk before nRec is updated to mark
1.2898 + ** it as a candidate for rollback.
1.2899 + **
1.2900 + ** This is not required if the persistent media supports the
1.2901 + ** SAFE_APPEND property. Because in this case it is not possible
1.2902 + ** for garbage data to be appended to the file, the nRec field
1.2903 + ** is populated with 0xFFFFFFFF when the journal header is written
1.2904 + ** and never needs to be updated.
1.2905 + */
1.2906 + i64 jrnlOff;
1.2907 + if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
1.2908 + PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager));
1.2909 + IOTRACE(("JSYNC %p\n", pPager))
1.2910 + rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags);
1.2911 + if( rc!=0 ) return rc;
1.2912 + }
1.2913 +
1.2914 + jrnlOff = pPager->journalHdr + sizeof(aJournalMagic);
1.2915 + IOTRACE(("JHDR %p %lld %d\n", pPager, jrnlOff, 4));
1.2916 + rc = write32bits(pPager->jfd, jrnlOff, pPager->nRec);
1.2917 + if( rc ) return rc;
1.2918 + }
1.2919 + if( 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
1.2920 + PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager));
1.2921 + IOTRACE(("JSYNC %p\n", pPager))
1.2922 + rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags|
1.2923 + (pPager->sync_flags==SQLITE_SYNC_FULL?SQLITE_SYNC_DATAONLY:0)
1.2924 + );
1.2925 + if( rc!=0 ) return rc;
1.2926 + }
1.2927 + pPager->journalStarted = 1;
1.2928 + }
1.2929 + pPager->needSync = 0;
1.2930 +
1.2931 + /* Erase the needSync flag from every page.
1.2932 + */
1.2933 + for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1.2934 + pPg->needSync = 0;
1.2935 + }
1.2936 + lruListSetFirstSynced(pPager);
1.2937 + }
1.2938 +
1.2939 +#ifndef NDEBUG
1.2940 + /* If the Pager.needSync flag is clear then the PgHdr.needSync
1.2941 + ** flag must also be clear for all pages. Verify that this
1.2942 + ** invariant is true.
1.2943 + */
1.2944 + else{
1.2945 + for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1.2946 + assert( pPg->needSync==0 );
1.2947 + }
1.2948 + assert( pPager->lru.pFirstSynced==pPager->lru.pFirst );
1.2949 + }
1.2950 +#endif
1.2951 +
1.2952 + return rc;
1.2953 +}
1.2954 +
1.2955 +/*
1.2956 +** Merge two lists of pages connected by pDirty and in pgno order.
1.2957 +** Do not both fixing the pPrevDirty pointers.
1.2958 +*/
1.2959 +static PgHdr *merge_pagelist(PgHdr *pA, PgHdr *pB){
1.2960 + PgHdr result, *pTail;
1.2961 + pTail = &result;
1.2962 + while( pA && pB ){
1.2963 + if( pA->pgno<pB->pgno ){
1.2964 + pTail->pDirty = pA;
1.2965 + pTail = pA;
1.2966 + pA = pA->pDirty;
1.2967 + }else{
1.2968 + pTail->pDirty = pB;
1.2969 + pTail = pB;
1.2970 + pB = pB->pDirty;
1.2971 + }
1.2972 + }
1.2973 + if( pA ){
1.2974 + pTail->pDirty = pA;
1.2975 + }else if( pB ){
1.2976 + pTail->pDirty = pB;
1.2977 + }else{
1.2978 + pTail->pDirty = 0;
1.2979 + }
1.2980 + return result.pDirty;
1.2981 +}
1.2982 +
1.2983 +/*
1.2984 +** Sort the list of pages in accending order by pgno. Pages are
1.2985 +** connected by pDirty pointers. The pPrevDirty pointers are
1.2986 +** corrupted by this sort.
1.2987 +*/
1.2988 +#define N_SORT_BUCKET_ALLOC 25
1.2989 +#define N_SORT_BUCKET 25
1.2990 +#ifdef SQLITE_TEST
1.2991 + int sqlite3_pager_n_sort_bucket = 0;
1.2992 + #undef N_SORT_BUCKET
1.2993 + #define N_SORT_BUCKET \
1.2994 + (sqlite3_pager_n_sort_bucket?sqlite3_pager_n_sort_bucket:N_SORT_BUCKET_ALLOC)
1.2995 +#endif
1.2996 +static PgHdr *sort_pagelist(PgHdr *pIn){
1.2997 + PgHdr *a[N_SORT_BUCKET_ALLOC], *p;
1.2998 + int i;
1.2999 + memset(a, 0, sizeof(a));
1.3000 + while( pIn ){
1.3001 + p = pIn;
1.3002 + pIn = p->pDirty;
1.3003 + p->pDirty = 0;
1.3004 + for(i=0; i<N_SORT_BUCKET-1; i++){
1.3005 + if( a[i]==0 ){
1.3006 + a[i] = p;
1.3007 + break;
1.3008 + }else{
1.3009 + p = merge_pagelist(a[i], p);
1.3010 + a[i] = 0;
1.3011 + }
1.3012 + }
1.3013 + if( i==N_SORT_BUCKET-1 ){
1.3014 + /* Coverage: To get here, there need to be 2^(N_SORT_BUCKET)
1.3015 + ** elements in the input list. This is possible, but impractical.
1.3016 + ** Testing this line is the point of global variable
1.3017 + ** sqlite3_pager_n_sort_bucket.
1.3018 + */
1.3019 + a[i] = merge_pagelist(a[i], p);
1.3020 + }
1.3021 + }
1.3022 + p = a[0];
1.3023 + for(i=1; i<N_SORT_BUCKET; i++){
1.3024 + p = merge_pagelist(p, a[i]);
1.3025 + }
1.3026 + return p;
1.3027 +}
1.3028 +
1.3029 +/*
1.3030 +** Given a list of pages (connected by the PgHdr.pDirty pointer) write
1.3031 +** every one of those pages out to the database file and mark them all
1.3032 +** as clean.
1.3033 +*/
1.3034 +static int pager_write_pagelist(PgHdr *pList){
1.3035 + Pager *pPager;
1.3036 + PgHdr *p;
1.3037 + int rc;
1.3038 +
1.3039 + if( pList==0 ) return SQLITE_OK;
1.3040 + pPager = pList->pPager;
1.3041 +
1.3042 + /* At this point there may be either a RESERVED or EXCLUSIVE lock on the
1.3043 + ** database file. If there is already an EXCLUSIVE lock, the following
1.3044 + ** calls to sqlite3OsLock() are no-ops.
1.3045 + **
1.3046 + ** Moving the lock from RESERVED to EXCLUSIVE actually involves going
1.3047 + ** through an intermediate state PENDING. A PENDING lock prevents new
1.3048 + ** readers from attaching to the database but is unsufficient for us to
1.3049 + ** write. The idea of a PENDING lock is to prevent new readers from
1.3050 + ** coming in while we wait for existing readers to clear.
1.3051 + **
1.3052 + ** While the pager is in the RESERVED state, the original database file
1.3053 + ** is unchanged and we can rollback without having to playback the
1.3054 + ** journal into the original database file. Once we transition to
1.3055 + ** EXCLUSIVE, it means the database file has been changed and any rollback
1.3056 + ** will require a journal playback.
1.3057 + */
1.3058 + rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
1.3059 + if( rc!=SQLITE_OK ){
1.3060 + return rc;
1.3061 + }
1.3062 +
1.3063 + pList = sort_pagelist(pList);
1.3064 + for(p=pList; p; p=p->pDirty){
1.3065 + assert( p->dirty );
1.3066 + p->dirty = 0;
1.3067 + }
1.3068 +
1.3069 + /* If the file has not yet been opened, open it now. */
1.3070 + if( !pPager->fd->pMethods ){
1.3071 + assert(pPager->tempFile);
1.3072 + rc = sqlite3PagerOpentemp(pPager, pPager->fd, pPager->vfsFlags);
1.3073 + if( rc ) return rc;
1.3074 + }
1.3075 +
1.3076 + while( pList ){
1.3077 + /* If there are dirty pages in the page cache with page numbers greater
1.3078 + ** than Pager.dbSize, this means sqlite3PagerTruncate() was called to
1.3079 + ** make the file smaller (presumably by auto-vacuum code). Do not write
1.3080 + ** any such pages to the file.
1.3081 + */
1.3082 + if( pList->pgno<=pPager->dbSize ){
1.3083 + i64 offset = (pList->pgno-1)*(i64)pPager->pageSize;
1.3084 + char *pData = CODEC2(pPager, PGHDR_TO_DATA(pList), pList->pgno, 6);
1.3085 + PAGERTRACE4("STORE %d page %d hash(%08x)\n",
1.3086 + PAGERID(pPager), pList->pgno, pager_pagehash(pList));
1.3087 + IOTRACE(("PGOUT %p %d\n", pPager, pList->pgno));
1.3088 + rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize, offset);
1.3089 + PAGER_INCR(sqlite3_pager_writedb_count);
1.3090 + PAGER_INCR(pPager->nWrite);
1.3091 + if( pList->pgno==1 ){
1.3092 + memcpy(&pPager->dbFileVers, &pData[24], sizeof(pPager->dbFileVers));
1.3093 + }
1.3094 + }
1.3095 +#ifndef NDEBUG
1.3096 + else{
1.3097 + PAGERTRACE3("NOSTORE %d page %d\n", PAGERID(pPager), pList->pgno);
1.3098 + }
1.3099 +#endif
1.3100 + if( rc ) return rc;
1.3101 +#ifdef SQLITE_CHECK_PAGES
1.3102 + pList->pageHash = pager_pagehash(pList);
1.3103 +#endif
1.3104 + pList = pList->pDirty;
1.3105 + }
1.3106 + return SQLITE_OK;
1.3107 +}
1.3108 +
1.3109 +/*
1.3110 +** Collect every dirty page into a dirty list and
1.3111 +** return a pointer to the head of that list. All pages are
1.3112 +** collected even if they are still in use.
1.3113 +*/
1.3114 +static PgHdr *pager_get_all_dirty_pages(Pager *pPager){
1.3115 +
1.3116 +#ifndef NDEBUG
1.3117 + /* Verify the sanity of the dirty list when we are running
1.3118 + ** in debugging mode. This is expensive, so do not
1.3119 + ** do this on a normal build. */
1.3120 + int n1 = 0;
1.3121 + int n2 = 0;
1.3122 + PgHdr *p;
1.3123 + for(p=pPager->pAll; p; p=p->pNextAll){ if( p->dirty ) n1++; }
1.3124 + for(p=pPager->pDirty; p; p=p->pDirty){ n2++; }
1.3125 + assert( n1==n2 );
1.3126 +#endif
1.3127 +
1.3128 + return pPager->pDirty;
1.3129 +}
1.3130 +
1.3131 +/*
1.3132 +** Return 1 if there is a hot journal on the given pager.
1.3133 +** A hot journal is one that needs to be played back.
1.3134 +**
1.3135 +** If the current size of the database file is 0 but a journal file
1.3136 +** exists, that is probably an old journal left over from a prior
1.3137 +** database with the same name. Just delete the journal.
1.3138 +**
1.3139 +** Return negative if unable to determine the status of the journal.
1.3140 +**
1.3141 +** This routine does not open the journal file to examine its
1.3142 +** content. Hence, the journal might contain the name of a master
1.3143 +** journal file that has been deleted, and hence not be hot. Or
1.3144 +** the header of the journal might be zeroed out. This routine
1.3145 +** does not discover these cases of a non-hot journal - if the
1.3146 +** journal file exists and is not empty this routine assumes it
1.3147 +** is hot. The pager_playback() routine will discover that the
1.3148 +** journal file is not really hot and will no-op.
1.3149 +*/
1.3150 +static int hasHotJournal(Pager *pPager, int *pExists){
1.3151 + sqlite3_vfs *pVfs = pPager->pVfs;
1.3152 + int rc = SQLITE_OK;
1.3153 + *pExists = 0;
1.3154 + if( pPager->useJournal && pPager->fd->pMethods ){
1.3155 + int exists;
1.3156 + int locked;
1.3157 +
1.3158 + rc = sqlite3OsAccess(pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &exists);
1.3159 + if( rc==SQLITE_OK && exists ){
1.3160 + rc = sqlite3OsCheckReservedLock(pPager->fd, &locked);
1.3161 + }
1.3162 +
1.3163 + if( rc==SQLITE_OK && exists && !locked ){
1.3164 + int nPage;
1.3165 + rc = sqlite3PagerPagecount(pPager, &nPage);
1.3166 + if( rc==SQLITE_OK ){
1.3167 + if( nPage==0 ){
1.3168 + sqlite3OsDelete(pVfs, pPager->zJournal, 0);
1.3169 + }else{
1.3170 + *pExists = 1;
1.3171 + }
1.3172 + }
1.3173 + }
1.3174 + }
1.3175 +
1.3176 + return rc;
1.3177 +}
1.3178 +
1.3179 +/*
1.3180 +** Try to find a page in the cache that can be recycled.
1.3181 +**
1.3182 +** This routine may return SQLITE_IOERR, SQLITE_FULL or SQLITE_OK. It
1.3183 +** does not set the pPager->errCode variable.
1.3184 +*/
1.3185 +static int pager_recycle(Pager *pPager, PgHdr **ppPg){
1.3186 + PgHdr *pPg;
1.3187 + *ppPg = 0;
1.3188 +
1.3189 + /* It is illegal to call this function unless the pager object
1.3190 + ** pointed to by pPager has at least one free page (page with nRef==0).
1.3191 + */
1.3192 + assert(!MEMDB);
1.3193 + assert(pPager->lru.pFirst);
1.3194 +
1.3195 + /* Find a page to recycle. Try to locate a page that does not
1.3196 + ** require us to do an fsync() on the journal.
1.3197 + */
1.3198 + pPg = pPager->lru.pFirstSynced;
1.3199 +
1.3200 + /* If we could not find a page that does not require an fsync()
1.3201 + ** on the journal file then fsync the journal file. This is a
1.3202 + ** very slow operation, so we work hard to avoid it. But sometimes
1.3203 + ** it can't be helped.
1.3204 + */
1.3205 + if( pPg==0 && pPager->lru.pFirst ){
1.3206 + if( !pPager->errCode ){
1.3207 + int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
1.3208 + int rc = syncJournal(pPager);
1.3209 + if( rc!=0 ){
1.3210 + return rc;
1.3211 + }
1.3212 + if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){
1.3213 + /* If in full-sync mode, write a new journal header into the
1.3214 + ** journal file. This is done to avoid ever modifying a journal
1.3215 + ** header that is involved in the rollback of pages that have
1.3216 + ** already been written to the database (in case the header is
1.3217 + ** trashed when the nRec field is updated).
1.3218 + */
1.3219 + pPager->nRec = 0;
1.3220 + assert( pPager->journalOff > 0 );
1.3221 + assert( pPager->doNotSync==0 );
1.3222 + rc = writeJournalHdr(pPager);
1.3223 + if( rc!=0 ){
1.3224 + return rc;
1.3225 + }
1.3226 + }
1.3227 + }
1.3228 + pPg = pPager->lru.pFirst;
1.3229 + }
1.3230 +
1.3231 + assert( pPg->nRef==0 );
1.3232 +
1.3233 + /* Write the page to the database file if it is dirty.
1.3234 + */
1.3235 + if( pPg->dirty && !pPager->errCode ){
1.3236 + int rc;
1.3237 + assert( pPg->needSync==0 );
1.3238 + makeClean(pPg);
1.3239 + pPg->dirty = 1;
1.3240 + pPg->pDirty = 0;
1.3241 + rc = pager_write_pagelist( pPg );
1.3242 + pPg->dirty = 0;
1.3243 + if( rc!=SQLITE_OK ){
1.3244 + return rc;
1.3245 + }
1.3246 + }
1.3247 + assert( pPg->dirty==0 || pPager->errCode );
1.3248 +
1.3249 + /* If the page we are recycling is marked as alwaysRollback, then
1.3250 + ** set the global alwaysRollback flag, thus disabling the
1.3251 + ** sqlite3PagerDontRollback() optimization for the rest of this transaction.
1.3252 + ** It is necessary to do this because the page marked alwaysRollback
1.3253 + ** might be reloaded at a later time but at that point we won't remember
1.3254 + ** that is was marked alwaysRollback. This means that all pages must
1.3255 + ** be marked as alwaysRollback from here on out.
1.3256 + */
1.3257 + if( pPg->alwaysRollback ){
1.3258 + IOTRACE(("ALWAYS_ROLLBACK %p\n", pPager))
1.3259 + pPager->alwaysRollback = 1;
1.3260 + }
1.3261 +
1.3262 + /* Unlink the old page from the free list and the hash table
1.3263 + */
1.3264 + unlinkPage(pPg);
1.3265 + assert( pPg->pgno==0 );
1.3266 +
1.3267 + *ppPg = pPg;
1.3268 + return SQLITE_OK;
1.3269 +}
1.3270 +
1.3271 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
1.3272 +/*
1.3273 +** This function is called to free superfluous dynamically allocated memory
1.3274 +** held by the pager system. Memory in use by any SQLite pager allocated
1.3275 +** by the current thread may be sqlite3_free()ed.
1.3276 +**
1.3277 +** nReq is the number of bytes of memory required. Once this much has
1.3278 +** been released, the function returns. The return value is the total number
1.3279 +** of bytes of memory released.
1.3280 +*/
1.3281 +int sqlite3PagerReleaseMemory(int nReq){
1.3282 + int nReleased = 0; /* Bytes of memory released so far */
1.3283 + Pager *pPager; /* For looping over pagers */
1.3284 + BusyHandler *savedBusy; /* Saved copy of the busy handler */
1.3285 + int rc = SQLITE_OK;
1.3286 +
1.3287 + /* Acquire the memory-management mutex
1.3288 + */
1.3289 +#ifndef SQLITE_MUTEX_NOOP
1.3290 + sqlite3_mutex *mutex; /* The MEM2 mutex */
1.3291 + mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MEM2);
1.3292 +#endif
1.3293 + sqlite3_mutex_enter(mutex);
1.3294 +
1.3295 + /* Signal all database connections that memory management wants
1.3296 + ** to have access to the pagers.
1.3297 + */
1.3298 + for(pPager=sqlite3PagerList; pPager; pPager=pPager->pNext){
1.3299 + pPager->iInUseMM = 1;
1.3300 + }
1.3301 +
1.3302 + while( rc==SQLITE_OK && (nReq<0 || nReleased<nReq) ){
1.3303 + PgHdr *pPg;
1.3304 + PgHdr *pRecycled;
1.3305 +
1.3306 + /* Try to find a page to recycle that does not require a sync(). If
1.3307 + ** this is not possible, find one that does require a sync().
1.3308 + */
1.3309 + sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_LRU));
1.3310 + pPg = sqlite3LruPageList.pFirstSynced;
1.3311 + while( pPg && (pPg->needSync || pPg->pPager->iInUseDB) ){
1.3312 + pPg = pPg->gfree.pNext;
1.3313 + }
1.3314 + if( !pPg ){
1.3315 + pPg = sqlite3LruPageList.pFirst;
1.3316 + while( pPg && pPg->pPager->iInUseDB ){
1.3317 + pPg = pPg->gfree.pNext;
1.3318 + }
1.3319 + }
1.3320 + sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_LRU));
1.3321 +
1.3322 + /* If pPg==0, then the block above has failed to find a page to
1.3323 + ** recycle. In this case return early - no further memory will
1.3324 + ** be released.
1.3325 + */
1.3326 + if( !pPg ) break;
1.3327 +
1.3328 + pPager = pPg->pPager;
1.3329 + assert(!pPg->needSync || pPg==pPager->lru.pFirst);
1.3330 + assert(pPg->needSync || pPg==pPager->lru.pFirstSynced);
1.3331 +
1.3332 + savedBusy = pPager->pBusyHandler;
1.3333 + pPager->pBusyHandler = 0;
1.3334 + rc = pager_recycle(pPager, &pRecycled);
1.3335 + pPager->pBusyHandler = savedBusy;
1.3336 + assert(pRecycled==pPg || rc!=SQLITE_OK);
1.3337 + if( rc==SQLITE_OK ){
1.3338 + /* We've found a page to free. At this point the page has been
1.3339 + ** removed from the page hash-table, free-list and synced-list
1.3340 + ** (pFirstSynced). It is still in the all pages (pAll) list.
1.3341 + ** Remove it from this list before freeing.
1.3342 + **
1.3343 + ** Todo: Check the Pager.pStmt list to make sure this is Ok. It
1.3344 + ** probably is though.
1.3345 + */
1.3346 + PgHdr *pTmp;
1.3347 + assert( pPg );
1.3348 + if( pPg==pPager->pAll ){
1.3349 + assert(pPg->pPrevAll==0);
1.3350 + assert(pPg->pNextAll==0 || pPg->pNextAll->pPrevAll==pPg);
1.3351 + pPager->pAll = pPg->pNextAll;
1.3352 + if( pPager->pAll ){
1.3353 + pPager->pAll->pPrevAll = 0;
1.3354 + }
1.3355 + }else{
1.3356 + assert(pPg->pPrevAll);
1.3357 + assert(pPg->pPrevAll->pNextAll==pPg);
1.3358 + pTmp = pPg->pPrevAll;
1.3359 + pTmp->pNextAll = pPg->pNextAll;
1.3360 + if( pTmp->pNextAll ){
1.3361 + pTmp->pNextAll->pPrevAll = pTmp;
1.3362 + }
1.3363 + }
1.3364 + nReleased += (
1.3365 + sizeof(*pPg) + pPager->pageSize
1.3366 + + sizeof(u32) + pPager->nExtra
1.3367 + + MEMDB*sizeof(PgHistory)
1.3368 + );
1.3369 + IOTRACE(("PGFREE %p %d *\n", pPager, pPg->pgno));
1.3370 + PAGER_INCR(sqlite3_pager_pgfree_count);
1.3371 + sqlite3PageFree(pPg->pData);
1.3372 + sqlite3_free(pPg);
1.3373 + pPager->nPage--;
1.3374 + }else{
1.3375 + /* An error occured whilst writing to the database file or
1.3376 + ** journal in pager_recycle(). The error is not returned to the
1.3377 + ** caller of this function. Instead, set the Pager.errCode variable.
1.3378 + ** The error will be returned to the user (or users, in the case
1.3379 + ** of a shared pager cache) of the pager for which the error occured.
1.3380 + */
1.3381 + assert(
1.3382 + (rc&0xff)==SQLITE_IOERR ||
1.3383 + rc==SQLITE_FULL ||
1.3384 + rc==SQLITE_BUSY
1.3385 + );
1.3386 + assert( pPager->state>=PAGER_RESERVED );
1.3387 + pager_error(pPager, rc);
1.3388 + }
1.3389 + }
1.3390 +
1.3391 + /* Clear the memory management flags and release the mutex
1.3392 + */
1.3393 + for(pPager=sqlite3PagerList; pPager; pPager=pPager->pNext){
1.3394 + pPager->iInUseMM = 0;
1.3395 + }
1.3396 + sqlite3_mutex_leave(mutex);
1.3397 +
1.3398 + /* Return the number of bytes released
1.3399 + */
1.3400 + return nReleased;
1.3401 +}
1.3402 +#endif /* SQLITE_ENABLE_MEMORY_MANAGEMENT */
1.3403 +
1.3404 +/*
1.3405 +** Read the content of page pPg out of the database file.
1.3406 +*/
1.3407 +static int readDbPage(Pager *pPager, PgHdr *pPg, Pgno pgno){
1.3408 + int rc;
1.3409 + i64 offset;
1.3410 + assert( MEMDB==0 );
1.3411 + assert(pPager->fd->pMethods||pPager->tempFile);
1.3412 + if( !pPager->fd->pMethods ){
1.3413 + return SQLITE_IOERR_SHORT_READ;
1.3414 + }
1.3415 + offset = (pgno-1)*(i64)pPager->pageSize;
1.3416 + rc = sqlite3OsRead(pPager->fd, PGHDR_TO_DATA(pPg), pPager->pageSize, offset);
1.3417 + PAGER_INCR(sqlite3_pager_readdb_count);
1.3418 + PAGER_INCR(pPager->nRead);
1.3419 + IOTRACE(("PGIN %p %d\n", pPager, pgno));
1.3420 + if( pgno==1 ){
1.3421 + memcpy(&pPager->dbFileVers, &((u8*)PGHDR_TO_DATA(pPg))[24],
1.3422 + sizeof(pPager->dbFileVers));
1.3423 + }
1.3424 + CODEC1(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3);
1.3425 + PAGERTRACE4("FETCH %d page %d hash(%08x)\n",
1.3426 + PAGERID(pPager), pPg->pgno, pager_pagehash(pPg));
1.3427 + return rc;
1.3428 +}
1.3429 +
1.3430 +
1.3431 +/*
1.3432 +** This function is called to obtain the shared lock required before
1.3433 +** data may be read from the pager cache. If the shared lock has already
1.3434 +** been obtained, this function is a no-op.
1.3435 +**
1.3436 +** Immediately after obtaining the shared lock (if required), this function
1.3437 +** checks for a hot-journal file. If one is found, an emergency rollback
1.3438 +** is performed immediately.
1.3439 +*/
1.3440 +static int pagerSharedLock(Pager *pPager){
1.3441 + int rc = SQLITE_OK;
1.3442 + int isErrorReset = 0;
1.3443 +
1.3444 + /* If this database is opened for exclusive access, has no outstanding
1.3445 + ** page references and is in an error-state, now is the chance to clear
1.3446 + ** the error. Discard the contents of the pager-cache and treat any
1.3447 + ** open journal file as a hot-journal.
1.3448 + */
1.3449 + if( !MEMDB && pPager->exclusiveMode && pPager->nRef==0 && pPager->errCode ){
1.3450 + if( pPager->journalOpen ){
1.3451 + isErrorReset = 1;
1.3452 + }
1.3453 + pPager->errCode = SQLITE_OK;
1.3454 + pager_reset(pPager);
1.3455 + }
1.3456 +
1.3457 + /* If the pager is still in an error state, do not proceed. The error
1.3458 + ** state will be cleared at some point in the future when all page
1.3459 + ** references are dropped and the cache can be discarded.
1.3460 + */
1.3461 + if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
1.3462 + return pPager->errCode;
1.3463 + }
1.3464 +
1.3465 + if( pPager->state==PAGER_UNLOCK || isErrorReset ){
1.3466 + sqlite3_vfs *pVfs = pPager->pVfs;
1.3467 + if( !MEMDB ){
1.3468 + int isHotJournal;
1.3469 + assert( pPager->nRef==0 );
1.3470 + if( !pPager->noReadlock ){
1.3471 + rc = pager_wait_on_lock(pPager, SHARED_LOCK);
1.3472 + if( rc!=SQLITE_OK ){
1.3473 + assert( pPager->state==PAGER_UNLOCK );
1.3474 + return pager_error(pPager, rc);
1.3475 + }
1.3476 + assert( pPager->state>=SHARED_LOCK );
1.3477 + }
1.3478 +
1.3479 + /* If a journal file exists, and there is no RESERVED lock on the
1.3480 + ** database file, then it either needs to be played back or deleted.
1.3481 + */
1.3482 + if( !isErrorReset ){
1.3483 + rc = hasHotJournal(pPager, &isHotJournal);
1.3484 + if( rc!=SQLITE_OK ){
1.3485 + goto failed;
1.3486 + }
1.3487 + }
1.3488 + if( isErrorReset || isHotJournal ){
1.3489 + /* Get an EXCLUSIVE lock on the database file. At this point it is
1.3490 + ** important that a RESERVED lock is not obtained on the way to the
1.3491 + ** EXCLUSIVE lock. If it were, another process might open the
1.3492 + ** database file, detect the RESERVED lock, and conclude that the
1.3493 + ** database is safe to read while this process is still rolling it
1.3494 + ** back.
1.3495 + **
1.3496 + ** Because the intermediate RESERVED lock is not requested, the
1.3497 + ** second process will get to this point in the code and fail to
1.3498 + ** obtain its own EXCLUSIVE lock on the database file.
1.3499 + */
1.3500 + if( pPager->state<EXCLUSIVE_LOCK ){
1.3501 + rc = sqlite3OsLock(pPager->fd, EXCLUSIVE_LOCK);
1.3502 + if( rc!=SQLITE_OK ){
1.3503 + rc = pager_error(pPager, rc);
1.3504 + goto failed;
1.3505 + }
1.3506 + pPager->state = PAGER_EXCLUSIVE;
1.3507 + }
1.3508 +
1.3509 + /* Open the journal for read/write access. This is because in
1.3510 + ** exclusive-access mode the file descriptor will be kept open and
1.3511 + ** possibly used for a transaction later on. On some systems, the
1.3512 + ** OsTruncate() call used in exclusive-access mode also requires
1.3513 + ** a read/write file handle.
1.3514 + */
1.3515 + if( !isErrorReset && pPager->journalOpen==0 ){
1.3516 + int res;
1.3517 + rc = sqlite3OsAccess(pVfs,pPager->zJournal,SQLITE_ACCESS_EXISTS,&res);
1.3518 + if( rc==SQLITE_OK ){
1.3519 + if( res ){
1.3520 + int fout = 0;
1.3521 + int f = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL;
1.3522 + assert( !pPager->tempFile );
1.3523 + rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &fout);
1.3524 + assert( rc!=SQLITE_OK || pPager->jfd->pMethods );
1.3525 + if( fout&SQLITE_OPEN_READONLY ){
1.3526 + rc = SQLITE_BUSY;
1.3527 + sqlite3OsClose(pPager->jfd);
1.3528 + }
1.3529 + }else{
1.3530 + /* If the journal does not exist, that means some other process
1.3531 + ** has already rolled it back */
1.3532 + rc = SQLITE_BUSY;
1.3533 + }
1.3534 + }
1.3535 + }
1.3536 + if( rc!=SQLITE_OK ){
1.3537 + if( rc!=SQLITE_NOMEM && rc!=SQLITE_IOERR_UNLOCK
1.3538 + && rc!=SQLITE_IOERR_NOMEM
1.3539 + ){
1.3540 + rc = SQLITE_BUSY;
1.3541 + }
1.3542 + goto failed;
1.3543 + }
1.3544 + pPager->journalOpen = 1;
1.3545 + pPager->journalStarted = 0;
1.3546 + pPager->journalOff = 0;
1.3547 + pPager->setMaster = 0;
1.3548 + pPager->journalHdr = 0;
1.3549 +
1.3550 + /* Playback and delete the journal. Drop the database write
1.3551 + ** lock and reacquire the read lock.
1.3552 + */
1.3553 + rc = pager_playback(pPager, 1);
1.3554 + if( rc!=SQLITE_OK ){
1.3555 + rc = pager_error(pPager, rc);
1.3556 + goto failed;
1.3557 + }
1.3558 + assert(pPager->state==PAGER_SHARED ||
1.3559 + (pPager->exclusiveMode && pPager->state>PAGER_SHARED)
1.3560 + );
1.3561 + }
1.3562 +
1.3563 + if( pPager->pAll ){
1.3564 + /* The shared-lock has just been acquired on the database file
1.3565 + ** and there are already pages in the cache (from a previous
1.3566 + ** read or write transaction). Check to see if the database
1.3567 + ** has been modified. If the database has changed, flush the
1.3568 + ** cache.
1.3569 + **
1.3570 + ** Database changes is detected by looking at 15 bytes beginning
1.3571 + ** at offset 24 into the file. The first 4 of these 16 bytes are
1.3572 + ** a 32-bit counter that is incremented with each change. The
1.3573 + ** other bytes change randomly with each file change when
1.3574 + ** a codec is in use.
1.3575 + **
1.3576 + ** There is a vanishingly small chance that a change will not be
1.3577 + ** detected. The chance of an undetected change is so small that
1.3578 + ** it can be neglected.
1.3579 + */
1.3580 + char dbFileVers[sizeof(pPager->dbFileVers)];
1.3581 + sqlite3PagerPagecount(pPager, 0);
1.3582 +
1.3583 + if( pPager->errCode ){
1.3584 + rc = pPager->errCode;
1.3585 + goto failed;
1.3586 + }
1.3587 +
1.3588 + if( pPager->dbSize>0 ){
1.3589 + IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers)));
1.3590 + rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24);
1.3591 + if( rc!=SQLITE_OK ){
1.3592 + goto failed;
1.3593 + }
1.3594 + }else{
1.3595 + memset(dbFileVers, 0, sizeof(dbFileVers));
1.3596 + }
1.3597 +
1.3598 + if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){
1.3599 + pager_reset(pPager);
1.3600 + }
1.3601 + }
1.3602 + }
1.3603 + assert( pPager->exclusiveMode || pPager->state<=PAGER_SHARED );
1.3604 + if( pPager->state==PAGER_UNLOCK ){
1.3605 + pPager->state = PAGER_SHARED;
1.3606 + }
1.3607 + }
1.3608 +
1.3609 + failed:
1.3610 + if( rc!=SQLITE_OK ){
1.3611 + /* pager_unlock() is a no-op for exclusive mode and in-memory databases. */
1.3612 + pager_unlock(pPager);
1.3613 + }
1.3614 + return rc;
1.3615 +}
1.3616 +
1.3617 +/*
1.3618 +** Allocate a PgHdr object. Either create a new one or reuse
1.3619 +** an existing one that is not otherwise in use.
1.3620 +**
1.3621 +** A new PgHdr structure is created if any of the following are
1.3622 +** true:
1.3623 +**
1.3624 +** (1) We have not exceeded our maximum allocated cache size
1.3625 +** as set by the "PRAGMA cache_size" command.
1.3626 +**
1.3627 +** (2) There are no unused PgHdr objects available at this time.
1.3628 +**
1.3629 +** (3) This is an in-memory database.
1.3630 +**
1.3631 +** (4) There are no PgHdr objects that do not require a journal
1.3632 +** file sync and a sync of the journal file is currently
1.3633 +** prohibited.
1.3634 +**
1.3635 +** Otherwise, reuse an existing PgHdr. In other words, reuse an
1.3636 +** existing PgHdr if all of the following are true:
1.3637 +**
1.3638 +** (1) We have reached or exceeded the maximum cache size
1.3639 +** allowed by "PRAGMA cache_size".
1.3640 +**
1.3641 +** (2) There is a PgHdr available with PgHdr->nRef==0
1.3642 +**
1.3643 +** (3) We are not in an in-memory database
1.3644 +**
1.3645 +** (4) Either there is an available PgHdr that does not need
1.3646 +** to be synced to disk or else disk syncing is currently
1.3647 +** allowed.
1.3648 +*/
1.3649 +static int pagerAllocatePage(Pager *pPager, PgHdr **ppPg){
1.3650 + int rc = SQLITE_OK;
1.3651 + PgHdr *pPg;
1.3652 + int nByteHdr;
1.3653 +
1.3654 + /* Create a new PgHdr if any of the four conditions defined
1.3655 + ** above are met: */
1.3656 + if( pPager->nPage<pPager->mxPage
1.3657 + || pPager->lru.pFirst==0
1.3658 + || MEMDB
1.3659 + || (pPager->lru.pFirstSynced==0 && pPager->doNotSync)
1.3660 + ){
1.3661 + void *pData = 0; /* Initialized to placate warning */
1.3662 + if( pPager->nPage>=pPager->nHash ){
1.3663 + pager_resize_hash_table(pPager,
1.3664 + pPager->nHash<256 ? 256 : pPager->nHash*2);
1.3665 + if( pPager->nHash==0 ){
1.3666 + rc = SQLITE_NOMEM;
1.3667 + goto pager_allocate_out;
1.3668 + }
1.3669 + }
1.3670 + pagerLeave(pPager);
1.3671 + nByteHdr = sizeof(*pPg) + sizeof(u32) + pPager->nExtra
1.3672 + + MEMDB*sizeof(PgHistory);
1.3673 + pPg = sqlite3Malloc( nByteHdr );
1.3674 + if( pPg ){
1.3675 + pData = sqlite3PageMalloc( pPager->pageSize );
1.3676 + if( pData==0 ){
1.3677 + sqlite3_free(pPg);
1.3678 + pPg = 0;
1.3679 + }
1.3680 + }
1.3681 + pagerEnter(pPager);
1.3682 + if( pPg==0 ){
1.3683 + rc = SQLITE_NOMEM;
1.3684 + goto pager_allocate_out;
1.3685 + }
1.3686 + memset(pPg, 0, nByteHdr);
1.3687 + pPg->pData = pData;
1.3688 + pPg->pPager = pPager;
1.3689 + pPg->pNextAll = pPager->pAll;
1.3690 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
1.3691 + if( pPg->pNextAll ){
1.3692 + pPg->pNextAll->pPrevAll = pPg;
1.3693 + }
1.3694 +#endif
1.3695 + pPager->pAll = pPg;
1.3696 + pPager->nPage++;
1.3697 + }else{
1.3698 + /* Recycle an existing page with a zero ref-count. */
1.3699 + rc = pager_recycle(pPager, &pPg);
1.3700 + if( rc==SQLITE_BUSY ){
1.3701 + rc = SQLITE_IOERR_BLOCKED;
1.3702 + }
1.3703 + if( rc!=SQLITE_OK ){
1.3704 + goto pager_allocate_out;
1.3705 + }
1.3706 + assert( pPager->state>=SHARED_LOCK );
1.3707 + assert(pPg);
1.3708 + }
1.3709 + *ppPg = pPg;
1.3710 +
1.3711 +pager_allocate_out:
1.3712 + return rc;
1.3713 +}
1.3714 +
1.3715 +/*
1.3716 +** Make sure we have the content for a page. If the page was
1.3717 +** previously acquired with noContent==1, then the content was
1.3718 +** just initialized to zeros instead of being read from disk.
1.3719 +** But now we need the real data off of disk. So make sure we
1.3720 +** have it. Read it in if we do not have it already.
1.3721 +*/
1.3722 +static int pager_get_content(PgHdr *pPg){
1.3723 + if( pPg->needRead ){
1.3724 + int rc = readDbPage(pPg->pPager, pPg, pPg->pgno);
1.3725 + if( rc==SQLITE_OK ){
1.3726 + pPg->needRead = 0;
1.3727 + }else{
1.3728 + return rc;
1.3729 + }
1.3730 + }
1.3731 + return SQLITE_OK;
1.3732 +}
1.3733 +
1.3734 +/*
1.3735 +** Acquire a page.
1.3736 +**
1.3737 +** A read lock on the disk file is obtained when the first page is acquired.
1.3738 +** This read lock is dropped when the last page is released.
1.3739 +**
1.3740 +** This routine works for any page number greater than 0. If the database
1.3741 +** file is smaller than the requested page, then no actual disk
1.3742 +** read occurs and the memory image of the page is initialized to
1.3743 +** all zeros. The extra data appended to a page is always initialized
1.3744 +** to zeros the first time a page is loaded into memory.
1.3745 +**
1.3746 +** The acquisition might fail for several reasons. In all cases,
1.3747 +** an appropriate error code is returned and *ppPage is set to NULL.
1.3748 +**
1.3749 +** See also sqlite3PagerLookup(). Both this routine and Lookup() attempt
1.3750 +** to find a page in the in-memory cache first. If the page is not already
1.3751 +** in memory, this routine goes to disk to read it in whereas Lookup()
1.3752 +** just returns 0. This routine acquires a read-lock the first time it
1.3753 +** has to go to disk, and could also playback an old journal if necessary.
1.3754 +** Since Lookup() never goes to disk, it never has to deal with locks
1.3755 +** or journal files.
1.3756 +**
1.3757 +** If noContent is false, the page contents are actually read from disk.
1.3758 +** If noContent is true, it means that we do not care about the contents
1.3759 +** of the page at this time, so do not do a disk read. Just fill in the
1.3760 +** page content with zeros. But mark the fact that we have not read the
1.3761 +** content by setting the PgHdr.needRead flag. Later on, if
1.3762 +** sqlite3PagerWrite() is called on this page or if this routine is
1.3763 +** called again with noContent==0, that means that the content is needed
1.3764 +** and the disk read should occur at that point.
1.3765 +*/
1.3766 +static int pagerAcquire(
1.3767 + Pager *pPager, /* The pager open on the database file */
1.3768 + Pgno pgno, /* Page number to fetch */
1.3769 + DbPage **ppPage, /* Write a pointer to the page here */
1.3770 + int noContent /* Do not bother reading content from disk if true */
1.3771 +){
1.3772 + PgHdr *pPg;
1.3773 + int rc;
1.3774 +
1.3775 + assert( pPager->state==PAGER_UNLOCK || pPager->nRef>0 || pgno==1 );
1.3776 +
1.3777 + /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page
1.3778 + ** number greater than this, or zero, is requested.
1.3779 + */
1.3780 + if( pgno>PAGER_MAX_PGNO || pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
1.3781 + return SQLITE_CORRUPT_BKPT;
1.3782 + }
1.3783 +
1.3784 + /* Make sure we have not hit any critical errors.
1.3785 + */
1.3786 + assert( pPager!=0 );
1.3787 + *ppPage = 0;
1.3788 +
1.3789 + /* If this is the first page accessed, then get a SHARED lock
1.3790 + ** on the database file. pagerSharedLock() is a no-op if
1.3791 + ** a database lock is already held.
1.3792 + */
1.3793 + rc = pagerSharedLock(pPager);
1.3794 + if( rc!=SQLITE_OK ){
1.3795 + return rc;
1.3796 + }
1.3797 + assert( pPager->state!=PAGER_UNLOCK );
1.3798 +
1.3799 + pPg = pager_lookup(pPager, pgno);
1.3800 + if( pPg==0 ){
1.3801 + /* The requested page is not in the page cache. */
1.3802 + int nMax;
1.3803 + int h;
1.3804 + PAGER_INCR(pPager->nMiss);
1.3805 + rc = pagerAllocatePage(pPager, &pPg);
1.3806 + if( rc!=SQLITE_OK ){
1.3807 + return rc;
1.3808 + }
1.3809 +
1.3810 + pPg->pgno = pgno;
1.3811 + assert( !MEMDB || pgno>pPager->stmtSize );
1.3812 + pPg->inJournal = sqlite3BitvecTest(pPager->pInJournal, pgno);
1.3813 + pPg->needSync = 0;
1.3814 +
1.3815 + makeClean(pPg);
1.3816 + pPg->nRef = 1;
1.3817 +
1.3818 + pPager->nRef++;
1.3819 + if( pPager->nExtra>0 ){
1.3820 + memset(PGHDR_TO_EXTRA(pPg, pPager), 0, pPager->nExtra);
1.3821 + }
1.3822 + rc = sqlite3PagerPagecount(pPager, &nMax);
1.3823 + if( rc!=SQLITE_OK ){
1.3824 + sqlite3PagerUnref(pPg);
1.3825 + return rc;
1.3826 + }
1.3827 +
1.3828 + /* Populate the page with data, either by reading from the database
1.3829 + ** file, or by setting the entire page to zero.
1.3830 + */
1.3831 + if( nMax<(int)pgno || MEMDB || (noContent && !pPager->alwaysRollback) ){
1.3832 + if( pgno>pPager->mxPgno ){
1.3833 + sqlite3PagerUnref(pPg);
1.3834 + return SQLITE_FULL;
1.3835 + }
1.3836 + memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
1.3837 + pPg->needRead = noContent && !pPager->alwaysRollback;
1.3838 + IOTRACE(("ZERO %p %d\n", pPager, pgno));
1.3839 + }else{
1.3840 + rc = readDbPage(pPager, pPg, pgno);
1.3841 + if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){
1.3842 + pPg->pgno = 0;
1.3843 + sqlite3PagerUnref(pPg);
1.3844 + return rc;
1.3845 + }
1.3846 + pPg->needRead = 0;
1.3847 + }
1.3848 +
1.3849 + /* Link the page into the page hash table */
1.3850 + h = pgno & (pPager->nHash-1);
1.3851 + assert( pgno!=0 );
1.3852 + pPg->pNextHash = pPager->aHash[h];
1.3853 + pPager->aHash[h] = pPg;
1.3854 + if( pPg->pNextHash ){
1.3855 + assert( pPg->pNextHash->pPrevHash==0 );
1.3856 + pPg->pNextHash->pPrevHash = pPg;
1.3857 + }
1.3858 +
1.3859 +#ifdef SQLITE_CHECK_PAGES
1.3860 + pPg->pageHash = pager_pagehash(pPg);
1.3861 +#endif
1.3862 + }else{
1.3863 + /* The requested page is in the page cache. */
1.3864 + assert(pPager->nRef>0 || pgno==1);
1.3865 + PAGER_INCR(pPager->nHit);
1.3866 + if( !noContent ){
1.3867 + rc = pager_get_content(pPg);
1.3868 + if( rc ){
1.3869 + return rc;
1.3870 + }
1.3871 + }
1.3872 + page_ref(pPg);
1.3873 + }
1.3874 + *ppPage = pPg;
1.3875 + return SQLITE_OK;
1.3876 +}
1.3877 +int sqlite3PagerAcquire(
1.3878 + Pager *pPager, /* The pager open on the database file */
1.3879 + Pgno pgno, /* Page number to fetch */
1.3880 + DbPage **ppPage, /* Write a pointer to the page here */
1.3881 + int noContent /* Do not bother reading content from disk if true */
1.3882 +){
1.3883 + int rc;
1.3884 + pagerEnter(pPager);
1.3885 + rc = pagerAcquire(pPager, pgno, ppPage, noContent);
1.3886 + pagerLeave(pPager);
1.3887 + return rc;
1.3888 +}
1.3889 +
1.3890 +
1.3891 +/*
1.3892 +** Acquire a page if it is already in the in-memory cache. Do
1.3893 +** not read the page from disk. Return a pointer to the page,
1.3894 +** or 0 if the page is not in cache.
1.3895 +**
1.3896 +** See also sqlite3PagerGet(). The difference between this routine
1.3897 +** and sqlite3PagerGet() is that _get() will go to the disk and read
1.3898 +** in the page if the page is not already in cache. This routine
1.3899 +** returns NULL if the page is not in cache or if a disk I/O error
1.3900 +** has ever happened.
1.3901 +*/
1.3902 +DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){
1.3903 + PgHdr *pPg = 0;
1.3904 +
1.3905 + assert( pPager!=0 );
1.3906 + assert( pgno!=0 );
1.3907 +
1.3908 + pagerEnter(pPager);
1.3909 + if( pPager->state==PAGER_UNLOCK ){
1.3910 + assert( !pPager->pAll || pPager->exclusiveMode );
1.3911 + }else if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
1.3912 + /* Do nothing */
1.3913 + }else if( (pPg = pager_lookup(pPager, pgno))!=0 ){
1.3914 + page_ref(pPg);
1.3915 + }
1.3916 + pagerLeave(pPager);
1.3917 + return pPg;
1.3918 +}
1.3919 +
1.3920 +/*
1.3921 +** Release a page.
1.3922 +**
1.3923 +** If the number of references to the page drop to zero, then the
1.3924 +** page is added to the LRU list. When all references to all pages
1.3925 +** are released, a rollback occurs and the lock on the database is
1.3926 +** removed.
1.3927 +*/
1.3928 +int sqlite3PagerUnref(DbPage *pPg){
1.3929 + Pager *pPager;
1.3930 +
1.3931 + if( pPg==0 ) return SQLITE_OK;
1.3932 + pPager = pPg->pPager;
1.3933 +
1.3934 + /* Decrement the reference count for this page
1.3935 + */
1.3936 + assert( pPg->nRef>0 );
1.3937 + pagerEnter(pPg->pPager);
1.3938 + pPg->nRef--;
1.3939 +
1.3940 + CHECK_PAGE(pPg);
1.3941 +
1.3942 + /* When the number of references to a page reach 0, call the
1.3943 + ** destructor and add the page to the freelist.
1.3944 + */
1.3945 + if( pPg->nRef==0 ){
1.3946 +
1.3947 + lruListAdd(pPg);
1.3948 + if( pPager->xDestructor ){
1.3949 + pPager->xDestructor(pPg, pPager->pageSize);
1.3950 + }
1.3951 +
1.3952 + /* When all pages reach the freelist, drop the read lock from
1.3953 + ** the database file.
1.3954 + */
1.3955 + pPager->nRef--;
1.3956 + assert( pPager->nRef>=0 );
1.3957 + if( pPager->nRef==0 && (!pPager->exclusiveMode || pPager->journalOff>0) ){
1.3958 + pagerUnlockAndRollback(pPager);
1.3959 + }
1.3960 + }
1.3961 + pagerLeave(pPager);
1.3962 + return SQLITE_OK;
1.3963 +}
1.3964 +
1.3965 +/*
1.3966 +** Create a journal file for pPager. There should already be a RESERVED
1.3967 +** or EXCLUSIVE lock on the database file when this routine is called.
1.3968 +**
1.3969 +** Return SQLITE_OK if everything. Return an error code and release the
1.3970 +** write lock if anything goes wrong.
1.3971 +*/
1.3972 +static int pager_open_journal(Pager *pPager){
1.3973 + sqlite3_vfs *pVfs = pPager->pVfs;
1.3974 + int flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_EXCLUSIVE|SQLITE_OPEN_CREATE);
1.3975 +
1.3976 + int rc;
1.3977 + assert( !MEMDB );
1.3978 + assert( pPager->state>=PAGER_RESERVED );
1.3979 + assert( pPager->useJournal );
1.3980 + assert( pPager->pInJournal==0 );
1.3981 + sqlite3PagerPagecount(pPager, 0);
1.3982 + pagerLeave(pPager);
1.3983 + pPager->pInJournal = sqlite3BitvecCreate(pPager->dbSize);
1.3984 + pagerEnter(pPager);
1.3985 + if( pPager->pInJournal==0 ){
1.3986 + rc = SQLITE_NOMEM;
1.3987 + goto failed_to_open_journal;
1.3988 + }
1.3989 +
1.3990 + if( pPager->journalOpen==0 ){
1.3991 + if( pPager->tempFile ){
1.3992 + flags |= (SQLITE_OPEN_DELETEONCLOSE|SQLITE_OPEN_TEMP_JOURNAL);
1.3993 + }else{
1.3994 + flags |= (SQLITE_OPEN_MAIN_JOURNAL);
1.3995 + }
1.3996 +#ifdef SQLITE_ENABLE_ATOMIC_WRITE
1.3997 + rc = sqlite3JournalOpen(
1.3998 + pVfs, pPager->zJournal, pPager->jfd, flags, jrnlBufferSize(pPager)
1.3999 + );
1.4000 +#else
1.4001 + rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, flags, 0);
1.4002 +#endif
1.4003 + assert( rc!=SQLITE_OK || pPager->jfd->pMethods );
1.4004 + pPager->journalOff = 0;
1.4005 + pPager->setMaster = 0;
1.4006 + pPager->journalHdr = 0;
1.4007 + if( rc!=SQLITE_OK ){
1.4008 + if( rc==SQLITE_NOMEM ){
1.4009 + sqlite3OsDelete(pVfs, pPager->zJournal, 0);
1.4010 + }
1.4011 + goto failed_to_open_journal;
1.4012 + }
1.4013 + }
1.4014 + pPager->journalOpen = 1;
1.4015 + pPager->journalStarted = 0;
1.4016 + pPager->needSync = 0;
1.4017 + pPager->alwaysRollback = 0;
1.4018 + pPager->nRec = 0;
1.4019 + if( pPager->errCode ){
1.4020 + rc = pPager->errCode;
1.4021 + goto failed_to_open_journal;
1.4022 + }
1.4023 + pPager->origDbSize = pPager->dbSize;
1.4024 +
1.4025 + rc = writeJournalHdr(pPager);
1.4026 +
1.4027 + if( pPager->stmtAutoopen && rc==SQLITE_OK ){
1.4028 + rc = sqlite3PagerStmtBegin(pPager);
1.4029 + }
1.4030 + if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && rc!=SQLITE_IOERR_NOMEM ){
1.4031 + rc = pager_end_transaction(pPager, 0);
1.4032 + if( rc==SQLITE_OK ){
1.4033 + rc = SQLITE_FULL;
1.4034 + }
1.4035 + }
1.4036 + return rc;
1.4037 +
1.4038 +failed_to_open_journal:
1.4039 + sqlite3BitvecDestroy(pPager->pInJournal);
1.4040 + pPager->pInJournal = 0;
1.4041 + return rc;
1.4042 +}
1.4043 +
1.4044 +/*
1.4045 +** Acquire a write-lock on the database. The lock is removed when
1.4046 +** the any of the following happen:
1.4047 +**
1.4048 +** * sqlite3PagerCommitPhaseTwo() is called.
1.4049 +** * sqlite3PagerRollback() is called.
1.4050 +** * sqlite3PagerClose() is called.
1.4051 +** * sqlite3PagerUnref() is called to on every outstanding page.
1.4052 +**
1.4053 +** The first parameter to this routine is a pointer to any open page of the
1.4054 +** database file. Nothing changes about the page - it is used merely to
1.4055 +** acquire a pointer to the Pager structure and as proof that there is
1.4056 +** already a read-lock on the database.
1.4057 +**
1.4058 +** The second parameter indicates how much space in bytes to reserve for a
1.4059 +** master journal file-name at the start of the journal when it is created.
1.4060 +**
1.4061 +** A journal file is opened if this is not a temporary file. For temporary
1.4062 +** files, the opening of the journal file is deferred until there is an
1.4063 +** actual need to write to the journal.
1.4064 +**
1.4065 +** If the database is already reserved for writing, this routine is a no-op.
1.4066 +**
1.4067 +** If exFlag is true, go ahead and get an EXCLUSIVE lock on the file
1.4068 +** immediately instead of waiting until we try to flush the cache. The
1.4069 +** exFlag is ignored if a transaction is already active.
1.4070 +*/
1.4071 +int sqlite3PagerBegin(DbPage *pPg, int exFlag){
1.4072 + Pager *pPager = pPg->pPager;
1.4073 + int rc = SQLITE_OK;
1.4074 + pagerEnter(pPager);
1.4075 + assert( pPg->nRef>0 );
1.4076 + assert( pPager->state!=PAGER_UNLOCK );
1.4077 + if( pPager->state==PAGER_SHARED ){
1.4078 + assert( pPager->pInJournal==0 );
1.4079 + if( MEMDB ){
1.4080 + pPager->state = PAGER_EXCLUSIVE;
1.4081 + pPager->origDbSize = pPager->dbSize;
1.4082 + }else{
1.4083 + rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK);
1.4084 + if( rc==SQLITE_OK ){
1.4085 + pPager->state = PAGER_RESERVED;
1.4086 + if( exFlag ){
1.4087 + rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
1.4088 + }
1.4089 + }
1.4090 + if( rc!=SQLITE_OK ){
1.4091 + pagerLeave(pPager);
1.4092 + return rc;
1.4093 + }
1.4094 + pPager->dirtyCache = 0;
1.4095 + PAGERTRACE2("TRANSACTION %d\n", PAGERID(pPager));
1.4096 + if( pPager->useJournal && !pPager->tempFile
1.4097 + && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
1.4098 + rc = pager_open_journal(pPager);
1.4099 + }
1.4100 + }
1.4101 + }else if( pPager->journalOpen && pPager->journalOff==0 ){
1.4102 + /* This happens when the pager was in exclusive-access mode the last
1.4103 + ** time a (read or write) transaction was successfully concluded
1.4104 + ** by this connection. Instead of deleting the journal file it was
1.4105 + ** kept open and either was truncated to 0 bytes or its header was
1.4106 + ** overwritten with zeros.
1.4107 + */
1.4108 + assert( pPager->nRec==0 );
1.4109 + assert( pPager->origDbSize==0 );
1.4110 + assert( pPager->pInJournal==0 );
1.4111 + sqlite3PagerPagecount(pPager, 0);
1.4112 + pagerLeave(pPager);
1.4113 + pPager->pInJournal = sqlite3BitvecCreate( pPager->dbSize );
1.4114 + pagerEnter(pPager);
1.4115 + if( !pPager->pInJournal ){
1.4116 + rc = SQLITE_NOMEM;
1.4117 + }else{
1.4118 + pPager->origDbSize = pPager->dbSize;
1.4119 + rc = writeJournalHdr(pPager);
1.4120 + }
1.4121 + }
1.4122 + assert( !pPager->journalOpen || pPager->journalOff>0 || rc!=SQLITE_OK );
1.4123 + pagerLeave(pPager);
1.4124 + return rc;
1.4125 +}
1.4126 +
1.4127 +/*
1.4128 +** Make a page dirty. Set its dirty flag and add it to the dirty
1.4129 +** page list.
1.4130 +*/
1.4131 +static void makeDirty(PgHdr *pPg){
1.4132 + if( pPg->dirty==0 ){
1.4133 + Pager *pPager = pPg->pPager;
1.4134 + pPg->dirty = 1;
1.4135 + pPg->pDirty = pPager->pDirty;
1.4136 + if( pPager->pDirty ){
1.4137 + pPager->pDirty->pPrevDirty = pPg;
1.4138 + }
1.4139 + pPg->pPrevDirty = 0;
1.4140 + pPager->pDirty = pPg;
1.4141 + }
1.4142 +}
1.4143 +
1.4144 +/*
1.4145 +** Make a page clean. Clear its dirty bit and remove it from the
1.4146 +** dirty page list.
1.4147 +*/
1.4148 +static void makeClean(PgHdr *pPg){
1.4149 + if( pPg->dirty ){
1.4150 + pPg->dirty = 0;
1.4151 + if( pPg->pDirty ){
1.4152 + assert( pPg->pDirty->pPrevDirty==pPg );
1.4153 + pPg->pDirty->pPrevDirty = pPg->pPrevDirty;
1.4154 + }
1.4155 + if( pPg->pPrevDirty ){
1.4156 + assert( pPg->pPrevDirty->pDirty==pPg );
1.4157 + pPg->pPrevDirty->pDirty = pPg->pDirty;
1.4158 + }else{
1.4159 + assert( pPg->pPager->pDirty==pPg );
1.4160 + pPg->pPager->pDirty = pPg->pDirty;
1.4161 + }
1.4162 + }
1.4163 +}
1.4164 +
1.4165 +
1.4166 +/*
1.4167 +** Mark a data page as writeable. The page is written into the journal
1.4168 +** if it is not there already. This routine must be called before making
1.4169 +** changes to a page.
1.4170 +**
1.4171 +** The first time this routine is called, the pager creates a new
1.4172 +** journal and acquires a RESERVED lock on the database. If the RESERVED
1.4173 +** lock could not be acquired, this routine returns SQLITE_BUSY. The
1.4174 +** calling routine must check for that return value and be careful not to
1.4175 +** change any page data until this routine returns SQLITE_OK.
1.4176 +**
1.4177 +** If the journal file could not be written because the disk is full,
1.4178 +** then this routine returns SQLITE_FULL and does an immediate rollback.
1.4179 +** All subsequent write attempts also return SQLITE_FULL until there
1.4180 +** is a call to sqlite3PagerCommit() or sqlite3PagerRollback() to
1.4181 +** reset.
1.4182 +*/
1.4183 +static int pager_write(PgHdr *pPg){
1.4184 + void *pData = PGHDR_TO_DATA(pPg);
1.4185 + Pager *pPager = pPg->pPager;
1.4186 + int rc = SQLITE_OK;
1.4187 +
1.4188 + /* Check for errors
1.4189 + */
1.4190 + if( pPager->errCode ){
1.4191 + return pPager->errCode;
1.4192 + }
1.4193 + if( pPager->readOnly ){
1.4194 + return SQLITE_PERM;
1.4195 + }
1.4196 +
1.4197 + assert( !pPager->setMaster );
1.4198 +
1.4199 + CHECK_PAGE(pPg);
1.4200 +
1.4201 + /* If this page was previously acquired with noContent==1, that means
1.4202 + ** we didn't really read in the content of the page. This can happen
1.4203 + ** (for example) when the page is being moved to the freelist. But
1.4204 + ** now we are (perhaps) moving the page off of the freelist for
1.4205 + ** reuse and we need to know its original content so that content
1.4206 + ** can be stored in the rollback journal. So do the read at this
1.4207 + ** time.
1.4208 + */
1.4209 + rc = pager_get_content(pPg);
1.4210 + if( rc ){
1.4211 + return rc;
1.4212 + }
1.4213 +
1.4214 + /* Mark the page as dirty. If the page has already been written
1.4215 + ** to the journal then we can return right away.
1.4216 + */
1.4217 + makeDirty(pPg);
1.4218 + if( pPg->inJournal && (pageInStatement(pPg) || pPager->stmtInUse==0) ){
1.4219 + pPager->dirtyCache = 1;
1.4220 + pPager->dbModified = 1;
1.4221 + }else{
1.4222 +
1.4223 + /* If we get this far, it means that the page needs to be
1.4224 + ** written to the transaction journal or the ckeckpoint journal
1.4225 + ** or both.
1.4226 + **
1.4227 + ** First check to see that the transaction journal exists and
1.4228 + ** create it if it does not.
1.4229 + */
1.4230 + assert( pPager->state!=PAGER_UNLOCK );
1.4231 + rc = sqlite3PagerBegin(pPg, 0);
1.4232 + if( rc!=SQLITE_OK ){
1.4233 + return rc;
1.4234 + }
1.4235 + assert( pPager->state>=PAGER_RESERVED );
1.4236 + if( !pPager->journalOpen && pPager->useJournal
1.4237 + && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
1.4238 + rc = pager_open_journal(pPager);
1.4239 + if( rc!=SQLITE_OK ) return rc;
1.4240 + }
1.4241 + pPager->dirtyCache = 1;
1.4242 + pPager->dbModified = 1;
1.4243 +
1.4244 + /* The transaction journal now exists and we have a RESERVED or an
1.4245 + ** EXCLUSIVE lock on the main database file. Write the current page to
1.4246 + ** the transaction journal if it is not there already.
1.4247 + */
1.4248 + if( !pPg->inJournal && (pPager->journalOpen || MEMDB) ){
1.4249 + if( (int)pPg->pgno <= pPager->origDbSize ){
1.4250 + if( MEMDB ){
1.4251 + PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
1.4252 + PAGERTRACE3("JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
1.4253 + assert( pHist->pOrig==0 );
1.4254 + pHist->pOrig = sqlite3PageMalloc( pPager->pageSize );
1.4255 + if( !pHist->pOrig ){
1.4256 + return SQLITE_NOMEM;
1.4257 + }
1.4258 + memcpy(pHist->pOrig, PGHDR_TO_DATA(pPg), pPager->pageSize);
1.4259 + }else{
1.4260 + u32 cksum;
1.4261 + char *pData2;
1.4262 +
1.4263 + /* We should never write to the journal file the page that
1.4264 + ** contains the database locks. The following assert verifies
1.4265 + ** that we do not. */
1.4266 + assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) );
1.4267 + pData2 = CODEC2(pPager, pData, pPg->pgno, 7);
1.4268 + cksum = pager_cksum(pPager, (u8*)pData2);
1.4269 + rc = write32bits(pPager->jfd, pPager->journalOff, pPg->pgno);
1.4270 + if( rc==SQLITE_OK ){
1.4271 + rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize,
1.4272 + pPager->journalOff + 4);
1.4273 + pPager->journalOff += pPager->pageSize+4;
1.4274 + }
1.4275 + if( rc==SQLITE_OK ){
1.4276 + rc = write32bits(pPager->jfd, pPager->journalOff, cksum);
1.4277 + pPager->journalOff += 4;
1.4278 + }
1.4279 + IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno,
1.4280 + pPager->journalOff, pPager->pageSize));
1.4281 + PAGER_INCR(sqlite3_pager_writej_count);
1.4282 + PAGERTRACE5("JOURNAL %d page %d needSync=%d hash(%08x)\n",
1.4283 + PAGERID(pPager), pPg->pgno, pPg->needSync, pager_pagehash(pPg));
1.4284 +
1.4285 + /* An error has occured writing to the journal file. The
1.4286 + ** transaction will be rolled back by the layer above.
1.4287 + */
1.4288 + if( rc!=SQLITE_OK ){
1.4289 + return rc;
1.4290 + }
1.4291 +
1.4292 + pPager->nRec++;
1.4293 + assert( pPager->pInJournal!=0 );
1.4294 + sqlite3BitvecSet(pPager->pInJournal, pPg->pgno);
1.4295 + pPg->needSync = !pPager->noSync;
1.4296 + if( pPager->stmtInUse ){
1.4297 + sqlite3BitvecSet(pPager->pInStmt, pPg->pgno);
1.4298 + }
1.4299 + }
1.4300 + }else{
1.4301 + pPg->needSync = !pPager->journalStarted && !pPager->noSync;
1.4302 + PAGERTRACE4("APPEND %d page %d needSync=%d\n",
1.4303 + PAGERID(pPager), pPg->pgno, pPg->needSync);
1.4304 + }
1.4305 + if( pPg->needSync ){
1.4306 + pPager->needSync = 1;
1.4307 + }
1.4308 + pPg->inJournal = 1;
1.4309 + }
1.4310 +
1.4311 + /* If the statement journal is open and the page is not in it,
1.4312 + ** then write the current page to the statement journal. Note that
1.4313 + ** the statement journal format differs from the standard journal format
1.4314 + ** in that it omits the checksums and the header.
1.4315 + */
1.4316 + if( pPager->stmtInUse
1.4317 + && !pageInStatement(pPg)
1.4318 + && (int)pPg->pgno<=pPager->stmtSize
1.4319 + ){
1.4320 + assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
1.4321 + if( MEMDB ){
1.4322 + PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
1.4323 + assert( pHist->pStmt==0 );
1.4324 + pHist->pStmt = sqlite3PageMalloc( pPager->pageSize );
1.4325 + if( pHist->pStmt ){
1.4326 + memcpy(pHist->pStmt, PGHDR_TO_DATA(pPg), pPager->pageSize);
1.4327 + }
1.4328 + PAGERTRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
1.4329 + page_add_to_stmt_list(pPg);
1.4330 + }else{
1.4331 + i64 offset = pPager->stmtNRec*(4+pPager->pageSize);
1.4332 + char *pData2 = CODEC2(pPager, pData, pPg->pgno, 7);
1.4333 + rc = write32bits(pPager->stfd, offset, pPg->pgno);
1.4334 + if( rc==SQLITE_OK ){
1.4335 + rc = sqlite3OsWrite(pPager->stfd, pData2, pPager->pageSize, offset+4);
1.4336 + }
1.4337 + PAGERTRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
1.4338 + if( rc!=SQLITE_OK ){
1.4339 + return rc;
1.4340 + }
1.4341 + pPager->stmtNRec++;
1.4342 + assert( pPager->pInStmt!=0 );
1.4343 + sqlite3BitvecSet(pPager->pInStmt, pPg->pgno);
1.4344 + }
1.4345 + }
1.4346 + }
1.4347 +
1.4348 + /* Update the database size and return.
1.4349 + */
1.4350 + assert( pPager->state>=PAGER_SHARED );
1.4351 + if( pPager->dbSize<(int)pPg->pgno ){
1.4352 + pPager->dbSize = pPg->pgno;
1.4353 + if( !MEMDB && pPager->dbSize==PENDING_BYTE/pPager->pageSize ){
1.4354 + pPager->dbSize++;
1.4355 + }
1.4356 + }
1.4357 + return rc;
1.4358 +}
1.4359 +
1.4360 +/*
1.4361 +** This function is used to mark a data-page as writable. It uses
1.4362 +** pager_write() to open a journal file (if it is not already open)
1.4363 +** and write the page *pData to the journal.
1.4364 +**
1.4365 +** The difference between this function and pager_write() is that this
1.4366 +** function also deals with the special case where 2 or more pages
1.4367 +** fit on a single disk sector. In this case all co-resident pages
1.4368 +** must have been written to the journal file before returning.
1.4369 +*/
1.4370 +int sqlite3PagerWrite(DbPage *pDbPage){
1.4371 + int rc = SQLITE_OK;
1.4372 +
1.4373 + PgHdr *pPg = pDbPage;
1.4374 + Pager *pPager = pPg->pPager;
1.4375 + Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize);
1.4376 +
1.4377 + pagerEnter(pPager);
1.4378 + if( !MEMDB && nPagePerSector>1 ){
1.4379 + Pgno nPageCount; /* Total number of pages in database file */
1.4380 + Pgno pg1; /* First page of the sector pPg is located on. */
1.4381 + int nPage; /* Number of pages starting at pg1 to journal */
1.4382 + int ii;
1.4383 + int needSync = 0;
1.4384 +
1.4385 + /* Set the doNotSync flag to 1. This is because we cannot allow a journal
1.4386 + ** header to be written between the pages journaled by this function.
1.4387 + */
1.4388 + assert( pPager->doNotSync==0 );
1.4389 + pPager->doNotSync = 1;
1.4390 +
1.4391 + /* This trick assumes that both the page-size and sector-size are
1.4392 + ** an integer power of 2. It sets variable pg1 to the identifier
1.4393 + ** of the first page of the sector pPg is located on.
1.4394 + */
1.4395 + pg1 = ((pPg->pgno-1) & ~(nPagePerSector-1)) + 1;
1.4396 +
1.4397 + sqlite3PagerPagecount(pPager, (int *)&nPageCount);
1.4398 + if( pPg->pgno>nPageCount ){
1.4399 + nPage = (pPg->pgno - pg1)+1;
1.4400 + }else if( (pg1+nPagePerSector-1)>nPageCount ){
1.4401 + nPage = nPageCount+1-pg1;
1.4402 + }else{
1.4403 + nPage = nPagePerSector;
1.4404 + }
1.4405 + assert(nPage>0);
1.4406 + assert(pg1<=pPg->pgno);
1.4407 + assert((pg1+nPage)>pPg->pgno);
1.4408 +
1.4409 + for(ii=0; ii<nPage && rc==SQLITE_OK; ii++){
1.4410 + Pgno pg = pg1+ii;
1.4411 + PgHdr *pPage;
1.4412 + if( pg==pPg->pgno || !sqlite3BitvecTest(pPager->pInJournal, pg) ){
1.4413 + if( pg!=PAGER_MJ_PGNO(pPager) ){
1.4414 + rc = sqlite3PagerGet(pPager, pg, &pPage);
1.4415 + if( rc==SQLITE_OK ){
1.4416 + rc = pager_write(pPage);
1.4417 + if( pPage->needSync ){
1.4418 + needSync = 1;
1.4419 + }
1.4420 + sqlite3PagerUnref(pPage);
1.4421 + }
1.4422 + }
1.4423 + }else if( (pPage = pager_lookup(pPager, pg))!=0 ){
1.4424 + if( pPage->needSync ){
1.4425 + needSync = 1;
1.4426 + }
1.4427 + }
1.4428 + }
1.4429 +
1.4430 + /* If the PgHdr.needSync flag is set for any of the nPage pages
1.4431 + ** starting at pg1, then it needs to be set for all of them. Because
1.4432 + ** writing to any of these nPage pages may damage the others, the
1.4433 + ** journal file must contain sync()ed copies of all of them
1.4434 + ** before any of them can be written out to the database file.
1.4435 + */
1.4436 + if( needSync ){
1.4437 + for(ii=0; ii<nPage && needSync; ii++){
1.4438 + PgHdr *pPage = pager_lookup(pPager, pg1+ii);
1.4439 + if( pPage ) pPage->needSync = 1;
1.4440 + }
1.4441 + assert(pPager->needSync);
1.4442 + }
1.4443 +
1.4444 + assert( pPager->doNotSync==1 );
1.4445 + pPager->doNotSync = 0;
1.4446 + }else{
1.4447 + rc = pager_write(pDbPage);
1.4448 + }
1.4449 + pagerLeave(pPager);
1.4450 + return rc;
1.4451 +}
1.4452 +
1.4453 +/*
1.4454 +** Return TRUE if the page given in the argument was previously passed
1.4455 +** to sqlite3PagerWrite(). In other words, return TRUE if it is ok
1.4456 +** to change the content of the page.
1.4457 +*/
1.4458 +#ifndef NDEBUG
1.4459 +int sqlite3PagerIswriteable(DbPage *pPg){
1.4460 + return pPg->dirty;
1.4461 +}
1.4462 +#endif
1.4463 +
1.4464 +/*
1.4465 +** A call to this routine tells the pager that it is not necessary to
1.4466 +** write the information on page pPg back to the disk, even though
1.4467 +** that page might be marked as dirty.
1.4468 +**
1.4469 +** The overlying software layer calls this routine when all of the data
1.4470 +** on the given page is unused. The pager marks the page as clean so
1.4471 +** that it does not get written to disk.
1.4472 +**
1.4473 +** Tests show that this optimization, together with the
1.4474 +** sqlite3PagerDontRollback() below, more than double the speed
1.4475 +** of large INSERT operations and quadruple the speed of large DELETEs.
1.4476 +**
1.4477 +** When this routine is called, set the alwaysRollback flag to true.
1.4478 +** Subsequent calls to sqlite3PagerDontRollback() for the same page
1.4479 +** will thereafter be ignored. This is necessary to avoid a problem
1.4480 +** where a page with data is added to the freelist during one part of
1.4481 +** a transaction then removed from the freelist during a later part
1.4482 +** of the same transaction and reused for some other purpose. When it
1.4483 +** is first added to the freelist, this routine is called. When reused,
1.4484 +** the sqlite3PagerDontRollback() routine is called. But because the
1.4485 +** page contains critical data, we still need to be sure it gets
1.4486 +** rolled back in spite of the sqlite3PagerDontRollback() call.
1.4487 +*/
1.4488 +void sqlite3PagerDontWrite(DbPage *pDbPage){
1.4489 + PgHdr *pPg = pDbPage;
1.4490 + Pager *pPager = pPg->pPager;
1.4491 +
1.4492 + if( MEMDB ) return;
1.4493 + pagerEnter(pPager);
1.4494 + pPg->alwaysRollback = 1;
1.4495 + if( pPg->dirty && !pPager->stmtInUse ){
1.4496 + assert( pPager->state>=PAGER_SHARED );
1.4497 + if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
1.4498 + /* If this pages is the last page in the file and the file has grown
1.4499 + ** during the current transaction, then do NOT mark the page as clean.
1.4500 + ** When the database file grows, we must make sure that the last page
1.4501 + ** gets written at least once so that the disk file will be the correct
1.4502 + ** size. If you do not write this page and the size of the file
1.4503 + ** on the disk ends up being too small, that can lead to database
1.4504 + ** corruption during the next transaction.
1.4505 + */
1.4506 + }else{
1.4507 + PAGERTRACE3("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager));
1.4508 + IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno))
1.4509 + makeClean(pPg);
1.4510 +#ifdef SQLITE_CHECK_PAGES
1.4511 + pPg->pageHash = pager_pagehash(pPg);
1.4512 +#endif
1.4513 + }
1.4514 + }
1.4515 + pagerLeave(pPager);
1.4516 +}
1.4517 +
1.4518 +/*
1.4519 +** A call to this routine tells the pager that if a rollback occurs,
1.4520 +** it is not necessary to restore the data on the given page. This
1.4521 +** means that the pager does not have to record the given page in the
1.4522 +** rollback journal.
1.4523 +**
1.4524 +** If we have not yet actually read the content of this page (if
1.4525 +** the PgHdr.needRead flag is set) then this routine acts as a promise
1.4526 +** that we will never need to read the page content in the future.
1.4527 +** so the needRead flag can be cleared at this point.
1.4528 +*/
1.4529 +void sqlite3PagerDontRollback(DbPage *pPg){
1.4530 + Pager *pPager = pPg->pPager;
1.4531 +
1.4532 + pagerEnter(pPager);
1.4533 + assert( pPager->state>=PAGER_RESERVED );
1.4534 +
1.4535 + /* If the journal file is not open, or DontWrite() has been called on
1.4536 + ** this page (DontWrite() sets the alwaysRollback flag), then this
1.4537 + ** function is a no-op.
1.4538 + */
1.4539 + if( pPager->journalOpen==0 || pPg->alwaysRollback || pPager->alwaysRollback ){
1.4540 + pagerLeave(pPager);
1.4541 + return;
1.4542 + }
1.4543 + assert( !MEMDB ); /* For a memdb, pPager->journalOpen is always 0 */
1.4544 +
1.4545 +#ifdef SQLITE_SECURE_DELETE
1.4546 + if( pPg->inJournal || (int)pPg->pgno > pPager->origDbSize ){
1.4547 + return;
1.4548 + }
1.4549 +#endif
1.4550 +
1.4551 + /* If SECURE_DELETE is disabled, then there is no way that this
1.4552 + ** routine can be called on a page for which sqlite3PagerDontWrite()
1.4553 + ** has not been previously called during the same transaction.
1.4554 + ** And if DontWrite() has previously been called, the following
1.4555 + ** conditions must be met.
1.4556 + **
1.4557 + ** (Later:) Not true. If the database is corrupted by having duplicate
1.4558 + ** pages on the freelist (ex: corrupt9.test) then the following is not
1.4559 + ** necessarily true:
1.4560 + */
1.4561 + /* assert( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ); */
1.4562 +
1.4563 + assert( pPager->pInJournal!=0 );
1.4564 + sqlite3BitvecSet(pPager->pInJournal, pPg->pgno);
1.4565 + pPg->inJournal = 1;
1.4566 + pPg->needRead = 0;
1.4567 + if( pPager->stmtInUse ){
1.4568 + assert( pPager->stmtSize >= pPager->origDbSize );
1.4569 + sqlite3BitvecSet(pPager->pInStmt, pPg->pgno);
1.4570 + }
1.4571 + PAGERTRACE3("DONT_ROLLBACK page %d of %d\n", pPg->pgno, PAGERID(pPager));
1.4572 + IOTRACE(("GARBAGE %p %d\n", pPager, pPg->pgno))
1.4573 + pagerLeave(pPager);
1.4574 +}
1.4575 +
1.4576 +
1.4577 +/*
1.4578 +** This routine is called to increment the database file change-counter,
1.4579 +** stored at byte 24 of the pager file.
1.4580 +*/
1.4581 +static int pager_incr_changecounter(Pager *pPager, int isDirect){
1.4582 + PgHdr *pPgHdr;
1.4583 + u32 change_counter;
1.4584 + int rc = SQLITE_OK;
1.4585 +
1.4586 +#ifndef SQLITE_ENABLE_ATOMIC_WRITE
1.4587 + assert( isDirect==0 ); /* isDirect is only true for atomic writes */
1.4588 +#endif
1.4589 + if( !pPager->changeCountDone ){
1.4590 + /* Open page 1 of the file for writing. */
1.4591 + rc = sqlite3PagerGet(pPager, 1, &pPgHdr);
1.4592 + if( rc!=SQLITE_OK ) return rc;
1.4593 +
1.4594 + if( !isDirect ){
1.4595 + rc = sqlite3PagerWrite(pPgHdr);
1.4596 + if( rc!=SQLITE_OK ){
1.4597 + sqlite3PagerUnref(pPgHdr);
1.4598 + return rc;
1.4599 + }
1.4600 + }
1.4601 +
1.4602 + /* Increment the value just read and write it back to byte 24. */
1.4603 + change_counter = sqlite3Get4byte((u8*)pPager->dbFileVers);
1.4604 + change_counter++;
1.4605 + put32bits(((char*)PGHDR_TO_DATA(pPgHdr))+24, change_counter);
1.4606 +
1.4607 +#ifdef SQLITE_ENABLE_ATOMIC_WRITE
1.4608 + if( isDirect && pPager->fd->pMethods ){
1.4609 + const void *zBuf = PGHDR_TO_DATA(pPgHdr);
1.4610 + rc = sqlite3OsWrite(pPager->fd, zBuf, pPager->pageSize, 0);
1.4611 + }
1.4612 +#endif
1.4613 +
1.4614 + /* Release the page reference. */
1.4615 + sqlite3PagerUnref(pPgHdr);
1.4616 + pPager->changeCountDone = 1;
1.4617 + }
1.4618 + return rc;
1.4619 +}
1.4620 +
1.4621 +/*
1.4622 +** Sync the pager file to disk.
1.4623 +*/
1.4624 +int sqlite3PagerSync(Pager *pPager){
1.4625 + int rc;
1.4626 + pagerEnter(pPager);
1.4627 + rc = sqlite3OsSync(pPager->fd, pPager->sync_flags);
1.4628 + pagerLeave(pPager);
1.4629 + return rc;
1.4630 +}
1.4631 +
1.4632 +/*
1.4633 +** Sync the database file for the pager pPager. zMaster points to the name
1.4634 +** of a master journal file that should be written into the individual
1.4635 +** journal file. zMaster may be NULL, which is interpreted as no master
1.4636 +** journal (a single database transaction).
1.4637 +**
1.4638 +** This routine ensures that the journal is synced, all dirty pages written
1.4639 +** to the database file and the database file synced. The only thing that
1.4640 +** remains to commit the transaction is to delete the journal file (or
1.4641 +** master journal file if specified).
1.4642 +**
1.4643 +** Note that if zMaster==NULL, this does not overwrite a previous value
1.4644 +** passed to an sqlite3PagerCommitPhaseOne() call.
1.4645 +**
1.4646 +** If parameter nTrunc is non-zero, then the pager file is truncated to
1.4647 +** nTrunc pages (this is used by auto-vacuum databases).
1.4648 +**
1.4649 +** If the final parameter - noSync - is true, then the database file itself
1.4650 +** is not synced. The caller must call sqlite3PagerSync() directly to
1.4651 +** sync the database file before calling CommitPhaseTwo() to delete the
1.4652 +** journal file in this case.
1.4653 +*/
1.4654 +int sqlite3PagerCommitPhaseOne(
1.4655 + Pager *pPager,
1.4656 + const char *zMaster,
1.4657 + Pgno nTrunc,
1.4658 + int noSync
1.4659 +){
1.4660 + int rc = SQLITE_OK;
1.4661 +
1.4662 + if( pPager->errCode ){
1.4663 + return pPager->errCode;
1.4664 + }
1.4665 +
1.4666 + /* If no changes have been made, we can leave the transaction early.
1.4667 + */
1.4668 + if( pPager->dbModified==0 &&
1.4669 + (pPager->journalMode!=PAGER_JOURNALMODE_DELETE ||
1.4670 + pPager->exclusiveMode!=0) ){
1.4671 + assert( pPager->dirtyCache==0 || pPager->journalOpen==0 );
1.4672 + return SQLITE_OK;
1.4673 + }
1.4674 +
1.4675 + PAGERTRACE4("DATABASE SYNC: File=%s zMaster=%s nTrunc=%d\n",
1.4676 + pPager->zFilename, zMaster, nTrunc);
1.4677 + pagerEnter(pPager);
1.4678 +
1.4679 + /* If this is an in-memory db, or no pages have been written to, or this
1.4680 + ** function has already been called, it is a no-op.
1.4681 + */
1.4682 + if( pPager->state!=PAGER_SYNCED && !MEMDB && pPager->dirtyCache ){
1.4683 + PgHdr *pPg;
1.4684 +
1.4685 +#ifdef SQLITE_ENABLE_ATOMIC_WRITE
1.4686 + /* The atomic-write optimization can be used if all of the
1.4687 + ** following are true:
1.4688 + **
1.4689 + ** + The file-system supports the atomic-write property for
1.4690 + ** blocks of size page-size, and
1.4691 + ** + This commit is not part of a multi-file transaction, and
1.4692 + ** + Exactly one page has been modified and store in the journal file.
1.4693 + **
1.4694 + ** If the optimization can be used, then the journal file will never
1.4695 + ** be created for this transaction.
1.4696 + */
1.4697 + int useAtomicWrite = (
1.4698 + !zMaster &&
1.4699 + pPager->journalOpen &&
1.4700 + pPager->journalOff==jrnlBufferSize(pPager) &&
1.4701 + nTrunc==0 &&
1.4702 + (0==pPager->pDirty || 0==pPager->pDirty->pDirty)
1.4703 + );
1.4704 + assert( pPager->journalOpen || pPager->journalMode==PAGER_JOURNALMODE_OFF );
1.4705 + if( useAtomicWrite ){
1.4706 + /* Update the nRec field in the journal file. */
1.4707 + int offset = pPager->journalHdr + sizeof(aJournalMagic);
1.4708 + assert(pPager->nRec==1);
1.4709 + rc = write32bits(pPager->jfd, offset, pPager->nRec);
1.4710 +
1.4711 + /* Update the db file change counter. The following call will modify
1.4712 + ** the in-memory representation of page 1 to include the updated
1.4713 + ** change counter and then write page 1 directly to the database
1.4714 + ** file. Because of the atomic-write property of the host file-system,
1.4715 + ** this is safe.
1.4716 + */
1.4717 + if( rc==SQLITE_OK ){
1.4718 + rc = pager_incr_changecounter(pPager, 1);
1.4719 + }
1.4720 + }else{
1.4721 + rc = sqlite3JournalCreate(pPager->jfd);
1.4722 + }
1.4723 +
1.4724 + if( !useAtomicWrite && rc==SQLITE_OK )
1.4725 +#endif
1.4726 +
1.4727 + /* If a master journal file name has already been written to the
1.4728 + ** journal file, then no sync is required. This happens when it is
1.4729 + ** written, then the process fails to upgrade from a RESERVED to an
1.4730 + ** EXCLUSIVE lock. The next time the process tries to commit the
1.4731 + ** transaction the m-j name will have already been written.
1.4732 + */
1.4733 + if( !pPager->setMaster ){
1.4734 + rc = pager_incr_changecounter(pPager, 0);
1.4735 + if( rc!=SQLITE_OK ) goto sync_exit;
1.4736 + if( pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
1.4737 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.4738 + if( nTrunc!=0 ){
1.4739 + /* If this transaction has made the database smaller, then all pages
1.4740 + ** being discarded by the truncation must be written to the journal
1.4741 + ** file.
1.4742 + */
1.4743 + Pgno i;
1.4744 + int iSkip = PAGER_MJ_PGNO(pPager);
1.4745 + for( i=nTrunc+1; i<=pPager->origDbSize; i++ ){
1.4746 + if( !sqlite3BitvecTest(pPager->pInJournal, i) && i!=iSkip ){
1.4747 + rc = sqlite3PagerGet(pPager, i, &pPg);
1.4748 + if( rc!=SQLITE_OK ) goto sync_exit;
1.4749 + rc = sqlite3PagerWrite(pPg);
1.4750 + sqlite3PagerUnref(pPg);
1.4751 + if( rc!=SQLITE_OK ) goto sync_exit;
1.4752 + }
1.4753 + }
1.4754 + }
1.4755 +#endif
1.4756 + rc = writeMasterJournal(pPager, zMaster);
1.4757 + if( rc!=SQLITE_OK ) goto sync_exit;
1.4758 + rc = syncJournal(pPager);
1.4759 + }
1.4760 + }
1.4761 + if( rc!=SQLITE_OK ) goto sync_exit;
1.4762 +
1.4763 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.4764 + if( nTrunc!=0 ){
1.4765 + rc = sqlite3PagerTruncate(pPager, nTrunc);
1.4766 + if( rc!=SQLITE_OK ) goto sync_exit;
1.4767 + }
1.4768 +#endif
1.4769 +
1.4770 + /* Write all dirty pages to the database file */
1.4771 + pPg = pager_get_all_dirty_pages(pPager);
1.4772 + rc = pager_write_pagelist(pPg);
1.4773 + if( rc!=SQLITE_OK ){
1.4774 + assert( rc!=SQLITE_IOERR_BLOCKED );
1.4775 + /* The error might have left the dirty list all fouled up here,
1.4776 + ** but that does not matter because if the if the dirty list did
1.4777 + ** get corrupted, then the transaction will roll back and
1.4778 + ** discard the dirty list. There is an assert in
1.4779 + ** pager_get_all_dirty_pages() that verifies that no attempt
1.4780 + ** is made to use an invalid dirty list.
1.4781 + */
1.4782 + goto sync_exit;
1.4783 + }
1.4784 + pPager->pDirty = 0;
1.4785 +
1.4786 + /* Sync the database file. */
1.4787 + if( !pPager->noSync && !noSync ){
1.4788 + rc = sqlite3OsSync(pPager->fd, pPager->sync_flags);
1.4789 + }
1.4790 + IOTRACE(("DBSYNC %p\n", pPager))
1.4791 +
1.4792 + pPager->state = PAGER_SYNCED;
1.4793 + }else if( MEMDB && nTrunc!=0 ){
1.4794 + rc = sqlite3PagerTruncate(pPager, nTrunc);
1.4795 + }
1.4796 +
1.4797 +sync_exit:
1.4798 + if( rc==SQLITE_IOERR_BLOCKED ){
1.4799 + /* pager_incr_changecounter() may attempt to obtain an exclusive
1.4800 + * lock to spill the cache and return IOERR_BLOCKED. But since
1.4801 + * there is no chance the cache is inconsistent, it is
1.4802 + * better to return SQLITE_BUSY.
1.4803 + */
1.4804 + rc = SQLITE_BUSY;
1.4805 + }
1.4806 + pagerLeave(pPager);
1.4807 + return rc;
1.4808 +}
1.4809 +
1.4810 +
1.4811 +/*
1.4812 +** Commit all changes to the database and release the write lock.
1.4813 +**
1.4814 +** If the commit fails for any reason, a rollback attempt is made
1.4815 +** and an error code is returned. If the commit worked, SQLITE_OK
1.4816 +** is returned.
1.4817 +*/
1.4818 +int sqlite3PagerCommitPhaseTwo(Pager *pPager){
1.4819 + int rc;
1.4820 + PgHdr *pPg;
1.4821 +
1.4822 + if( pPager->errCode ){
1.4823 + return pPager->errCode;
1.4824 + }
1.4825 + if( pPager->state<PAGER_RESERVED ){
1.4826 + return SQLITE_ERROR;
1.4827 + }
1.4828 + if( pPager->dbModified==0 &&
1.4829 + (pPager->journalMode!=PAGER_JOURNALMODE_DELETE ||
1.4830 + pPager->exclusiveMode!=0) ){
1.4831 + assert( pPager->dirtyCache==0 || pPager->journalOpen==0 );
1.4832 + return SQLITE_OK;
1.4833 + }
1.4834 + pagerEnter(pPager);
1.4835 + PAGERTRACE2("COMMIT %d\n", PAGERID(pPager));
1.4836 + if( MEMDB ){
1.4837 + pPg = pager_get_all_dirty_pages(pPager);
1.4838 + while( pPg ){
1.4839 + PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
1.4840 + clearHistory(pHist);
1.4841 + pPg->dirty = 0;
1.4842 + pPg->inJournal = 0;
1.4843 + pHist->inStmt = 0;
1.4844 + pPg->needSync = 0;
1.4845 + pHist->pPrevStmt = pHist->pNextStmt = 0;
1.4846 + pPg = pPg->pDirty;
1.4847 + }
1.4848 + pPager->pDirty = 0;
1.4849 +#ifndef NDEBUG
1.4850 + for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1.4851 + PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
1.4852 + assert( !pPg->alwaysRollback );
1.4853 + assert( !pHist->pOrig );
1.4854 + assert( !pHist->pStmt );
1.4855 + }
1.4856 +#endif
1.4857 + pPager->pStmt = 0;
1.4858 + pPager->state = PAGER_SHARED;
1.4859 + pagerLeave(pPager);
1.4860 + return SQLITE_OK;
1.4861 + }
1.4862 + assert( pPager->state==PAGER_SYNCED || !pPager->dirtyCache );
1.4863 + rc = pager_end_transaction(pPager, pPager->setMaster);
1.4864 + rc = pager_error(pPager, rc);
1.4865 + pagerLeave(pPager);
1.4866 + return rc;
1.4867 +}
1.4868 +
1.4869 +/*
1.4870 +** Rollback all changes. The database falls back to PAGER_SHARED mode.
1.4871 +** All in-memory cache pages revert to their original data contents.
1.4872 +** The journal is deleted.
1.4873 +**
1.4874 +** This routine cannot fail unless some other process is not following
1.4875 +** the correct locking protocol or unless some other
1.4876 +** process is writing trash into the journal file (SQLITE_CORRUPT) or
1.4877 +** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
1.4878 +** codes are returned for all these occasions. Otherwise,
1.4879 +** SQLITE_OK is returned.
1.4880 +*/
1.4881 +int sqlite3PagerRollback(Pager *pPager){
1.4882 + int rc;
1.4883 + PAGERTRACE2("ROLLBACK %d\n", PAGERID(pPager));
1.4884 + if( MEMDB ){
1.4885 + PgHdr *p;
1.4886 + for(p=pPager->pAll; p; p=p->pNextAll){
1.4887 + PgHistory *pHist;
1.4888 + assert( !p->alwaysRollback );
1.4889 + if( !p->dirty ){
1.4890 + assert( !((PgHistory *)PGHDR_TO_HIST(p, pPager))->pOrig );
1.4891 + assert( !((PgHistory *)PGHDR_TO_HIST(p, pPager))->pStmt );
1.4892 + continue;
1.4893 + }
1.4894 +
1.4895 + pHist = PGHDR_TO_HIST(p, pPager);
1.4896 + if( pHist->pOrig ){
1.4897 + memcpy(PGHDR_TO_DATA(p), pHist->pOrig, pPager->pageSize);
1.4898 + PAGERTRACE3("ROLLBACK-PAGE %d of %d\n", p->pgno, PAGERID(pPager));
1.4899 + }else{
1.4900 + PAGERTRACE3("PAGE %d is clean on %d\n", p->pgno, PAGERID(pPager));
1.4901 + }
1.4902 + clearHistory(pHist);
1.4903 + p->dirty = 0;
1.4904 + p->inJournal = 0;
1.4905 + pHist->inStmt = 0;
1.4906 + pHist->pPrevStmt = pHist->pNextStmt = 0;
1.4907 + if( pPager->xReiniter ){
1.4908 + pPager->xReiniter(p, pPager->pageSize);
1.4909 + }
1.4910 + }
1.4911 + pPager->pDirty = 0;
1.4912 + pPager->pStmt = 0;
1.4913 + pPager->dbSize = pPager->origDbSize;
1.4914 + pager_truncate_cache(pPager);
1.4915 + pPager->stmtInUse = 0;
1.4916 + pPager->state = PAGER_SHARED;
1.4917 + return SQLITE_OK;
1.4918 + }
1.4919 +
1.4920 + pagerEnter(pPager);
1.4921 + if( !pPager->dirtyCache || !pPager->journalOpen ){
1.4922 + rc = pager_end_transaction(pPager, pPager->setMaster);
1.4923 + pagerLeave(pPager);
1.4924 + return rc;
1.4925 + }
1.4926 +
1.4927 + if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
1.4928 + if( pPager->state>=PAGER_EXCLUSIVE ){
1.4929 + pager_playback(pPager, 0);
1.4930 + }
1.4931 + pagerLeave(pPager);
1.4932 + return pPager->errCode;
1.4933 + }
1.4934 + if( pPager->state==PAGER_RESERVED ){
1.4935 + int rc2;
1.4936 + rc = pager_playback(pPager, 0);
1.4937 + rc2 = pager_end_transaction(pPager, pPager->setMaster);
1.4938 + if( rc==SQLITE_OK ){
1.4939 + rc = rc2;
1.4940 + }
1.4941 + }else{
1.4942 + rc = pager_playback(pPager, 0);
1.4943 + }
1.4944 + /* pager_reset(pPager); */
1.4945 + pPager->dbSize = -1;
1.4946 +
1.4947 + /* If an error occurs during a ROLLBACK, we can no longer trust the pager
1.4948 + ** cache. So call pager_error() on the way out to make any error
1.4949 + ** persistent.
1.4950 + */
1.4951 + rc = pager_error(pPager, rc);
1.4952 + pagerLeave(pPager);
1.4953 + return rc;
1.4954 +}
1.4955 +
1.4956 +/*
1.4957 +** Return TRUE if the database file is opened read-only. Return FALSE
1.4958 +** if the database is (in theory) writable.
1.4959 +*/
1.4960 +int sqlite3PagerIsreadonly(Pager *pPager){
1.4961 + return pPager->readOnly;
1.4962 +}
1.4963 +
1.4964 +/*
1.4965 +** Return the number of references to the pager.
1.4966 +*/
1.4967 +int sqlite3PagerRefcount(Pager *pPager){
1.4968 + return pPager->nRef;
1.4969 +}
1.4970 +
1.4971 +#ifdef SQLITE_TEST
1.4972 +/*
1.4973 +** This routine is used for testing and analysis only.
1.4974 +*/
1.4975 +int *sqlite3PagerStats(Pager *pPager){
1.4976 + static int a[11];
1.4977 + a[0] = pPager->nRef;
1.4978 + a[1] = pPager->nPage;
1.4979 + a[2] = pPager->mxPage;
1.4980 + a[3] = pPager->dbSize;
1.4981 + a[4] = pPager->state;
1.4982 + a[5] = pPager->errCode;
1.4983 + a[6] = pPager->nHit;
1.4984 + a[7] = pPager->nMiss;
1.4985 + a[8] = 0; /* Used to be pPager->nOvfl */
1.4986 + a[9] = pPager->nRead;
1.4987 + a[10] = pPager->nWrite;
1.4988 + return a;
1.4989 +}
1.4990 +int sqlite3PagerIsMemdb(Pager *pPager){
1.4991 + return MEMDB;
1.4992 +}
1.4993 +#endif
1.4994 +
1.4995 +/*
1.4996 +** Set the statement rollback point.
1.4997 +**
1.4998 +** This routine should be called with the transaction journal already
1.4999 +** open. A new statement journal is created that can be used to rollback
1.5000 +** changes of a single SQL command within a larger transaction.
1.5001 +*/
1.5002 +static int pagerStmtBegin(Pager *pPager){
1.5003 + int rc;
1.5004 + assert( !pPager->stmtInUse );
1.5005 + assert( pPager->state>=PAGER_SHARED );
1.5006 + assert( pPager->dbSize>=0 );
1.5007 + PAGERTRACE2("STMT-BEGIN %d\n", PAGERID(pPager));
1.5008 + if( MEMDB ){
1.5009 + pPager->stmtInUse = 1;
1.5010 + pPager->stmtSize = pPager->dbSize;
1.5011 + return SQLITE_OK;
1.5012 + }
1.5013 + if( !pPager->journalOpen ){
1.5014 + pPager->stmtAutoopen = 1;
1.5015 + return SQLITE_OK;
1.5016 + }
1.5017 + assert( pPager->journalOpen );
1.5018 + pagerLeave(pPager);
1.5019 + assert( pPager->pInStmt==0 );
1.5020 + pPager->pInStmt = sqlite3BitvecCreate(pPager->dbSize);
1.5021 + pagerEnter(pPager);
1.5022 + if( pPager->pInStmt==0 ){
1.5023 + /* sqlite3OsLock(pPager->fd, SHARED_LOCK); */
1.5024 + return SQLITE_NOMEM;
1.5025 + }
1.5026 + pPager->stmtJSize = pPager->journalOff;
1.5027 + pPager->stmtSize = pPager->dbSize;
1.5028 + pPager->stmtHdrOff = 0;
1.5029 + pPager->stmtCksum = pPager->cksumInit;
1.5030 + if( !pPager->stmtOpen ){
1.5031 + rc = sqlite3PagerOpentemp(pPager, pPager->stfd, SQLITE_OPEN_SUBJOURNAL);
1.5032 + if( rc ){
1.5033 + goto stmt_begin_failed;
1.5034 + }
1.5035 + pPager->stmtOpen = 1;
1.5036 + pPager->stmtNRec = 0;
1.5037 + }
1.5038 + pPager->stmtInUse = 1;
1.5039 + return SQLITE_OK;
1.5040 +
1.5041 +stmt_begin_failed:
1.5042 + if( pPager->pInStmt ){
1.5043 + sqlite3BitvecDestroy(pPager->pInStmt);
1.5044 + pPager->pInStmt = 0;
1.5045 + }
1.5046 + return rc;
1.5047 +}
1.5048 +int sqlite3PagerStmtBegin(Pager *pPager){
1.5049 + int rc;
1.5050 + pagerEnter(pPager);
1.5051 + rc = pagerStmtBegin(pPager);
1.5052 + pagerLeave(pPager);
1.5053 + return rc;
1.5054 +}
1.5055 +
1.5056 +/*
1.5057 +** Commit a statement.
1.5058 +*/
1.5059 +int sqlite3PagerStmtCommit(Pager *pPager){
1.5060 + pagerEnter(pPager);
1.5061 + if( pPager->stmtInUse ){
1.5062 + PgHdr *pPg, *pNext;
1.5063 + PAGERTRACE2("STMT-COMMIT %d\n", PAGERID(pPager));
1.5064 + if( !MEMDB ){
1.5065 + /* sqlite3OsTruncate(pPager->stfd, 0); */
1.5066 + sqlite3BitvecDestroy(pPager->pInStmt);
1.5067 + pPager->pInStmt = 0;
1.5068 + }else{
1.5069 + for(pPg=pPager->pStmt; pPg; pPg=pNext){
1.5070 + PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
1.5071 + pNext = pHist->pNextStmt;
1.5072 + assert( pHist->inStmt );
1.5073 + pHist->inStmt = 0;
1.5074 + pHist->pPrevStmt = pHist->pNextStmt = 0;
1.5075 + sqlite3PageFree(pHist->pStmt);
1.5076 + pHist->pStmt = 0;
1.5077 + }
1.5078 + }
1.5079 + pPager->stmtNRec = 0;
1.5080 + pPager->stmtInUse = 0;
1.5081 + pPager->pStmt = 0;
1.5082 + }
1.5083 + pPager->stmtAutoopen = 0;
1.5084 + pagerLeave(pPager);
1.5085 + return SQLITE_OK;
1.5086 +}
1.5087 +
1.5088 +/*
1.5089 +** Rollback a statement.
1.5090 +*/
1.5091 +int sqlite3PagerStmtRollback(Pager *pPager){
1.5092 + int rc;
1.5093 + pagerEnter(pPager);
1.5094 + if( pPager->stmtInUse ){
1.5095 + PAGERTRACE2("STMT-ROLLBACK %d\n", PAGERID(pPager));
1.5096 + if( MEMDB ){
1.5097 + PgHdr *pPg;
1.5098 + PgHistory *pHist;
1.5099 + for(pPg=pPager->pStmt; pPg; pPg=pHist->pNextStmt){
1.5100 + pHist = PGHDR_TO_HIST(pPg, pPager);
1.5101 + if( pHist->pStmt ){
1.5102 + memcpy(PGHDR_TO_DATA(pPg), pHist->pStmt, pPager->pageSize);
1.5103 + sqlite3PageFree(pHist->pStmt);
1.5104 + pHist->pStmt = 0;
1.5105 + }
1.5106 + }
1.5107 + pPager->dbSize = pPager->stmtSize;
1.5108 + pager_truncate_cache(pPager);
1.5109 + rc = SQLITE_OK;
1.5110 + }else{
1.5111 + rc = pager_stmt_playback(pPager);
1.5112 + }
1.5113 + sqlite3PagerStmtCommit(pPager);
1.5114 + }else{
1.5115 + rc = SQLITE_OK;
1.5116 + }
1.5117 + pPager->stmtAutoopen = 0;
1.5118 + pagerLeave(pPager);
1.5119 + return rc;
1.5120 +}
1.5121 +
1.5122 +/*
1.5123 +** Return the full pathname of the database file.
1.5124 +*/
1.5125 +const char *sqlite3PagerFilename(Pager *pPager){
1.5126 + return pPager->zFilename;
1.5127 +}
1.5128 +
1.5129 +/*
1.5130 +** Return the VFS structure for the pager.
1.5131 +*/
1.5132 +const sqlite3_vfs *sqlite3PagerVfs(Pager *pPager){
1.5133 + return pPager->pVfs;
1.5134 +}
1.5135 +
1.5136 +/*
1.5137 +** Return the file handle for the database file associated
1.5138 +** with the pager. This might return NULL if the file has
1.5139 +** not yet been opened.
1.5140 +*/
1.5141 +sqlite3_file *sqlite3PagerFile(Pager *pPager){
1.5142 + return pPager->fd;
1.5143 +}
1.5144 +
1.5145 +/*
1.5146 +** Return the directory of the database file.
1.5147 +*/
1.5148 +const char *sqlite3PagerDirname(Pager *pPager){
1.5149 + return pPager->zDirectory;
1.5150 +}
1.5151 +
1.5152 +/*
1.5153 +** Return the full pathname of the journal file.
1.5154 +*/
1.5155 +const char *sqlite3PagerJournalname(Pager *pPager){
1.5156 + return pPager->zJournal;
1.5157 +}
1.5158 +
1.5159 +/*
1.5160 +** Return true if fsync() calls are disabled for this pager. Return FALSE
1.5161 +** if fsync()s are executed normally.
1.5162 +*/
1.5163 +int sqlite3PagerNosync(Pager *pPager){
1.5164 + return pPager->noSync;
1.5165 +}
1.5166 +
1.5167 +#ifdef SQLITE_HAS_CODEC
1.5168 +/*
1.5169 +** Set the codec for this pager
1.5170 +*/
1.5171 +void sqlite3PagerSetCodec(
1.5172 + Pager *pPager,
1.5173 + void *(*xCodec)(void*,void*,Pgno,int),
1.5174 + void *pCodecArg
1.5175 +){
1.5176 + pPager->xCodec = xCodec;
1.5177 + pPager->pCodecArg = pCodecArg;
1.5178 +}
1.5179 +#endif
1.5180 +
1.5181 +#ifndef SQLITE_OMIT_AUTOVACUUM
1.5182 +/*
1.5183 +** Move the page pPg to location pgno in the file.
1.5184 +**
1.5185 +** There must be no references to the page previously located at
1.5186 +** pgno (which we call pPgOld) though that page is allowed to be
1.5187 +** in cache. If the page previous located at pgno is not already
1.5188 +** in the rollback journal, it is not put there by by this routine.
1.5189 +**
1.5190 +** References to the page pPg remain valid. Updating any
1.5191 +** meta-data associated with pPg (i.e. data stored in the nExtra bytes
1.5192 +** allocated along with the page) is the responsibility of the caller.
1.5193 +**
1.5194 +** A transaction must be active when this routine is called. It used to be
1.5195 +** required that a statement transaction was not active, but this restriction
1.5196 +** has been removed (CREATE INDEX needs to move a page when a statement
1.5197 +** transaction is active).
1.5198 +**
1.5199 +** If the fourth argument, isCommit, is non-zero, then this page is being
1.5200 +** moved as part of a database reorganization just before the transaction
1.5201 +** is being committed. In this case, it is guaranteed that the database page
1.5202 +** pPg refers to will not be written to again within this transaction.
1.5203 +*/
1.5204 +int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno, int isCommit){
1.5205 + PgHdr *pPgOld; /* The page being overwritten. */
1.5206 + int h;
1.5207 + Pgno needSyncPgno = 0;
1.5208 +
1.5209 + pagerEnter(pPager);
1.5210 + assert( pPg->nRef>0 );
1.5211 +
1.5212 + PAGERTRACE5("MOVE %d page %d (needSync=%d) moves to %d\n",
1.5213 + PAGERID(pPager), pPg->pgno, pPg->needSync, pgno);
1.5214 + IOTRACE(("MOVE %p %d %d\n", pPager, pPg->pgno, pgno))
1.5215 +
1.5216 + pager_get_content(pPg);
1.5217 +
1.5218 + /* If the journal needs to be sync()ed before page pPg->pgno can
1.5219 + ** be written to, store pPg->pgno in local variable needSyncPgno.
1.5220 + **
1.5221 + ** If the isCommit flag is set, there is no need to remember that
1.5222 + ** the journal needs to be sync()ed before database page pPg->pgno
1.5223 + ** can be written to. The caller has already promised not to write to it.
1.5224 + */
1.5225 + if( pPg->needSync && !isCommit ){
1.5226 + needSyncPgno = pPg->pgno;
1.5227 + assert( pPg->inJournal || (int)pgno>pPager->origDbSize );
1.5228 + assert( pPg->dirty );
1.5229 + assert( pPager->needSync );
1.5230 + }
1.5231 +
1.5232 + /* Unlink pPg from its hash-chain */
1.5233 + unlinkHashChain(pPager, pPg);
1.5234 +
1.5235 + /* If the cache contains a page with page-number pgno, remove it
1.5236 + ** from its hash chain. Also, if the PgHdr.needSync was set for
1.5237 + ** page pgno before the 'move' operation, it needs to be retained
1.5238 + ** for the page moved there.
1.5239 + */
1.5240 + pPg->needSync = 0;
1.5241 + pPgOld = pager_lookup(pPager, pgno);
1.5242 + if( pPgOld ){
1.5243 + assert( pPgOld->nRef==0 );
1.5244 + unlinkHashChain(pPager, pPgOld);
1.5245 + makeClean(pPgOld);
1.5246 + pPg->needSync = pPgOld->needSync;
1.5247 + }else{
1.5248 + pPg->needSync = 0;
1.5249 + }
1.5250 + pPg->inJournal = sqlite3BitvecTest(pPager->pInJournal, pgno);
1.5251 +
1.5252 + /* Change the page number for pPg and insert it into the new hash-chain. */
1.5253 + assert( pgno!=0 );
1.5254 + pPg->pgno = pgno;
1.5255 + h = pgno & (pPager->nHash-1);
1.5256 + if( pPager->aHash[h] ){
1.5257 + assert( pPager->aHash[h]->pPrevHash==0 );
1.5258 + pPager->aHash[h]->pPrevHash = pPg;
1.5259 + }
1.5260 + pPg->pNextHash = pPager->aHash[h];
1.5261 + pPager->aHash[h] = pPg;
1.5262 + pPg->pPrevHash = 0;
1.5263 +
1.5264 + makeDirty(pPg);
1.5265 + pPager->dirtyCache = 1;
1.5266 + pPager->dbModified = 1;
1.5267 +
1.5268 + if( needSyncPgno ){
1.5269 + /* If needSyncPgno is non-zero, then the journal file needs to be
1.5270 + ** sync()ed before any data is written to database file page needSyncPgno.
1.5271 + ** Currently, no such page exists in the page-cache and the
1.5272 + ** "is journaled" bitvec flag has been set. This needs to be remedied by
1.5273 + ** loading the page into the pager-cache and setting the PgHdr.needSync
1.5274 + ** flag.
1.5275 + **
1.5276 + ** If the attempt to load the page into the page-cache fails, (due
1.5277 + ** to a malloc() or IO failure), clear the bit in the pInJournal[]
1.5278 + ** array. Otherwise, if the page is loaded and written again in
1.5279 + ** this transaction, it may be written to the database file before
1.5280 + ** it is synced into the journal file. This way, it may end up in
1.5281 + ** the journal file twice, but that is not a problem.
1.5282 + **
1.5283 + ** The sqlite3PagerGet() call may cause the journal to sync. So make
1.5284 + ** sure the Pager.needSync flag is set too.
1.5285 + */
1.5286 + int rc;
1.5287 + PgHdr *pPgHdr;
1.5288 + assert( pPager->needSync );
1.5289 + rc = sqlite3PagerGet(pPager, needSyncPgno, &pPgHdr);
1.5290 + if( rc!=SQLITE_OK ){
1.5291 + if( pPager->pInJournal && (int)needSyncPgno<=pPager->origDbSize ){
1.5292 + sqlite3BitvecClear(pPager->pInJournal, needSyncPgno);
1.5293 + }
1.5294 + pagerLeave(pPager);
1.5295 + return rc;
1.5296 + }
1.5297 + pPager->needSync = 1;
1.5298 + pPgHdr->needSync = 1;
1.5299 + pPgHdr->inJournal = 1;
1.5300 + makeDirty(pPgHdr);
1.5301 + sqlite3PagerUnref(pPgHdr);
1.5302 + }
1.5303 +
1.5304 + pagerLeave(pPager);
1.5305 + return SQLITE_OK;
1.5306 +}
1.5307 +#endif
1.5308 +
1.5309 +/*
1.5310 +** Return a pointer to the data for the specified page.
1.5311 +*/
1.5312 +void *sqlite3PagerGetData(DbPage *pPg){
1.5313 + return PGHDR_TO_DATA(pPg);
1.5314 +}
1.5315 +
1.5316 +/*
1.5317 +** Return a pointer to the Pager.nExtra bytes of "extra" space
1.5318 +** allocated along with the specified page.
1.5319 +*/
1.5320 +void *sqlite3PagerGetExtra(DbPage *pPg){
1.5321 + Pager *pPager = pPg->pPager;
1.5322 + return (pPager?PGHDR_TO_EXTRA(pPg, pPager):0);
1.5323 +}
1.5324 +
1.5325 +/*
1.5326 +** Get/set the locking-mode for this pager. Parameter eMode must be one
1.5327 +** of PAGER_LOCKINGMODE_QUERY, PAGER_LOCKINGMODE_NORMAL or
1.5328 +** PAGER_LOCKINGMODE_EXCLUSIVE. If the parameter is not _QUERY, then
1.5329 +** the locking-mode is set to the value specified.
1.5330 +**
1.5331 +** The returned value is either PAGER_LOCKINGMODE_NORMAL or
1.5332 +** PAGER_LOCKINGMODE_EXCLUSIVE, indicating the current (possibly updated)
1.5333 +** locking-mode.
1.5334 +*/
1.5335 +int sqlite3PagerLockingMode(Pager *pPager, int eMode){
1.5336 + assert( eMode==PAGER_LOCKINGMODE_QUERY
1.5337 + || eMode==PAGER_LOCKINGMODE_NORMAL
1.5338 + || eMode==PAGER_LOCKINGMODE_EXCLUSIVE );
1.5339 + assert( PAGER_LOCKINGMODE_QUERY<0 );
1.5340 + assert( PAGER_LOCKINGMODE_NORMAL>=0 && PAGER_LOCKINGMODE_EXCLUSIVE>=0 );
1.5341 + if( eMode>=0 && !pPager->tempFile ){
1.5342 + pPager->exclusiveMode = eMode;
1.5343 + }
1.5344 + return (int)pPager->exclusiveMode;
1.5345 +}
1.5346 +
1.5347 +/*
1.5348 +** Get/set the journal-mode for this pager. Parameter eMode must be one
1.5349 +** of PAGER_JOURNALMODE_QUERY, PAGER_JOURNALMODE_DELETE or
1.5350 +** PAGER_JOURNALMODE_PERSIST. If the parameter is not _QUERY, then
1.5351 +** the journal-mode is set to the value specified.
1.5352 +**
1.5353 +** The returned value is either PAGER_JOURNALMODE_DELETE or
1.5354 +** PAGER_JOURNALMODE_PERSIST, indicating the current (possibly updated)
1.5355 +** journal-mode.
1.5356 +*/
1.5357 +int sqlite3PagerJournalMode(Pager *pPager, int eMode){
1.5358 + assert( eMode==PAGER_JOURNALMODE_QUERY
1.5359 + || eMode==PAGER_JOURNALMODE_DELETE
1.5360 + || eMode==PAGER_JOURNALMODE_PERSIST
1.5361 + || eMode==PAGER_JOURNALMODE_OFF );
1.5362 + assert( PAGER_JOURNALMODE_QUERY<0 );
1.5363 + assert( PAGER_JOURNALMODE_DELETE>=0 && PAGER_JOURNALMODE_PERSIST>=0 );
1.5364 + if( eMode>=0 ){
1.5365 + pPager->journalMode = eMode;
1.5366 + }
1.5367 + return (int)pPager->journalMode;
1.5368 +}
1.5369 +
1.5370 +/*
1.5371 +** Get/set the size-limit used for persistent journal files.
1.5372 +*/
1.5373 +i64 sqlite3PagerJournalSizeLimit(Pager *pPager, i64 iLimit){
1.5374 + if( iLimit>=-1 ){
1.5375 + pPager->journalSizeLimit = iLimit;
1.5376 + }
1.5377 + return pPager->journalSizeLimit;
1.5378 +}
1.5379 +
1.5380 +#endif /* SQLITE_OMIT_DISKIO */