os/persistentdata/persistentstorage/sql/SQLite/pager.c
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/persistentdata/persistentstorage/sql/SQLite/pager.c	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,5377 @@
     1.4 +/*
     1.5 +** 2001 September 15
     1.6 +**
     1.7 +** The author disclaims copyright to this source code.  In place of
     1.8 +** a legal notice, here is a blessing:
     1.9 +**
    1.10 +**    May you do good and not evil.
    1.11 +**    May you find forgiveness for yourself and forgive others.
    1.12 +**    May you share freely, never taking more than you give.
    1.13 +**
    1.14 +*************************************************************************
    1.15 +** This is the implementation of the page cache subsystem or "pager".
    1.16 +** 
    1.17 +** The pager is used to access a database disk file.  It implements
    1.18 +** atomic commit and rollback through the use of a journal file that
    1.19 +** is separate from the database file.  The pager also implements file
    1.20 +** locking to prevent two processes from writing the same database
    1.21 +** file simultaneously, or one process from reading the database while
    1.22 +** another is writing.
    1.23 +**
    1.24 +** @(#) $Id: pager.c,v 1.469 2008/08/02 03:50:39 drh Exp $
    1.25 +*/
    1.26 +#ifndef SQLITE_OMIT_DISKIO
    1.27 +#include "sqliteInt.h"
    1.28 +#include <assert.h>
    1.29 +#include <string.h>
    1.30 +
    1.31 +/*
    1.32 +** Macros for troubleshooting.  Normally turned off
    1.33 +*/
    1.34 +#if 0
    1.35 +#define sqlite3DebugPrintf printf
    1.36 +#define PAGERTRACE1(X)       sqlite3DebugPrintf(X)
    1.37 +#define PAGERTRACE2(X,Y)     sqlite3DebugPrintf(X,Y)
    1.38 +#define PAGERTRACE3(X,Y,Z)   sqlite3DebugPrintf(X,Y,Z)
    1.39 +#define PAGERTRACE4(X,Y,Z,W) sqlite3DebugPrintf(X,Y,Z,W)
    1.40 +#define PAGERTRACE5(X,Y,Z,W,V) sqlite3DebugPrintf(X,Y,Z,W,V)
    1.41 +#else
    1.42 +#define PAGERTRACE1(X)
    1.43 +#define PAGERTRACE2(X,Y)
    1.44 +#define PAGERTRACE3(X,Y,Z)
    1.45 +#define PAGERTRACE4(X,Y,Z,W)
    1.46 +#define PAGERTRACE5(X,Y,Z,W,V)
    1.47 +#endif
    1.48 +
    1.49 +/*
    1.50 +** The following two macros are used within the PAGERTRACEX() macros above
    1.51 +** to print out file-descriptors. 
    1.52 +**
    1.53 +** PAGERID() takes a pointer to a Pager struct as its argument. The
    1.54 +** associated file-descriptor is returned. FILEHANDLEID() takes an sqlite3_file
    1.55 +** struct as its argument.
    1.56 +*/
    1.57 +#define PAGERID(p) ((int)(p->fd))
    1.58 +#define FILEHANDLEID(fd) ((int)fd)
    1.59 +
    1.60 +/*
    1.61 +** The page cache as a whole is always in one of the following
    1.62 +** states:
    1.63 +**
    1.64 +**   PAGER_UNLOCK        The page cache is not currently reading or 
    1.65 +**                       writing the database file.  There is no
    1.66 +**                       data held in memory.  This is the initial
    1.67 +**                       state.
    1.68 +**
    1.69 +**   PAGER_SHARED        The page cache is reading the database.
    1.70 +**                       Writing is not permitted.  There can be
    1.71 +**                       multiple readers accessing the same database
    1.72 +**                       file at the same time.
    1.73 +**
    1.74 +**   PAGER_RESERVED      This process has reserved the database for writing
    1.75 +**                       but has not yet made any changes.  Only one process
    1.76 +**                       at a time can reserve the database.  The original
    1.77 +**                       database file has not been modified so other
    1.78 +**                       processes may still be reading the on-disk
    1.79 +**                       database file.
    1.80 +**
    1.81 +**   PAGER_EXCLUSIVE     The page cache is writing the database.
    1.82 +**                       Access is exclusive.  No other processes or
    1.83 +**                       threads can be reading or writing while one
    1.84 +**                       process is writing.
    1.85 +**
    1.86 +**   PAGER_SYNCED        The pager moves to this state from PAGER_EXCLUSIVE
    1.87 +**                       after all dirty pages have been written to the
    1.88 +**                       database file and the file has been synced to
    1.89 +**                       disk. All that remains to do is to remove or
    1.90 +**                       truncate the journal file and the transaction 
    1.91 +**                       will be committed.
    1.92 +**
    1.93 +** The page cache comes up in PAGER_UNLOCK.  The first time a
    1.94 +** sqlite3PagerGet() occurs, the state transitions to PAGER_SHARED.
    1.95 +** After all pages have been released using sqlite_page_unref(),
    1.96 +** the state transitions back to PAGER_UNLOCK.  The first time
    1.97 +** that sqlite3PagerWrite() is called, the state transitions to
    1.98 +** PAGER_RESERVED.  (Note that sqlite3PagerWrite() can only be
    1.99 +** called on an outstanding page which means that the pager must
   1.100 +** be in PAGER_SHARED before it transitions to PAGER_RESERVED.)
   1.101 +** PAGER_RESERVED means that there is an open rollback journal.
   1.102 +** The transition to PAGER_EXCLUSIVE occurs before any changes
   1.103 +** are made to the database file, though writes to the rollback
   1.104 +** journal occurs with just PAGER_RESERVED.  After an sqlite3PagerRollback()
   1.105 +** or sqlite3PagerCommitPhaseTwo(), the state can go back to PAGER_SHARED,
   1.106 +** or it can stay at PAGER_EXCLUSIVE if we are in exclusive access mode.
   1.107 +*/
   1.108 +#define PAGER_UNLOCK      0
   1.109 +#define PAGER_SHARED      1   /* same as SHARED_LOCK */
   1.110 +#define PAGER_RESERVED    2   /* same as RESERVED_LOCK */
   1.111 +#define PAGER_EXCLUSIVE   4   /* same as EXCLUSIVE_LOCK */
   1.112 +#define PAGER_SYNCED      5
   1.113 +
   1.114 +/*
   1.115 +** If the SQLITE_BUSY_RESERVED_LOCK macro is set to true at compile-time,
   1.116 +** then failed attempts to get a reserved lock will invoke the busy callback.
   1.117 +** This is off by default.  To see why, consider the following scenario:
   1.118 +** 
   1.119 +** Suppose thread A already has a shared lock and wants a reserved lock.
   1.120 +** Thread B already has a reserved lock and wants an exclusive lock.  If
   1.121 +** both threads are using their busy callbacks, it might be a long time
   1.122 +** be for one of the threads give up and allows the other to proceed.
   1.123 +** But if the thread trying to get the reserved lock gives up quickly
   1.124 +** (if it never invokes its busy callback) then the contention will be
   1.125 +** resolved quickly.
   1.126 +*/
   1.127 +#ifndef SQLITE_BUSY_RESERVED_LOCK
   1.128 +# define SQLITE_BUSY_RESERVED_LOCK 0
   1.129 +#endif
   1.130 +
   1.131 +/*
   1.132 +** This macro rounds values up so that if the value is an address it
   1.133 +** is guaranteed to be an address that is aligned to an 8-byte boundary.
   1.134 +*/
   1.135 +#define FORCE_ALIGNMENT(X)   (((X)+7)&~7)
   1.136 +
   1.137 +typedef struct PgHdr PgHdr;
   1.138 +
   1.139 +/*
   1.140 +** Each pager stores all currently unreferenced pages in a list sorted
   1.141 +** in least-recently-used (LRU) order (i.e. the first item on the list has 
   1.142 +** not been referenced in a long time, the last item has been recently
   1.143 +** used). An instance of this structure is included as part of each
   1.144 +** pager structure for this purpose (variable Pager.lru).
   1.145 +**
   1.146 +** Additionally, if memory-management is enabled, all unreferenced pages 
   1.147 +** are stored in a global LRU list (global variable sqlite3LruPageList).
   1.148 +**
   1.149 +** In both cases, the PagerLruList.pFirstSynced variable points to
   1.150 +** the first page in the corresponding list that does not require an
   1.151 +** fsync() operation before its memory can be reclaimed. If no such
   1.152 +** page exists, PagerLruList.pFirstSynced is set to NULL.
   1.153 +*/
   1.154 +typedef struct PagerLruList PagerLruList;
   1.155 +struct PagerLruList {
   1.156 +  PgHdr *pFirst;         /* First page in LRU list */
   1.157 +  PgHdr *pLast;          /* Last page in LRU list (the most recently used) */
   1.158 +  PgHdr *pFirstSynced;   /* First page in list with PgHdr.needSync==0 */
   1.159 +};
   1.160 +
   1.161 +/*
   1.162 +** The following structure contains the next and previous pointers used
   1.163 +** to link a PgHdr structure into a PagerLruList linked list. 
   1.164 +*/
   1.165 +typedef struct PagerLruLink PagerLruLink;
   1.166 +struct PagerLruLink {
   1.167 +  PgHdr *pNext;
   1.168 +  PgHdr *pPrev;
   1.169 +};
   1.170 +
   1.171 +/*
   1.172 +** Each in-memory image of a page begins with the following header.
   1.173 +** This header is only visible to this pager module.  The client
   1.174 +** code that calls pager sees only the data that follows the header.
   1.175 +**
   1.176 +** Client code should call sqlite3PagerWrite() on a page prior to making
   1.177 +** any modifications to that page.  The first time sqlite3PagerWrite()
   1.178 +** is called, the original page contents are written into the rollback
   1.179 +** journal and PgHdr.inJournal and PgHdr.needSync are set.  Later, once
   1.180 +** the journal page has made it onto the disk surface, PgHdr.needSync
   1.181 +** is cleared.  The modified page cannot be written back into the original
   1.182 +** database file until the journal pages has been synced to disk and the
   1.183 +** PgHdr.needSync has been cleared.
   1.184 +**
   1.185 +** The PgHdr.dirty flag is set when sqlite3PagerWrite() is called and
   1.186 +** is cleared again when the page content is written back to the original
   1.187 +** database file.
   1.188 +**
   1.189 +** Details of important structure elements:
   1.190 +**
   1.191 +** needSync
   1.192 +**
   1.193 +**     If this is true, this means that it is not safe to write the page
   1.194 +**     content to the database because the original content needed
   1.195 +**     for rollback has not by synced to the main rollback journal.
   1.196 +**     The original content may have been written to the rollback journal
   1.197 +**     but it has not yet been synced.  So we cannot write to the database
   1.198 +**     file because power failure might cause the page in the journal file
   1.199 +**     to never reach the disk.  It is as if the write to the journal file
   1.200 +**     does not occur until the journal file is synced.
   1.201 +**     
   1.202 +**     This flag is false if the page content exactly matches what
   1.203 +**     currently exists in the database file.  The needSync flag is also
   1.204 +**     false if the original content has been written to the main rollback
   1.205 +**     journal and synced.  If the page represents a new page that has
   1.206 +**     been added onto the end of the database during the current
   1.207 +**     transaction, the needSync flag is true until the original database
   1.208 +**     size in the journal header has been synced to disk.
   1.209 +**
   1.210 +** inJournal
   1.211 +**
   1.212 +**     This is true if the original page has been written into the main
   1.213 +**     rollback journal.  This is always false for new pages added to
   1.214 +**     the end of the database file during the current transaction.
   1.215 +**     And this flag says nothing about whether or not the journal
   1.216 +**     has been synced to disk.  For pages that are in the original
   1.217 +**     database file, the following expression should always be true:
   1.218 +**
   1.219 +**       inJournal = sqlite3BitvecTest(pPager->pInJournal, pgno)
   1.220 +**
   1.221 +**     The pPager->pInJournal object is only valid for the original
   1.222 +**     pages of the database, not new pages that are added to the end
   1.223 +**     of the database, so obviously the above expression cannot be
   1.224 +**     valid for new pages.  For new pages inJournal is always 0.
   1.225 +**
   1.226 +** dirty
   1.227 +**
   1.228 +**     When true, this means that the content of the page has been
   1.229 +**     modified and needs to be written back to the database file.
   1.230 +**     If false, it means that either the content of the page is
   1.231 +**     unchanged or else the content is unimportant and we do not
   1.232 +**     care whether or not it is preserved.
   1.233 +**
   1.234 +** alwaysRollback
   1.235 +**
   1.236 +**     This means that the sqlite3PagerDontRollback() API should be
   1.237 +**     ignored for this page.  The DontRollback() API attempts to say
   1.238 +**     that the content of the page on disk is unimportant (it is an
   1.239 +**     unused page on the freelist) so that it is unnecessary to 
   1.240 +**     rollback changes to this page because the content of the page
   1.241 +**     can change without changing the meaning of the database.  This
   1.242 +**     flag overrides any DontRollback() attempt.  This flag is set
   1.243 +**     when a page that originally contained valid data is added to
   1.244 +**     the freelist.  Later in the same transaction, this page might
   1.245 +**     be pulled from the freelist and reused for something different
   1.246 +**     and at that point the DontRollback() API will be called because
   1.247 +**     pages taken from the freelist do not need to be protected by
   1.248 +**     the rollback journal.  But this flag says that the page was
   1.249 +**     not originally part of the freelist so that it still needs to
   1.250 +**     be rolled back in spite of any subsequent DontRollback() calls.
   1.251 +**
   1.252 +** needRead 
   1.253 +**
   1.254 +**     This flag means (when true) that the content of the page has
   1.255 +**     not yet been loaded from disk.  The in-memory content is just
   1.256 +**     garbage.  (Actually, we zero the content, but you should not
   1.257 +**     make any assumptions about the content nevertheless.)  If the
   1.258 +**     content is needed in the future, it should be read from the
   1.259 +**     original database file.
   1.260 +*/
   1.261 +struct PgHdr {
   1.262 +  Pager *pPager;                 /* The pager to which this page belongs */
   1.263 +  Pgno pgno;                     /* The page number for this page */
   1.264 +  PgHdr *pNextHash, *pPrevHash;  /* Hash collision chain for PgHdr.pgno */
   1.265 +  PagerLruLink free;             /* Next and previous free pages */
   1.266 +  PgHdr *pNextAll;               /* A list of all pages */
   1.267 +  u8 inJournal;                  /* TRUE if has been written to journal */
   1.268 +  u8 dirty;                      /* TRUE if we need to write back changes */
   1.269 +  u8 needSync;                   /* Sync journal before writing this page */
   1.270 +  u8 alwaysRollback;             /* Disable DontRollback() for this page */
   1.271 +  u8 needRead;                   /* Read content if PagerWrite() is called */
   1.272 +  short int nRef;                /* Number of users of this page */
   1.273 +  PgHdr *pDirty, *pPrevDirty;    /* Dirty pages */
   1.274 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
   1.275 +  PgHdr *pPrevAll;               /* A list of all pages */
   1.276 +  PagerLruLink gfree;            /* Global list of nRef==0 pages */
   1.277 +#endif
   1.278 +#ifdef SQLITE_CHECK_PAGES
   1.279 +  u32 pageHash;
   1.280 +#endif
   1.281 +  void *pData;                   /* Page data */
   1.282 +  /* Pager.nExtra bytes of local data appended to this header */
   1.283 +};
   1.284 +
   1.285 +/*
   1.286 +** For an in-memory only database, some extra information is recorded about
   1.287 +** each page so that changes can be rolled back.  (Journal files are not
   1.288 +** used for in-memory databases.)  The following information is added to
   1.289 +** the end of every EXTRA block for in-memory databases.
   1.290 +**
   1.291 +** This information could have been added directly to the PgHdr structure.
   1.292 +** But then it would take up an extra 8 bytes of storage on every PgHdr
   1.293 +** even for disk-based databases.  Splitting it out saves 8 bytes.  This
   1.294 +** is only a savings of 0.8% but those percentages add up.
   1.295 +*/
   1.296 +typedef struct PgHistory PgHistory;
   1.297 +struct PgHistory {
   1.298 +  u8 *pOrig;     /* Original page text.  Restore to this on a full rollback */
   1.299 +  u8 *pStmt;     /* Text as it was at the beginning of the current statement */
   1.300 +  PgHdr *pNextStmt, *pPrevStmt;  /* List of pages in the statement journal */
   1.301 +  u8 inStmt;                     /* TRUE if in the statement subjournal */
   1.302 +};
   1.303 +
   1.304 +/*
   1.305 +** A macro used for invoking the codec if there is one
   1.306 +*/
   1.307 +#ifdef SQLITE_HAS_CODEC
   1.308 +# define CODEC1(P,D,N,X) if( P->xCodec!=0 ){ P->xCodec(P->pCodecArg,D,N,X); }
   1.309 +# define CODEC2(P,D,N,X) ((char*)(P->xCodec!=0?P->xCodec(P->pCodecArg,D,N,X):D))
   1.310 +#else
   1.311 +# define CODEC1(P,D,N,X) /* NO-OP */
   1.312 +# define CODEC2(P,D,N,X) ((char*)D)
   1.313 +#endif
   1.314 +
   1.315 +/*
   1.316 +** Convert a pointer to a PgHdr into a pointer to its data
   1.317 +** and back again.
   1.318 +*/
   1.319 +#define PGHDR_TO_DATA(P)    ((P)->pData)
   1.320 +#define PGHDR_TO_EXTRA(G,P) ((void*)&((G)[1]))
   1.321 +#define PGHDR_TO_HIST(P,PGR)  \
   1.322 +            ((PgHistory*)&((char*)(&(P)[1]))[(PGR)->nExtra])
   1.323 +
   1.324 +/*
   1.325 +** A open page cache is an instance of the following structure.
   1.326 +**
   1.327 +** Pager.errCode may be set to SQLITE_IOERR, SQLITE_CORRUPT, or
   1.328 +** or SQLITE_FULL. Once one of the first three errors occurs, it persists
   1.329 +** and is returned as the result of every major pager API call.  The
   1.330 +** SQLITE_FULL return code is slightly different. It persists only until the
   1.331 +** next successful rollback is performed on the pager cache. Also,
   1.332 +** SQLITE_FULL does not affect the sqlite3PagerGet() and sqlite3PagerLookup()
   1.333 +** APIs, they may still be used successfully.
   1.334 +*/
   1.335 +struct Pager {
   1.336 +  sqlite3_vfs *pVfs;          /* OS functions to use for IO */
   1.337 +  u8 journalOpen;             /* True if journal file descriptors is valid */
   1.338 +  u8 journalStarted;          /* True if header of journal is synced */
   1.339 +  u8 useJournal;              /* Use a rollback journal on this file */
   1.340 +  u8 noReadlock;              /* Do not bother to obtain readlocks */
   1.341 +  u8 stmtOpen;                /* True if the statement subjournal is open */
   1.342 +  u8 stmtInUse;               /* True we are in a statement subtransaction */
   1.343 +  u8 stmtAutoopen;            /* Open stmt journal when main journal is opened*/
   1.344 +  u8 noSync;                  /* Do not sync the journal if true */
   1.345 +  u8 fullSync;                /* Do extra syncs of the journal for robustness */
   1.346 +  u8 sync_flags;              /* One of SYNC_NORMAL or SYNC_FULL */
   1.347 +  u8 state;                   /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */
   1.348 +  u8 tempFile;                /* zFilename is a temporary file */
   1.349 +  u8 readOnly;                /* True for a read-only database */
   1.350 +  u8 needSync;                /* True if an fsync() is needed on the journal */
   1.351 +  u8 dirtyCache;              /* True if cached pages have changed */
   1.352 +  u8 alwaysRollback;          /* Disable DontRollback() for all pages */
   1.353 +  u8 memDb;                   /* True to inhibit all file I/O */
   1.354 +  u8 setMaster;               /* True if a m-j name has been written to jrnl */
   1.355 +  u8 doNotSync;               /* Boolean. While true, do not spill the cache */
   1.356 +  u8 exclusiveMode;           /* Boolean. True if locking_mode==EXCLUSIVE */
   1.357 +  u8 journalMode;             /* On of the PAGER_JOURNALMODE_* values */
   1.358 +  u8 dbModified;              /* True if there are any changes to the Db */
   1.359 +  u8 changeCountDone;         /* Set after incrementing the change-counter */
   1.360 +  u32 vfsFlags;               /* Flags for sqlite3_vfs.xOpen() */
   1.361 +  int errCode;                /* One of several kinds of errors */
   1.362 +  int dbSize;                 /* Number of pages in the file */
   1.363 +  int origDbSize;             /* dbSize before the current change */
   1.364 +  int stmtSize;               /* Size of database (in pages) at stmt_begin() */
   1.365 +  int nRec;                   /* Number of pages written to the journal */
   1.366 +  u32 cksumInit;              /* Quasi-random value added to every checksum */
   1.367 +  int stmtNRec;               /* Number of records in stmt subjournal */
   1.368 +  int nExtra;                 /* Add this many bytes to each in-memory page */
   1.369 +  int pageSize;               /* Number of bytes in a page */
   1.370 +  int nPage;                  /* Total number of in-memory pages */
   1.371 +  int nRef;                   /* Number of in-memory pages with PgHdr.nRef>0 */
   1.372 +  int mxPage;                 /* Maximum number of pages to hold in cache */
   1.373 +  Pgno mxPgno;                /* Maximum allowed size of the database */
   1.374 +  Bitvec *pInJournal;         /* One bit for each page in the database file */
   1.375 +  Bitvec *pInStmt;            /* One bit for each page in the database */
   1.376 +  char *zFilename;            /* Name of the database file */
   1.377 +  char *zJournal;             /* Name of the journal file */
   1.378 +  char *zDirectory;           /* Directory hold database and journal files */
   1.379 +  sqlite3_file *fd, *jfd;     /* File descriptors for database and journal */
   1.380 +  sqlite3_file *stfd;         /* File descriptor for the statement subjournal*/
   1.381 +  BusyHandler *pBusyHandler;  /* Pointer to sqlite.busyHandler */
   1.382 +  PagerLruList lru;           /* LRU list of free pages */
   1.383 +  PgHdr *pAll;                /* List of all pages */
   1.384 +  PgHdr *pStmt;               /* List of pages in the statement subjournal */
   1.385 +  PgHdr *pDirty;              /* List of all dirty pages */
   1.386 +  i64 journalOff;             /* Current byte offset in the journal file */
   1.387 +  i64 journalHdr;             /* Byte offset to previous journal header */
   1.388 +  i64 stmtHdrOff;             /* First journal header written this statement */
   1.389 +  i64 stmtCksum;              /* cksumInit when statement was started */
   1.390 +  i64 stmtJSize;              /* Size of journal at stmt_begin() */
   1.391 +  int sectorSize;             /* Assumed sector size during rollback */
   1.392 +#ifdef SQLITE_TEST
   1.393 +  int nHit, nMiss;            /* Cache hits and missing */
   1.394 +  int nRead, nWrite;          /* Database pages read/written */
   1.395 +#endif
   1.396 +  void (*xDestructor)(DbPage*,int); /* Call this routine when freeing pages */
   1.397 +  void (*xReiniter)(DbPage*,int);   /* Call this routine when reloading pages */
   1.398 +#ifdef SQLITE_HAS_CODEC
   1.399 +  void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
   1.400 +  void *pCodecArg;            /* First argument to xCodec() */
   1.401 +#endif
   1.402 +  int nHash;                  /* Size of the pager hash table */
   1.403 +  PgHdr **aHash;              /* Hash table to map page number to PgHdr */
   1.404 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
   1.405 +  Pager *pNext;               /* Doubly linked list of pagers on which */
   1.406 +  Pager *pPrev;               /* sqlite3_release_memory() will work */
   1.407 +  volatile int iInUseMM;      /* Non-zero if unavailable to MM */
   1.408 +  volatile int iInUseDB;      /* Non-zero if in sqlite3_release_memory() */
   1.409 +#endif
   1.410 +  char *pTmpSpace;            /* Pager.pageSize bytes of space for tmp use */
   1.411 +  char dbFileVers[16];        /* Changes whenever database file changes */
   1.412 +  i64 journalSizeLimit;       /* Size limit for persistent journal files */
   1.413 +};
   1.414 +
   1.415 +/*
   1.416 +** The following global variables hold counters used for
   1.417 +** testing purposes only.  These variables do not exist in
   1.418 +** a non-testing build.  These variables are not thread-safe.
   1.419 +*/
   1.420 +#ifdef SQLITE_TEST
   1.421 +int sqlite3_pager_readdb_count = 0;    /* Number of full pages read from DB */
   1.422 +int sqlite3_pager_writedb_count = 0;   /* Number of full pages written to DB */
   1.423 +int sqlite3_pager_writej_count = 0;    /* Number of pages written to journal */
   1.424 +int sqlite3_pager_pgfree_count = 0;    /* Number of cache pages freed */
   1.425 +# define PAGER_INCR(v)  v++
   1.426 +#else
   1.427 +# define PAGER_INCR(v)
   1.428 +#endif
   1.429 +
   1.430 +/*
   1.431 +** The following variable points to the head of a double-linked list
   1.432 +** of all pagers that are eligible for page stealing by the
   1.433 +** sqlite3_release_memory() interface.  Access to this list is
   1.434 +** protected by the SQLITE_MUTEX_STATIC_MEM2 mutex.
   1.435 +*/
   1.436 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
   1.437 +static Pager *sqlite3PagerList = 0;
   1.438 +static PagerLruList sqlite3LruPageList = {0, 0, 0};
   1.439 +#endif
   1.440 +
   1.441 +
   1.442 +/*
   1.443 +** Journal files begin with the following magic string.  The data
   1.444 +** was obtained from /dev/random.  It is used only as a sanity check.
   1.445 +**
   1.446 +** Since version 2.8.0, the journal format contains additional sanity
   1.447 +** checking information.  If the power fails while the journal is begin
   1.448 +** written, semi-random garbage data might appear in the journal
   1.449 +** file after power is restored.  If an attempt is then made
   1.450 +** to roll the journal back, the database could be corrupted.  The additional
   1.451 +** sanity checking data is an attempt to discover the garbage in the
   1.452 +** journal and ignore it.
   1.453 +**
   1.454 +** The sanity checking information for the new journal format consists
   1.455 +** of a 32-bit checksum on each page of data.  The checksum covers both
   1.456 +** the page number and the pPager->pageSize bytes of data for the page.
   1.457 +** This cksum is initialized to a 32-bit random value that appears in the
   1.458 +** journal file right after the header.  The random initializer is important,
   1.459 +** because garbage data that appears at the end of a journal is likely
   1.460 +** data that was once in other files that have now been deleted.  If the
   1.461 +** garbage data came from an obsolete journal file, the checksums might
   1.462 +** be correct.  But by initializing the checksum to random value which
   1.463 +** is different for every journal, we minimize that risk.
   1.464 +*/
   1.465 +static const unsigned char aJournalMagic[] = {
   1.466 +  0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7,
   1.467 +};
   1.468 +
   1.469 +/*
   1.470 +** The size of the header and of each page in the journal is determined
   1.471 +** by the following macros.
   1.472 +*/
   1.473 +#define JOURNAL_PG_SZ(pPager)  ((pPager->pageSize) + 8)
   1.474 +
   1.475 +/*
   1.476 +** The journal header size for this pager. In the future, this could be
   1.477 +** set to some value read from the disk controller. The important
   1.478 +** characteristic is that it is the same size as a disk sector.
   1.479 +*/
   1.480 +#define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize)
   1.481 +
   1.482 +/*
   1.483 +** The macro MEMDB is true if we are dealing with an in-memory database.
   1.484 +** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set,
   1.485 +** the value of MEMDB will be a constant and the compiler will optimize
   1.486 +** out code that would never execute.
   1.487 +*/
   1.488 +#ifdef SQLITE_OMIT_MEMORYDB
   1.489 +# define MEMDB 0
   1.490 +#else
   1.491 +# define MEMDB pPager->memDb
   1.492 +#endif
   1.493 +
   1.494 +/*
   1.495 +** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is
   1.496 +** reserved for working around a windows/posix incompatibility). It is
   1.497 +** used in the journal to signify that the remainder of the journal file 
   1.498 +** is devoted to storing a master journal name - there are no more pages to
   1.499 +** roll back. See comments for function writeMasterJournal() for details.
   1.500 +*/
   1.501 +/* #define PAGER_MJ_PGNO(x) (PENDING_BYTE/((x)->pageSize)) */
   1.502 +#define PAGER_MJ_PGNO(x) ((PENDING_BYTE/((x)->pageSize))+1)
   1.503 +
   1.504 +/*
   1.505 +** The maximum legal page number is (2^31 - 1).
   1.506 +*/
   1.507 +#define PAGER_MAX_PGNO 2147483647
   1.508 +
   1.509 +/*
   1.510 +** The pagerEnter() and pagerLeave() routines acquire and release
   1.511 +** a mutex on each pager.  The mutex is recursive.
   1.512 +**
   1.513 +** This is a special-purpose mutex.  It only provides mutual exclusion
   1.514 +** between the Btree and the Memory Management sqlite3_release_memory()
   1.515 +** function.  It does not prevent, for example, two Btrees from accessing
   1.516 +** the same pager at the same time.  Other general-purpose mutexes in
   1.517 +** the btree layer handle that chore.
   1.518 +*/
   1.519 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
   1.520 +  static void pagerEnter(Pager *p){
   1.521 +    p->iInUseDB++;
   1.522 +    if( p->iInUseMM && p->iInUseDB==1 ){
   1.523 +#ifndef SQLITE_MUTEX_NOOP
   1.524 +      sqlite3_mutex *mutex;
   1.525 +      mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MEM2);
   1.526 +#endif
   1.527 +      p->iInUseDB = 0;
   1.528 +      sqlite3_mutex_enter(mutex);
   1.529 +      p->iInUseDB = 1;
   1.530 +      sqlite3_mutex_leave(mutex);
   1.531 +    }
   1.532 +    assert( p->iInUseMM==0 );
   1.533 +  }
   1.534 +  static void pagerLeave(Pager *p){
   1.535 +    p->iInUseDB--;
   1.536 +    assert( p->iInUseDB>=0 );
   1.537 +  }
   1.538 +#else
   1.539 +# define pagerEnter(X)
   1.540 +# define pagerLeave(X)
   1.541 +#endif
   1.542 +
   1.543 +/*
   1.544 +** Add page pPg to the end of the linked list managed by structure
   1.545 +** pList (pPg becomes the last entry in the list - the most recently 
   1.546 +** used). Argument pLink should point to either pPg->free or pPg->gfree,
   1.547 +** depending on whether pPg is being added to the pager-specific or
   1.548 +** global LRU list.
   1.549 +*/
   1.550 +static void listAdd(PagerLruList *pList, PagerLruLink *pLink, PgHdr *pPg){
   1.551 +  pLink->pNext = 0;
   1.552 +  pLink->pPrev = pList->pLast;
   1.553 +
   1.554 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
   1.555 +  assert(pLink==&pPg->free || pLink==&pPg->gfree);
   1.556 +  assert(pLink==&pPg->gfree || pList!=&sqlite3LruPageList);
   1.557 +#endif
   1.558 +
   1.559 +  if( pList->pLast ){
   1.560 +    int iOff = (char *)pLink - (char *)pPg;
   1.561 +    PagerLruLink *pLastLink = (PagerLruLink *)(&((u8 *)pList->pLast)[iOff]);
   1.562 +    pLastLink->pNext = pPg;
   1.563 +  }else{
   1.564 +    assert(!pList->pFirst);
   1.565 +    pList->pFirst = pPg;
   1.566 +  }
   1.567 +
   1.568 +  pList->pLast = pPg;
   1.569 +  if( !pList->pFirstSynced && pPg->needSync==0 ){
   1.570 +    pList->pFirstSynced = pPg;
   1.571 +  }
   1.572 +}
   1.573 +
   1.574 +/*
   1.575 +** Remove pPg from the list managed by the structure pointed to by pList.
   1.576 +**
   1.577 +** Argument pLink should point to either pPg->free or pPg->gfree, depending 
   1.578 +** on whether pPg is being added to the pager-specific or global LRU list.
   1.579 +*/
   1.580 +static void listRemove(PagerLruList *pList, PagerLruLink *pLink, PgHdr *pPg){
   1.581 +  int iOff = (char *)pLink - (char *)pPg;
   1.582 +
   1.583 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
   1.584 +  assert(pLink==&pPg->free || pLink==&pPg->gfree);
   1.585 +  assert(pLink==&pPg->gfree || pList!=&sqlite3LruPageList);
   1.586 +#endif
   1.587 +
   1.588 +  if( pPg==pList->pFirst ){
   1.589 +    pList->pFirst = pLink->pNext;
   1.590 +  }
   1.591 +  if( pPg==pList->pLast ){
   1.592 +    pList->pLast = pLink->pPrev;
   1.593 +  }
   1.594 +  if( pLink->pPrev ){
   1.595 +    PagerLruLink *pPrevLink = (PagerLruLink *)(&((u8 *)pLink->pPrev)[iOff]);
   1.596 +    pPrevLink->pNext = pLink->pNext;
   1.597 +  }
   1.598 +  if( pLink->pNext ){
   1.599 +    PagerLruLink *pNextLink = (PagerLruLink *)(&((u8 *)pLink->pNext)[iOff]);
   1.600 +    pNextLink->pPrev = pLink->pPrev;
   1.601 +  }
   1.602 +  if( pPg==pList->pFirstSynced ){
   1.603 +    PgHdr *p = pLink->pNext;
   1.604 +    while( p && p->needSync ){
   1.605 +      PagerLruLink *pL = (PagerLruLink *)(&((u8 *)p)[iOff]);
   1.606 +      p = pL->pNext;
   1.607 +    }
   1.608 +    pList->pFirstSynced = p;
   1.609 +  }
   1.610 +
   1.611 +  pLink->pNext = pLink->pPrev = 0;
   1.612 +}
   1.613 +
   1.614 +/* 
   1.615 +** Add page pPg to the list of free pages for the pager. If 
   1.616 +** memory-management is enabled, also add the page to the global 
   1.617 +** list of free pages.
   1.618 +*/
   1.619 +static void lruListAdd(PgHdr *pPg){
   1.620 +  listAdd(&pPg->pPager->lru, &pPg->free, pPg);
   1.621 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
   1.622 +  if( !pPg->pPager->memDb ){
   1.623 +    sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_LRU));
   1.624 +    listAdd(&sqlite3LruPageList, &pPg->gfree, pPg);
   1.625 +    sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_LRU));
   1.626 +  }
   1.627 +#endif
   1.628 +}
   1.629 +
   1.630 +/* 
   1.631 +** Remove page pPg from the list of free pages for the associated pager.
   1.632 +** If memory-management is enabled, also remove pPg from the global list
   1.633 +** of free pages.
   1.634 +*/
   1.635 +static void lruListRemove(PgHdr *pPg){
   1.636 +  listRemove(&pPg->pPager->lru, &pPg->free, pPg);
   1.637 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
   1.638 +  if( !pPg->pPager->memDb ){
   1.639 +    sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_LRU));
   1.640 +    listRemove(&sqlite3LruPageList, &pPg->gfree, pPg);
   1.641 +    sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_LRU));
   1.642 +  }
   1.643 +#endif
   1.644 +}
   1.645 +
   1.646 +/* 
   1.647 +** This function is called just after the needSync flag has been cleared
   1.648 +** from all pages managed by pPager (usually because the journal file
   1.649 +** has just been synced). It updates the pPager->lru.pFirstSynced variable
   1.650 +** and, if memory-management is enabled, the sqlite3LruPageList.pFirstSynced
   1.651 +** variable also.
   1.652 +*/
   1.653 +static void lruListSetFirstSynced(Pager *pPager){
   1.654 +  pPager->lru.pFirstSynced = pPager->lru.pFirst;
   1.655 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
   1.656 +  if( !pPager->memDb ){
   1.657 +    PgHdr *p;
   1.658 +    sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_LRU));
   1.659 +    for(p=sqlite3LruPageList.pFirst; p && p->needSync; p=p->gfree.pNext);
   1.660 +    assert(p==pPager->lru.pFirstSynced || p==sqlite3LruPageList.pFirstSynced);
   1.661 +    sqlite3LruPageList.pFirstSynced = p;
   1.662 +    sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_LRU));
   1.663 +  }
   1.664 +#endif
   1.665 +}
   1.666 +
   1.667 +/*
   1.668 +** Return true if page *pPg has already been written to the statement
   1.669 +** journal (or statement snapshot has been created, if *pPg is part
   1.670 +** of an in-memory database).
   1.671 +*/
   1.672 +static int pageInStatement(PgHdr *pPg){
   1.673 +  Pager *pPager = pPg->pPager;
   1.674 +  if( MEMDB ){
   1.675 +    return PGHDR_TO_HIST(pPg, pPager)->inStmt;
   1.676 +  }else{
   1.677 +    return sqlite3BitvecTest(pPager->pInStmt, pPg->pgno);
   1.678 +  }
   1.679 +}
   1.680 +
   1.681 +/*
   1.682 +** Change the size of the pager hash table to N.  N must be a power
   1.683 +** of two.
   1.684 +*/
   1.685 +static void pager_resize_hash_table(Pager *pPager, int N){
   1.686 +  PgHdr **aHash, *pPg;
   1.687 +  assert( N>0 && (N&(N-1))==0 );
   1.688 +#ifdef SQLITE_MALLOC_SOFT_LIMIT
   1.689 +  if( N*sizeof(aHash[0])>SQLITE_MALLOC_SOFT_LIMIT ){
   1.690 +    N = SQLITE_MALLOC_SOFT_LIMIT/sizeof(aHash[0]);
   1.691 +  }
   1.692 +  if( N==pPager->nHash ) return;
   1.693 +#endif
   1.694 +  pagerLeave(pPager);
   1.695 +  if( pPager->aHash!=0 ) sqlite3BeginBenignMalloc();
   1.696 +  aHash = sqlite3MallocZero( sizeof(aHash[0])*N );
   1.697 +  if( pPager->aHash!=0 ) sqlite3EndBenignMalloc();
   1.698 +  pagerEnter(pPager);
   1.699 +  if( aHash==0 ){
   1.700 +    /* Failure to rehash is not an error.  It is only a performance hit. */
   1.701 +    return;
   1.702 +  }
   1.703 +  sqlite3_free(pPager->aHash);
   1.704 +  pPager->nHash = N;
   1.705 +  pPager->aHash = aHash;
   1.706 +  for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
   1.707 +    int h;
   1.708 +    if( pPg->pgno==0 ){
   1.709 +      assert( pPg->pNextHash==0 && pPg->pPrevHash==0 );
   1.710 +      continue;
   1.711 +    }
   1.712 +    h = pPg->pgno & (N-1);
   1.713 +    pPg->pNextHash = aHash[h];
   1.714 +    if( aHash[h] ){
   1.715 +      aHash[h]->pPrevHash = pPg;
   1.716 +    }
   1.717 +    aHash[h] = pPg;
   1.718 +    pPg->pPrevHash = 0;
   1.719 +  }
   1.720 +}
   1.721 +
   1.722 +/*
   1.723 +** Read a 32-bit integer from the given file descriptor.  Store the integer
   1.724 +** that is read in *pRes.  Return SQLITE_OK if everything worked, or an
   1.725 +** error code is something goes wrong.
   1.726 +**
   1.727 +** All values are stored on disk as big-endian.
   1.728 +*/
   1.729 +static int read32bits(sqlite3_file *fd, i64 offset, u32 *pRes){
   1.730 +  unsigned char ac[4];
   1.731 +  int rc = sqlite3OsRead(fd, ac, sizeof(ac), offset);
   1.732 +  if( rc==SQLITE_OK ){
   1.733 +    *pRes = sqlite3Get4byte(ac);
   1.734 +  }
   1.735 +  return rc;
   1.736 +}
   1.737 +
   1.738 +/*
   1.739 +** Write a 32-bit integer into a string buffer in big-endian byte order.
   1.740 +*/
   1.741 +#define put32bits(A,B)  sqlite3Put4byte((u8*)A,B)
   1.742 +
   1.743 +/*
   1.744 +** Write a 32-bit integer into the given file descriptor.  Return SQLITE_OK
   1.745 +** on success or an error code is something goes wrong.
   1.746 +*/
   1.747 +static int write32bits(sqlite3_file *fd, i64 offset, u32 val){
   1.748 +  char ac[4];
   1.749 +  put32bits(ac, val);
   1.750 +  return sqlite3OsWrite(fd, ac, 4, offset);
   1.751 +}
   1.752 +
   1.753 +/*
   1.754 +** If file pFd is open, call sqlite3OsUnlock() on it.
   1.755 +*/
   1.756 +static int osUnlock(sqlite3_file *pFd, int eLock){
   1.757 +  if( !pFd->pMethods ){
   1.758 +    return SQLITE_OK;
   1.759 +  }
   1.760 +  return sqlite3OsUnlock(pFd, eLock);
   1.761 +}
   1.762 +
   1.763 +/*
   1.764 +** This function determines whether or not the atomic-write optimization
   1.765 +** can be used with this pager. The optimization can be used if:
   1.766 +**
   1.767 +**  (a) the value returned by OsDeviceCharacteristics() indicates that
   1.768 +**      a database page may be written atomically, and
   1.769 +**  (b) the value returned by OsSectorSize() is less than or equal
   1.770 +**      to the page size.
   1.771 +**
   1.772 +** If the optimization cannot be used, 0 is returned. If it can be used,
   1.773 +** then the value returned is the size of the journal file when it
   1.774 +** contains rollback data for exactly one page.
   1.775 +*/
   1.776 +#ifdef SQLITE_ENABLE_ATOMIC_WRITE
   1.777 +static int jrnlBufferSize(Pager *pPager){
   1.778 +  int dc;           /* Device characteristics */
   1.779 +  int nSector;      /* Sector size */
   1.780 +  int szPage;        /* Page size */
   1.781 +  sqlite3_file *fd = pPager->fd;
   1.782 +
   1.783 +  if( fd->pMethods ){
   1.784 +    dc = sqlite3OsDeviceCharacteristics(fd);
   1.785 +    nSector = sqlite3OsSectorSize(fd);
   1.786 +    szPage = pPager->pageSize;
   1.787 +  }
   1.788 +
   1.789 +  assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
   1.790 +  assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
   1.791 +
   1.792 +  if( !fd->pMethods || 
   1.793 +       (dc & (SQLITE_IOCAP_ATOMIC|(szPage>>8)) && nSector<=szPage) ){
   1.794 +    return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager);
   1.795 +  }
   1.796 +  return 0;
   1.797 +}
   1.798 +#endif
   1.799 +
   1.800 +/*
   1.801 +** This function should be called when an error occurs within the pager
   1.802 +** code. The first argument is a pointer to the pager structure, the
   1.803 +** second the error-code about to be returned by a pager API function. 
   1.804 +** The value returned is a copy of the second argument to this function. 
   1.805 +**
   1.806 +** If the second argument is SQLITE_IOERR, SQLITE_CORRUPT, or SQLITE_FULL
   1.807 +** the error becomes persistent. Until the persisten error is cleared,
   1.808 +** subsequent API calls on this Pager will immediately return the same 
   1.809 +** error code.
   1.810 +**
   1.811 +** A persistent error indicates that the contents of the pager-cache 
   1.812 +** cannot be trusted. This state can be cleared by completely discarding 
   1.813 +** the contents of the pager-cache. If a transaction was active when
   1.814 +** the persistent error occured, then the rollback journal may need
   1.815 +** to be replayed.
   1.816 +*/
   1.817 +static void pager_unlock(Pager *pPager);
   1.818 +static int pager_error(Pager *pPager, int rc){
   1.819 +  int rc2 = rc & 0xff;
   1.820 +  assert(
   1.821 +       pPager->errCode==SQLITE_FULL ||
   1.822 +       pPager->errCode==SQLITE_OK ||
   1.823 +       (pPager->errCode & 0xff)==SQLITE_IOERR
   1.824 +  );
   1.825 +  if(
   1.826 +    rc2==SQLITE_FULL ||
   1.827 +    rc2==SQLITE_IOERR ||
   1.828 +    rc2==SQLITE_CORRUPT
   1.829 +  ){
   1.830 +    pPager->errCode = rc;
   1.831 +    if( pPager->state==PAGER_UNLOCK && pPager->nRef==0 ){
   1.832 +      /* If the pager is already unlocked, call pager_unlock() now to
   1.833 +      ** clear the error state and ensure that the pager-cache is 
   1.834 +      ** completely empty.
   1.835 +      */
   1.836 +      pager_unlock(pPager);
   1.837 +    }
   1.838 +  }
   1.839 +  return rc;
   1.840 +}
   1.841 +
   1.842 +/*
   1.843 +** If SQLITE_CHECK_PAGES is defined then we do some sanity checking
   1.844 +** on the cache using a hash function.  This is used for testing
   1.845 +** and debugging only.
   1.846 +*/
   1.847 +#ifdef SQLITE_CHECK_PAGES
   1.848 +/*
   1.849 +** Return a 32-bit hash of the page data for pPage.
   1.850 +*/
   1.851 +static u32 pager_datahash(int nByte, unsigned char *pData){
   1.852 +  u32 hash = 0;
   1.853 +  int i;
   1.854 +  for(i=0; i<nByte; i++){
   1.855 +    hash = (hash*1039) + pData[i];
   1.856 +  }
   1.857 +  return hash;
   1.858 +}
   1.859 +static u32 pager_pagehash(PgHdr *pPage){
   1.860 +  return pager_datahash(pPage->pPager->pageSize, 
   1.861 +                        (unsigned char *)PGHDR_TO_DATA(pPage));
   1.862 +}
   1.863 +
   1.864 +/*
   1.865 +** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES
   1.866 +** is defined, and NDEBUG is not defined, an assert() statement checks
   1.867 +** that the page is either dirty or still matches the calculated page-hash.
   1.868 +*/
   1.869 +#define CHECK_PAGE(x) checkPage(x)
   1.870 +static void checkPage(PgHdr *pPg){
   1.871 +  Pager *pPager = pPg->pPager;
   1.872 +  assert( !pPg->pageHash || pPager->errCode || MEMDB || pPg->dirty || 
   1.873 +      pPg->pageHash==pager_pagehash(pPg) );
   1.874 +}
   1.875 +
   1.876 +#else
   1.877 +#define pager_datahash(X,Y)  0
   1.878 +#define pager_pagehash(X)  0
   1.879 +#define CHECK_PAGE(x)
   1.880 +#endif
   1.881 +
   1.882 +/*
   1.883 +** When this is called the journal file for pager pPager must be open.
   1.884 +** The master journal file name is read from the end of the file and 
   1.885 +** written into memory supplied by the caller. 
   1.886 +**
   1.887 +** zMaster must point to a buffer of at least nMaster bytes allocated by
   1.888 +** the caller. This should be sqlite3_vfs.mxPathname+1 (to ensure there is
   1.889 +** enough space to write the master journal name). If the master journal
   1.890 +** name in the journal is longer than nMaster bytes (including a
   1.891 +** nul-terminator), then this is handled as if no master journal name
   1.892 +** were present in the journal.
   1.893 +**
   1.894 +** If no master journal file name is present zMaster[0] is set to 0 and
   1.895 +** SQLITE_OK returned.
   1.896 +*/
   1.897 +static int readMasterJournal(sqlite3_file *pJrnl, char *zMaster, int nMaster){
   1.898 +  int rc;
   1.899 +  u32 len;
   1.900 +  i64 szJ;
   1.901 +  u32 cksum;
   1.902 +  u32 u;                   /* Unsigned loop counter */
   1.903 +  unsigned char aMagic[8]; /* A buffer to hold the magic header */
   1.904 +
   1.905 +  zMaster[0] = '\0';
   1.906 +
   1.907 +  rc = sqlite3OsFileSize(pJrnl, &szJ);
   1.908 +  if( rc!=SQLITE_OK || szJ<16 ) return rc;
   1.909 +
   1.910 +  rc = read32bits(pJrnl, szJ-16, &len);
   1.911 +  if( rc!=SQLITE_OK ) return rc;
   1.912 +
   1.913 +  if( len>=nMaster ){
   1.914 +    return SQLITE_OK;
   1.915 +  }
   1.916 +
   1.917 +  rc = read32bits(pJrnl, szJ-12, &cksum);
   1.918 +  if( rc!=SQLITE_OK ) return rc;
   1.919 +
   1.920 +  rc = sqlite3OsRead(pJrnl, aMagic, 8, szJ-8);
   1.921 +  if( rc!=SQLITE_OK || memcmp(aMagic, aJournalMagic, 8) ) return rc;
   1.922 +
   1.923 +  rc = sqlite3OsRead(pJrnl, zMaster, len, szJ-16-len);
   1.924 +  if( rc!=SQLITE_OK ){
   1.925 +    return rc;
   1.926 +  }
   1.927 +  zMaster[len] = '\0';
   1.928 +
   1.929 +  /* See if the checksum matches the master journal name */
   1.930 +  for(u=0; u<len; u++){
   1.931 +    cksum -= zMaster[u];
   1.932 +   }
   1.933 +  if( cksum ){
   1.934 +    /* If the checksum doesn't add up, then one or more of the disk sectors
   1.935 +    ** containing the master journal filename is corrupted. This means
   1.936 +    ** definitely roll back, so just return SQLITE_OK and report a (nul)
   1.937 +    ** master-journal filename.
   1.938 +    */
   1.939 +    zMaster[0] = '\0';
   1.940 +  }
   1.941 +   
   1.942 +  return SQLITE_OK;
   1.943 +}
   1.944 +
   1.945 +/*
   1.946 +** Seek the journal file descriptor to the next sector boundary where a
   1.947 +** journal header may be read or written. Pager.journalOff is updated with
   1.948 +** the new seek offset.
   1.949 +**
   1.950 +** i.e for a sector size of 512:
   1.951 +**
   1.952 +** Input Offset              Output Offset
   1.953 +** ---------------------------------------
   1.954 +** 0                         0
   1.955 +** 512                       512
   1.956 +** 100                       512
   1.957 +** 2000                      2048
   1.958 +** 
   1.959 +*/
   1.960 +static void seekJournalHdr(Pager *pPager){
   1.961 +  i64 offset = 0;
   1.962 +  i64 c = pPager->journalOff;
   1.963 +  if( c ){
   1.964 +    offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager);
   1.965 +  }
   1.966 +  assert( offset%JOURNAL_HDR_SZ(pPager)==0 );
   1.967 +  assert( offset>=c );
   1.968 +  assert( (offset-c)<JOURNAL_HDR_SZ(pPager) );
   1.969 +  pPager->journalOff = offset;
   1.970 +}
   1.971 +
   1.972 +/*
   1.973 +** Write zeros over the header of the journal file.  This has the
   1.974 +** effect of invalidating the journal file and committing the
   1.975 +** transaction.
   1.976 +*/
   1.977 +static int zeroJournalHdr(Pager *pPager, int doTruncate){
   1.978 +  int rc = SQLITE_OK;
   1.979 +  static const char zeroHdr[28];
   1.980 +
   1.981 +  if( pPager->journalOff ){
   1.982 +    i64 iLimit = pPager->journalSizeLimit;
   1.983 +
   1.984 +    IOTRACE(("JZEROHDR %p\n", pPager))
   1.985 +    if( doTruncate || iLimit==0 ){
   1.986 +      rc = sqlite3OsTruncate(pPager->jfd, 0);
   1.987 +    }else{
   1.988 +      rc = sqlite3OsWrite(pPager->jfd, zeroHdr, sizeof(zeroHdr), 0);
   1.989 +    }
   1.990 +    if( rc==SQLITE_OK && !pPager->noSync ){
   1.991 +      rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_DATAONLY|pPager->sync_flags);
   1.992 +    }
   1.993 +
   1.994 +    /* At this point the transaction is committed but the write lock 
   1.995 +    ** is still held on the file. If there is a size limit configured for 
   1.996 +    ** the persistent journal and the journal file currently consumes more
   1.997 +    ** space than that limit allows for, truncate it now. There is no need
   1.998 +    ** to sync the file following this operation.
   1.999 +    */
  1.1000 +    if( rc==SQLITE_OK && iLimit>0 ){
  1.1001 +      i64 sz;
  1.1002 +      rc = sqlite3OsFileSize(pPager->jfd, &sz);
  1.1003 +      if( rc==SQLITE_OK && sz>iLimit ){
  1.1004 +        rc = sqlite3OsTruncate(pPager->jfd, iLimit);
  1.1005 +      }
  1.1006 +    }
  1.1007 +  }
  1.1008 +  return rc;
  1.1009 +}
  1.1010 +
  1.1011 +/*
  1.1012 +** The journal file must be open when this routine is called. A journal
  1.1013 +** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the
  1.1014 +** current location.
  1.1015 +**
  1.1016 +** The format for the journal header is as follows:
  1.1017 +** - 8 bytes: Magic identifying journal format.
  1.1018 +** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
  1.1019 +** - 4 bytes: Random number used for page hash.
  1.1020 +** - 4 bytes: Initial database page count.
  1.1021 +** - 4 bytes: Sector size used by the process that wrote this journal.
  1.1022 +** - 4 bytes: Database page size.
  1.1023 +** 
  1.1024 +** Followed by (JOURNAL_HDR_SZ - 28) bytes of unused space.
  1.1025 +*/
  1.1026 +static int writeJournalHdr(Pager *pPager){
  1.1027 +  int rc = SQLITE_OK;
  1.1028 +  char *zHeader = pPager->pTmpSpace;
  1.1029 +  int nHeader = pPager->pageSize;
  1.1030 +  int nWrite;
  1.1031 +
  1.1032 +  if( nHeader>JOURNAL_HDR_SZ(pPager) ){
  1.1033 +    nHeader = JOURNAL_HDR_SZ(pPager);
  1.1034 +  }
  1.1035 +
  1.1036 +  if( pPager->stmtHdrOff==0 ){
  1.1037 +    pPager->stmtHdrOff = pPager->journalOff;
  1.1038 +  }
  1.1039 +
  1.1040 +  seekJournalHdr(pPager);
  1.1041 +  pPager->journalHdr = pPager->journalOff;
  1.1042 +
  1.1043 +  memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
  1.1044 +
  1.1045 +  /* 
  1.1046 +  ** Write the nRec Field - the number of page records that follow this
  1.1047 +  ** journal header. Normally, zero is written to this value at this time.
  1.1048 +  ** After the records are added to the journal (and the journal synced, 
  1.1049 +  ** if in full-sync mode), the zero is overwritten with the true number
  1.1050 +  ** of records (see syncJournal()).
  1.1051 +  **
  1.1052 +  ** A faster alternative is to write 0xFFFFFFFF to the nRec field. When
  1.1053 +  ** reading the journal this value tells SQLite to assume that the
  1.1054 +  ** rest of the journal file contains valid page records. This assumption
  1.1055 +  ** is dangerous, as if a failure occured whilst writing to the journal
  1.1056 +  ** file it may contain some garbage data. There are two scenarios
  1.1057 +  ** where this risk can be ignored:
  1.1058 +  **
  1.1059 +  **   * When the pager is in no-sync mode. Corruption can follow a
  1.1060 +  **     power failure in this case anyway.
  1.1061 +  **
  1.1062 +  **   * When the SQLITE_IOCAP_SAFE_APPEND flag is set. This guarantees
  1.1063 +  **     that garbage data is never appended to the journal file.
  1.1064 +  */
  1.1065 +  assert(pPager->fd->pMethods||pPager->noSync);
  1.1066 +  if( (pPager->noSync) 
  1.1067 +   || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND) 
  1.1068 +  ){
  1.1069 +    put32bits(&zHeader[sizeof(aJournalMagic)], 0xffffffff);
  1.1070 +  }else{
  1.1071 +    put32bits(&zHeader[sizeof(aJournalMagic)], 0);
  1.1072 +  }
  1.1073 +
  1.1074 +  /* The random check-hash initialiser */ 
  1.1075 +  sqlite3_randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
  1.1076 +  put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit);
  1.1077 +  /* The initial database size */
  1.1078 +  put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbSize);
  1.1079 +  /* The assumed sector size for this process */
  1.1080 +  put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize);
  1.1081 +  if( pPager->journalHdr==0 ){
  1.1082 +    /* The page size */
  1.1083 +    put32bits(&zHeader[sizeof(aJournalMagic)+16], pPager->pageSize);
  1.1084 +  }
  1.1085 +
  1.1086 +  for(nWrite=0; rc==SQLITE_OK&&nWrite<JOURNAL_HDR_SZ(pPager); nWrite+=nHeader){
  1.1087 +    IOTRACE(("JHDR %p %lld %d\n", pPager, pPager->journalHdr, nHeader))
  1.1088 +    rc = sqlite3OsWrite(pPager->jfd, zHeader, nHeader, pPager->journalOff);
  1.1089 +    pPager->journalOff += nHeader;
  1.1090 +  }
  1.1091 +
  1.1092 +  return rc;
  1.1093 +}
  1.1094 +
  1.1095 +/*
  1.1096 +** The journal file must be open when this is called. A journal header file
  1.1097 +** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal
  1.1098 +** file. See comments above function writeJournalHdr() for a description of
  1.1099 +** the journal header format.
  1.1100 +**
  1.1101 +** If the header is read successfully, *nRec is set to the number of
  1.1102 +** page records following this header and *dbSize is set to the size of the
  1.1103 +** database before the transaction began, in pages. Also, pPager->cksumInit
  1.1104 +** is set to the value read from the journal header. SQLITE_OK is returned
  1.1105 +** in this case.
  1.1106 +**
  1.1107 +** If the journal header file appears to be corrupted, SQLITE_DONE is
  1.1108 +** returned and *nRec and *dbSize are not set.  If JOURNAL_HDR_SZ bytes
  1.1109 +** cannot be read from the journal file an error code is returned.
  1.1110 +*/
  1.1111 +static int readJournalHdr(
  1.1112 +  Pager *pPager, 
  1.1113 +  i64 journalSize,
  1.1114 +  u32 *pNRec, 
  1.1115 +  u32 *pDbSize
  1.1116 +){
  1.1117 +  int rc;
  1.1118 +  unsigned char aMagic[8]; /* A buffer to hold the magic header */
  1.1119 +  i64 jrnlOff;
  1.1120 +  int iPageSize;
  1.1121 +
  1.1122 +  seekJournalHdr(pPager);
  1.1123 +  if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){
  1.1124 +    return SQLITE_DONE;
  1.1125 +  }
  1.1126 +  jrnlOff = pPager->journalOff;
  1.1127 +
  1.1128 +  rc = sqlite3OsRead(pPager->jfd, aMagic, sizeof(aMagic), jrnlOff);
  1.1129 +  if( rc ) return rc;
  1.1130 +  jrnlOff += sizeof(aMagic);
  1.1131 +
  1.1132 +  if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){
  1.1133 +    return SQLITE_DONE;
  1.1134 +  }
  1.1135 +
  1.1136 +  rc = read32bits(pPager->jfd, jrnlOff, pNRec);
  1.1137 +  if( rc ) return rc;
  1.1138 +
  1.1139 +  rc = read32bits(pPager->jfd, jrnlOff+4, &pPager->cksumInit);
  1.1140 +  if( rc ) return rc;
  1.1141 +
  1.1142 +  rc = read32bits(pPager->jfd, jrnlOff+8, pDbSize);
  1.1143 +  if( rc ) return rc;
  1.1144 +
  1.1145 +  rc = read32bits(pPager->jfd, jrnlOff+16, (u32 *)&iPageSize);
  1.1146 +  if( rc==SQLITE_OK 
  1.1147 +   && iPageSize>=512 
  1.1148 +   && iPageSize<=SQLITE_MAX_PAGE_SIZE 
  1.1149 +   && ((iPageSize-1)&iPageSize)==0 
  1.1150 +  ){
  1.1151 +    u16 pagesize = iPageSize;
  1.1152 +    rc = sqlite3PagerSetPagesize(pPager, &pagesize);
  1.1153 +  }
  1.1154 +  if( rc ) return rc;
  1.1155 +
  1.1156 +  /* Update the assumed sector-size to match the value used by 
  1.1157 +  ** the process that created this journal. If this journal was
  1.1158 +  ** created by a process other than this one, then this routine
  1.1159 +  ** is being called from within pager_playback(). The local value
  1.1160 +  ** of Pager.sectorSize is restored at the end of that routine.
  1.1161 +  */
  1.1162 +  rc = read32bits(pPager->jfd, jrnlOff+12, (u32 *)&pPager->sectorSize);
  1.1163 +  if( rc ) return rc;
  1.1164 +
  1.1165 +  pPager->journalOff += JOURNAL_HDR_SZ(pPager);
  1.1166 +  return SQLITE_OK;
  1.1167 +}
  1.1168 +
  1.1169 +
  1.1170 +/*
  1.1171 +** Write the supplied master journal name into the journal file for pager
  1.1172 +** pPager at the current location. The master journal name must be the last
  1.1173 +** thing written to a journal file. If the pager is in full-sync mode, the
  1.1174 +** journal file descriptor is advanced to the next sector boundary before
  1.1175 +** anything is written. The format is:
  1.1176 +**
  1.1177 +** + 4 bytes: PAGER_MJ_PGNO.
  1.1178 +** + N bytes: length of master journal name.
  1.1179 +** + 4 bytes: N
  1.1180 +** + 4 bytes: Master journal name checksum.
  1.1181 +** + 8 bytes: aJournalMagic[].
  1.1182 +**
  1.1183 +** The master journal page checksum is the sum of the bytes in the master
  1.1184 +** journal name.
  1.1185 +**
  1.1186 +** If zMaster is a NULL pointer (occurs for a single database transaction), 
  1.1187 +** this call is a no-op.
  1.1188 +*/
  1.1189 +static int writeMasterJournal(Pager *pPager, const char *zMaster){
  1.1190 +  int rc;
  1.1191 +  int len; 
  1.1192 +  int i; 
  1.1193 +  i64 jrnlOff;
  1.1194 +  i64 jrnlSize;
  1.1195 +  u32 cksum = 0;
  1.1196 +  char zBuf[sizeof(aJournalMagic)+2*4];
  1.1197 +
  1.1198 +  if( !zMaster || pPager->setMaster) return SQLITE_OK;
  1.1199 +  pPager->setMaster = 1;
  1.1200 +
  1.1201 +  len = strlen(zMaster);
  1.1202 +  for(i=0; i<len; i++){
  1.1203 +    cksum += zMaster[i];
  1.1204 +  }
  1.1205 +
  1.1206 +  /* If in full-sync mode, advance to the next disk sector before writing
  1.1207 +  ** the master journal name. This is in case the previous page written to
  1.1208 +  ** the journal has already been synced.
  1.1209 +  */
  1.1210 +  if( pPager->fullSync ){
  1.1211 +    seekJournalHdr(pPager);
  1.1212 +  }
  1.1213 +  jrnlOff = pPager->journalOff;
  1.1214 +  pPager->journalOff += (len+20);
  1.1215 +
  1.1216 +  rc = write32bits(pPager->jfd, jrnlOff, PAGER_MJ_PGNO(pPager));
  1.1217 +  if( rc!=SQLITE_OK ) return rc;
  1.1218 +  jrnlOff += 4;
  1.1219 +
  1.1220 +  rc = sqlite3OsWrite(pPager->jfd, zMaster, len, jrnlOff);
  1.1221 +  if( rc!=SQLITE_OK ) return rc;
  1.1222 +  jrnlOff += len;
  1.1223 +
  1.1224 +  put32bits(zBuf, len);
  1.1225 +  put32bits(&zBuf[4], cksum);
  1.1226 +  memcpy(&zBuf[8], aJournalMagic, sizeof(aJournalMagic));
  1.1227 +  rc = sqlite3OsWrite(pPager->jfd, zBuf, 8+sizeof(aJournalMagic), jrnlOff);
  1.1228 +  jrnlOff += 8+sizeof(aJournalMagic);
  1.1229 +  pPager->needSync = !pPager->noSync;
  1.1230 +
  1.1231 +  /* If the pager is in peristent-journal mode, then the physical 
  1.1232 +  ** journal-file may extend past the end of the master-journal name
  1.1233 +  ** and 8 bytes of magic data just written to the file. This is 
  1.1234 +  ** dangerous because the code to rollback a hot-journal file
  1.1235 +  ** will not be able to find the master-journal name to determine 
  1.1236 +  ** whether or not the journal is hot. 
  1.1237 +  **
  1.1238 +  ** Easiest thing to do in this scenario is to truncate the journal 
  1.1239 +  ** file to the required size.
  1.1240 +  */ 
  1.1241 +  if( (rc==SQLITE_OK)
  1.1242 +   && (rc = sqlite3OsFileSize(pPager->jfd, &jrnlSize))==SQLITE_OK
  1.1243 +   && jrnlSize>jrnlOff
  1.1244 +  ){
  1.1245 +    rc = sqlite3OsTruncate(pPager->jfd, jrnlOff);
  1.1246 +  }
  1.1247 +  return rc;
  1.1248 +}
  1.1249 +
  1.1250 +/*
  1.1251 +** Add or remove a page from the list of all pages that are in the
  1.1252 +** statement journal.
  1.1253 +**
  1.1254 +** The Pager keeps a separate list of pages that are currently in
  1.1255 +** the statement journal.  This helps the sqlite3PagerStmtCommit()
  1.1256 +** routine run MUCH faster for the common case where there are many
  1.1257 +** pages in memory but only a few are in the statement journal.
  1.1258 +*/
  1.1259 +static void page_add_to_stmt_list(PgHdr *pPg){
  1.1260 +  Pager *pPager = pPg->pPager;
  1.1261 +  PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
  1.1262 +  assert( MEMDB );
  1.1263 +  if( !pHist->inStmt ){
  1.1264 +    assert( pHist->pPrevStmt==0 && pHist->pNextStmt==0 );
  1.1265 +    if( pPager->pStmt ){
  1.1266 +      PGHDR_TO_HIST(pPager->pStmt, pPager)->pPrevStmt = pPg;
  1.1267 +    }
  1.1268 +    pHist->pNextStmt = pPager->pStmt;
  1.1269 +    pPager->pStmt = pPg;
  1.1270 +    pHist->inStmt = 1;
  1.1271 +  }
  1.1272 +}
  1.1273 +
  1.1274 +/*
  1.1275 +** Find a page in the hash table given its page number.  Return
  1.1276 +** a pointer to the page or NULL if not found.
  1.1277 +*/
  1.1278 +static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
  1.1279 +  PgHdr *p;
  1.1280 +  if( pPager->aHash==0 ) return 0;
  1.1281 +  p = pPager->aHash[pgno & (pPager->nHash-1)];
  1.1282 +  while( p && p->pgno!=pgno ){
  1.1283 +    p = p->pNextHash;
  1.1284 +  }
  1.1285 +  return p;
  1.1286 +}
  1.1287 +
  1.1288 +/*
  1.1289 +** Clear the in-memory cache.  This routine
  1.1290 +** sets the state of the pager back to what it was when it was first
  1.1291 +** opened.  Any outstanding pages are invalidated and subsequent attempts
  1.1292 +** to access those pages will likely result in a coredump.
  1.1293 +*/
  1.1294 +static void pager_reset(Pager *pPager){
  1.1295 +  PgHdr *pPg, *pNext;
  1.1296 +  if( pPager->errCode ) return;
  1.1297 +  for(pPg=pPager->pAll; pPg; pPg=pNext){
  1.1298 +    IOTRACE(("PGFREE %p %d\n", pPager, pPg->pgno));
  1.1299 +    PAGER_INCR(sqlite3_pager_pgfree_count);
  1.1300 +    pNext = pPg->pNextAll;
  1.1301 +    lruListRemove(pPg);
  1.1302 +    sqlite3PageFree(pPg->pData);
  1.1303 +    sqlite3_free(pPg);
  1.1304 +  }
  1.1305 +  assert(pPager->lru.pFirst==0);
  1.1306 +  assert(pPager->lru.pFirstSynced==0);
  1.1307 +  assert(pPager->lru.pLast==0);
  1.1308 +  pPager->pStmt = 0;
  1.1309 +  pPager->pAll = 0;
  1.1310 +  pPager->pDirty = 0;
  1.1311 +  pPager->nHash = 0;
  1.1312 +  sqlite3_free(pPager->aHash);
  1.1313 +  pPager->nPage = 0;
  1.1314 +  pPager->aHash = 0;
  1.1315 +  pPager->nRef = 0;
  1.1316 +}
  1.1317 +
  1.1318 +/*
  1.1319 +** Unlock the database file. 
  1.1320 +**
  1.1321 +** If the pager is currently in error state, discard the contents of 
  1.1322 +** the cache and reset the Pager structure internal state. If there is
  1.1323 +** an open journal-file, then the next time a shared-lock is obtained
  1.1324 +** on the pager file (by this or any other process), it will be
  1.1325 +** treated as a hot-journal and rolled back.
  1.1326 +*/
  1.1327 +static void pager_unlock(Pager *pPager){
  1.1328 +  if( !pPager->exclusiveMode ){
  1.1329 +    if( !MEMDB ){
  1.1330 +      int rc = osUnlock(pPager->fd, NO_LOCK);
  1.1331 +      if( rc ) pPager->errCode = rc;
  1.1332 +      pPager->dbSize = -1;
  1.1333 +      IOTRACE(("UNLOCK %p\n", pPager))
  1.1334 +
  1.1335 +      /* Always close the journal file when dropping the database lock.
  1.1336 +      ** Otherwise, another connection with journal_mode=delete might
  1.1337 +      ** delete the file out from under us.
  1.1338 +      */
  1.1339 +      if( pPager->journalOpen ){
  1.1340 +        sqlite3OsClose(pPager->jfd);
  1.1341 +        pPager->journalOpen = 0;
  1.1342 +        sqlite3BitvecDestroy(pPager->pInJournal);
  1.1343 +        pPager->pInJournal = 0;
  1.1344 +      }
  1.1345 +
  1.1346 +      /* If Pager.errCode is set, the contents of the pager cache cannot be
  1.1347 +      ** trusted. Now that the pager file is unlocked, the contents of the
  1.1348 +      ** cache can be discarded and the error code safely cleared.
  1.1349 +      */
  1.1350 +      if( pPager->errCode ){
  1.1351 +        if( rc==SQLITE_OK ) pPager->errCode = SQLITE_OK;
  1.1352 +        pager_reset(pPager);
  1.1353 +        if( pPager->stmtOpen ){
  1.1354 +          sqlite3OsClose(pPager->stfd);
  1.1355 +          sqlite3BitvecDestroy(pPager->pInStmt);
  1.1356 +          pPager->pInStmt = 0;
  1.1357 +        }
  1.1358 +        pPager->stmtOpen = 0;
  1.1359 +        pPager->stmtInUse = 0;
  1.1360 +        pPager->journalOff = 0;
  1.1361 +        pPager->journalStarted = 0;
  1.1362 +        pPager->stmtAutoopen = 0;
  1.1363 +        pPager->origDbSize = 0;
  1.1364 +      }
  1.1365 +    }
  1.1366 +
  1.1367 +    if( !MEMDB || pPager->errCode==SQLITE_OK ){
  1.1368 +      pPager->state = PAGER_UNLOCK;
  1.1369 +      pPager->changeCountDone = 0;
  1.1370 +    }
  1.1371 +  }
  1.1372 +}
  1.1373 +
  1.1374 +/*
  1.1375 +** Execute a rollback if a transaction is active and unlock the 
  1.1376 +** database file. If the pager has already entered the error state, 
  1.1377 +** do not attempt the rollback.
  1.1378 +*/
  1.1379 +static void pagerUnlockAndRollback(Pager *p){
  1.1380 +  /* assert( p->state>=PAGER_RESERVED || p->journalOpen==0 ); */
  1.1381 +  if( p->errCode==SQLITE_OK && p->state>=PAGER_RESERVED ){
  1.1382 +    sqlite3BeginBenignMalloc();
  1.1383 +    sqlite3PagerRollback(p);
  1.1384 +    sqlite3EndBenignMalloc();
  1.1385 +  }
  1.1386 +  pager_unlock(p);
  1.1387 +#if 0
  1.1388 +  assert( p->errCode || !p->journalOpen || (p->exclusiveMode&&!p->journalOff) );
  1.1389 +  assert( p->errCode || !p->stmtOpen || p->exclusiveMode );
  1.1390 +#endif
  1.1391 +}
  1.1392 +
  1.1393 +/*
  1.1394 +** This routine ends a transaction.  A transaction is ended by either
  1.1395 +** a COMMIT or a ROLLBACK.
  1.1396 +**
  1.1397 +** When this routine is called, the pager has the journal file open and
  1.1398 +** a RESERVED or EXCLUSIVE lock on the database.  This routine will release
  1.1399 +** the database lock and acquires a SHARED lock in its place if that is
  1.1400 +** the appropriate thing to do.  Release locks usually is appropriate,
  1.1401 +** unless we are in exclusive access mode or unless this is a 
  1.1402 +** COMMIT AND BEGIN or ROLLBACK AND BEGIN operation.
  1.1403 +**
  1.1404 +** The journal file is either deleted or truncated.
  1.1405 +**
  1.1406 +** TODO: Consider keeping the journal file open for temporary databases.
  1.1407 +** This might give a performance improvement on windows where opening
  1.1408 +** a file is an expensive operation.
  1.1409 +*/
  1.1410 +static int pager_end_transaction(Pager *pPager, int hasMaster){
  1.1411 +  PgHdr *pPg;
  1.1412 +  int rc = SQLITE_OK;
  1.1413 +  int rc2 = SQLITE_OK;
  1.1414 +  assert( !MEMDB );
  1.1415 +  if( pPager->state<PAGER_RESERVED ){
  1.1416 +    return SQLITE_OK;
  1.1417 +  }
  1.1418 +  sqlite3PagerStmtCommit(pPager);
  1.1419 +  if( pPager->stmtOpen && !pPager->exclusiveMode ){
  1.1420 +    sqlite3OsClose(pPager->stfd);
  1.1421 +    pPager->stmtOpen = 0;
  1.1422 +  }
  1.1423 +  if( pPager->journalOpen ){
  1.1424 +    if( pPager->exclusiveMode 
  1.1425 +     || pPager->journalMode==PAGER_JOURNALMODE_PERSIST
  1.1426 +    ){
  1.1427 +      rc = zeroJournalHdr(pPager, hasMaster);
  1.1428 +      pager_error(pPager, rc);
  1.1429 +      pPager->journalOff = 0;
  1.1430 +      pPager->journalStarted = 0;
  1.1431 +    }else{
  1.1432 +      sqlite3OsClose(pPager->jfd);
  1.1433 +      pPager->journalOpen = 0;
  1.1434 +      if( rc==SQLITE_OK && !pPager->tempFile ){
  1.1435 +        rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
  1.1436 +      }
  1.1437 +    }
  1.1438 +    sqlite3BitvecDestroy(pPager->pInJournal);
  1.1439 +    pPager->pInJournal = 0;
  1.1440 +    for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
  1.1441 +      pPg->inJournal = 0;
  1.1442 +      pPg->dirty = 0;
  1.1443 +      pPg->needSync = 0;
  1.1444 +      pPg->alwaysRollback = 0;
  1.1445 +#ifdef SQLITE_CHECK_PAGES
  1.1446 +      pPg->pageHash = pager_pagehash(pPg);
  1.1447 +#endif
  1.1448 +    }
  1.1449 +    pPager->pDirty = 0;
  1.1450 +    pPager->dirtyCache = 0;
  1.1451 +    pPager->nRec = 0;
  1.1452 +  }else{
  1.1453 +    assert( pPager->pInJournal==0 );
  1.1454 +  }
  1.1455 +
  1.1456 +  if( !pPager->exclusiveMode ){
  1.1457 +    rc2 = osUnlock(pPager->fd, SHARED_LOCK);
  1.1458 +    pPager->state = PAGER_SHARED;
  1.1459 +  }else if( pPager->state==PAGER_SYNCED ){
  1.1460 +    pPager->state = PAGER_EXCLUSIVE;
  1.1461 +  }
  1.1462 +  pPager->origDbSize = 0;
  1.1463 +  pPager->setMaster = 0;
  1.1464 +  pPager->needSync = 0;
  1.1465 +  lruListSetFirstSynced(pPager);
  1.1466 +  pPager->dbSize = -1;
  1.1467 +  pPager->dbModified = 0;
  1.1468 +
  1.1469 +  return (rc==SQLITE_OK?rc2:rc);
  1.1470 +}
  1.1471 +
  1.1472 +/*
  1.1473 +** Compute and return a checksum for the page of data.
  1.1474 +**
  1.1475 +** This is not a real checksum.  It is really just the sum of the 
  1.1476 +** random initial value and the page number.  We experimented with
  1.1477 +** a checksum of the entire data, but that was found to be too slow.
  1.1478 +**
  1.1479 +** Note that the page number is stored at the beginning of data and
  1.1480 +** the checksum is stored at the end.  This is important.  If journal
  1.1481 +** corruption occurs due to a power failure, the most likely scenario
  1.1482 +** is that one end or the other of the record will be changed.  It is
  1.1483 +** much less likely that the two ends of the journal record will be
  1.1484 +** correct and the middle be corrupt.  Thus, this "checksum" scheme,
  1.1485 +** though fast and simple, catches the mostly likely kind of corruption.
  1.1486 +**
  1.1487 +** FIX ME:  Consider adding every 200th (or so) byte of the data to the
  1.1488 +** checksum.  That way if a single page spans 3 or more disk sectors and
  1.1489 +** only the middle sector is corrupt, we will still have a reasonable
  1.1490 +** chance of failing the checksum and thus detecting the problem.
  1.1491 +*/
  1.1492 +static u32 pager_cksum(Pager *pPager, const u8 *aData){
  1.1493 +  u32 cksum = pPager->cksumInit;
  1.1494 +  int i = pPager->pageSize-200;
  1.1495 +  while( i>0 ){
  1.1496 +    cksum += aData[i];
  1.1497 +    i -= 200;
  1.1498 +  }
  1.1499 +  return cksum;
  1.1500 +}
  1.1501 +
  1.1502 +/* Forward declaration */
  1.1503 +static void makeClean(PgHdr*);
  1.1504 +
  1.1505 +/*
  1.1506 +** Read a single page from the journal file opened on file descriptor
  1.1507 +** jfd.  Playback this one page.
  1.1508 +**
  1.1509 +** If useCksum==0 it means this journal does not use checksums.  Checksums
  1.1510 +** are not used in statement journals because statement journals do not
  1.1511 +** need to survive power failures.
  1.1512 +*/
  1.1513 +static int pager_playback_one_page(
  1.1514 +  Pager *pPager, 
  1.1515 +  sqlite3_file *jfd,
  1.1516 +  i64 offset,
  1.1517 +  int useCksum
  1.1518 +){
  1.1519 +  int rc;
  1.1520 +  PgHdr *pPg;                   /* An existing page in the cache */
  1.1521 +  Pgno pgno;                    /* The page number of a page in journal */
  1.1522 +  u32 cksum;                    /* Checksum used for sanity checking */
  1.1523 +  u8 *aData = (u8 *)pPager->pTmpSpace;   /* Temp storage for a page */
  1.1524 +
  1.1525 +  /* useCksum should be true for the main journal and false for
  1.1526 +  ** statement journals.  Verify that this is always the case
  1.1527 +  */
  1.1528 +  assert( jfd == (useCksum ? pPager->jfd : pPager->stfd) );
  1.1529 +  assert( aData );
  1.1530 +
  1.1531 +  rc = read32bits(jfd, offset, &pgno);
  1.1532 +  if( rc!=SQLITE_OK ) return rc;
  1.1533 +  rc = sqlite3OsRead(jfd, aData, pPager->pageSize, offset+4);
  1.1534 +  if( rc!=SQLITE_OK ) return rc;
  1.1535 +  pPager->journalOff += pPager->pageSize + 4;
  1.1536 +
  1.1537 +  /* Sanity checking on the page.  This is more important that I originally
  1.1538 +  ** thought.  If a power failure occurs while the journal is being written,
  1.1539 +  ** it could cause invalid data to be written into the journal.  We need to
  1.1540 +  ** detect this invalid data (with high probability) and ignore it.
  1.1541 +  */
  1.1542 +  if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
  1.1543 +    return SQLITE_DONE;
  1.1544 +  }
  1.1545 +  if( pgno>(unsigned)pPager->dbSize ){
  1.1546 +    return SQLITE_OK;
  1.1547 +  }
  1.1548 +  if( useCksum ){
  1.1549 +    rc = read32bits(jfd, offset+pPager->pageSize+4, &cksum);
  1.1550 +    if( rc ) return rc;
  1.1551 +    pPager->journalOff += 4;
  1.1552 +    if( pager_cksum(pPager, aData)!=cksum ){
  1.1553 +      return SQLITE_DONE;
  1.1554 +    }
  1.1555 +  }
  1.1556 +
  1.1557 +  assert( pPager->state==PAGER_RESERVED || pPager->state>=PAGER_EXCLUSIVE );
  1.1558 +
  1.1559 +  /* If the pager is in RESERVED state, then there must be a copy of this
  1.1560 +  ** page in the pager cache. In this case just update the pager cache,
  1.1561 +  ** not the database file. The page is left marked dirty in this case.
  1.1562 +  **
  1.1563 +  ** An exception to the above rule: If the database is in no-sync mode
  1.1564 +  ** and a page is moved during an incremental vacuum then the page may
  1.1565 +  ** not be in the pager cache. Later: if a malloc() or IO error occurs
  1.1566 +  ** during a Movepage() call, then the page may not be in the cache
  1.1567 +  ** either. So the condition described in the above paragraph is not
  1.1568 +  ** assert()able.
  1.1569 +  **
  1.1570 +  ** If in EXCLUSIVE state, then we update the pager cache if it exists
  1.1571 +  ** and the main file. The page is then marked not dirty.
  1.1572 +  **
  1.1573 +  ** Ticket #1171:  The statement journal might contain page content that is
  1.1574 +  ** different from the page content at the start of the transaction.
  1.1575 +  ** This occurs when a page is changed prior to the start of a statement
  1.1576 +  ** then changed again within the statement.  When rolling back such a
  1.1577 +  ** statement we must not write to the original database unless we know
  1.1578 +  ** for certain that original page contents are synced into the main rollback
  1.1579 +  ** journal.  Otherwise, a power loss might leave modified data in the
  1.1580 +  ** database file without an entry in the rollback journal that can
  1.1581 +  ** restore the database to its original form.  Two conditions must be
  1.1582 +  ** met before writing to the database files. (1) the database must be
  1.1583 +  ** locked.  (2) we know that the original page content is fully synced
  1.1584 +  ** in the main journal either because the page is not in cache or else
  1.1585 +  ** the page is marked as needSync==0.
  1.1586 +  **
  1.1587 +  ** 2008-04-14:  When attempting to vacuum a corrupt database file, it
  1.1588 +  ** is possible to fail a statement on a database that does not yet exist.
  1.1589 +  ** Do not attempt to write if database file has never been opened.
  1.1590 +  */
  1.1591 +  pPg = pager_lookup(pPager, pgno);
  1.1592 +  PAGERTRACE4("PLAYBACK %d page %d hash(%08x)\n",
  1.1593 +               PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, aData));
  1.1594 +  if( pPager->state>=PAGER_EXCLUSIVE && (pPg==0 || pPg->needSync==0)
  1.1595 +        && pPager->fd->pMethods ){
  1.1596 +    i64 offset = (pgno-1)*(i64)pPager->pageSize;
  1.1597 +    rc = sqlite3OsWrite(pPager->fd, aData, pPager->pageSize, offset);
  1.1598 +    if( pPg ){
  1.1599 +      makeClean(pPg);
  1.1600 +    }
  1.1601 +  }
  1.1602 +  if( pPg ){
  1.1603 +    /* No page should ever be explicitly rolled back that is in use, except
  1.1604 +    ** for page 1 which is held in use in order to keep the lock on the
  1.1605 +    ** database active. However such a page may be rolled back as a result
  1.1606 +    ** of an internal error resulting in an automatic call to
  1.1607 +    ** sqlite3PagerRollback().
  1.1608 +    */
  1.1609 +    void *pData;
  1.1610 +    /* assert( pPg->nRef==0 || pPg->pgno==1 ); */
  1.1611 +    pData = PGHDR_TO_DATA(pPg);
  1.1612 +    memcpy(pData, aData, pPager->pageSize);
  1.1613 +    if( pPager->xReiniter ){
  1.1614 +      pPager->xReiniter(pPg, pPager->pageSize);
  1.1615 +    }
  1.1616 +#ifdef SQLITE_CHECK_PAGES
  1.1617 +    pPg->pageHash = pager_pagehash(pPg);
  1.1618 +#endif
  1.1619 +    /* If this was page 1, then restore the value of Pager.dbFileVers.
  1.1620 +    ** Do this before any decoding. */
  1.1621 +    if( pgno==1 ){
  1.1622 +      memcpy(&pPager->dbFileVers, &((u8*)pData)[24],sizeof(pPager->dbFileVers));
  1.1623 +    }
  1.1624 +
  1.1625 +    /* Decode the page just read from disk */
  1.1626 +    CODEC1(pPager, pData, pPg->pgno, 3);
  1.1627 +  }
  1.1628 +  return rc;
  1.1629 +}
  1.1630 +
  1.1631 +/*
  1.1632 +** Parameter zMaster is the name of a master journal file. A single journal
  1.1633 +** file that referred to the master journal file has just been rolled back.
  1.1634 +** This routine checks if it is possible to delete the master journal file,
  1.1635 +** and does so if it is.
  1.1636 +**
  1.1637 +** Argument zMaster may point to Pager.pTmpSpace. So that buffer is not 
  1.1638 +** available for use within this function.
  1.1639 +**
  1.1640 +**
  1.1641 +** The master journal file contains the names of all child journals.
  1.1642 +** To tell if a master journal can be deleted, check to each of the
  1.1643 +** children.  If all children are either missing or do not refer to
  1.1644 +** a different master journal, then this master journal can be deleted.
  1.1645 +*/
  1.1646 +static int pager_delmaster(Pager *pPager, const char *zMaster){
  1.1647 +  sqlite3_vfs *pVfs = pPager->pVfs;
  1.1648 +  int rc;
  1.1649 +  int master_open = 0;
  1.1650 +  sqlite3_file *pMaster;
  1.1651 +  sqlite3_file *pJournal;
  1.1652 +  char *zMasterJournal = 0; /* Contents of master journal file */
  1.1653 +  i64 nMasterJournal;       /* Size of master journal file */
  1.1654 +
  1.1655 +  /* Open the master journal file exclusively in case some other process
  1.1656 +  ** is running this routine also. Not that it makes too much difference.
  1.1657 +  */
  1.1658 +  pMaster = (sqlite3_file *)sqlite3Malloc(pVfs->szOsFile * 2);
  1.1659 +  pJournal = (sqlite3_file *)(((u8 *)pMaster) + pVfs->szOsFile);
  1.1660 +  if( !pMaster ){
  1.1661 +    rc = SQLITE_NOMEM;
  1.1662 +  }else{
  1.1663 +    int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MASTER_JOURNAL);
  1.1664 +    rc = sqlite3OsOpen(pVfs, zMaster, pMaster, flags, 0);
  1.1665 +  }
  1.1666 +  if( rc!=SQLITE_OK ) goto delmaster_out;
  1.1667 +  master_open = 1;
  1.1668 +
  1.1669 +  rc = sqlite3OsFileSize(pMaster, &nMasterJournal);
  1.1670 +  if( rc!=SQLITE_OK ) goto delmaster_out;
  1.1671 +
  1.1672 +  if( nMasterJournal>0 ){
  1.1673 +    char *zJournal;
  1.1674 +    char *zMasterPtr = 0;
  1.1675 +    int nMasterPtr = pPager->pVfs->mxPathname+1;
  1.1676 +
  1.1677 +    /* Load the entire master journal file into space obtained from
  1.1678 +    ** sqlite3_malloc() and pointed to by zMasterJournal. 
  1.1679 +    */
  1.1680 +    zMasterJournal = (char *)sqlite3Malloc(nMasterJournal + nMasterPtr);
  1.1681 +    if( !zMasterJournal ){
  1.1682 +      rc = SQLITE_NOMEM;
  1.1683 +      goto delmaster_out;
  1.1684 +    }
  1.1685 +    zMasterPtr = &zMasterJournal[nMasterJournal];
  1.1686 +    rc = sqlite3OsRead(pMaster, zMasterJournal, nMasterJournal, 0);
  1.1687 +    if( rc!=SQLITE_OK ) goto delmaster_out;
  1.1688 +
  1.1689 +    zJournal = zMasterJournal;
  1.1690 +    while( (zJournal-zMasterJournal)<nMasterJournal ){
  1.1691 +      int exists;
  1.1692 +      rc = sqlite3OsAccess(pVfs, zJournal, SQLITE_ACCESS_EXISTS, &exists);
  1.1693 +      if( rc!=SQLITE_OK ){
  1.1694 +        goto delmaster_out;
  1.1695 +      }
  1.1696 +      if( exists ){
  1.1697 +        /* One of the journals pointed to by the master journal exists.
  1.1698 +        ** Open it and check if it points at the master journal. If
  1.1699 +        ** so, return without deleting the master journal file.
  1.1700 +        */
  1.1701 +        int c;
  1.1702 +        int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL);
  1.1703 +        rc = sqlite3OsOpen(pVfs, zJournal, pJournal, flags, 0);
  1.1704 +        if( rc!=SQLITE_OK ){
  1.1705 +          goto delmaster_out;
  1.1706 +        }
  1.1707 +
  1.1708 +        rc = readMasterJournal(pJournal, zMasterPtr, nMasterPtr);
  1.1709 +        sqlite3OsClose(pJournal);
  1.1710 +        if( rc!=SQLITE_OK ){
  1.1711 +          goto delmaster_out;
  1.1712 +        }
  1.1713 +
  1.1714 +        c = zMasterPtr[0]!=0 && strcmp(zMasterPtr, zMaster)==0;
  1.1715 +        if( c ){
  1.1716 +          /* We have a match. Do not delete the master journal file. */
  1.1717 +          goto delmaster_out;
  1.1718 +        }
  1.1719 +      }
  1.1720 +      zJournal += (strlen(zJournal)+1);
  1.1721 +    }
  1.1722 +  }
  1.1723 +  
  1.1724 +  rc = sqlite3OsDelete(pVfs, zMaster, 0);
  1.1725 +
  1.1726 +delmaster_out:
  1.1727 +  if( zMasterJournal ){
  1.1728 +    sqlite3_free(zMasterJournal);
  1.1729 +  }  
  1.1730 +  if( master_open ){
  1.1731 +    sqlite3OsClose(pMaster);
  1.1732 +  }
  1.1733 +  sqlite3_free(pMaster);
  1.1734 +  return rc;
  1.1735 +}
  1.1736 +
  1.1737 +
  1.1738 +static void pager_truncate_cache(Pager *pPager);
  1.1739 +
  1.1740 +/*
  1.1741 +** Truncate the main file of the given pager to the number of pages
  1.1742 +** indicated. Also truncate the cached representation of the file.
  1.1743 +**
  1.1744 +** Might might be the case that the file on disk is smaller than nPage.
  1.1745 +** This can happen, for example, if we are in the middle of a transaction
  1.1746 +** which has extended the file size and the new pages are still all held
  1.1747 +** in cache, then an INSERT or UPDATE does a statement rollback.  Some
  1.1748 +** operating system implementations can get confused if you try to
  1.1749 +** truncate a file to some size that is larger than it currently is,
  1.1750 +** so detect this case and write a single zero byte to the end of the new
  1.1751 +** file instead.
  1.1752 +*/
  1.1753 +static int pager_truncate(Pager *pPager, int nPage){
  1.1754 +  int rc = SQLITE_OK;
  1.1755 +  if( pPager->state>=PAGER_EXCLUSIVE && pPager->fd->pMethods ){
  1.1756 +    i64 currentSize, newSize;
  1.1757 +    rc = sqlite3OsFileSize(pPager->fd, &currentSize);
  1.1758 +    newSize = pPager->pageSize*(i64)nPage;
  1.1759 +    if( rc==SQLITE_OK && currentSize!=newSize ){
  1.1760 +      if( currentSize>newSize ){
  1.1761 +        rc = sqlite3OsTruncate(pPager->fd, newSize);
  1.1762 +      }else{
  1.1763 +        rc = sqlite3OsWrite(pPager->fd, "", 1, newSize-1);
  1.1764 +      }
  1.1765 +    }
  1.1766 +  }
  1.1767 +  if( rc==SQLITE_OK ){
  1.1768 +    pPager->dbSize = nPage;
  1.1769 +    pager_truncate_cache(pPager);
  1.1770 +  }
  1.1771 +  return rc;
  1.1772 +}
  1.1773 +
  1.1774 +/*
  1.1775 +** Set the sectorSize for the given pager.
  1.1776 +**
  1.1777 +** The sector size is at least as big as the sector size reported
  1.1778 +** by sqlite3OsSectorSize().  The minimum sector size is 512.
  1.1779 +*/
  1.1780 +static void setSectorSize(Pager *pPager){
  1.1781 +  assert(pPager->fd->pMethods||pPager->tempFile);
  1.1782 +  if( !pPager->tempFile ){
  1.1783 +    /* Sector size doesn't matter for temporary files. Also, the file
  1.1784 +    ** may not have been opened yet, in whcih case the OsSectorSize()
  1.1785 +    ** call will segfault.
  1.1786 +    */
  1.1787 +    pPager->sectorSize = sqlite3OsSectorSize(pPager->fd);
  1.1788 +  }
  1.1789 +  if( pPager->sectorSize<512 ){
  1.1790 +    pPager->sectorSize = 512;
  1.1791 +  }
  1.1792 +}
  1.1793 +
  1.1794 +/*
  1.1795 +** Playback the journal and thus restore the database file to
  1.1796 +** the state it was in before we started making changes.  
  1.1797 +**
  1.1798 +** The journal file format is as follows: 
  1.1799 +**
  1.1800 +**  (1)  8 byte prefix.  A copy of aJournalMagic[].
  1.1801 +**  (2)  4 byte big-endian integer which is the number of valid page records
  1.1802 +**       in the journal.  If this value is 0xffffffff, then compute the
  1.1803 +**       number of page records from the journal size.
  1.1804 +**  (3)  4 byte big-endian integer which is the initial value for the 
  1.1805 +**       sanity checksum.
  1.1806 +**  (4)  4 byte integer which is the number of pages to truncate the
  1.1807 +**       database to during a rollback.
  1.1808 +**  (5)  4 byte big-endian integer which is the sector size.  The header
  1.1809 +**       is this many bytes in size.
  1.1810 +**  (6)  4 byte big-endian integer which is the page case.
  1.1811 +**  (7)  4 byte integer which is the number of bytes in the master journal
  1.1812 +**       name.  The value may be zero (indicate that there is no master
  1.1813 +**       journal.)
  1.1814 +**  (8)  N bytes of the master journal name.  The name will be nul-terminated
  1.1815 +**       and might be shorter than the value read from (5).  If the first byte
  1.1816 +**       of the name is \000 then there is no master journal.  The master
  1.1817 +**       journal name is stored in UTF-8.
  1.1818 +**  (9)  Zero or more pages instances, each as follows:
  1.1819 +**        +  4 byte page number.
  1.1820 +**        +  pPager->pageSize bytes of data.
  1.1821 +**        +  4 byte checksum
  1.1822 +**
  1.1823 +** When we speak of the journal header, we mean the first 8 items above.
  1.1824 +** Each entry in the journal is an instance of the 9th item.
  1.1825 +**
  1.1826 +** Call the value from the second bullet "nRec".  nRec is the number of
  1.1827 +** valid page entries in the journal.  In most cases, you can compute the
  1.1828 +** value of nRec from the size of the journal file.  But if a power
  1.1829 +** failure occurred while the journal was being written, it could be the
  1.1830 +** case that the size of the journal file had already been increased but
  1.1831 +** the extra entries had not yet made it safely to disk.  In such a case,
  1.1832 +** the value of nRec computed from the file size would be too large.  For
  1.1833 +** that reason, we always use the nRec value in the header.
  1.1834 +**
  1.1835 +** If the nRec value is 0xffffffff it means that nRec should be computed
  1.1836 +** from the file size.  This value is used when the user selects the
  1.1837 +** no-sync option for the journal.  A power failure could lead to corruption
  1.1838 +** in this case.  But for things like temporary table (which will be
  1.1839 +** deleted when the power is restored) we don't care.  
  1.1840 +**
  1.1841 +** If the file opened as the journal file is not a well-formed
  1.1842 +** journal file then all pages up to the first corrupted page are rolled
  1.1843 +** back (or no pages if the journal header is corrupted). The journal file
  1.1844 +** is then deleted and SQLITE_OK returned, just as if no corruption had
  1.1845 +** been encountered.
  1.1846 +**
  1.1847 +** If an I/O or malloc() error occurs, the journal-file is not deleted
  1.1848 +** and an error code is returned.
  1.1849 +*/
  1.1850 +static int pager_playback(Pager *pPager, int isHot){
  1.1851 +  sqlite3_vfs *pVfs = pPager->pVfs;
  1.1852 +  i64 szJ;                 /* Size of the journal file in bytes */
  1.1853 +  u32 nRec;                /* Number of Records in the journal */
  1.1854 +  u32 u;                   /* Unsigned loop counter */
  1.1855 +  Pgno mxPg = 0;           /* Size of the original file in pages */
  1.1856 +  int rc;                  /* Result code of a subroutine */
  1.1857 +  int res = 1;             /* Value returned by sqlite3OsAccess() */
  1.1858 +  char *zMaster = 0;       /* Name of master journal file if any */
  1.1859 +
  1.1860 +  /* Figure out how many records are in the journal.  Abort early if
  1.1861 +  ** the journal is empty.
  1.1862 +  */
  1.1863 +  assert( pPager->journalOpen );
  1.1864 +  rc = sqlite3OsFileSize(pPager->jfd, &szJ);
  1.1865 +  if( rc!=SQLITE_OK || szJ==0 ){
  1.1866 +    goto end_playback;
  1.1867 +  }
  1.1868 +
  1.1869 +  /* Read the master journal name from the journal, if it is present.
  1.1870 +  ** If a master journal file name is specified, but the file is not
  1.1871 +  ** present on disk, then the journal is not hot and does not need to be
  1.1872 +  ** played back.
  1.1873 +  */
  1.1874 +  zMaster = pPager->pTmpSpace;
  1.1875 +  rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
  1.1876 +  if( rc==SQLITE_OK && zMaster[0] ){
  1.1877 +    rc = sqlite3OsAccess(pVfs, zMaster, SQLITE_ACCESS_EXISTS, &res);
  1.1878 +  }
  1.1879 +  zMaster = 0;
  1.1880 +  if( rc!=SQLITE_OK || !res ){
  1.1881 +    goto end_playback;
  1.1882 +  }
  1.1883 +  pPager->journalOff = 0;
  1.1884 +
  1.1885 +  /* This loop terminates either when the readJournalHdr() call returns
  1.1886 +  ** SQLITE_DONE or an IO error occurs. */
  1.1887 +  while( 1 ){
  1.1888 +
  1.1889 +    /* Read the next journal header from the journal file.  If there are
  1.1890 +    ** not enough bytes left in the journal file for a complete header, or
  1.1891 +    ** it is corrupted, then a process must of failed while writing it.
  1.1892 +    ** This indicates nothing more needs to be rolled back.
  1.1893 +    */
  1.1894 +    rc = readJournalHdr(pPager, szJ, &nRec, &mxPg);
  1.1895 +    if( rc!=SQLITE_OK ){ 
  1.1896 +      if( rc==SQLITE_DONE ){
  1.1897 +        rc = SQLITE_OK;
  1.1898 +      }
  1.1899 +      goto end_playback;
  1.1900 +    }
  1.1901 +
  1.1902 +    /* If nRec is 0xffffffff, then this journal was created by a process
  1.1903 +    ** working in no-sync mode. This means that the rest of the journal
  1.1904 +    ** file consists of pages, there are no more journal headers. Compute
  1.1905 +    ** the value of nRec based on this assumption.
  1.1906 +    */
  1.1907 +    if( nRec==0xffffffff ){
  1.1908 +      assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) );
  1.1909 +      nRec = (szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager);
  1.1910 +    }
  1.1911 +
  1.1912 +    /* If nRec is 0 and this rollback is of a transaction created by this
  1.1913 +    ** process and if this is the final header in the journal, then it means
  1.1914 +    ** that this part of the journal was being filled but has not yet been
  1.1915 +    ** synced to disk.  Compute the number of pages based on the remaining
  1.1916 +    ** size of the file.
  1.1917 +    **
  1.1918 +    ** The third term of the test was added to fix ticket #2565.
  1.1919 +    */
  1.1920 +    if( nRec==0 && !isHot &&
  1.1921 +        pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){
  1.1922 +      nRec = (szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager);
  1.1923 +    }
  1.1924 +
  1.1925 +    /* If this is the first header read from the journal, truncate the
  1.1926 +    ** database file back to its original size.
  1.1927 +    */
  1.1928 +    if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){
  1.1929 +      rc = pager_truncate(pPager, mxPg);
  1.1930 +      if( rc!=SQLITE_OK ){
  1.1931 +        goto end_playback;
  1.1932 +      }
  1.1933 +    }
  1.1934 +
  1.1935 +    /* Copy original pages out of the journal and back into the database file.
  1.1936 +    */
  1.1937 +    for(u=0; u<nRec; u++){
  1.1938 +      rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1);
  1.1939 +      if( rc!=SQLITE_OK ){
  1.1940 +        if( rc==SQLITE_DONE ){
  1.1941 +          rc = SQLITE_OK;
  1.1942 +          pPager->journalOff = szJ;
  1.1943 +          break;
  1.1944 +        }else{
  1.1945 +          goto end_playback;
  1.1946 +        }
  1.1947 +      }
  1.1948 +    }
  1.1949 +  }
  1.1950 +  /*NOTREACHED*/
  1.1951 +  assert( 0 );
  1.1952 +
  1.1953 +end_playback:
  1.1954 +  if( rc==SQLITE_OK ){
  1.1955 +    zMaster = pPager->pTmpSpace;
  1.1956 +    rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
  1.1957 +  }
  1.1958 +  if( rc==SQLITE_OK ){
  1.1959 +    rc = pager_end_transaction(pPager, zMaster[0]!='\0');
  1.1960 +  }
  1.1961 +  if( rc==SQLITE_OK && zMaster[0] ){
  1.1962 +    /* If there was a master journal and this routine will return success,
  1.1963 +    ** see if it is possible to delete the master journal.
  1.1964 +    */
  1.1965 +    rc = pager_delmaster(pPager, zMaster);
  1.1966 +  }
  1.1967 +
  1.1968 +  /* The Pager.sectorSize variable may have been updated while rolling
  1.1969 +  ** back a journal created by a process with a different sector size
  1.1970 +  ** value. Reset it to the correct value for this process.
  1.1971 +  */
  1.1972 +  setSectorSize(pPager);
  1.1973 +  return rc;
  1.1974 +}
  1.1975 +
  1.1976 +/*
  1.1977 +** Playback the statement journal.
  1.1978 +**
  1.1979 +** This is similar to playing back the transaction journal but with
  1.1980 +** a few extra twists.
  1.1981 +**
  1.1982 +**    (1)  The number of pages in the database file at the start of
  1.1983 +**         the statement is stored in pPager->stmtSize, not in the
  1.1984 +**         journal file itself.
  1.1985 +**
  1.1986 +**    (2)  In addition to playing back the statement journal, also
  1.1987 +**         playback all pages of the transaction journal beginning
  1.1988 +**         at offset pPager->stmtJSize.
  1.1989 +*/
  1.1990 +static int pager_stmt_playback(Pager *pPager){
  1.1991 +  i64 szJ;                 /* Size of the full journal */
  1.1992 +  i64 hdrOff;
  1.1993 +  int nRec;                /* Number of Records */
  1.1994 +  int i;                   /* Loop counter */
  1.1995 +  int rc;
  1.1996 +
  1.1997 +  szJ = pPager->journalOff;
  1.1998 +
  1.1999 +  /* Set hdrOff to be the offset just after the end of the last journal
  1.2000 +  ** page written before the first journal-header for this statement
  1.2001 +  ** transaction was written, or the end of the file if no journal
  1.2002 +  ** header was written.
  1.2003 +  */
  1.2004 +  hdrOff = pPager->stmtHdrOff;
  1.2005 +  assert( pPager->fullSync || !hdrOff );
  1.2006 +  if( !hdrOff ){
  1.2007 +    hdrOff = szJ;
  1.2008 +  }
  1.2009 +  
  1.2010 +  /* Truncate the database back to its original size.
  1.2011 +  */
  1.2012 +  rc = pager_truncate(pPager, pPager->stmtSize);
  1.2013 +  assert( pPager->state>=PAGER_SHARED );
  1.2014 +
  1.2015 +  /* Figure out how many records are in the statement journal.
  1.2016 +  */
  1.2017 +  assert( pPager->stmtInUse && pPager->journalOpen );
  1.2018 +  nRec = pPager->stmtNRec;
  1.2019 +  
  1.2020 +  /* Copy original pages out of the statement journal and back into the
  1.2021 +  ** database file.  Note that the statement journal omits checksums from
  1.2022 +  ** each record since power-failure recovery is not important to statement
  1.2023 +  ** journals.
  1.2024 +  */
  1.2025 +  for(i=0; i<nRec; i++){
  1.2026 +    i64 offset = i*(4+pPager->pageSize);
  1.2027 +    rc = pager_playback_one_page(pPager, pPager->stfd, offset, 0);
  1.2028 +    assert( rc!=SQLITE_DONE );
  1.2029 +    if( rc!=SQLITE_OK ) goto end_stmt_playback;
  1.2030 +  }
  1.2031 +
  1.2032 +  /* Now roll some pages back from the transaction journal. Pager.stmtJSize
  1.2033 +  ** was the size of the journal file when this statement was started, so
  1.2034 +  ** everything after that needs to be rolled back, either into the
  1.2035 +  ** database, the memory cache, or both.
  1.2036 +  **
  1.2037 +  ** If it is not zero, then Pager.stmtHdrOff is the offset to the start
  1.2038 +  ** of the first journal header written during this statement transaction.
  1.2039 +  */
  1.2040 +  pPager->journalOff = pPager->stmtJSize;
  1.2041 +  pPager->cksumInit = pPager->stmtCksum;
  1.2042 +  while( pPager->journalOff < hdrOff ){
  1.2043 +    rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1);
  1.2044 +    assert( rc!=SQLITE_DONE );
  1.2045 +    if( rc!=SQLITE_OK ) goto end_stmt_playback;
  1.2046 +  }
  1.2047 +
  1.2048 +  while( pPager->journalOff < szJ ){
  1.2049 +    u32 nJRec;         /* Number of Journal Records */
  1.2050 +    u32 dummy;
  1.2051 +    rc = readJournalHdr(pPager, szJ, &nJRec, &dummy);
  1.2052 +    if( rc!=SQLITE_OK ){
  1.2053 +      assert( rc!=SQLITE_DONE );
  1.2054 +      goto end_stmt_playback;
  1.2055 +    }
  1.2056 +    if( nJRec==0 ){
  1.2057 +      nJRec = (szJ - pPager->journalOff) / (pPager->pageSize+8);
  1.2058 +    }
  1.2059 +    for(i=nJRec-1; i>=0 && pPager->journalOff < szJ; i--){
  1.2060 +      rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1);
  1.2061 +      assert( rc!=SQLITE_DONE );
  1.2062 +      if( rc!=SQLITE_OK ) goto end_stmt_playback;
  1.2063 +    }
  1.2064 +  }
  1.2065 +
  1.2066 +  pPager->journalOff = szJ;
  1.2067 +  
  1.2068 +end_stmt_playback:
  1.2069 +  if( rc==SQLITE_OK) {
  1.2070 +    pPager->journalOff = szJ;
  1.2071 +    /* pager_reload_cache(pPager); */
  1.2072 +  }
  1.2073 +  return rc;
  1.2074 +}
  1.2075 +
  1.2076 +/*
  1.2077 +** Change the maximum number of in-memory pages that are allowed.
  1.2078 +*/
  1.2079 +void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){
  1.2080 +  if( mxPage>10 ){
  1.2081 +    pPager->mxPage = mxPage;
  1.2082 +  }else{
  1.2083 +    pPager->mxPage = 10;
  1.2084 +  }
  1.2085 +}
  1.2086 +
  1.2087 +/*
  1.2088 +** Adjust the robustness of the database to damage due to OS crashes
  1.2089 +** or power failures by changing the number of syncs()s when writing
  1.2090 +** the rollback journal.  There are three levels:
  1.2091 +**
  1.2092 +**    OFF       sqlite3OsSync() is never called.  This is the default
  1.2093 +**              for temporary and transient files.
  1.2094 +**
  1.2095 +**    NORMAL    The journal is synced once before writes begin on the
  1.2096 +**              database.  This is normally adequate protection, but
  1.2097 +**              it is theoretically possible, though very unlikely,
  1.2098 +**              that an inopertune power failure could leave the journal
  1.2099 +**              in a state which would cause damage to the database
  1.2100 +**              when it is rolled back.
  1.2101 +**
  1.2102 +**    FULL      The journal is synced twice before writes begin on the
  1.2103 +**              database (with some additional information - the nRec field
  1.2104 +**              of the journal header - being written in between the two
  1.2105 +**              syncs).  If we assume that writing a
  1.2106 +**              single disk sector is atomic, then this mode provides
  1.2107 +**              assurance that the journal will not be corrupted to the
  1.2108 +**              point of causing damage to the database during rollback.
  1.2109 +**
  1.2110 +** Numeric values associated with these states are OFF==1, NORMAL=2,
  1.2111 +** and FULL=3.
  1.2112 +*/
  1.2113 +#ifndef SQLITE_OMIT_PAGER_PRAGMAS
  1.2114 +void sqlite3PagerSetSafetyLevel(Pager *pPager, int level, int full_fsync){
  1.2115 +  pPager->noSync =  level==1 || pPager->tempFile;
  1.2116 +  pPager->fullSync = level==3 && !pPager->tempFile;
  1.2117 +  pPager->sync_flags = (full_fsync?SQLITE_SYNC_FULL:SQLITE_SYNC_NORMAL);
  1.2118 +  if( pPager->noSync ) pPager->needSync = 0;
  1.2119 +}
  1.2120 +#endif
  1.2121 +
  1.2122 +/*
  1.2123 +** The following global variable is incremented whenever the library
  1.2124 +** attempts to open a temporary file.  This information is used for
  1.2125 +** testing and analysis only.  
  1.2126 +*/
  1.2127 +#ifdef SQLITE_TEST
  1.2128 +int sqlite3_opentemp_count = 0;
  1.2129 +#endif
  1.2130 +
  1.2131 +/*
  1.2132 +** Open a temporary file. 
  1.2133 +**
  1.2134 +** Write the file descriptor into *fd.  Return SQLITE_OK on success or some
  1.2135 +** other error code if we fail. The OS will automatically delete the temporary
  1.2136 +** file when it is closed.
  1.2137 +*/
  1.2138 +static int sqlite3PagerOpentemp(
  1.2139 +  Pager *pPager,        /* The pager object */
  1.2140 +  sqlite3_file *pFile,  /* Write the file descriptor here */
  1.2141 +  int vfsFlags          /* Flags passed through to the VFS */
  1.2142 +){
  1.2143 +  int rc;
  1.2144 +
  1.2145 +#ifdef SQLITE_TEST
  1.2146 +  sqlite3_opentemp_count++;  /* Used for testing and analysis only */
  1.2147 +#endif
  1.2148 +
  1.2149 +  vfsFlags |=  SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE |
  1.2150 +            SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE;
  1.2151 +  rc = sqlite3OsOpen(pPager->pVfs, 0, pFile, vfsFlags, 0);
  1.2152 +  assert( rc!=SQLITE_OK || pFile->pMethods );
  1.2153 +  return rc;
  1.2154 +}
  1.2155 +
  1.2156 +/*
  1.2157 +** Create a new page cache and put a pointer to the page cache in *ppPager.
  1.2158 +** The file to be cached need not exist.  The file is not locked until
  1.2159 +** the first call to sqlite3PagerGet() and is only held open until the
  1.2160 +** last page is released using sqlite3PagerUnref().
  1.2161 +**
  1.2162 +** If zFilename is NULL then a randomly-named temporary file is created
  1.2163 +** and used as the file to be cached.  The file will be deleted
  1.2164 +** automatically when it is closed.
  1.2165 +**
  1.2166 +** If zFilename is ":memory:" then all information is held in cache.
  1.2167 +** It is never written to disk.  This can be used to implement an
  1.2168 +** in-memory database.
  1.2169 +*/
  1.2170 +int sqlite3PagerOpen(
  1.2171 +  sqlite3_vfs *pVfs,       /* The virtual file system to use */
  1.2172 +  Pager **ppPager,         /* Return the Pager structure here */
  1.2173 +  const char *zFilename,   /* Name of the database file to open */
  1.2174 +  int nExtra,              /* Extra bytes append to each in-memory page */
  1.2175 +  int flags,               /* flags controlling this file */
  1.2176 +  int vfsFlags             /* flags passed through to sqlite3_vfs.xOpen() */
  1.2177 +){
  1.2178 +  u8 *pPtr;
  1.2179 +  Pager *pPager = 0;
  1.2180 +  int rc = SQLITE_OK;
  1.2181 +  int i;
  1.2182 +  int tempFile = 0;
  1.2183 +  int memDb = 0;
  1.2184 +  int readOnly = 0;
  1.2185 +  int useJournal = (flags & PAGER_OMIT_JOURNAL)==0;
  1.2186 +  int noReadlock = (flags & PAGER_NO_READLOCK)!=0;
  1.2187 +  int journalFileSize = sqlite3JournalSize(pVfs);
  1.2188 +  int szPageDflt = SQLITE_DEFAULT_PAGE_SIZE;
  1.2189 +  char *zPathname = 0;
  1.2190 +  int nPathname = 0;
  1.2191 +
  1.2192 +  /* The default return is a NULL pointer */
  1.2193 +  *ppPager = 0;
  1.2194 +
  1.2195 +  /* Compute and store the full pathname in an allocated buffer pointed
  1.2196 +  ** to by zPathname, length nPathname. Or, if this is a temporary file,
  1.2197 +  ** leave both nPathname and zPathname set to 0.
  1.2198 +  */
  1.2199 +  if( zFilename && zFilename[0] ){
  1.2200 +    nPathname = pVfs->mxPathname+1;
  1.2201 +    zPathname = sqlite3Malloc(nPathname*2);
  1.2202 +    if( zPathname==0 ){
  1.2203 +      return SQLITE_NOMEM;
  1.2204 +    }
  1.2205 +#ifndef SQLITE_OMIT_MEMORYDB
  1.2206 +    if( strcmp(zFilename,":memory:")==0 ){
  1.2207 +      memDb = 1;
  1.2208 +      zPathname[0] = 0;
  1.2209 +    }else
  1.2210 +#endif
  1.2211 +    {
  1.2212 +      rc = sqlite3OsFullPathname(pVfs, zFilename, nPathname, zPathname);
  1.2213 +    }
  1.2214 +    if( rc!=SQLITE_OK ){
  1.2215 +      sqlite3_free(zPathname);
  1.2216 +      return rc;
  1.2217 +    }
  1.2218 +    nPathname = strlen(zPathname);
  1.2219 +  }
  1.2220 +
  1.2221 +  /* Allocate memory for the pager structure */
  1.2222 +  pPager = sqlite3MallocZero(
  1.2223 +    sizeof(*pPager) +           /* Pager structure */
  1.2224 +    journalFileSize +           /* The journal file structure */ 
  1.2225 +    pVfs->szOsFile * 3 +        /* The main db and two journal files */ 
  1.2226 +    3*nPathname + 40            /* zFilename, zDirectory, zJournal */
  1.2227 +  );
  1.2228 +  if( !pPager ){
  1.2229 +    sqlite3_free(zPathname);
  1.2230 +    return SQLITE_NOMEM;
  1.2231 +  }
  1.2232 +  pPtr = (u8 *)&pPager[1];
  1.2233 +  pPager->vfsFlags = vfsFlags;
  1.2234 +  pPager->fd = (sqlite3_file*)&pPtr[pVfs->szOsFile*0];
  1.2235 +  pPager->stfd = (sqlite3_file*)&pPtr[pVfs->szOsFile*1];
  1.2236 +  pPager->jfd = (sqlite3_file*)&pPtr[pVfs->szOsFile*2];
  1.2237 +  pPager->zFilename = (char*)&pPtr[pVfs->szOsFile*2+journalFileSize];
  1.2238 +  pPager->zDirectory = &pPager->zFilename[nPathname+1];
  1.2239 +  pPager->zJournal = &pPager->zDirectory[nPathname+1];
  1.2240 +  pPager->pVfs = pVfs;
  1.2241 +  if( zPathname ){
  1.2242 +    memcpy(pPager->zFilename, zPathname, nPathname+1);
  1.2243 +    sqlite3_free(zPathname);
  1.2244 +  }
  1.2245 +
  1.2246 +  /* Open the pager file.
  1.2247 +  */
  1.2248 +  if( zFilename && zFilename[0] && !memDb ){
  1.2249 +    if( nPathname>(pVfs->mxPathname - sizeof("-journal")) ){
  1.2250 +      rc = SQLITE_CANTOPEN;
  1.2251 +    }else{
  1.2252 +      int fout = 0;
  1.2253 +      rc = sqlite3OsOpen(pVfs, pPager->zFilename, pPager->fd,
  1.2254 +                         pPager->vfsFlags, &fout);
  1.2255 +      readOnly = (fout&SQLITE_OPEN_READONLY);
  1.2256 +
  1.2257 +      /* If the file was successfully opened for read/write access,
  1.2258 +      ** choose a default page size in case we have to create the
  1.2259 +      ** database file. The default page size is the maximum of:
  1.2260 +      **
  1.2261 +      **    + SQLITE_DEFAULT_PAGE_SIZE,
  1.2262 +      **    + The value returned by sqlite3OsSectorSize()
  1.2263 +      **    + The largest page size that can be written atomically.
  1.2264 +      */
  1.2265 +      if( rc==SQLITE_OK && !readOnly ){
  1.2266 +        int iSectorSize = sqlite3OsSectorSize(pPager->fd);
  1.2267 +        if( szPageDflt<iSectorSize ){
  1.2268 +          szPageDflt = iSectorSize;
  1.2269 +        }
  1.2270 +#ifdef SQLITE_ENABLE_ATOMIC_WRITE
  1.2271 +        {
  1.2272 +          int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
  1.2273 +          int ii;
  1.2274 +          assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
  1.2275 +          assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
  1.2276 +          assert(SQLITE_MAX_DEFAULT_PAGE_SIZE<=65536);
  1.2277 +          for(ii=szPageDflt; ii<=SQLITE_MAX_DEFAULT_PAGE_SIZE; ii=ii*2){
  1.2278 +            if( iDc&(SQLITE_IOCAP_ATOMIC|(ii>>8)) ) szPageDflt = ii;
  1.2279 +          }
  1.2280 +        }
  1.2281 +#endif
  1.2282 +        if( szPageDflt>SQLITE_MAX_DEFAULT_PAGE_SIZE ){
  1.2283 +          szPageDflt = SQLITE_MAX_DEFAULT_PAGE_SIZE;
  1.2284 +        }
  1.2285 +      }
  1.2286 +    }
  1.2287 +  }else if( !memDb ){
  1.2288 +    /* If a temporary file is requested, it is not opened immediately.
  1.2289 +    ** In this case we accept the default page size and delay actually
  1.2290 +    ** opening the file until the first call to OsWrite().
  1.2291 +    */ 
  1.2292 +    tempFile = 1;
  1.2293 +    pPager->state = PAGER_EXCLUSIVE;
  1.2294 +  }
  1.2295 +
  1.2296 +  if( pPager && rc==SQLITE_OK ){
  1.2297 +    pPager->pTmpSpace = sqlite3PageMalloc(szPageDflt);
  1.2298 +  }
  1.2299 +
  1.2300 +  /* If an error occured in either of the blocks above.
  1.2301 +  ** Free the Pager structure and close the file.
  1.2302 +  ** Since the pager is not allocated there is no need to set 
  1.2303 +  ** any Pager.errMask variables.
  1.2304 +  */
  1.2305 +  if( !pPager || !pPager->pTmpSpace ){
  1.2306 +    sqlite3OsClose(pPager->fd);
  1.2307 +    sqlite3_free(pPager);
  1.2308 +    return ((rc==SQLITE_OK)?SQLITE_NOMEM:rc);
  1.2309 +  }
  1.2310 +
  1.2311 +  PAGERTRACE3("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename);
  1.2312 +  IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename))
  1.2313 +
  1.2314 +  /* Fill in Pager.zDirectory[] */
  1.2315 +  memcpy(pPager->zDirectory, pPager->zFilename, nPathname+1);
  1.2316 +  for(i=strlen(pPager->zDirectory); i>0 && pPager->zDirectory[i-1]!='/'; i--){}
  1.2317 +  if( i>0 ) pPager->zDirectory[i-1] = 0;
  1.2318 +
  1.2319 +  /* Fill in Pager.zJournal[] */
  1.2320 +  if( zPathname ){
  1.2321 +    memcpy(pPager->zJournal, pPager->zFilename, nPathname);
  1.2322 +    memcpy(&pPager->zJournal[nPathname], "-journal", 9);
  1.2323 +  }else{
  1.2324 +    pPager->zJournal = 0;
  1.2325 +  }
  1.2326 +
  1.2327 +  /* pPager->journalOpen = 0; */
  1.2328 +  pPager->useJournal = useJournal && !memDb;
  1.2329 +  pPager->noReadlock = noReadlock && readOnly;
  1.2330 +  /* pPager->stmtOpen = 0; */
  1.2331 +  /* pPager->stmtInUse = 0; */
  1.2332 +  /* pPager->nRef = 0; */
  1.2333 +  pPager->dbSize = memDb-1;
  1.2334 +  pPager->pageSize = szPageDflt;
  1.2335 +  /* pPager->stmtSize = 0; */
  1.2336 +  /* pPager->stmtJSize = 0; */
  1.2337 +  /* pPager->nPage = 0; */
  1.2338 +  pPager->mxPage = 100;
  1.2339 +  pPager->mxPgno = SQLITE_MAX_PAGE_COUNT;
  1.2340 +  /* pPager->state = PAGER_UNLOCK; */
  1.2341 +  assert( pPager->state == (tempFile ? PAGER_EXCLUSIVE : PAGER_UNLOCK) );
  1.2342 +  /* pPager->errMask = 0; */
  1.2343 +  pPager->tempFile = tempFile;
  1.2344 +  assert( tempFile==PAGER_LOCKINGMODE_NORMAL 
  1.2345 +          || tempFile==PAGER_LOCKINGMODE_EXCLUSIVE );
  1.2346 +  assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 );
  1.2347 +  pPager->exclusiveMode = tempFile; 
  1.2348 +  pPager->memDb = memDb;
  1.2349 +  pPager->readOnly = readOnly;
  1.2350 +  /* pPager->needSync = 0; */
  1.2351 +  pPager->noSync = pPager->tempFile || !useJournal;
  1.2352 +  pPager->fullSync = (pPager->noSync?0:1);
  1.2353 +  pPager->sync_flags = SQLITE_SYNC_NORMAL;
  1.2354 +  /* pPager->pFirst = 0; */
  1.2355 +  /* pPager->pFirstSynced = 0; */
  1.2356 +  /* pPager->pLast = 0; */
  1.2357 +  pPager->nExtra = FORCE_ALIGNMENT(nExtra);
  1.2358 +  pPager->journalSizeLimit = SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT;
  1.2359 +  assert(pPager->fd->pMethods||memDb||tempFile);
  1.2360 +  if( !memDb ){
  1.2361 +    setSectorSize(pPager);
  1.2362 +  }
  1.2363 +  /* pPager->pBusyHandler = 0; */
  1.2364 +  /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */
  1.2365 +  *ppPager = pPager;
  1.2366 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
  1.2367 +  pPager->iInUseMM = 0;
  1.2368 +  pPager->iInUseDB = 0;
  1.2369 +  if( !memDb ){
  1.2370 +#ifndef SQLITE_MUTEX_NOOP
  1.2371 +    sqlite3_mutex *mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MEM2);
  1.2372 +#endif
  1.2373 +    sqlite3_mutex_enter(mutex);
  1.2374 +    pPager->pNext = sqlite3PagerList;
  1.2375 +    if( sqlite3PagerList ){
  1.2376 +      assert( sqlite3PagerList->pPrev==0 );
  1.2377 +      sqlite3PagerList->pPrev = pPager;
  1.2378 +    }
  1.2379 +    pPager->pPrev = 0;
  1.2380 +    sqlite3PagerList = pPager;
  1.2381 +    sqlite3_mutex_leave(mutex);
  1.2382 +  }
  1.2383 +#endif
  1.2384 +  return SQLITE_OK;
  1.2385 +}
  1.2386 +
  1.2387 +/*
  1.2388 +** Set the busy handler function.
  1.2389 +*/
  1.2390 +void sqlite3PagerSetBusyhandler(Pager *pPager, BusyHandler *pBusyHandler){
  1.2391 +  pPager->pBusyHandler = pBusyHandler;
  1.2392 +}
  1.2393 +
  1.2394 +/*
  1.2395 +** Set the destructor for this pager.  If not NULL, the destructor is called
  1.2396 +** when the reference count on each page reaches zero.  The destructor can
  1.2397 +** be used to clean up information in the extra segment appended to each page.
  1.2398 +**
  1.2399 +** The destructor is not called as a result sqlite3PagerClose().  
  1.2400 +** Destructors are only called by sqlite3PagerUnref().
  1.2401 +*/
  1.2402 +void sqlite3PagerSetDestructor(Pager *pPager, void (*xDesc)(DbPage*,int)){
  1.2403 +  pPager->xDestructor = xDesc;
  1.2404 +}
  1.2405 +
  1.2406 +/*
  1.2407 +** Set the reinitializer for this pager.  If not NULL, the reinitializer
  1.2408 +** is called when the content of a page in cache is restored to its original
  1.2409 +** value as a result of a rollback.  The callback gives higher-level code
  1.2410 +** an opportunity to restore the EXTRA section to agree with the restored
  1.2411 +** page data.
  1.2412 +*/
  1.2413 +void sqlite3PagerSetReiniter(Pager *pPager, void (*xReinit)(DbPage*,int)){
  1.2414 +  pPager->xReiniter = xReinit;
  1.2415 +}
  1.2416 +
  1.2417 +/*
  1.2418 +** Set the page size to *pPageSize. If the suggest new page size is
  1.2419 +** inappropriate, then an alternative page size is set to that
  1.2420 +** value before returning.
  1.2421 +*/
  1.2422 +int sqlite3PagerSetPagesize(Pager *pPager, u16 *pPageSize){
  1.2423 +  int rc = SQLITE_OK;
  1.2424 +  u16 pageSize = *pPageSize;
  1.2425 +  assert( pageSize==0 || (pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE) );
  1.2426 +  if( pageSize && pageSize!=pPager->pageSize 
  1.2427 +   && !pPager->memDb && pPager->nRef==0 
  1.2428 +  ){
  1.2429 +    char *pNew = (char *)sqlite3PageMalloc(pageSize);
  1.2430 +    if( !pNew ){
  1.2431 +      rc = SQLITE_NOMEM;
  1.2432 +    }else{
  1.2433 +      pagerEnter(pPager);
  1.2434 +      pager_reset(pPager);
  1.2435 +      pPager->pageSize = pageSize;
  1.2436 +      setSectorSize(pPager);
  1.2437 +      sqlite3PageFree(pPager->pTmpSpace);
  1.2438 +      pPager->pTmpSpace = pNew;
  1.2439 +      pagerLeave(pPager);
  1.2440 +    }
  1.2441 +  }
  1.2442 +  *pPageSize = pPager->pageSize;
  1.2443 +  return rc;
  1.2444 +}
  1.2445 +
  1.2446 +/*
  1.2447 +** Return a pointer to the "temporary page" buffer held internally
  1.2448 +** by the pager.  This is a buffer that is big enough to hold the
  1.2449 +** entire content of a database page.  This buffer is used internally
  1.2450 +** during rollback and will be overwritten whenever a rollback
  1.2451 +** occurs.  But other modules are free to use it too, as long as
  1.2452 +** no rollbacks are happening.
  1.2453 +*/
  1.2454 +void *sqlite3PagerTempSpace(Pager *pPager){
  1.2455 +  return pPager->pTmpSpace;
  1.2456 +}
  1.2457 +
  1.2458 +/*
  1.2459 +** Attempt to set the maximum database page count if mxPage is positive. 
  1.2460 +** Make no changes if mxPage is zero or negative.  And never reduce the
  1.2461 +** maximum page count below the current size of the database.
  1.2462 +**
  1.2463 +** Regardless of mxPage, return the current maximum page count.
  1.2464 +*/
  1.2465 +int sqlite3PagerMaxPageCount(Pager *pPager, int mxPage){
  1.2466 +  if( mxPage>0 ){
  1.2467 +    pPager->mxPgno = mxPage;
  1.2468 +  }
  1.2469 +  sqlite3PagerPagecount(pPager, 0);
  1.2470 +  return pPager->mxPgno;
  1.2471 +}
  1.2472 +
  1.2473 +/*
  1.2474 +** The following set of routines are used to disable the simulated
  1.2475 +** I/O error mechanism.  These routines are used to avoid simulated
  1.2476 +** errors in places where we do not care about errors.
  1.2477 +**
  1.2478 +** Unless -DSQLITE_TEST=1 is used, these routines are all no-ops
  1.2479 +** and generate no code.
  1.2480 +*/
  1.2481 +#ifdef SQLITE_TEST
  1.2482 +extern int sqlite3_io_error_pending;
  1.2483 +extern int sqlite3_io_error_hit;
  1.2484 +static int saved_cnt;
  1.2485 +void disable_simulated_io_errors(void){
  1.2486 +  saved_cnt = sqlite3_io_error_pending;
  1.2487 +  sqlite3_io_error_pending = -1;
  1.2488 +}
  1.2489 +void enable_simulated_io_errors(void){
  1.2490 +  sqlite3_io_error_pending = saved_cnt;
  1.2491 +}
  1.2492 +#else
  1.2493 +# define disable_simulated_io_errors()
  1.2494 +# define enable_simulated_io_errors()
  1.2495 +#endif
  1.2496 +
  1.2497 +/*
  1.2498 +** Read the first N bytes from the beginning of the file into memory
  1.2499 +** that pDest points to. 
  1.2500 +**
  1.2501 +** No error checking is done. The rational for this is that this function 
  1.2502 +** may be called even if the file does not exist or contain a header. In 
  1.2503 +** these cases sqlite3OsRead() will return an error, to which the correct 
  1.2504 +** response is to zero the memory at pDest and continue.  A real IO error 
  1.2505 +** will presumably recur and be picked up later (Todo: Think about this).
  1.2506 +*/
  1.2507 +int sqlite3PagerReadFileheader(Pager *pPager, int N, unsigned char *pDest){
  1.2508 +  int rc = SQLITE_OK;
  1.2509 +  memset(pDest, 0, N);
  1.2510 +  assert(MEMDB||pPager->fd->pMethods||pPager->tempFile);
  1.2511 +  if( pPager->fd->pMethods ){
  1.2512 +    IOTRACE(("DBHDR %p 0 %d\n", pPager, N))
  1.2513 +    rc = sqlite3OsRead(pPager->fd, pDest, N, 0);
  1.2514 +    if( rc==SQLITE_IOERR_SHORT_READ ){
  1.2515 +      rc = SQLITE_OK;
  1.2516 +    }
  1.2517 +  }
  1.2518 +  return rc;
  1.2519 +}
  1.2520 +
  1.2521 +/*
  1.2522 +** Return the total number of pages in the disk file associated with
  1.2523 +** pPager. 
  1.2524 +**
  1.2525 +** If the PENDING_BYTE lies on the page directly after the end of the
  1.2526 +** file, then consider this page part of the file too. For example, if
  1.2527 +** PENDING_BYTE is byte 4096 (the first byte of page 5) and the size of the
  1.2528 +** file is 4096 bytes, 5 is returned instead of 4.
  1.2529 +*/
  1.2530 +int sqlite3PagerPagecount(Pager *pPager, int *pnPage){
  1.2531 +  i64 n = 0;
  1.2532 +  int rc;
  1.2533 +  assert( pPager!=0 );
  1.2534 +  if( pPager->errCode ){
  1.2535 +    return pPager->errCode;
  1.2536 +  }
  1.2537 +  if( pPager->dbSize>=0 ){
  1.2538 +    n = pPager->dbSize;
  1.2539 +  } else {
  1.2540 +    assert(pPager->fd->pMethods||pPager->tempFile);
  1.2541 +    if( (pPager->fd->pMethods)
  1.2542 +     && (rc = sqlite3OsFileSize(pPager->fd, &n))!=SQLITE_OK ){
  1.2543 +      pPager->nRef++;
  1.2544 +      pager_error(pPager, rc);
  1.2545 +      pPager->nRef--;
  1.2546 +      return rc;
  1.2547 +    }
  1.2548 +    if( n>0 && n<pPager->pageSize ){
  1.2549 +      n = 1;
  1.2550 +    }else{
  1.2551 +      n /= pPager->pageSize;
  1.2552 +    }
  1.2553 +    if( pPager->state!=PAGER_UNLOCK ){
  1.2554 +      pPager->dbSize = n;
  1.2555 +    }
  1.2556 +  }
  1.2557 +  if( n==(PENDING_BYTE/pPager->pageSize) ){
  1.2558 +    n++;
  1.2559 +  }
  1.2560 +  if( n>pPager->mxPgno ){
  1.2561 +    pPager->mxPgno = n;
  1.2562 +  }
  1.2563 +  if( pnPage ){
  1.2564 +    *pnPage = n;
  1.2565 +  }
  1.2566 +  return SQLITE_OK;
  1.2567 +}
  1.2568 +
  1.2569 +
  1.2570 +#ifndef SQLITE_OMIT_MEMORYDB
  1.2571 +/*
  1.2572 +** Clear a PgHistory block
  1.2573 +*/
  1.2574 +static void clearHistory(PgHistory *pHist){
  1.2575 +  sqlite3PageFree(pHist->pOrig);
  1.2576 +  sqlite3PageFree(pHist->pStmt);
  1.2577 +  pHist->pOrig = 0;
  1.2578 +  pHist->pStmt = 0;
  1.2579 +}
  1.2580 +#else
  1.2581 +#define clearHistory(x)
  1.2582 +#endif
  1.2583 +
  1.2584 +/*
  1.2585 +** Forward declaration
  1.2586 +*/
  1.2587 +static int syncJournal(Pager*);
  1.2588 +
  1.2589 +/*
  1.2590 +** Unlink pPg from its hash chain. Also set the page number to 0 to indicate
  1.2591 +** that the page is not part of any hash chain. This is required because the
  1.2592 +** sqlite3PagerMovepage() routine can leave a page in the 
  1.2593 +** pNextFree/pPrevFree list that is not a part of any hash-chain.
  1.2594 +*/
  1.2595 +static void unlinkHashChain(Pager *pPager, PgHdr *pPg){
  1.2596 +  if( pPg->pgno==0 ){
  1.2597 +    assert( pPg->pNextHash==0 && pPg->pPrevHash==0 );
  1.2598 +    return;
  1.2599 +  }
  1.2600 +  if( pPg->pNextHash ){
  1.2601 +    pPg->pNextHash->pPrevHash = pPg->pPrevHash;
  1.2602 +  }
  1.2603 +  if( pPg->pPrevHash ){
  1.2604 +    assert( pPager->aHash[pPg->pgno & (pPager->nHash-1)]!=pPg );
  1.2605 +    pPg->pPrevHash->pNextHash = pPg->pNextHash;
  1.2606 +  }else{
  1.2607 +    int h = pPg->pgno & (pPager->nHash-1);
  1.2608 +    pPager->aHash[h] = pPg->pNextHash;
  1.2609 +  }
  1.2610 +  if( MEMDB ){
  1.2611 +    clearHistory(PGHDR_TO_HIST(pPg, pPager));
  1.2612 +  }
  1.2613 +  pPg->pgno = 0;
  1.2614 +  pPg->pNextHash = pPg->pPrevHash = 0;
  1.2615 +}
  1.2616 +
  1.2617 +/*
  1.2618 +** Unlink a page from the free list (the list of all pages where nRef==0)
  1.2619 +** and from its hash collision chain.
  1.2620 +*/
  1.2621 +static void unlinkPage(PgHdr *pPg){
  1.2622 +  Pager *pPager = pPg->pPager;
  1.2623 +
  1.2624 +  /* Unlink from free page list */
  1.2625 +  lruListRemove(pPg);
  1.2626 +
  1.2627 +  /* Unlink from the pgno hash table */
  1.2628 +  unlinkHashChain(pPager, pPg);
  1.2629 +}
  1.2630 +
  1.2631 +/*
  1.2632 +** This routine is used to truncate the cache when a database
  1.2633 +** is truncated.  Drop from the cache all pages whose pgno is
  1.2634 +** larger than pPager->dbSize and is unreferenced.
  1.2635 +**
  1.2636 +** Referenced pages larger than pPager->dbSize are zeroed.
  1.2637 +**
  1.2638 +** Actually, at the point this routine is called, it would be
  1.2639 +** an error to have a referenced page.  But rather than delete
  1.2640 +** that page and guarantee a subsequent segfault, it seems better
  1.2641 +** to zero it and hope that we error out sanely.
  1.2642 +*/
  1.2643 +static void pager_truncate_cache(Pager *pPager){
  1.2644 +  PgHdr *pPg;
  1.2645 +  PgHdr **ppPg;
  1.2646 +  int dbSize = pPager->dbSize;
  1.2647 +
  1.2648 +  ppPg = &pPager->pAll;
  1.2649 +  while( (pPg = *ppPg)!=0 ){
  1.2650 +    if( pPg->pgno<=dbSize ){
  1.2651 +      ppPg = &pPg->pNextAll;
  1.2652 +    }else if( pPg->nRef>0 ){
  1.2653 +      memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
  1.2654 +      ppPg = &pPg->pNextAll;
  1.2655 +    }else{
  1.2656 +      *ppPg = pPg->pNextAll;
  1.2657 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
  1.2658 +      if( *ppPg ){
  1.2659 +        (*ppPg)->pPrevAll = pPg->pPrevAll;
  1.2660 +      }
  1.2661 +#endif
  1.2662 +      IOTRACE(("PGFREE %p %d\n", pPager, pPg->pgno));
  1.2663 +      PAGER_INCR(sqlite3_pager_pgfree_count);
  1.2664 +      unlinkPage(pPg);
  1.2665 +      makeClean(pPg);
  1.2666 +      sqlite3PageFree(pPg->pData);
  1.2667 +      sqlite3_free(pPg);
  1.2668 +      pPager->nPage--;
  1.2669 +    }
  1.2670 +  }
  1.2671 +}
  1.2672 +
  1.2673 +/*
  1.2674 +** Try to obtain a lock on a file.  Invoke the busy callback if the lock
  1.2675 +** is currently not available.  Repeat until the busy callback returns
  1.2676 +** false or until the lock succeeds.
  1.2677 +**
  1.2678 +** Return SQLITE_OK on success and an error code if we cannot obtain
  1.2679 +** the lock.
  1.2680 +*/
  1.2681 +static int pager_wait_on_lock(Pager *pPager, int locktype){
  1.2682 +  int rc;
  1.2683 +
  1.2684 +  /* The OS lock values must be the same as the Pager lock values */
  1.2685 +  assert( PAGER_SHARED==SHARED_LOCK );
  1.2686 +  assert( PAGER_RESERVED==RESERVED_LOCK );
  1.2687 +  assert( PAGER_EXCLUSIVE==EXCLUSIVE_LOCK );
  1.2688 +
  1.2689 +  /* If the file is currently unlocked then the size must be unknown */
  1.2690 +  assert( pPager->state>=PAGER_SHARED || pPager->dbSize<0 || MEMDB );
  1.2691 +
  1.2692 +  if( pPager->state>=locktype ){
  1.2693 +    rc = SQLITE_OK;
  1.2694 +  }else{
  1.2695 +    if( pPager->pBusyHandler ) pPager->pBusyHandler->nBusy = 0;
  1.2696 +    do {
  1.2697 +      rc = sqlite3OsLock(pPager->fd, locktype);
  1.2698 +    }while( rc==SQLITE_BUSY && sqlite3InvokeBusyHandler(pPager->pBusyHandler) );
  1.2699 +    if( rc==SQLITE_OK ){
  1.2700 +      pPager->state = locktype;
  1.2701 +      IOTRACE(("LOCK %p %d\n", pPager, locktype))
  1.2702 +    }
  1.2703 +  }
  1.2704 +  return rc;
  1.2705 +}
  1.2706 +
  1.2707 +/*
  1.2708 +** Truncate the file to the number of pages specified.
  1.2709 +*/
  1.2710 +int sqlite3PagerTruncate(Pager *pPager, Pgno nPage){
  1.2711 +  int rc;
  1.2712 +  assert( pPager->state>=PAGER_SHARED || MEMDB );
  1.2713 +  sqlite3PagerPagecount(pPager, 0);
  1.2714 +  if( pPager->errCode ){
  1.2715 +    rc = pPager->errCode;
  1.2716 +    return rc;
  1.2717 +  }
  1.2718 +  if( nPage>=(unsigned)pPager->dbSize ){
  1.2719 +    return SQLITE_OK;
  1.2720 +  }
  1.2721 +  if( MEMDB ){
  1.2722 +    pPager->dbSize = nPage;
  1.2723 +    pager_truncate_cache(pPager);
  1.2724 +    return SQLITE_OK;
  1.2725 +  }
  1.2726 +  pagerEnter(pPager);
  1.2727 +  rc = syncJournal(pPager);
  1.2728 +  pagerLeave(pPager);
  1.2729 +  if( rc!=SQLITE_OK ){
  1.2730 +    return rc;
  1.2731 +  }
  1.2732 +
  1.2733 +  /* Get an exclusive lock on the database before truncating. */
  1.2734 +  pagerEnter(pPager);
  1.2735 +  rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
  1.2736 +  pagerLeave(pPager);
  1.2737 +  if( rc!=SQLITE_OK ){
  1.2738 +    return rc;
  1.2739 +  }
  1.2740 +
  1.2741 +  rc = pager_truncate(pPager, nPage);
  1.2742 +  return rc;
  1.2743 +}
  1.2744 +
  1.2745 +/*
  1.2746 +** Shutdown the page cache.  Free all memory and close all files.
  1.2747 +**
  1.2748 +** If a transaction was in progress when this routine is called, that
  1.2749 +** transaction is rolled back.  All outstanding pages are invalidated
  1.2750 +** and their memory is freed.  Any attempt to use a page associated
  1.2751 +** with this page cache after this function returns will likely
  1.2752 +** result in a coredump.
  1.2753 +**
  1.2754 +** This function always succeeds. If a transaction is active an attempt
  1.2755 +** is made to roll it back. If an error occurs during the rollback 
  1.2756 +** a hot journal may be left in the filesystem but no error is returned
  1.2757 +** to the caller.
  1.2758 +*/
  1.2759 +int sqlite3PagerClose(Pager *pPager){
  1.2760 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
  1.2761 +  if( !MEMDB ){
  1.2762 +#ifndef SQLITE_MUTEX_NOOP
  1.2763 +    sqlite3_mutex *mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MEM2);
  1.2764 +#endif
  1.2765 +    sqlite3_mutex_enter(mutex);
  1.2766 +    if( pPager->pPrev ){
  1.2767 +      pPager->pPrev->pNext = pPager->pNext;
  1.2768 +    }else{
  1.2769 +      sqlite3PagerList = pPager->pNext;
  1.2770 +    }
  1.2771 +    if( pPager->pNext ){
  1.2772 +      pPager->pNext->pPrev = pPager->pPrev;
  1.2773 +    }
  1.2774 +    sqlite3_mutex_leave(mutex);
  1.2775 +  }
  1.2776 +#endif
  1.2777 +
  1.2778 +  disable_simulated_io_errors();
  1.2779 +  sqlite3BeginBenignMalloc();
  1.2780 +  pPager->errCode = 0;
  1.2781 +  pPager->exclusiveMode = 0;
  1.2782 +  pager_reset(pPager);
  1.2783 +  pagerUnlockAndRollback(pPager);
  1.2784 +  enable_simulated_io_errors();
  1.2785 +  sqlite3EndBenignMalloc();
  1.2786 +  PAGERTRACE2("CLOSE %d\n", PAGERID(pPager));
  1.2787 +  IOTRACE(("CLOSE %p\n", pPager))
  1.2788 +  if( pPager->journalOpen ){
  1.2789 +    sqlite3OsClose(pPager->jfd);
  1.2790 +  }
  1.2791 +  sqlite3BitvecDestroy(pPager->pInJournal);
  1.2792 +  if( pPager->stmtOpen ){
  1.2793 +    sqlite3OsClose(pPager->stfd);
  1.2794 +  }
  1.2795 +  sqlite3OsClose(pPager->fd);
  1.2796 +  /* Temp files are automatically deleted by the OS
  1.2797 +  ** if( pPager->tempFile ){
  1.2798 +  **   sqlite3OsDelete(pPager->zFilename);
  1.2799 +  ** }
  1.2800 +  */
  1.2801 +
  1.2802 +  sqlite3_free(pPager->aHash);
  1.2803 +  sqlite3PageFree(pPager->pTmpSpace);
  1.2804 +  sqlite3_free(pPager);
  1.2805 +  return SQLITE_OK;
  1.2806 +}
  1.2807 +
  1.2808 +#if !defined(NDEBUG) || defined(SQLITE_TEST)
  1.2809 +/*
  1.2810 +** Return the page number for the given page data.
  1.2811 +*/
  1.2812 +Pgno sqlite3PagerPagenumber(DbPage *p){
  1.2813 +  return p->pgno;
  1.2814 +}
  1.2815 +#endif
  1.2816 +
  1.2817 +/*
  1.2818 +** The page_ref() function increments the reference count for a page.
  1.2819 +** If the page is currently on the freelist (the reference count is zero) then
  1.2820 +** remove it from the freelist.
  1.2821 +**
  1.2822 +** For non-test systems, page_ref() is a macro that calls _page_ref()
  1.2823 +** online of the reference count is zero.  For test systems, page_ref()
  1.2824 +** is a real function so that we can set breakpoints and trace it.
  1.2825 +*/
  1.2826 +static void _page_ref(PgHdr *pPg){
  1.2827 +  if( pPg->nRef==0 ){
  1.2828 +    /* The page is currently on the freelist.  Remove it. */
  1.2829 +    lruListRemove(pPg);
  1.2830 +    pPg->pPager->nRef++;
  1.2831 +  }
  1.2832 +  pPg->nRef++;
  1.2833 +}
  1.2834 +#ifdef SQLITE_DEBUG
  1.2835 +  static void page_ref(PgHdr *pPg){
  1.2836 +    if( pPg->nRef==0 ){
  1.2837 +      _page_ref(pPg);
  1.2838 +    }else{
  1.2839 +      pPg->nRef++;
  1.2840 +    }
  1.2841 +  }
  1.2842 +#else
  1.2843 +# define page_ref(P)   ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++)
  1.2844 +#endif
  1.2845 +
  1.2846 +/*
  1.2847 +** Increment the reference count for a page.  The input pointer is
  1.2848 +** a reference to the page data.
  1.2849 +*/
  1.2850 +int sqlite3PagerRef(DbPage *pPg){
  1.2851 +  pagerEnter(pPg->pPager);
  1.2852 +  page_ref(pPg);
  1.2853 +  pagerLeave(pPg->pPager);
  1.2854 +  return SQLITE_OK;
  1.2855 +}
  1.2856 +
  1.2857 +/*
  1.2858 +** Sync the journal.  In other words, make sure all the pages that have
  1.2859 +** been written to the journal have actually reached the surface of the
  1.2860 +** disk.  It is not safe to modify the original database file until after
  1.2861 +** the journal has been synced.  If the original database is modified before
  1.2862 +** the journal is synced and a power failure occurs, the unsynced journal
  1.2863 +** data would be lost and we would be unable to completely rollback the
  1.2864 +** database changes.  Database corruption would occur.
  1.2865 +** 
  1.2866 +** This routine also updates the nRec field in the header of the journal.
  1.2867 +** (See comments on the pager_playback() routine for additional information.)
  1.2868 +** If the sync mode is FULL, two syncs will occur.  First the whole journal
  1.2869 +** is synced, then the nRec field is updated, then a second sync occurs.
  1.2870 +**
  1.2871 +** For temporary databases, we do not care if we are able to rollback
  1.2872 +** after a power failure, so no sync occurs.
  1.2873 +**
  1.2874 +** If the IOCAP_SEQUENTIAL flag is set for the persistent media on which
  1.2875 +** the database is stored, then OsSync() is never called on the journal
  1.2876 +** file. In this case all that is required is to update the nRec field in
  1.2877 +** the journal header.
  1.2878 +**
  1.2879 +** This routine clears the needSync field of every page current held in
  1.2880 +** memory.
  1.2881 +*/
  1.2882 +static int syncJournal(Pager *pPager){
  1.2883 +  PgHdr *pPg;
  1.2884 +  int rc = SQLITE_OK;
  1.2885 +
  1.2886 +  /* Sync the journal before modifying the main database
  1.2887 +  ** (assuming there is a journal and it needs to be synced.)
  1.2888 +  */
  1.2889 +  if( pPager->needSync ){
  1.2890 +    if( !pPager->tempFile ){
  1.2891 +      int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
  1.2892 +      assert( pPager->journalOpen );
  1.2893 +
  1.2894 +      if( 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){
  1.2895 +        /* Write the nRec value into the journal file header. If in
  1.2896 +        ** full-synchronous mode, sync the journal first. This ensures that
  1.2897 +        ** all data has really hit the disk before nRec is updated to mark
  1.2898 +        ** it as a candidate for rollback.
  1.2899 +        **
  1.2900 +        ** This is not required if the persistent media supports the
  1.2901 +        ** SAFE_APPEND property. Because in this case it is not possible 
  1.2902 +        ** for garbage data to be appended to the file, the nRec field
  1.2903 +        ** is populated with 0xFFFFFFFF when the journal header is written
  1.2904 +        ** and never needs to be updated.
  1.2905 +        */
  1.2906 +        i64 jrnlOff;
  1.2907 +        if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
  1.2908 +          PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager));
  1.2909 +          IOTRACE(("JSYNC %p\n", pPager))
  1.2910 +          rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags);
  1.2911 +          if( rc!=0 ) return rc;
  1.2912 +        }
  1.2913 +
  1.2914 +        jrnlOff = pPager->journalHdr + sizeof(aJournalMagic);
  1.2915 +        IOTRACE(("JHDR %p %lld %d\n", pPager, jrnlOff, 4));
  1.2916 +        rc = write32bits(pPager->jfd, jrnlOff, pPager->nRec);
  1.2917 +        if( rc ) return rc;
  1.2918 +      }
  1.2919 +      if( 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
  1.2920 +        PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager));
  1.2921 +        IOTRACE(("JSYNC %p\n", pPager))
  1.2922 +        rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags| 
  1.2923 +          (pPager->sync_flags==SQLITE_SYNC_FULL?SQLITE_SYNC_DATAONLY:0)
  1.2924 +        );
  1.2925 +        if( rc!=0 ) return rc;
  1.2926 +      }
  1.2927 +      pPager->journalStarted = 1;
  1.2928 +    }
  1.2929 +    pPager->needSync = 0;
  1.2930 +
  1.2931 +    /* Erase the needSync flag from every page.
  1.2932 +    */
  1.2933 +    for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
  1.2934 +      pPg->needSync = 0;
  1.2935 +    }
  1.2936 +    lruListSetFirstSynced(pPager);
  1.2937 +  }
  1.2938 +
  1.2939 +#ifndef NDEBUG
  1.2940 +  /* If the Pager.needSync flag is clear then the PgHdr.needSync
  1.2941 +  ** flag must also be clear for all pages.  Verify that this
  1.2942 +  ** invariant is true.
  1.2943 +  */
  1.2944 +  else{
  1.2945 +    for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
  1.2946 +      assert( pPg->needSync==0 );
  1.2947 +    }
  1.2948 +    assert( pPager->lru.pFirstSynced==pPager->lru.pFirst );
  1.2949 +  }
  1.2950 +#endif
  1.2951 +
  1.2952 +  return rc;
  1.2953 +}
  1.2954 +
  1.2955 +/*
  1.2956 +** Merge two lists of pages connected by pDirty and in pgno order.
  1.2957 +** Do not both fixing the pPrevDirty pointers.
  1.2958 +*/
  1.2959 +static PgHdr *merge_pagelist(PgHdr *pA, PgHdr *pB){
  1.2960 +  PgHdr result, *pTail;
  1.2961 +  pTail = &result;
  1.2962 +  while( pA && pB ){
  1.2963 +    if( pA->pgno<pB->pgno ){
  1.2964 +      pTail->pDirty = pA;
  1.2965 +      pTail = pA;
  1.2966 +      pA = pA->pDirty;
  1.2967 +    }else{
  1.2968 +      pTail->pDirty = pB;
  1.2969 +      pTail = pB;
  1.2970 +      pB = pB->pDirty;
  1.2971 +    }
  1.2972 +  }
  1.2973 +  if( pA ){
  1.2974 +    pTail->pDirty = pA;
  1.2975 +  }else if( pB ){
  1.2976 +    pTail->pDirty = pB;
  1.2977 +  }else{
  1.2978 +    pTail->pDirty = 0;
  1.2979 +  }
  1.2980 +  return result.pDirty;
  1.2981 +}
  1.2982 +
  1.2983 +/*
  1.2984 +** Sort the list of pages in accending order by pgno.  Pages are
  1.2985 +** connected by pDirty pointers.  The pPrevDirty pointers are
  1.2986 +** corrupted by this sort.
  1.2987 +*/
  1.2988 +#define N_SORT_BUCKET_ALLOC 25
  1.2989 +#define N_SORT_BUCKET       25
  1.2990 +#ifdef SQLITE_TEST
  1.2991 +  int sqlite3_pager_n_sort_bucket = 0;
  1.2992 +  #undef N_SORT_BUCKET
  1.2993 +  #define N_SORT_BUCKET \
  1.2994 +   (sqlite3_pager_n_sort_bucket?sqlite3_pager_n_sort_bucket:N_SORT_BUCKET_ALLOC)
  1.2995 +#endif
  1.2996 +static PgHdr *sort_pagelist(PgHdr *pIn){
  1.2997 +  PgHdr *a[N_SORT_BUCKET_ALLOC], *p;
  1.2998 +  int i;
  1.2999 +  memset(a, 0, sizeof(a));
  1.3000 +  while( pIn ){
  1.3001 +    p = pIn;
  1.3002 +    pIn = p->pDirty;
  1.3003 +    p->pDirty = 0;
  1.3004 +    for(i=0; i<N_SORT_BUCKET-1; i++){
  1.3005 +      if( a[i]==0 ){
  1.3006 +        a[i] = p;
  1.3007 +        break;
  1.3008 +      }else{
  1.3009 +        p = merge_pagelist(a[i], p);
  1.3010 +        a[i] = 0;
  1.3011 +      }
  1.3012 +    }
  1.3013 +    if( i==N_SORT_BUCKET-1 ){
  1.3014 +      /* Coverage: To get here, there need to be 2^(N_SORT_BUCKET) 
  1.3015 +      ** elements in the input list. This is possible, but impractical.
  1.3016 +      ** Testing this line is the point of global variable
  1.3017 +      ** sqlite3_pager_n_sort_bucket.
  1.3018 +      */
  1.3019 +      a[i] = merge_pagelist(a[i], p);
  1.3020 +    }
  1.3021 +  }
  1.3022 +  p = a[0];
  1.3023 +  for(i=1; i<N_SORT_BUCKET; i++){
  1.3024 +    p = merge_pagelist(p, a[i]);
  1.3025 +  }
  1.3026 +  return p;
  1.3027 +}
  1.3028 +
  1.3029 +/*
  1.3030 +** Given a list of pages (connected by the PgHdr.pDirty pointer) write
  1.3031 +** every one of those pages out to the database file and mark them all
  1.3032 +** as clean.
  1.3033 +*/
  1.3034 +static int pager_write_pagelist(PgHdr *pList){
  1.3035 +  Pager *pPager;
  1.3036 +  PgHdr *p;
  1.3037 +  int rc;
  1.3038 +
  1.3039 +  if( pList==0 ) return SQLITE_OK;
  1.3040 +  pPager = pList->pPager;
  1.3041 +
  1.3042 +  /* At this point there may be either a RESERVED or EXCLUSIVE lock on the
  1.3043 +  ** database file. If there is already an EXCLUSIVE lock, the following
  1.3044 +  ** calls to sqlite3OsLock() are no-ops.
  1.3045 +  **
  1.3046 +  ** Moving the lock from RESERVED to EXCLUSIVE actually involves going
  1.3047 +  ** through an intermediate state PENDING.   A PENDING lock prevents new
  1.3048 +  ** readers from attaching to the database but is unsufficient for us to
  1.3049 +  ** write.  The idea of a PENDING lock is to prevent new readers from
  1.3050 +  ** coming in while we wait for existing readers to clear.
  1.3051 +  **
  1.3052 +  ** While the pager is in the RESERVED state, the original database file
  1.3053 +  ** is unchanged and we can rollback without having to playback the
  1.3054 +  ** journal into the original database file.  Once we transition to
  1.3055 +  ** EXCLUSIVE, it means the database file has been changed and any rollback
  1.3056 +  ** will require a journal playback.
  1.3057 +  */
  1.3058 +  rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
  1.3059 +  if( rc!=SQLITE_OK ){
  1.3060 +    return rc;
  1.3061 +  }
  1.3062 +
  1.3063 +  pList = sort_pagelist(pList);
  1.3064 +  for(p=pList; p; p=p->pDirty){
  1.3065 +    assert( p->dirty );
  1.3066 +    p->dirty = 0;
  1.3067 +  }
  1.3068 +
  1.3069 +  /* If the file has not yet been opened, open it now. */
  1.3070 +  if( !pPager->fd->pMethods ){
  1.3071 +    assert(pPager->tempFile);
  1.3072 +	rc = sqlite3PagerOpentemp(pPager, pPager->fd, pPager->vfsFlags);
  1.3073 +	if( rc ) return rc;
  1.3074 +  }
  1.3075 +
  1.3076 +  while( pList ){
  1.3077 +    /* If there are dirty pages in the page cache with page numbers greater
  1.3078 +    ** than Pager.dbSize, this means sqlite3PagerTruncate() was called to
  1.3079 +    ** make the file smaller (presumably by auto-vacuum code). Do not write
  1.3080 +    ** any such pages to the file.
  1.3081 +    */
  1.3082 +    if( pList->pgno<=pPager->dbSize ){
  1.3083 +      i64 offset = (pList->pgno-1)*(i64)pPager->pageSize;
  1.3084 +      char *pData = CODEC2(pPager, PGHDR_TO_DATA(pList), pList->pgno, 6);
  1.3085 +      PAGERTRACE4("STORE %d page %d hash(%08x)\n",
  1.3086 +                   PAGERID(pPager), pList->pgno, pager_pagehash(pList));
  1.3087 +      IOTRACE(("PGOUT %p %d\n", pPager, pList->pgno));
  1.3088 +      rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize, offset);
  1.3089 +      PAGER_INCR(sqlite3_pager_writedb_count);
  1.3090 +      PAGER_INCR(pPager->nWrite);
  1.3091 +      if( pList->pgno==1 ){
  1.3092 +        memcpy(&pPager->dbFileVers, &pData[24], sizeof(pPager->dbFileVers));
  1.3093 +      }
  1.3094 +    }
  1.3095 +#ifndef NDEBUG
  1.3096 +    else{
  1.3097 +      PAGERTRACE3("NOSTORE %d page %d\n", PAGERID(pPager), pList->pgno);
  1.3098 +    }
  1.3099 +#endif
  1.3100 +    if( rc ) return rc;
  1.3101 +#ifdef SQLITE_CHECK_PAGES
  1.3102 +    pList->pageHash = pager_pagehash(pList);
  1.3103 +#endif
  1.3104 +    pList = pList->pDirty;
  1.3105 +  }
  1.3106 +  return SQLITE_OK;
  1.3107 +}
  1.3108 +
  1.3109 +/*
  1.3110 +** Collect every dirty page into a dirty list and
  1.3111 +** return a pointer to the head of that list.  All pages are
  1.3112 +** collected even if they are still in use.
  1.3113 +*/
  1.3114 +static PgHdr *pager_get_all_dirty_pages(Pager *pPager){
  1.3115 +
  1.3116 +#ifndef NDEBUG
  1.3117 +  /* Verify the sanity of the dirty list when we are running
  1.3118 +  ** in debugging mode.  This is expensive, so do not
  1.3119 +  ** do this on a normal build. */
  1.3120 +  int n1 = 0;
  1.3121 +  int n2 = 0;
  1.3122 +  PgHdr *p;
  1.3123 +  for(p=pPager->pAll; p; p=p->pNextAll){ if( p->dirty ) n1++; }
  1.3124 +  for(p=pPager->pDirty; p; p=p->pDirty){ n2++; }
  1.3125 +  assert( n1==n2 );
  1.3126 +#endif
  1.3127 +
  1.3128 +  return pPager->pDirty;
  1.3129 +}
  1.3130 +
  1.3131 +/*
  1.3132 +** Return 1 if there is a hot journal on the given pager.
  1.3133 +** A hot journal is one that needs to be played back.
  1.3134 +**
  1.3135 +** If the current size of the database file is 0 but a journal file
  1.3136 +** exists, that is probably an old journal left over from a prior
  1.3137 +** database with the same name.  Just delete the journal.
  1.3138 +**
  1.3139 +** Return negative if unable to determine the status of the journal.
  1.3140 +**
  1.3141 +** This routine does not open the journal file to examine its
  1.3142 +** content.  Hence, the journal might contain the name of a master
  1.3143 +** journal file that has been deleted, and hence not be hot.  Or
  1.3144 +** the header of the journal might be zeroed out.  This routine
  1.3145 +** does not discover these cases of a non-hot journal - if the
  1.3146 +** journal file exists and is not empty this routine assumes it
  1.3147 +** is hot.  The pager_playback() routine will discover that the
  1.3148 +** journal file is not really hot and will no-op.
  1.3149 +*/
  1.3150 +static int hasHotJournal(Pager *pPager, int *pExists){
  1.3151 +  sqlite3_vfs *pVfs = pPager->pVfs;
  1.3152 +  int rc = SQLITE_OK;
  1.3153 +  *pExists = 0;
  1.3154 +  if( pPager->useJournal && pPager->fd->pMethods ){
  1.3155 +    int exists;
  1.3156 +    int locked;
  1.3157 +
  1.3158 +    rc = sqlite3OsAccess(pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &exists);
  1.3159 +    if( rc==SQLITE_OK && exists ){
  1.3160 +      rc = sqlite3OsCheckReservedLock(pPager->fd, &locked);
  1.3161 +    }
  1.3162 +
  1.3163 +    if( rc==SQLITE_OK && exists && !locked ){
  1.3164 +      int nPage;
  1.3165 +      rc = sqlite3PagerPagecount(pPager, &nPage);
  1.3166 +      if( rc==SQLITE_OK ){
  1.3167 +        if( nPage==0 ){
  1.3168 +          sqlite3OsDelete(pVfs, pPager->zJournal, 0);
  1.3169 +        }else{
  1.3170 +          *pExists = 1;
  1.3171 +        }
  1.3172 +      }
  1.3173 +    }
  1.3174 +  }
  1.3175 +
  1.3176 +  return rc;
  1.3177 +}
  1.3178 +
  1.3179 +/*
  1.3180 +** Try to find a page in the cache that can be recycled. 
  1.3181 +**
  1.3182 +** This routine may return SQLITE_IOERR, SQLITE_FULL or SQLITE_OK. It 
  1.3183 +** does not set the pPager->errCode variable.
  1.3184 +*/
  1.3185 +static int pager_recycle(Pager *pPager, PgHdr **ppPg){
  1.3186 +  PgHdr *pPg;
  1.3187 +  *ppPg = 0;
  1.3188 +
  1.3189 +  /* It is illegal to call this function unless the pager object
  1.3190 +  ** pointed to by pPager has at least one free page (page with nRef==0).
  1.3191 +  */ 
  1.3192 +  assert(!MEMDB);
  1.3193 +  assert(pPager->lru.pFirst);
  1.3194 +
  1.3195 +  /* Find a page to recycle.  Try to locate a page that does not
  1.3196 +  ** require us to do an fsync() on the journal.
  1.3197 +  */
  1.3198 +  pPg = pPager->lru.pFirstSynced;
  1.3199 +
  1.3200 +  /* If we could not find a page that does not require an fsync()
  1.3201 +  ** on the journal file then fsync the journal file.  This is a
  1.3202 +  ** very slow operation, so we work hard to avoid it.  But sometimes
  1.3203 +  ** it can't be helped.
  1.3204 +  */
  1.3205 +  if( pPg==0 && pPager->lru.pFirst ){
  1.3206 +    if( !pPager->errCode ){
  1.3207 +      int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
  1.3208 +      int rc = syncJournal(pPager);
  1.3209 +      if( rc!=0 ){
  1.3210 +        return rc;
  1.3211 +      }
  1.3212 +      if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){
  1.3213 +        /* If in full-sync mode, write a new journal header into the
  1.3214 +        ** journal file. This is done to avoid ever modifying a journal
  1.3215 +        ** header that is involved in the rollback of pages that have
  1.3216 +        ** already been written to the database (in case the header is
  1.3217 +        ** trashed when the nRec field is updated).
  1.3218 +        */
  1.3219 +        pPager->nRec = 0;
  1.3220 +        assert( pPager->journalOff > 0 );
  1.3221 +        assert( pPager->doNotSync==0 );
  1.3222 +        rc = writeJournalHdr(pPager);
  1.3223 +        if( rc!=0 ){
  1.3224 +          return rc;
  1.3225 +        }
  1.3226 +      }
  1.3227 +    }
  1.3228 +    pPg = pPager->lru.pFirst;
  1.3229 +  }
  1.3230 +
  1.3231 +  assert( pPg->nRef==0 );
  1.3232 +
  1.3233 +  /* Write the page to the database file if it is dirty.
  1.3234 +  */
  1.3235 +  if( pPg->dirty && !pPager->errCode ){
  1.3236 +    int rc;
  1.3237 +    assert( pPg->needSync==0 );
  1.3238 +    makeClean(pPg);
  1.3239 +    pPg->dirty = 1;
  1.3240 +    pPg->pDirty = 0;
  1.3241 +    rc = pager_write_pagelist( pPg );
  1.3242 +    pPg->dirty = 0;
  1.3243 +    if( rc!=SQLITE_OK ){
  1.3244 +      return rc;
  1.3245 +    }
  1.3246 +  }
  1.3247 +  assert( pPg->dirty==0 || pPager->errCode );
  1.3248 +
  1.3249 +  /* If the page we are recycling is marked as alwaysRollback, then
  1.3250 +  ** set the global alwaysRollback flag, thus disabling the
  1.3251 +  ** sqlite3PagerDontRollback() optimization for the rest of this transaction.
  1.3252 +  ** It is necessary to do this because the page marked alwaysRollback
  1.3253 +  ** might be reloaded at a later time but at that point we won't remember
  1.3254 +  ** that is was marked alwaysRollback.  This means that all pages must
  1.3255 +  ** be marked as alwaysRollback from here on out.
  1.3256 +  */
  1.3257 +  if( pPg->alwaysRollback ){
  1.3258 +    IOTRACE(("ALWAYS_ROLLBACK %p\n", pPager))
  1.3259 +    pPager->alwaysRollback = 1;
  1.3260 +  }
  1.3261 +
  1.3262 +  /* Unlink the old page from the free list and the hash table
  1.3263 +  */
  1.3264 +  unlinkPage(pPg);
  1.3265 +  assert( pPg->pgno==0 );
  1.3266 +
  1.3267 +  *ppPg = pPg;
  1.3268 +  return SQLITE_OK;
  1.3269 +}
  1.3270 +
  1.3271 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
  1.3272 +/*
  1.3273 +** This function is called to free superfluous dynamically allocated memory
  1.3274 +** held by the pager system. Memory in use by any SQLite pager allocated
  1.3275 +** by the current thread may be sqlite3_free()ed.
  1.3276 +**
  1.3277 +** nReq is the number of bytes of memory required. Once this much has
  1.3278 +** been released, the function returns. The return value is the total number 
  1.3279 +** of bytes of memory released.
  1.3280 +*/
  1.3281 +int sqlite3PagerReleaseMemory(int nReq){
  1.3282 +  int nReleased = 0;          /* Bytes of memory released so far */
  1.3283 +  Pager *pPager;              /* For looping over pagers */
  1.3284 +  BusyHandler *savedBusy;     /* Saved copy of the busy handler */
  1.3285 +  int rc = SQLITE_OK;
  1.3286 +
  1.3287 +  /* Acquire the memory-management mutex
  1.3288 +  */
  1.3289 +#ifndef SQLITE_MUTEX_NOOP
  1.3290 +  sqlite3_mutex *mutex;       /* The MEM2 mutex */
  1.3291 +  mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MEM2);
  1.3292 +#endif
  1.3293 +  sqlite3_mutex_enter(mutex);
  1.3294 +
  1.3295 +  /* Signal all database connections that memory management wants
  1.3296 +  ** to have access to the pagers.
  1.3297 +  */
  1.3298 +  for(pPager=sqlite3PagerList; pPager; pPager=pPager->pNext){
  1.3299 +     pPager->iInUseMM = 1;
  1.3300 +  }
  1.3301 +
  1.3302 +  while( rc==SQLITE_OK && (nReq<0 || nReleased<nReq) ){
  1.3303 +    PgHdr *pPg;
  1.3304 +    PgHdr *pRecycled;
  1.3305 + 
  1.3306 +    /* Try to find a page to recycle that does not require a sync(). If
  1.3307 +    ** this is not possible, find one that does require a sync().
  1.3308 +    */
  1.3309 +    sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_LRU));
  1.3310 +    pPg = sqlite3LruPageList.pFirstSynced;
  1.3311 +    while( pPg && (pPg->needSync || pPg->pPager->iInUseDB) ){
  1.3312 +      pPg = pPg->gfree.pNext;
  1.3313 +    }
  1.3314 +    if( !pPg ){
  1.3315 +      pPg = sqlite3LruPageList.pFirst;
  1.3316 +      while( pPg && pPg->pPager->iInUseDB ){
  1.3317 +        pPg = pPg->gfree.pNext;
  1.3318 +      }
  1.3319 +    }
  1.3320 +    sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_LRU));
  1.3321 +
  1.3322 +    /* If pPg==0, then the block above has failed to find a page to
  1.3323 +    ** recycle. In this case return early - no further memory will
  1.3324 +    ** be released.
  1.3325 +    */
  1.3326 +    if( !pPg ) break;
  1.3327 +
  1.3328 +    pPager = pPg->pPager;
  1.3329 +    assert(!pPg->needSync || pPg==pPager->lru.pFirst);
  1.3330 +    assert(pPg->needSync || pPg==pPager->lru.pFirstSynced);
  1.3331 +  
  1.3332 +    savedBusy = pPager->pBusyHandler;
  1.3333 +    pPager->pBusyHandler = 0;
  1.3334 +    rc = pager_recycle(pPager, &pRecycled);
  1.3335 +    pPager->pBusyHandler = savedBusy;
  1.3336 +    assert(pRecycled==pPg || rc!=SQLITE_OK);
  1.3337 +    if( rc==SQLITE_OK ){
  1.3338 +      /* We've found a page to free. At this point the page has been 
  1.3339 +      ** removed from the page hash-table, free-list and synced-list 
  1.3340 +      ** (pFirstSynced). It is still in the all pages (pAll) list. 
  1.3341 +      ** Remove it from this list before freeing.
  1.3342 +      **
  1.3343 +      ** Todo: Check the Pager.pStmt list to make sure this is Ok. It 
  1.3344 +      ** probably is though.
  1.3345 +      */
  1.3346 +      PgHdr *pTmp;
  1.3347 +      assert( pPg );
  1.3348 +      if( pPg==pPager->pAll ){
  1.3349 +         assert(pPg->pPrevAll==0);
  1.3350 +         assert(pPg->pNextAll==0 || pPg->pNextAll->pPrevAll==pPg);
  1.3351 +         pPager->pAll = pPg->pNextAll;
  1.3352 +         if( pPager->pAll ){
  1.3353 +           pPager->pAll->pPrevAll = 0;
  1.3354 +         }
  1.3355 +      }else{
  1.3356 +         assert(pPg->pPrevAll);
  1.3357 +         assert(pPg->pPrevAll->pNextAll==pPg);
  1.3358 +         pTmp = pPg->pPrevAll;
  1.3359 +         pTmp->pNextAll = pPg->pNextAll;
  1.3360 +         if( pTmp->pNextAll ){
  1.3361 +           pTmp->pNextAll->pPrevAll = pTmp;
  1.3362 +         }
  1.3363 +      }
  1.3364 +      nReleased += (
  1.3365 +          sizeof(*pPg) + pPager->pageSize
  1.3366 +          + sizeof(u32) + pPager->nExtra
  1.3367 +          + MEMDB*sizeof(PgHistory) 
  1.3368 +      );
  1.3369 +      IOTRACE(("PGFREE %p %d *\n", pPager, pPg->pgno));
  1.3370 +      PAGER_INCR(sqlite3_pager_pgfree_count);
  1.3371 +      sqlite3PageFree(pPg->pData);
  1.3372 +      sqlite3_free(pPg);
  1.3373 +      pPager->nPage--;
  1.3374 +    }else{
  1.3375 +      /* An error occured whilst writing to the database file or 
  1.3376 +      ** journal in pager_recycle(). The error is not returned to the 
  1.3377 +      ** caller of this function. Instead, set the Pager.errCode variable.
  1.3378 +      ** The error will be returned to the user (or users, in the case 
  1.3379 +      ** of a shared pager cache) of the pager for which the error occured.
  1.3380 +      */
  1.3381 +      assert(
  1.3382 +          (rc&0xff)==SQLITE_IOERR ||
  1.3383 +          rc==SQLITE_FULL ||
  1.3384 +          rc==SQLITE_BUSY
  1.3385 +      );
  1.3386 +      assert( pPager->state>=PAGER_RESERVED );
  1.3387 +      pager_error(pPager, rc);
  1.3388 +    }
  1.3389 +  }
  1.3390 +
  1.3391 +  /* Clear the memory management flags and release the mutex
  1.3392 +  */
  1.3393 +  for(pPager=sqlite3PagerList; pPager; pPager=pPager->pNext){
  1.3394 +     pPager->iInUseMM = 0;
  1.3395 +  }
  1.3396 +  sqlite3_mutex_leave(mutex);
  1.3397 +
  1.3398 +  /* Return the number of bytes released
  1.3399 +  */
  1.3400 +  return nReleased;
  1.3401 +}
  1.3402 +#endif /* SQLITE_ENABLE_MEMORY_MANAGEMENT */
  1.3403 +
  1.3404 +/*
  1.3405 +** Read the content of page pPg out of the database file.
  1.3406 +*/
  1.3407 +static int readDbPage(Pager *pPager, PgHdr *pPg, Pgno pgno){
  1.3408 +  int rc;
  1.3409 +  i64 offset;
  1.3410 +  assert( MEMDB==0 );
  1.3411 +  assert(pPager->fd->pMethods||pPager->tempFile);
  1.3412 +  if( !pPager->fd->pMethods ){
  1.3413 +    return SQLITE_IOERR_SHORT_READ;
  1.3414 +  }
  1.3415 +  offset = (pgno-1)*(i64)pPager->pageSize;
  1.3416 +  rc = sqlite3OsRead(pPager->fd, PGHDR_TO_DATA(pPg), pPager->pageSize, offset);
  1.3417 +  PAGER_INCR(sqlite3_pager_readdb_count);
  1.3418 +  PAGER_INCR(pPager->nRead);
  1.3419 +  IOTRACE(("PGIN %p %d\n", pPager, pgno));
  1.3420 +  if( pgno==1 ){
  1.3421 +    memcpy(&pPager->dbFileVers, &((u8*)PGHDR_TO_DATA(pPg))[24],
  1.3422 +                                              sizeof(pPager->dbFileVers));
  1.3423 +  }
  1.3424 +  CODEC1(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3);
  1.3425 +  PAGERTRACE4("FETCH %d page %d hash(%08x)\n",
  1.3426 +               PAGERID(pPager), pPg->pgno, pager_pagehash(pPg));
  1.3427 +  return rc;
  1.3428 +}
  1.3429 +
  1.3430 +
  1.3431 +/*
  1.3432 +** This function is called to obtain the shared lock required before
  1.3433 +** data may be read from the pager cache. If the shared lock has already
  1.3434 +** been obtained, this function is a no-op.
  1.3435 +**
  1.3436 +** Immediately after obtaining the shared lock (if required), this function
  1.3437 +** checks for a hot-journal file. If one is found, an emergency rollback
  1.3438 +** is performed immediately.
  1.3439 +*/
  1.3440 +static int pagerSharedLock(Pager *pPager){
  1.3441 +  int rc = SQLITE_OK;
  1.3442 +  int isErrorReset = 0;
  1.3443 +
  1.3444 +  /* If this database is opened for exclusive access, has no outstanding 
  1.3445 +  ** page references and is in an error-state, now is the chance to clear
  1.3446 +  ** the error. Discard the contents of the pager-cache and treat any
  1.3447 +  ** open journal file as a hot-journal.
  1.3448 +  */
  1.3449 +  if( !MEMDB && pPager->exclusiveMode && pPager->nRef==0 && pPager->errCode ){
  1.3450 +    if( pPager->journalOpen ){
  1.3451 +      isErrorReset = 1;
  1.3452 +    }
  1.3453 +    pPager->errCode = SQLITE_OK;
  1.3454 +    pager_reset(pPager);
  1.3455 +  }
  1.3456 +
  1.3457 +  /* If the pager is still in an error state, do not proceed. The error 
  1.3458 +  ** state will be cleared at some point in the future when all page 
  1.3459 +  ** references are dropped and the cache can be discarded.
  1.3460 +  */
  1.3461 +  if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
  1.3462 +    return pPager->errCode;
  1.3463 +  }
  1.3464 +
  1.3465 +  if( pPager->state==PAGER_UNLOCK || isErrorReset ){
  1.3466 +    sqlite3_vfs *pVfs = pPager->pVfs;
  1.3467 +    if( !MEMDB ){
  1.3468 +      int isHotJournal;
  1.3469 +      assert( pPager->nRef==0 );
  1.3470 +      if( !pPager->noReadlock ){
  1.3471 +        rc = pager_wait_on_lock(pPager, SHARED_LOCK);
  1.3472 +        if( rc!=SQLITE_OK ){
  1.3473 +          assert( pPager->state==PAGER_UNLOCK );
  1.3474 +          return pager_error(pPager, rc);
  1.3475 +        }
  1.3476 +        assert( pPager->state>=SHARED_LOCK );
  1.3477 +      }
  1.3478 +  
  1.3479 +      /* If a journal file exists, and there is no RESERVED lock on the
  1.3480 +      ** database file, then it either needs to be played back or deleted.
  1.3481 +      */
  1.3482 +      if( !isErrorReset ){
  1.3483 +        rc = hasHotJournal(pPager, &isHotJournal);
  1.3484 +        if( rc!=SQLITE_OK ){
  1.3485 +          goto failed;
  1.3486 +        }
  1.3487 +      }
  1.3488 +      if( isErrorReset || isHotJournal ){
  1.3489 +        /* Get an EXCLUSIVE lock on the database file. At this point it is
  1.3490 +        ** important that a RESERVED lock is not obtained on the way to the
  1.3491 +        ** EXCLUSIVE lock. If it were, another process might open the
  1.3492 +        ** database file, detect the RESERVED lock, and conclude that the
  1.3493 +        ** database is safe to read while this process is still rolling it 
  1.3494 +        ** back.
  1.3495 +        ** 
  1.3496 +        ** Because the intermediate RESERVED lock is not requested, the
  1.3497 +        ** second process will get to this point in the code and fail to
  1.3498 +        ** obtain its own EXCLUSIVE lock on the database file.
  1.3499 +        */
  1.3500 +        if( pPager->state<EXCLUSIVE_LOCK ){
  1.3501 +          rc = sqlite3OsLock(pPager->fd, EXCLUSIVE_LOCK);
  1.3502 +          if( rc!=SQLITE_OK ){
  1.3503 +            rc = pager_error(pPager, rc);
  1.3504 +            goto failed;
  1.3505 +          }
  1.3506 +          pPager->state = PAGER_EXCLUSIVE;
  1.3507 +        }
  1.3508 + 
  1.3509 +        /* Open the journal for read/write access. This is because in 
  1.3510 +        ** exclusive-access mode the file descriptor will be kept open and
  1.3511 +        ** possibly used for a transaction later on. On some systems, the
  1.3512 +        ** OsTruncate() call used in exclusive-access mode also requires
  1.3513 +        ** a read/write file handle.
  1.3514 +        */
  1.3515 +        if( !isErrorReset && pPager->journalOpen==0 ){
  1.3516 +          int res;
  1.3517 +          rc = sqlite3OsAccess(pVfs,pPager->zJournal,SQLITE_ACCESS_EXISTS,&res);
  1.3518 +          if( rc==SQLITE_OK ){
  1.3519 +            if( res ){
  1.3520 +              int fout = 0;
  1.3521 +              int f = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL;
  1.3522 +              assert( !pPager->tempFile );
  1.3523 +              rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &fout);
  1.3524 +              assert( rc!=SQLITE_OK || pPager->jfd->pMethods );
  1.3525 +              if( fout&SQLITE_OPEN_READONLY ){
  1.3526 +                rc = SQLITE_BUSY;
  1.3527 +                sqlite3OsClose(pPager->jfd);
  1.3528 +              }
  1.3529 +            }else{
  1.3530 +              /* If the journal does not exist, that means some other process
  1.3531 +              ** has already rolled it back */
  1.3532 +              rc = SQLITE_BUSY;
  1.3533 +            }
  1.3534 +          }
  1.3535 +        }
  1.3536 +        if( rc!=SQLITE_OK ){
  1.3537 +          if( rc!=SQLITE_NOMEM && rc!=SQLITE_IOERR_UNLOCK 
  1.3538 +           && rc!=SQLITE_IOERR_NOMEM 
  1.3539 +          ){
  1.3540 +            rc = SQLITE_BUSY;
  1.3541 +          }
  1.3542 +          goto failed;
  1.3543 +        }
  1.3544 +        pPager->journalOpen = 1;
  1.3545 +        pPager->journalStarted = 0;
  1.3546 +        pPager->journalOff = 0;
  1.3547 +        pPager->setMaster = 0;
  1.3548 +        pPager->journalHdr = 0;
  1.3549 + 
  1.3550 +        /* Playback and delete the journal.  Drop the database write
  1.3551 +        ** lock and reacquire the read lock.
  1.3552 +        */
  1.3553 +        rc = pager_playback(pPager, 1);
  1.3554 +        if( rc!=SQLITE_OK ){
  1.3555 +          rc = pager_error(pPager, rc);
  1.3556 +          goto failed;
  1.3557 +        }
  1.3558 +        assert(pPager->state==PAGER_SHARED || 
  1.3559 +            (pPager->exclusiveMode && pPager->state>PAGER_SHARED)
  1.3560 +        );
  1.3561 +      }
  1.3562 +
  1.3563 +      if( pPager->pAll ){
  1.3564 +        /* The shared-lock has just been acquired on the database file
  1.3565 +        ** and there are already pages in the cache (from a previous
  1.3566 +        ** read or write transaction).  Check to see if the database
  1.3567 +        ** has been modified.  If the database has changed, flush the
  1.3568 +        ** cache.
  1.3569 +        **
  1.3570 +        ** Database changes is detected by looking at 15 bytes beginning
  1.3571 +        ** at offset 24 into the file.  The first 4 of these 16 bytes are
  1.3572 +        ** a 32-bit counter that is incremented with each change.  The
  1.3573 +        ** other bytes change randomly with each file change when
  1.3574 +        ** a codec is in use.
  1.3575 +        ** 
  1.3576 +        ** There is a vanishingly small chance that a change will not be 
  1.3577 +        ** detected.  The chance of an undetected change is so small that
  1.3578 +        ** it can be neglected.
  1.3579 +        */
  1.3580 +        char dbFileVers[sizeof(pPager->dbFileVers)];
  1.3581 +        sqlite3PagerPagecount(pPager, 0);
  1.3582 +
  1.3583 +        if( pPager->errCode ){
  1.3584 +          rc = pPager->errCode;
  1.3585 +          goto failed;
  1.3586 +        }
  1.3587 +
  1.3588 +        if( pPager->dbSize>0 ){
  1.3589 +          IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers)));
  1.3590 +          rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24);
  1.3591 +          if( rc!=SQLITE_OK ){
  1.3592 +            goto failed;
  1.3593 +          }
  1.3594 +        }else{
  1.3595 +          memset(dbFileVers, 0, sizeof(dbFileVers));
  1.3596 +        }
  1.3597 +
  1.3598 +        if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){
  1.3599 +          pager_reset(pPager);
  1.3600 +        }
  1.3601 +      }
  1.3602 +    }
  1.3603 +    assert( pPager->exclusiveMode || pPager->state<=PAGER_SHARED );
  1.3604 +    if( pPager->state==PAGER_UNLOCK ){
  1.3605 +      pPager->state = PAGER_SHARED;
  1.3606 +    }
  1.3607 +  }
  1.3608 +
  1.3609 + failed:
  1.3610 +  if( rc!=SQLITE_OK ){
  1.3611 +    /* pager_unlock() is a no-op for exclusive mode and in-memory databases. */
  1.3612 +    pager_unlock(pPager);
  1.3613 +  }
  1.3614 +  return rc;
  1.3615 +}
  1.3616 +
  1.3617 +/*
  1.3618 +** Allocate a PgHdr object.   Either create a new one or reuse
  1.3619 +** an existing one that is not otherwise in use.
  1.3620 +**
  1.3621 +** A new PgHdr structure is created if any of the following are
  1.3622 +** true:
  1.3623 +**
  1.3624 +**     (1)  We have not exceeded our maximum allocated cache size
  1.3625 +**          as set by the "PRAGMA cache_size" command.
  1.3626 +**
  1.3627 +**     (2)  There are no unused PgHdr objects available at this time.
  1.3628 +**
  1.3629 +**     (3)  This is an in-memory database.
  1.3630 +**
  1.3631 +**     (4)  There are no PgHdr objects that do not require a journal
  1.3632 +**          file sync and a sync of the journal file is currently
  1.3633 +**          prohibited.
  1.3634 +**
  1.3635 +** Otherwise, reuse an existing PgHdr.  In other words, reuse an
  1.3636 +** existing PgHdr if all of the following are true:
  1.3637 +**
  1.3638 +**     (1)  We have reached or exceeded the maximum cache size
  1.3639 +**          allowed by "PRAGMA cache_size".
  1.3640 +**
  1.3641 +**     (2)  There is a PgHdr available with PgHdr->nRef==0
  1.3642 +**
  1.3643 +**     (3)  We are not in an in-memory database
  1.3644 +**
  1.3645 +**     (4)  Either there is an available PgHdr that does not need
  1.3646 +**          to be synced to disk or else disk syncing is currently
  1.3647 +**          allowed.
  1.3648 +*/
  1.3649 +static int pagerAllocatePage(Pager *pPager, PgHdr **ppPg){
  1.3650 +  int rc = SQLITE_OK;
  1.3651 +  PgHdr *pPg;
  1.3652 +  int nByteHdr;
  1.3653 +
  1.3654 +  /* Create a new PgHdr if any of the four conditions defined 
  1.3655 +  ** above are met: */
  1.3656 +  if( pPager->nPage<pPager->mxPage
  1.3657 +   || pPager->lru.pFirst==0 
  1.3658 +   || MEMDB
  1.3659 +   || (pPager->lru.pFirstSynced==0 && pPager->doNotSync)
  1.3660 +  ){
  1.3661 +    void *pData = 0;                   /* Initialized to placate warning */
  1.3662 +    if( pPager->nPage>=pPager->nHash ){
  1.3663 +      pager_resize_hash_table(pPager,
  1.3664 +         pPager->nHash<256 ? 256 : pPager->nHash*2);
  1.3665 +      if( pPager->nHash==0 ){
  1.3666 +        rc = SQLITE_NOMEM;
  1.3667 +        goto pager_allocate_out;
  1.3668 +      }
  1.3669 +    }
  1.3670 +    pagerLeave(pPager);
  1.3671 +    nByteHdr = sizeof(*pPg) + sizeof(u32) + pPager->nExtra
  1.3672 +              + MEMDB*sizeof(PgHistory);
  1.3673 +    pPg = sqlite3Malloc( nByteHdr );
  1.3674 +    if( pPg ){
  1.3675 +      pData = sqlite3PageMalloc( pPager->pageSize );
  1.3676 +      if( pData==0 ){
  1.3677 +        sqlite3_free(pPg);
  1.3678 +        pPg = 0;
  1.3679 +      }
  1.3680 +    }
  1.3681 +    pagerEnter(pPager);
  1.3682 +    if( pPg==0 ){
  1.3683 +      rc = SQLITE_NOMEM;
  1.3684 +      goto pager_allocate_out;
  1.3685 +    }
  1.3686 +    memset(pPg, 0, nByteHdr);
  1.3687 +    pPg->pData = pData;
  1.3688 +    pPg->pPager = pPager;
  1.3689 +    pPg->pNextAll = pPager->pAll;
  1.3690 +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
  1.3691 +    if( pPg->pNextAll ){
  1.3692 +      pPg->pNextAll->pPrevAll = pPg;
  1.3693 +    }
  1.3694 +#endif
  1.3695 +    pPager->pAll = pPg;
  1.3696 +    pPager->nPage++;
  1.3697 +  }else{
  1.3698 +    /* Recycle an existing page with a zero ref-count. */
  1.3699 +    rc = pager_recycle(pPager, &pPg);
  1.3700 +    if( rc==SQLITE_BUSY ){
  1.3701 +      rc = SQLITE_IOERR_BLOCKED;
  1.3702 +    }
  1.3703 +    if( rc!=SQLITE_OK ){
  1.3704 +      goto pager_allocate_out;
  1.3705 +    }
  1.3706 +    assert( pPager->state>=SHARED_LOCK );
  1.3707 +    assert(pPg);
  1.3708 +  }
  1.3709 +  *ppPg = pPg;
  1.3710 +
  1.3711 +pager_allocate_out:
  1.3712 +  return rc;
  1.3713 +}
  1.3714 +
  1.3715 +/*
  1.3716 +** Make sure we have the content for a page.  If the page was
  1.3717 +** previously acquired with noContent==1, then the content was
  1.3718 +** just initialized to zeros instead of being read from disk.
  1.3719 +** But now we need the real data off of disk.  So make sure we
  1.3720 +** have it.  Read it in if we do not have it already.
  1.3721 +*/
  1.3722 +static int pager_get_content(PgHdr *pPg){
  1.3723 +  if( pPg->needRead ){
  1.3724 +    int rc = readDbPage(pPg->pPager, pPg, pPg->pgno);
  1.3725 +    if( rc==SQLITE_OK ){
  1.3726 +      pPg->needRead = 0;
  1.3727 +    }else{
  1.3728 +      return rc;
  1.3729 +    }
  1.3730 +  }
  1.3731 +  return SQLITE_OK;
  1.3732 +}
  1.3733 +
  1.3734 +/*
  1.3735 +** Acquire a page.
  1.3736 +**
  1.3737 +** A read lock on the disk file is obtained when the first page is acquired. 
  1.3738 +** This read lock is dropped when the last page is released.
  1.3739 +**
  1.3740 +** This routine works for any page number greater than 0.  If the database
  1.3741 +** file is smaller than the requested page, then no actual disk
  1.3742 +** read occurs and the memory image of the page is initialized to
  1.3743 +** all zeros.  The extra data appended to a page is always initialized
  1.3744 +** to zeros the first time a page is loaded into memory.
  1.3745 +**
  1.3746 +** The acquisition might fail for several reasons.  In all cases,
  1.3747 +** an appropriate error code is returned and *ppPage is set to NULL.
  1.3748 +**
  1.3749 +** See also sqlite3PagerLookup().  Both this routine and Lookup() attempt
  1.3750 +** to find a page in the in-memory cache first.  If the page is not already
  1.3751 +** in memory, this routine goes to disk to read it in whereas Lookup()
  1.3752 +** just returns 0.  This routine acquires a read-lock the first time it
  1.3753 +** has to go to disk, and could also playback an old journal if necessary.
  1.3754 +** Since Lookup() never goes to disk, it never has to deal with locks
  1.3755 +** or journal files.
  1.3756 +**
  1.3757 +** If noContent is false, the page contents are actually read from disk.
  1.3758 +** If noContent is true, it means that we do not care about the contents
  1.3759 +** of the page at this time, so do not do a disk read.  Just fill in the
  1.3760 +** page content with zeros.  But mark the fact that we have not read the
  1.3761 +** content by setting the PgHdr.needRead flag.  Later on, if 
  1.3762 +** sqlite3PagerWrite() is called on this page or if this routine is
  1.3763 +** called again with noContent==0, that means that the content is needed
  1.3764 +** and the disk read should occur at that point.
  1.3765 +*/
  1.3766 +static int pagerAcquire(
  1.3767 +  Pager *pPager,      /* The pager open on the database file */
  1.3768 +  Pgno pgno,          /* Page number to fetch */
  1.3769 +  DbPage **ppPage,    /* Write a pointer to the page here */
  1.3770 +  int noContent       /* Do not bother reading content from disk if true */
  1.3771 +){
  1.3772 +  PgHdr *pPg;
  1.3773 +  int rc;
  1.3774 +
  1.3775 +  assert( pPager->state==PAGER_UNLOCK || pPager->nRef>0 || pgno==1 );
  1.3776 +
  1.3777 +  /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page
  1.3778 +  ** number greater than this, or zero, is requested.
  1.3779 +  */
  1.3780 +  if( pgno>PAGER_MAX_PGNO || pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
  1.3781 +    return SQLITE_CORRUPT_BKPT;
  1.3782 +  }
  1.3783 +
  1.3784 +  /* Make sure we have not hit any critical errors.
  1.3785 +  */ 
  1.3786 +  assert( pPager!=0 );
  1.3787 +  *ppPage = 0;
  1.3788 +
  1.3789 +  /* If this is the first page accessed, then get a SHARED lock
  1.3790 +  ** on the database file. pagerSharedLock() is a no-op if 
  1.3791 +  ** a database lock is already held.
  1.3792 +  */
  1.3793 +  rc = pagerSharedLock(pPager);
  1.3794 +  if( rc!=SQLITE_OK ){
  1.3795 +    return rc;
  1.3796 +  }
  1.3797 +  assert( pPager->state!=PAGER_UNLOCK );
  1.3798 +
  1.3799 +  pPg = pager_lookup(pPager, pgno);
  1.3800 +  if( pPg==0 ){
  1.3801 +    /* The requested page is not in the page cache. */
  1.3802 +    int nMax;
  1.3803 +    int h;
  1.3804 +    PAGER_INCR(pPager->nMiss);
  1.3805 +    rc = pagerAllocatePage(pPager, &pPg);
  1.3806 +    if( rc!=SQLITE_OK ){
  1.3807 +      return rc;
  1.3808 +    }
  1.3809 +
  1.3810 +    pPg->pgno = pgno;
  1.3811 +    assert( !MEMDB || pgno>pPager->stmtSize );
  1.3812 +    pPg->inJournal = sqlite3BitvecTest(pPager->pInJournal, pgno);
  1.3813 +    pPg->needSync = 0;
  1.3814 +
  1.3815 +    makeClean(pPg);
  1.3816 +    pPg->nRef = 1;
  1.3817 +
  1.3818 +    pPager->nRef++;
  1.3819 +    if( pPager->nExtra>0 ){
  1.3820 +      memset(PGHDR_TO_EXTRA(pPg, pPager), 0, pPager->nExtra);
  1.3821 +    }
  1.3822 +    rc = sqlite3PagerPagecount(pPager, &nMax);
  1.3823 +    if( rc!=SQLITE_OK ){
  1.3824 +      sqlite3PagerUnref(pPg);
  1.3825 +      return rc;
  1.3826 +    }
  1.3827 +
  1.3828 +    /* Populate the page with data, either by reading from the database
  1.3829 +    ** file, or by setting the entire page to zero.
  1.3830 +    */
  1.3831 +    if( nMax<(int)pgno || MEMDB || (noContent && !pPager->alwaysRollback) ){
  1.3832 +      if( pgno>pPager->mxPgno ){
  1.3833 +        sqlite3PagerUnref(pPg);
  1.3834 +        return SQLITE_FULL;
  1.3835 +      }
  1.3836 +      memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
  1.3837 +      pPg->needRead = noContent && !pPager->alwaysRollback;
  1.3838 +      IOTRACE(("ZERO %p %d\n", pPager, pgno));
  1.3839 +    }else{
  1.3840 +      rc = readDbPage(pPager, pPg, pgno);
  1.3841 +      if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){
  1.3842 +        pPg->pgno = 0;
  1.3843 +        sqlite3PagerUnref(pPg);
  1.3844 +        return rc;
  1.3845 +      }
  1.3846 +      pPg->needRead = 0;
  1.3847 +    }
  1.3848 +
  1.3849 +    /* Link the page into the page hash table */
  1.3850 +    h = pgno & (pPager->nHash-1);
  1.3851 +    assert( pgno!=0 );
  1.3852 +    pPg->pNextHash = pPager->aHash[h];
  1.3853 +    pPager->aHash[h] = pPg;
  1.3854 +    if( pPg->pNextHash ){
  1.3855 +      assert( pPg->pNextHash->pPrevHash==0 );
  1.3856 +      pPg->pNextHash->pPrevHash = pPg;
  1.3857 +    }
  1.3858 +
  1.3859 +#ifdef SQLITE_CHECK_PAGES
  1.3860 +    pPg->pageHash = pager_pagehash(pPg);
  1.3861 +#endif
  1.3862 +  }else{
  1.3863 +    /* The requested page is in the page cache. */
  1.3864 +    assert(pPager->nRef>0 || pgno==1);
  1.3865 +    PAGER_INCR(pPager->nHit);
  1.3866 +    if( !noContent ){
  1.3867 +      rc = pager_get_content(pPg);
  1.3868 +      if( rc ){
  1.3869 +        return rc;
  1.3870 +      }
  1.3871 +    }
  1.3872 +    page_ref(pPg);
  1.3873 +  }
  1.3874 +  *ppPage = pPg;
  1.3875 +  return SQLITE_OK;
  1.3876 +}
  1.3877 +int sqlite3PagerAcquire(
  1.3878 +  Pager *pPager,      /* The pager open on the database file */
  1.3879 +  Pgno pgno,          /* Page number to fetch */
  1.3880 +  DbPage **ppPage,    /* Write a pointer to the page here */
  1.3881 +  int noContent       /* Do not bother reading content from disk if true */
  1.3882 +){
  1.3883 +  int rc;
  1.3884 +  pagerEnter(pPager);
  1.3885 +  rc = pagerAcquire(pPager, pgno, ppPage, noContent);
  1.3886 +  pagerLeave(pPager);
  1.3887 +  return rc;
  1.3888 +}
  1.3889 +
  1.3890 +
  1.3891 +/*
  1.3892 +** Acquire a page if it is already in the in-memory cache.  Do
  1.3893 +** not read the page from disk.  Return a pointer to the page,
  1.3894 +** or 0 if the page is not in cache.
  1.3895 +**
  1.3896 +** See also sqlite3PagerGet().  The difference between this routine
  1.3897 +** and sqlite3PagerGet() is that _get() will go to the disk and read
  1.3898 +** in the page if the page is not already in cache.  This routine
  1.3899 +** returns NULL if the page is not in cache or if a disk I/O error 
  1.3900 +** has ever happened.
  1.3901 +*/
  1.3902 +DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){
  1.3903 +  PgHdr *pPg = 0;
  1.3904 +
  1.3905 +  assert( pPager!=0 );
  1.3906 +  assert( pgno!=0 );
  1.3907 +
  1.3908 +  pagerEnter(pPager);
  1.3909 +  if( pPager->state==PAGER_UNLOCK ){
  1.3910 +    assert( !pPager->pAll || pPager->exclusiveMode );
  1.3911 +  }else if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
  1.3912 +    /* Do nothing */
  1.3913 +  }else if( (pPg = pager_lookup(pPager, pgno))!=0 ){
  1.3914 +    page_ref(pPg);
  1.3915 +  }
  1.3916 +  pagerLeave(pPager);
  1.3917 +  return pPg;
  1.3918 +}
  1.3919 +
  1.3920 +/*
  1.3921 +** Release a page.
  1.3922 +**
  1.3923 +** If the number of references to the page drop to zero, then the
  1.3924 +** page is added to the LRU list.  When all references to all pages
  1.3925 +** are released, a rollback occurs and the lock on the database is
  1.3926 +** removed.
  1.3927 +*/
  1.3928 +int sqlite3PagerUnref(DbPage *pPg){
  1.3929 +  Pager *pPager;
  1.3930 +
  1.3931 +  if( pPg==0 ) return SQLITE_OK;
  1.3932 +  pPager = pPg->pPager;
  1.3933 +
  1.3934 +  /* Decrement the reference count for this page
  1.3935 +  */
  1.3936 +  assert( pPg->nRef>0 );
  1.3937 +  pagerEnter(pPg->pPager);
  1.3938 +  pPg->nRef--;
  1.3939 +
  1.3940 +  CHECK_PAGE(pPg);
  1.3941 +
  1.3942 +  /* When the number of references to a page reach 0, call the
  1.3943 +  ** destructor and add the page to the freelist.
  1.3944 +  */
  1.3945 +  if( pPg->nRef==0 ){
  1.3946 +
  1.3947 +    lruListAdd(pPg);
  1.3948 +    if( pPager->xDestructor ){
  1.3949 +      pPager->xDestructor(pPg, pPager->pageSize);
  1.3950 +    }
  1.3951 +  
  1.3952 +    /* When all pages reach the freelist, drop the read lock from
  1.3953 +    ** the database file.
  1.3954 +    */
  1.3955 +    pPager->nRef--;
  1.3956 +    assert( pPager->nRef>=0 );
  1.3957 +    if( pPager->nRef==0 && (!pPager->exclusiveMode || pPager->journalOff>0) ){
  1.3958 +      pagerUnlockAndRollback(pPager);
  1.3959 +    }
  1.3960 +  }
  1.3961 +  pagerLeave(pPager);
  1.3962 +  return SQLITE_OK;
  1.3963 +}
  1.3964 +
  1.3965 +/*
  1.3966 +** Create a journal file for pPager.  There should already be a RESERVED
  1.3967 +** or EXCLUSIVE lock on the database file when this routine is called.
  1.3968 +**
  1.3969 +** Return SQLITE_OK if everything.  Return an error code and release the
  1.3970 +** write lock if anything goes wrong.
  1.3971 +*/
  1.3972 +static int pager_open_journal(Pager *pPager){
  1.3973 +  sqlite3_vfs *pVfs = pPager->pVfs;
  1.3974 +  int flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_EXCLUSIVE|SQLITE_OPEN_CREATE);
  1.3975 +
  1.3976 +  int rc;
  1.3977 +  assert( !MEMDB );
  1.3978 +  assert( pPager->state>=PAGER_RESERVED );
  1.3979 +  assert( pPager->useJournal );
  1.3980 +  assert( pPager->pInJournal==0 );
  1.3981 +  sqlite3PagerPagecount(pPager, 0);
  1.3982 +  pagerLeave(pPager);
  1.3983 +  pPager->pInJournal = sqlite3BitvecCreate(pPager->dbSize);
  1.3984 +  pagerEnter(pPager);
  1.3985 +  if( pPager->pInJournal==0 ){
  1.3986 +    rc = SQLITE_NOMEM;
  1.3987 +    goto failed_to_open_journal;
  1.3988 +  }
  1.3989 +
  1.3990 +  if( pPager->journalOpen==0 ){
  1.3991 +    if( pPager->tempFile ){
  1.3992 +      flags |= (SQLITE_OPEN_DELETEONCLOSE|SQLITE_OPEN_TEMP_JOURNAL);
  1.3993 +    }else{
  1.3994 +      flags |= (SQLITE_OPEN_MAIN_JOURNAL);
  1.3995 +    }
  1.3996 +#ifdef SQLITE_ENABLE_ATOMIC_WRITE
  1.3997 +    rc = sqlite3JournalOpen(
  1.3998 +        pVfs, pPager->zJournal, pPager->jfd, flags, jrnlBufferSize(pPager)
  1.3999 +    );
  1.4000 +#else
  1.4001 +    rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, flags, 0);
  1.4002 +#endif
  1.4003 +    assert( rc!=SQLITE_OK || pPager->jfd->pMethods );
  1.4004 +    pPager->journalOff = 0;
  1.4005 +    pPager->setMaster = 0;
  1.4006 +    pPager->journalHdr = 0;
  1.4007 +    if( rc!=SQLITE_OK ){
  1.4008 +      if( rc==SQLITE_NOMEM ){
  1.4009 +        sqlite3OsDelete(pVfs, pPager->zJournal, 0);
  1.4010 +      }
  1.4011 +      goto failed_to_open_journal;
  1.4012 +    }
  1.4013 +  }
  1.4014 +  pPager->journalOpen = 1;
  1.4015 +  pPager->journalStarted = 0;
  1.4016 +  pPager->needSync = 0;
  1.4017 +  pPager->alwaysRollback = 0;
  1.4018 +  pPager->nRec = 0;
  1.4019 +  if( pPager->errCode ){
  1.4020 +    rc = pPager->errCode;
  1.4021 +    goto failed_to_open_journal;
  1.4022 +  }
  1.4023 +  pPager->origDbSize = pPager->dbSize;
  1.4024 +
  1.4025 +  rc = writeJournalHdr(pPager);
  1.4026 +
  1.4027 +  if( pPager->stmtAutoopen && rc==SQLITE_OK ){
  1.4028 +    rc = sqlite3PagerStmtBegin(pPager);
  1.4029 +  }
  1.4030 +  if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && rc!=SQLITE_IOERR_NOMEM ){
  1.4031 +    rc = pager_end_transaction(pPager, 0);
  1.4032 +    if( rc==SQLITE_OK ){
  1.4033 +      rc = SQLITE_FULL;
  1.4034 +    }
  1.4035 +  }
  1.4036 +  return rc;
  1.4037 +
  1.4038 +failed_to_open_journal:
  1.4039 +  sqlite3BitvecDestroy(pPager->pInJournal);
  1.4040 +  pPager->pInJournal = 0;
  1.4041 +  return rc;
  1.4042 +}
  1.4043 +
  1.4044 +/*
  1.4045 +** Acquire a write-lock on the database.  The lock is removed when
  1.4046 +** the any of the following happen:
  1.4047 +**
  1.4048 +**   *  sqlite3PagerCommitPhaseTwo() is called.
  1.4049 +**   *  sqlite3PagerRollback() is called.
  1.4050 +**   *  sqlite3PagerClose() is called.
  1.4051 +**   *  sqlite3PagerUnref() is called to on every outstanding page.
  1.4052 +**
  1.4053 +** The first parameter to this routine is a pointer to any open page of the
  1.4054 +** database file.  Nothing changes about the page - it is used merely to
  1.4055 +** acquire a pointer to the Pager structure and as proof that there is
  1.4056 +** already a read-lock on the database.
  1.4057 +**
  1.4058 +** The second parameter indicates how much space in bytes to reserve for a
  1.4059 +** master journal file-name at the start of the journal when it is created.
  1.4060 +**
  1.4061 +** A journal file is opened if this is not a temporary file.  For temporary
  1.4062 +** files, the opening of the journal file is deferred until there is an
  1.4063 +** actual need to write to the journal.
  1.4064 +**
  1.4065 +** If the database is already reserved for writing, this routine is a no-op.
  1.4066 +**
  1.4067 +** If exFlag is true, go ahead and get an EXCLUSIVE lock on the file
  1.4068 +** immediately instead of waiting until we try to flush the cache.  The
  1.4069 +** exFlag is ignored if a transaction is already active.
  1.4070 +*/
  1.4071 +int sqlite3PagerBegin(DbPage *pPg, int exFlag){
  1.4072 +  Pager *pPager = pPg->pPager;
  1.4073 +  int rc = SQLITE_OK;
  1.4074 +  pagerEnter(pPager);
  1.4075 +  assert( pPg->nRef>0 );
  1.4076 +  assert( pPager->state!=PAGER_UNLOCK );
  1.4077 +  if( pPager->state==PAGER_SHARED ){
  1.4078 +    assert( pPager->pInJournal==0 );
  1.4079 +    if( MEMDB ){
  1.4080 +      pPager->state = PAGER_EXCLUSIVE;
  1.4081 +      pPager->origDbSize = pPager->dbSize;
  1.4082 +    }else{
  1.4083 +      rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK);
  1.4084 +      if( rc==SQLITE_OK ){
  1.4085 +        pPager->state = PAGER_RESERVED;
  1.4086 +        if( exFlag ){
  1.4087 +          rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
  1.4088 +        }
  1.4089 +      }
  1.4090 +      if( rc!=SQLITE_OK ){
  1.4091 +        pagerLeave(pPager);
  1.4092 +        return rc;
  1.4093 +      }
  1.4094 +      pPager->dirtyCache = 0;
  1.4095 +      PAGERTRACE2("TRANSACTION %d\n", PAGERID(pPager));
  1.4096 +      if( pPager->useJournal && !pPager->tempFile
  1.4097 +             && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
  1.4098 +        rc = pager_open_journal(pPager);
  1.4099 +      }
  1.4100 +    }
  1.4101 +  }else if( pPager->journalOpen && pPager->journalOff==0 ){
  1.4102 +    /* This happens when the pager was in exclusive-access mode the last
  1.4103 +    ** time a (read or write) transaction was successfully concluded
  1.4104 +    ** by this connection. Instead of deleting the journal file it was 
  1.4105 +    ** kept open and either was truncated to 0 bytes or its header was
  1.4106 +    ** overwritten with zeros.
  1.4107 +    */
  1.4108 +    assert( pPager->nRec==0 );
  1.4109 +    assert( pPager->origDbSize==0 );
  1.4110 +    assert( pPager->pInJournal==0 );
  1.4111 +    sqlite3PagerPagecount(pPager, 0);
  1.4112 +    pagerLeave(pPager);
  1.4113 +    pPager->pInJournal = sqlite3BitvecCreate( pPager->dbSize );
  1.4114 +    pagerEnter(pPager);
  1.4115 +    if( !pPager->pInJournal ){
  1.4116 +      rc = SQLITE_NOMEM;
  1.4117 +    }else{
  1.4118 +      pPager->origDbSize = pPager->dbSize;
  1.4119 +      rc = writeJournalHdr(pPager);
  1.4120 +    }
  1.4121 +  }
  1.4122 +  assert( !pPager->journalOpen || pPager->journalOff>0 || rc!=SQLITE_OK );
  1.4123 +  pagerLeave(pPager);
  1.4124 +  return rc;
  1.4125 +}
  1.4126 +
  1.4127 +/*
  1.4128 +** Make a page dirty.  Set its dirty flag and add it to the dirty
  1.4129 +** page list.
  1.4130 +*/
  1.4131 +static void makeDirty(PgHdr *pPg){
  1.4132 +  if( pPg->dirty==0 ){
  1.4133 +    Pager *pPager = pPg->pPager;
  1.4134 +    pPg->dirty = 1;
  1.4135 +    pPg->pDirty = pPager->pDirty;
  1.4136 +    if( pPager->pDirty ){
  1.4137 +      pPager->pDirty->pPrevDirty = pPg;
  1.4138 +    }
  1.4139 +    pPg->pPrevDirty = 0;
  1.4140 +    pPager->pDirty = pPg;
  1.4141 +  }
  1.4142 +}
  1.4143 +
  1.4144 +/*
  1.4145 +** Make a page clean.  Clear its dirty bit and remove it from the
  1.4146 +** dirty page list.
  1.4147 +*/
  1.4148 +static void makeClean(PgHdr *pPg){
  1.4149 +  if( pPg->dirty ){
  1.4150 +    pPg->dirty = 0;
  1.4151 +    if( pPg->pDirty ){
  1.4152 +      assert( pPg->pDirty->pPrevDirty==pPg );
  1.4153 +      pPg->pDirty->pPrevDirty = pPg->pPrevDirty;
  1.4154 +    }
  1.4155 +    if( pPg->pPrevDirty ){
  1.4156 +      assert( pPg->pPrevDirty->pDirty==pPg );
  1.4157 +      pPg->pPrevDirty->pDirty = pPg->pDirty;
  1.4158 +    }else{
  1.4159 +      assert( pPg->pPager->pDirty==pPg );
  1.4160 +      pPg->pPager->pDirty = pPg->pDirty;
  1.4161 +    }
  1.4162 +  }
  1.4163 +}
  1.4164 +
  1.4165 +
  1.4166 +/*
  1.4167 +** Mark a data page as writeable.  The page is written into the journal 
  1.4168 +** if it is not there already.  This routine must be called before making
  1.4169 +** changes to a page.
  1.4170 +**
  1.4171 +** The first time this routine is called, the pager creates a new
  1.4172 +** journal and acquires a RESERVED lock on the database.  If the RESERVED
  1.4173 +** lock could not be acquired, this routine returns SQLITE_BUSY.  The
  1.4174 +** calling routine must check for that return value and be careful not to
  1.4175 +** change any page data until this routine returns SQLITE_OK.
  1.4176 +**
  1.4177 +** If the journal file could not be written because the disk is full,
  1.4178 +** then this routine returns SQLITE_FULL and does an immediate rollback.
  1.4179 +** All subsequent write attempts also return SQLITE_FULL until there
  1.4180 +** is a call to sqlite3PagerCommit() or sqlite3PagerRollback() to
  1.4181 +** reset.
  1.4182 +*/
  1.4183 +static int pager_write(PgHdr *pPg){
  1.4184 +  void *pData = PGHDR_TO_DATA(pPg);
  1.4185 +  Pager *pPager = pPg->pPager;
  1.4186 +  int rc = SQLITE_OK;
  1.4187 +
  1.4188 +  /* Check for errors
  1.4189 +  */
  1.4190 +  if( pPager->errCode ){ 
  1.4191 +    return pPager->errCode;
  1.4192 +  }
  1.4193 +  if( pPager->readOnly ){
  1.4194 +    return SQLITE_PERM;
  1.4195 +  }
  1.4196 +
  1.4197 +  assert( !pPager->setMaster );
  1.4198 +
  1.4199 +  CHECK_PAGE(pPg);
  1.4200 +
  1.4201 +  /* If this page was previously acquired with noContent==1, that means
  1.4202 +  ** we didn't really read in the content of the page.  This can happen
  1.4203 +  ** (for example) when the page is being moved to the freelist.  But
  1.4204 +  ** now we are (perhaps) moving the page off of the freelist for
  1.4205 +  ** reuse and we need to know its original content so that content
  1.4206 +  ** can be stored in the rollback journal.  So do the read at this
  1.4207 +  ** time.
  1.4208 +  */
  1.4209 +  rc = pager_get_content(pPg);
  1.4210 +  if( rc ){
  1.4211 +    return rc;
  1.4212 +  }
  1.4213 +
  1.4214 +  /* Mark the page as dirty.  If the page has already been written
  1.4215 +  ** to the journal then we can return right away.
  1.4216 +  */
  1.4217 +  makeDirty(pPg);
  1.4218 +  if( pPg->inJournal && (pageInStatement(pPg) || pPager->stmtInUse==0) ){
  1.4219 +    pPager->dirtyCache = 1;
  1.4220 +    pPager->dbModified = 1;
  1.4221 +  }else{
  1.4222 +
  1.4223 +    /* If we get this far, it means that the page needs to be
  1.4224 +    ** written to the transaction journal or the ckeckpoint journal
  1.4225 +    ** or both.
  1.4226 +    **
  1.4227 +    ** First check to see that the transaction journal exists and
  1.4228 +    ** create it if it does not.
  1.4229 +    */
  1.4230 +    assert( pPager->state!=PAGER_UNLOCK );
  1.4231 +    rc = sqlite3PagerBegin(pPg, 0);
  1.4232 +    if( rc!=SQLITE_OK ){
  1.4233 +      return rc;
  1.4234 +    }
  1.4235 +    assert( pPager->state>=PAGER_RESERVED );
  1.4236 +    if( !pPager->journalOpen && pPager->useJournal
  1.4237 +          && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
  1.4238 +      rc = pager_open_journal(pPager);
  1.4239 +      if( rc!=SQLITE_OK ) return rc;
  1.4240 +    }
  1.4241 +    pPager->dirtyCache = 1;
  1.4242 +    pPager->dbModified = 1;
  1.4243 +  
  1.4244 +    /* The transaction journal now exists and we have a RESERVED or an
  1.4245 +    ** EXCLUSIVE lock on the main database file.  Write the current page to
  1.4246 +    ** the transaction journal if it is not there already.
  1.4247 +    */
  1.4248 +    if( !pPg->inJournal && (pPager->journalOpen || MEMDB) ){
  1.4249 +      if( (int)pPg->pgno <= pPager->origDbSize ){
  1.4250 +        if( MEMDB ){
  1.4251 +          PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
  1.4252 +          PAGERTRACE3("JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
  1.4253 +          assert( pHist->pOrig==0 );
  1.4254 +          pHist->pOrig = sqlite3PageMalloc( pPager->pageSize );
  1.4255 +          if( !pHist->pOrig ){
  1.4256 +            return SQLITE_NOMEM;
  1.4257 +          }
  1.4258 +          memcpy(pHist->pOrig, PGHDR_TO_DATA(pPg), pPager->pageSize);
  1.4259 +        }else{
  1.4260 +          u32 cksum;
  1.4261 +          char *pData2;
  1.4262 +
  1.4263 +          /* We should never write to the journal file the page that
  1.4264 +          ** contains the database locks.  The following assert verifies
  1.4265 +          ** that we do not. */
  1.4266 +          assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) );
  1.4267 +          pData2 = CODEC2(pPager, pData, pPg->pgno, 7);
  1.4268 +          cksum = pager_cksum(pPager, (u8*)pData2);
  1.4269 +          rc = write32bits(pPager->jfd, pPager->journalOff, pPg->pgno);
  1.4270 +          if( rc==SQLITE_OK ){
  1.4271 +            rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize,
  1.4272 +                                pPager->journalOff + 4);
  1.4273 +            pPager->journalOff += pPager->pageSize+4;
  1.4274 +          }
  1.4275 +          if( rc==SQLITE_OK ){
  1.4276 +            rc = write32bits(pPager->jfd, pPager->journalOff, cksum);
  1.4277 +            pPager->journalOff += 4;
  1.4278 +          }
  1.4279 +          IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno, 
  1.4280 +                   pPager->journalOff, pPager->pageSize));
  1.4281 +          PAGER_INCR(sqlite3_pager_writej_count);
  1.4282 +          PAGERTRACE5("JOURNAL %d page %d needSync=%d hash(%08x)\n",
  1.4283 +               PAGERID(pPager), pPg->pgno, pPg->needSync, pager_pagehash(pPg));
  1.4284 +
  1.4285 +          /* An error has occured writing to the journal file. The 
  1.4286 +          ** transaction will be rolled back by the layer above.
  1.4287 +          */
  1.4288 +          if( rc!=SQLITE_OK ){
  1.4289 +            return rc;
  1.4290 +          }
  1.4291 +
  1.4292 +          pPager->nRec++;
  1.4293 +          assert( pPager->pInJournal!=0 );
  1.4294 +          sqlite3BitvecSet(pPager->pInJournal, pPg->pgno);
  1.4295 +          pPg->needSync = !pPager->noSync;
  1.4296 +          if( pPager->stmtInUse ){
  1.4297 +            sqlite3BitvecSet(pPager->pInStmt, pPg->pgno);
  1.4298 +          }
  1.4299 +        }
  1.4300 +      }else{
  1.4301 +        pPg->needSync = !pPager->journalStarted && !pPager->noSync;
  1.4302 +        PAGERTRACE4("APPEND %d page %d needSync=%d\n",
  1.4303 +                PAGERID(pPager), pPg->pgno, pPg->needSync);
  1.4304 +      }
  1.4305 +      if( pPg->needSync ){
  1.4306 +        pPager->needSync = 1;
  1.4307 +      }
  1.4308 +      pPg->inJournal = 1;
  1.4309 +    }
  1.4310 +  
  1.4311 +    /* If the statement journal is open and the page is not in it,
  1.4312 +    ** then write the current page to the statement journal.  Note that
  1.4313 +    ** the statement journal format differs from the standard journal format
  1.4314 +    ** in that it omits the checksums and the header.
  1.4315 +    */
  1.4316 +    if( pPager->stmtInUse 
  1.4317 +     && !pageInStatement(pPg) 
  1.4318 +     && (int)pPg->pgno<=pPager->stmtSize 
  1.4319 +    ){
  1.4320 +      assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
  1.4321 +      if( MEMDB ){
  1.4322 +        PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
  1.4323 +        assert( pHist->pStmt==0 );
  1.4324 +        pHist->pStmt = sqlite3PageMalloc( pPager->pageSize );
  1.4325 +        if( pHist->pStmt ){
  1.4326 +          memcpy(pHist->pStmt, PGHDR_TO_DATA(pPg), pPager->pageSize);
  1.4327 +        }
  1.4328 +        PAGERTRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
  1.4329 +        page_add_to_stmt_list(pPg);
  1.4330 +      }else{
  1.4331 +        i64 offset = pPager->stmtNRec*(4+pPager->pageSize);
  1.4332 +        char *pData2 = CODEC2(pPager, pData, pPg->pgno, 7);
  1.4333 +        rc = write32bits(pPager->stfd, offset, pPg->pgno);
  1.4334 +        if( rc==SQLITE_OK ){
  1.4335 +          rc = sqlite3OsWrite(pPager->stfd, pData2, pPager->pageSize, offset+4);
  1.4336 +        }
  1.4337 +        PAGERTRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
  1.4338 +        if( rc!=SQLITE_OK ){
  1.4339 +          return rc;
  1.4340 +        }
  1.4341 +        pPager->stmtNRec++;
  1.4342 +        assert( pPager->pInStmt!=0 );
  1.4343 +        sqlite3BitvecSet(pPager->pInStmt, pPg->pgno);
  1.4344 +      }
  1.4345 +    }
  1.4346 +  }
  1.4347 +
  1.4348 +  /* Update the database size and return.
  1.4349 +  */
  1.4350 +  assert( pPager->state>=PAGER_SHARED );
  1.4351 +  if( pPager->dbSize<(int)pPg->pgno ){
  1.4352 +    pPager->dbSize = pPg->pgno;
  1.4353 +    if( !MEMDB && pPager->dbSize==PENDING_BYTE/pPager->pageSize ){
  1.4354 +      pPager->dbSize++;
  1.4355 +    }
  1.4356 +  }
  1.4357 +  return rc;
  1.4358 +}
  1.4359 +
  1.4360 +/*
  1.4361 +** This function is used to mark a data-page as writable. It uses 
  1.4362 +** pager_write() to open a journal file (if it is not already open)
  1.4363 +** and write the page *pData to the journal.
  1.4364 +**
  1.4365 +** The difference between this function and pager_write() is that this
  1.4366 +** function also deals with the special case where 2 or more pages
  1.4367 +** fit on a single disk sector. In this case all co-resident pages
  1.4368 +** must have been written to the journal file before returning.
  1.4369 +*/
  1.4370 +int sqlite3PagerWrite(DbPage *pDbPage){
  1.4371 +  int rc = SQLITE_OK;
  1.4372 +
  1.4373 +  PgHdr *pPg = pDbPage;
  1.4374 +  Pager *pPager = pPg->pPager;
  1.4375 +  Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize);
  1.4376 +
  1.4377 +  pagerEnter(pPager);
  1.4378 +  if( !MEMDB && nPagePerSector>1 ){
  1.4379 +    Pgno nPageCount;          /* Total number of pages in database file */
  1.4380 +    Pgno pg1;                 /* First page of the sector pPg is located on. */
  1.4381 +    int nPage;                /* Number of pages starting at pg1 to journal */
  1.4382 +    int ii;
  1.4383 +    int needSync = 0;
  1.4384 +
  1.4385 +    /* Set the doNotSync flag to 1. This is because we cannot allow a journal
  1.4386 +    ** header to be written between the pages journaled by this function.
  1.4387 +    */
  1.4388 +    assert( pPager->doNotSync==0 );
  1.4389 +    pPager->doNotSync = 1;
  1.4390 +
  1.4391 +    /* This trick assumes that both the page-size and sector-size are
  1.4392 +    ** an integer power of 2. It sets variable pg1 to the identifier
  1.4393 +    ** of the first page of the sector pPg is located on.
  1.4394 +    */
  1.4395 +    pg1 = ((pPg->pgno-1) & ~(nPagePerSector-1)) + 1;
  1.4396 +
  1.4397 +    sqlite3PagerPagecount(pPager, (int *)&nPageCount);
  1.4398 +    if( pPg->pgno>nPageCount ){
  1.4399 +      nPage = (pPg->pgno - pg1)+1;
  1.4400 +    }else if( (pg1+nPagePerSector-1)>nPageCount ){
  1.4401 +      nPage = nPageCount+1-pg1;
  1.4402 +    }else{
  1.4403 +      nPage = nPagePerSector;
  1.4404 +    }
  1.4405 +    assert(nPage>0);
  1.4406 +    assert(pg1<=pPg->pgno);
  1.4407 +    assert((pg1+nPage)>pPg->pgno);
  1.4408 +
  1.4409 +    for(ii=0; ii<nPage && rc==SQLITE_OK; ii++){
  1.4410 +      Pgno pg = pg1+ii;
  1.4411 +      PgHdr *pPage;
  1.4412 +      if( pg==pPg->pgno || !sqlite3BitvecTest(pPager->pInJournal, pg) ){
  1.4413 +        if( pg!=PAGER_MJ_PGNO(pPager) ){
  1.4414 +          rc = sqlite3PagerGet(pPager, pg, &pPage);
  1.4415 +          if( rc==SQLITE_OK ){
  1.4416 +            rc = pager_write(pPage);
  1.4417 +            if( pPage->needSync ){
  1.4418 +              needSync = 1;
  1.4419 +            }
  1.4420 +            sqlite3PagerUnref(pPage);
  1.4421 +          }
  1.4422 +        }
  1.4423 +      }else if( (pPage = pager_lookup(pPager, pg))!=0 ){
  1.4424 +        if( pPage->needSync ){
  1.4425 +          needSync = 1;
  1.4426 +        }
  1.4427 +      }
  1.4428 +    }
  1.4429 +
  1.4430 +    /* If the PgHdr.needSync flag is set for any of the nPage pages 
  1.4431 +    ** starting at pg1, then it needs to be set for all of them. Because
  1.4432 +    ** writing to any of these nPage pages may damage the others, the
  1.4433 +    ** journal file must contain sync()ed copies of all of them
  1.4434 +    ** before any of them can be written out to the database file.
  1.4435 +    */
  1.4436 +    if( needSync ){
  1.4437 +      for(ii=0; ii<nPage && needSync; ii++){
  1.4438 +        PgHdr *pPage = pager_lookup(pPager, pg1+ii);
  1.4439 +        if( pPage ) pPage->needSync = 1;
  1.4440 +      }
  1.4441 +      assert(pPager->needSync);
  1.4442 +    }
  1.4443 +
  1.4444 +    assert( pPager->doNotSync==1 );
  1.4445 +    pPager->doNotSync = 0;
  1.4446 +  }else{
  1.4447 +    rc = pager_write(pDbPage);
  1.4448 +  }
  1.4449 +  pagerLeave(pPager);
  1.4450 +  return rc;
  1.4451 +}
  1.4452 +
  1.4453 +/*
  1.4454 +** Return TRUE if the page given in the argument was previously passed
  1.4455 +** to sqlite3PagerWrite().  In other words, return TRUE if it is ok
  1.4456 +** to change the content of the page.
  1.4457 +*/
  1.4458 +#ifndef NDEBUG
  1.4459 +int sqlite3PagerIswriteable(DbPage *pPg){
  1.4460 +  return pPg->dirty;
  1.4461 +}
  1.4462 +#endif
  1.4463 +
  1.4464 +/*
  1.4465 +** A call to this routine tells the pager that it is not necessary to
  1.4466 +** write the information on page pPg back to the disk, even though
  1.4467 +** that page might be marked as dirty.
  1.4468 +**
  1.4469 +** The overlying software layer calls this routine when all of the data
  1.4470 +** on the given page is unused.  The pager marks the page as clean so
  1.4471 +** that it does not get written to disk.
  1.4472 +**
  1.4473 +** Tests show that this optimization, together with the
  1.4474 +** sqlite3PagerDontRollback() below, more than double the speed
  1.4475 +** of large INSERT operations and quadruple the speed of large DELETEs.
  1.4476 +**
  1.4477 +** When this routine is called, set the alwaysRollback flag to true.
  1.4478 +** Subsequent calls to sqlite3PagerDontRollback() for the same page
  1.4479 +** will thereafter be ignored.  This is necessary to avoid a problem
  1.4480 +** where a page with data is added to the freelist during one part of
  1.4481 +** a transaction then removed from the freelist during a later part
  1.4482 +** of the same transaction and reused for some other purpose.  When it
  1.4483 +** is first added to the freelist, this routine is called.  When reused,
  1.4484 +** the sqlite3PagerDontRollback() routine is called.  But because the
  1.4485 +** page contains critical data, we still need to be sure it gets
  1.4486 +** rolled back in spite of the sqlite3PagerDontRollback() call.
  1.4487 +*/
  1.4488 +void sqlite3PagerDontWrite(DbPage *pDbPage){
  1.4489 +  PgHdr *pPg = pDbPage;
  1.4490 +  Pager *pPager = pPg->pPager;
  1.4491 +
  1.4492 +  if( MEMDB ) return;
  1.4493 +  pagerEnter(pPager);
  1.4494 +  pPg->alwaysRollback = 1;
  1.4495 +  if( pPg->dirty && !pPager->stmtInUse ){
  1.4496 +    assert( pPager->state>=PAGER_SHARED );
  1.4497 +    if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
  1.4498 +      /* If this pages is the last page in the file and the file has grown
  1.4499 +      ** during the current transaction, then do NOT mark the page as clean.
  1.4500 +      ** When the database file grows, we must make sure that the last page
  1.4501 +      ** gets written at least once so that the disk file will be the correct
  1.4502 +      ** size. If you do not write this page and the size of the file
  1.4503 +      ** on the disk ends up being too small, that can lead to database
  1.4504 +      ** corruption during the next transaction.
  1.4505 +      */
  1.4506 +    }else{
  1.4507 +      PAGERTRACE3("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager));
  1.4508 +      IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno))
  1.4509 +      makeClean(pPg);
  1.4510 +#ifdef SQLITE_CHECK_PAGES
  1.4511 +      pPg->pageHash = pager_pagehash(pPg);
  1.4512 +#endif
  1.4513 +    }
  1.4514 +  }
  1.4515 +  pagerLeave(pPager);
  1.4516 +}
  1.4517 +
  1.4518 +/*
  1.4519 +** A call to this routine tells the pager that if a rollback occurs,
  1.4520 +** it is not necessary to restore the data on the given page.  This
  1.4521 +** means that the pager does not have to record the given page in the
  1.4522 +** rollback journal.
  1.4523 +**
  1.4524 +** If we have not yet actually read the content of this page (if
  1.4525 +** the PgHdr.needRead flag is set) then this routine acts as a promise
  1.4526 +** that we will never need to read the page content in the future.
  1.4527 +** so the needRead flag can be cleared at this point.
  1.4528 +*/
  1.4529 +void sqlite3PagerDontRollback(DbPage *pPg){
  1.4530 +  Pager *pPager = pPg->pPager;
  1.4531 +
  1.4532 +  pagerEnter(pPager);
  1.4533 +  assert( pPager->state>=PAGER_RESERVED );
  1.4534 +
  1.4535 +  /* If the journal file is not open, or DontWrite() has been called on
  1.4536 +  ** this page (DontWrite() sets the alwaysRollback flag), then this
  1.4537 +  ** function is a no-op.
  1.4538 +  */
  1.4539 +  if( pPager->journalOpen==0 || pPg->alwaysRollback || pPager->alwaysRollback ){
  1.4540 +    pagerLeave(pPager);
  1.4541 +    return;
  1.4542 +  }
  1.4543 +  assert( !MEMDB );    /* For a memdb, pPager->journalOpen is always 0 */
  1.4544 +
  1.4545 +#ifdef SQLITE_SECURE_DELETE
  1.4546 +  if( pPg->inJournal || (int)pPg->pgno > pPager->origDbSize ){
  1.4547 +    return;
  1.4548 +  }
  1.4549 +#endif
  1.4550 +
  1.4551 +  /* If SECURE_DELETE is disabled, then there is no way that this
  1.4552 +  ** routine can be called on a page for which sqlite3PagerDontWrite()
  1.4553 +  ** has not been previously called during the same transaction.
  1.4554 +  ** And if DontWrite() has previously been called, the following
  1.4555 +  ** conditions must be met.
  1.4556 +  **
  1.4557 +  ** (Later:)  Not true.  If the database is corrupted by having duplicate
  1.4558 +  ** pages on the freelist (ex: corrupt9.test) then the following is not
  1.4559 +  ** necessarily true:
  1.4560 +  */
  1.4561 +  /* assert( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ); */
  1.4562 +
  1.4563 +  assert( pPager->pInJournal!=0 );
  1.4564 +  sqlite3BitvecSet(pPager->pInJournal, pPg->pgno);
  1.4565 +  pPg->inJournal = 1;
  1.4566 +  pPg->needRead = 0;
  1.4567 +  if( pPager->stmtInUse ){
  1.4568 +    assert( pPager->stmtSize >= pPager->origDbSize );
  1.4569 +    sqlite3BitvecSet(pPager->pInStmt, pPg->pgno);
  1.4570 +  }
  1.4571 +  PAGERTRACE3("DONT_ROLLBACK page %d of %d\n", pPg->pgno, PAGERID(pPager));
  1.4572 +  IOTRACE(("GARBAGE %p %d\n", pPager, pPg->pgno))
  1.4573 +  pagerLeave(pPager);
  1.4574 +}
  1.4575 +
  1.4576 +
  1.4577 +/*
  1.4578 +** This routine is called to increment the database file change-counter,
  1.4579 +** stored at byte 24 of the pager file.
  1.4580 +*/
  1.4581 +static int pager_incr_changecounter(Pager *pPager, int isDirect){
  1.4582 +  PgHdr *pPgHdr;
  1.4583 +  u32 change_counter;
  1.4584 +  int rc = SQLITE_OK;
  1.4585 +
  1.4586 +#ifndef SQLITE_ENABLE_ATOMIC_WRITE
  1.4587 +  assert( isDirect==0 );  /* isDirect is only true for atomic writes */
  1.4588 +#endif
  1.4589 +  if( !pPager->changeCountDone ){
  1.4590 +    /* Open page 1 of the file for writing. */
  1.4591 +    rc = sqlite3PagerGet(pPager, 1, &pPgHdr);
  1.4592 +    if( rc!=SQLITE_OK ) return rc;
  1.4593 +
  1.4594 +    if( !isDirect ){
  1.4595 +      rc = sqlite3PagerWrite(pPgHdr);
  1.4596 +      if( rc!=SQLITE_OK ){
  1.4597 +        sqlite3PagerUnref(pPgHdr);
  1.4598 +        return rc;
  1.4599 +      }
  1.4600 +    }
  1.4601 +
  1.4602 +    /* Increment the value just read and write it back to byte 24. */
  1.4603 +    change_counter = sqlite3Get4byte((u8*)pPager->dbFileVers);
  1.4604 +    change_counter++;
  1.4605 +    put32bits(((char*)PGHDR_TO_DATA(pPgHdr))+24, change_counter);
  1.4606 +
  1.4607 +#ifdef SQLITE_ENABLE_ATOMIC_WRITE
  1.4608 +    if( isDirect && pPager->fd->pMethods ){
  1.4609 +      const void *zBuf = PGHDR_TO_DATA(pPgHdr);
  1.4610 +      rc = sqlite3OsWrite(pPager->fd, zBuf, pPager->pageSize, 0);
  1.4611 +    }
  1.4612 +#endif
  1.4613 +
  1.4614 +    /* Release the page reference. */
  1.4615 +    sqlite3PagerUnref(pPgHdr);
  1.4616 +    pPager->changeCountDone = 1;
  1.4617 +  }
  1.4618 +  return rc;
  1.4619 +}
  1.4620 +
  1.4621 +/*
  1.4622 +** Sync the pager file to disk.
  1.4623 +*/
  1.4624 +int sqlite3PagerSync(Pager *pPager){
  1.4625 +  int rc;
  1.4626 +  pagerEnter(pPager);
  1.4627 +  rc = sqlite3OsSync(pPager->fd, pPager->sync_flags);
  1.4628 +  pagerLeave(pPager);
  1.4629 +  return rc;
  1.4630 +}
  1.4631 +
  1.4632 +/*
  1.4633 +** Sync the database file for the pager pPager. zMaster points to the name
  1.4634 +** of a master journal file that should be written into the individual
  1.4635 +** journal file. zMaster may be NULL, which is interpreted as no master
  1.4636 +** journal (a single database transaction).
  1.4637 +**
  1.4638 +** This routine ensures that the journal is synced, all dirty pages written
  1.4639 +** to the database file and the database file synced. The only thing that
  1.4640 +** remains to commit the transaction is to delete the journal file (or
  1.4641 +** master journal file if specified).
  1.4642 +**
  1.4643 +** Note that if zMaster==NULL, this does not overwrite a previous value
  1.4644 +** passed to an sqlite3PagerCommitPhaseOne() call.
  1.4645 +**
  1.4646 +** If parameter nTrunc is non-zero, then the pager file is truncated to
  1.4647 +** nTrunc pages (this is used by auto-vacuum databases).
  1.4648 +**
  1.4649 +** If the final parameter - noSync - is true, then the database file itself
  1.4650 +** is not synced. The caller must call sqlite3PagerSync() directly to
  1.4651 +** sync the database file before calling CommitPhaseTwo() to delete the
  1.4652 +** journal file in this case.
  1.4653 +*/
  1.4654 +int sqlite3PagerCommitPhaseOne(
  1.4655 +  Pager *pPager, 
  1.4656 +  const char *zMaster, 
  1.4657 +  Pgno nTrunc,
  1.4658 +  int noSync
  1.4659 +){
  1.4660 +  int rc = SQLITE_OK;
  1.4661 +
  1.4662 +  if( pPager->errCode ){
  1.4663 +    return pPager->errCode;
  1.4664 +  }
  1.4665 +
  1.4666 +  /* If no changes have been made, we can leave the transaction early.
  1.4667 +  */
  1.4668 +  if( pPager->dbModified==0 &&
  1.4669 +        (pPager->journalMode!=PAGER_JOURNALMODE_DELETE ||
  1.4670 +          pPager->exclusiveMode!=0) ){
  1.4671 +    assert( pPager->dirtyCache==0 || pPager->journalOpen==0 );
  1.4672 +    return SQLITE_OK;
  1.4673 +  }
  1.4674 +
  1.4675 +  PAGERTRACE4("DATABASE SYNC: File=%s zMaster=%s nTrunc=%d\n", 
  1.4676 +      pPager->zFilename, zMaster, nTrunc);
  1.4677 +  pagerEnter(pPager);
  1.4678 +
  1.4679 +  /* If this is an in-memory db, or no pages have been written to, or this
  1.4680 +  ** function has already been called, it is a no-op.
  1.4681 +  */
  1.4682 +  if( pPager->state!=PAGER_SYNCED && !MEMDB && pPager->dirtyCache ){
  1.4683 +    PgHdr *pPg;
  1.4684 +
  1.4685 +#ifdef SQLITE_ENABLE_ATOMIC_WRITE
  1.4686 +    /* The atomic-write optimization can be used if all of the
  1.4687 +    ** following are true:
  1.4688 +    **
  1.4689 +    **    + The file-system supports the atomic-write property for
  1.4690 +    **      blocks of size page-size, and
  1.4691 +    **    + This commit is not part of a multi-file transaction, and
  1.4692 +    **    + Exactly one page has been modified and store in the journal file.
  1.4693 +    **
  1.4694 +    ** If the optimization can be used, then the journal file will never
  1.4695 +    ** be created for this transaction.
  1.4696 +    */
  1.4697 +    int useAtomicWrite = (
  1.4698 +        !zMaster && 
  1.4699 +        pPager->journalOpen &&
  1.4700 +        pPager->journalOff==jrnlBufferSize(pPager) && 
  1.4701 +        nTrunc==0 && 
  1.4702 +        (0==pPager->pDirty || 0==pPager->pDirty->pDirty)
  1.4703 +    );
  1.4704 +    assert( pPager->journalOpen || pPager->journalMode==PAGER_JOURNALMODE_OFF );
  1.4705 +    if( useAtomicWrite ){
  1.4706 +      /* Update the nRec field in the journal file. */
  1.4707 +      int offset = pPager->journalHdr + sizeof(aJournalMagic);
  1.4708 +      assert(pPager->nRec==1);
  1.4709 +      rc = write32bits(pPager->jfd, offset, pPager->nRec);
  1.4710 +
  1.4711 +      /* Update the db file change counter. The following call will modify
  1.4712 +      ** the in-memory representation of page 1 to include the updated
  1.4713 +      ** change counter and then write page 1 directly to the database
  1.4714 +      ** file. Because of the atomic-write property of the host file-system, 
  1.4715 +      ** this is safe.
  1.4716 +      */
  1.4717 +      if( rc==SQLITE_OK ){
  1.4718 +        rc = pager_incr_changecounter(pPager, 1);
  1.4719 +      }
  1.4720 +    }else{
  1.4721 +      rc = sqlite3JournalCreate(pPager->jfd);
  1.4722 +    }
  1.4723 +
  1.4724 +    if( !useAtomicWrite && rc==SQLITE_OK )
  1.4725 +#endif
  1.4726 +
  1.4727 +    /* If a master journal file name has already been written to the
  1.4728 +    ** journal file, then no sync is required. This happens when it is
  1.4729 +    ** written, then the process fails to upgrade from a RESERVED to an
  1.4730 +    ** EXCLUSIVE lock. The next time the process tries to commit the
  1.4731 +    ** transaction the m-j name will have already been written.
  1.4732 +    */
  1.4733 +    if( !pPager->setMaster ){
  1.4734 +      rc = pager_incr_changecounter(pPager, 0);
  1.4735 +      if( rc!=SQLITE_OK ) goto sync_exit;
  1.4736 +      if( pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
  1.4737 +#ifndef SQLITE_OMIT_AUTOVACUUM
  1.4738 +        if( nTrunc!=0 ){
  1.4739 +          /* If this transaction has made the database smaller, then all pages
  1.4740 +          ** being discarded by the truncation must be written to the journal
  1.4741 +          ** file.
  1.4742 +          */
  1.4743 +          Pgno i;
  1.4744 +          int iSkip = PAGER_MJ_PGNO(pPager);
  1.4745 +          for( i=nTrunc+1; i<=pPager->origDbSize; i++ ){
  1.4746 +            if( !sqlite3BitvecTest(pPager->pInJournal, i) && i!=iSkip ){
  1.4747 +              rc = sqlite3PagerGet(pPager, i, &pPg);
  1.4748 +              if( rc!=SQLITE_OK ) goto sync_exit;
  1.4749 +              rc = sqlite3PagerWrite(pPg);
  1.4750 +              sqlite3PagerUnref(pPg);
  1.4751 +              if( rc!=SQLITE_OK ) goto sync_exit;
  1.4752 +            }
  1.4753 +          } 
  1.4754 +        }
  1.4755 +#endif
  1.4756 +        rc = writeMasterJournal(pPager, zMaster);
  1.4757 +        if( rc!=SQLITE_OK ) goto sync_exit;
  1.4758 +        rc = syncJournal(pPager);
  1.4759 +      }
  1.4760 +    }
  1.4761 +    if( rc!=SQLITE_OK ) goto sync_exit;
  1.4762 +
  1.4763 +#ifndef SQLITE_OMIT_AUTOVACUUM
  1.4764 +    if( nTrunc!=0 ){
  1.4765 +      rc = sqlite3PagerTruncate(pPager, nTrunc);
  1.4766 +      if( rc!=SQLITE_OK ) goto sync_exit;
  1.4767 +    }
  1.4768 +#endif
  1.4769 +
  1.4770 +    /* Write all dirty pages to the database file */
  1.4771 +    pPg = pager_get_all_dirty_pages(pPager);
  1.4772 +    rc = pager_write_pagelist(pPg);
  1.4773 +    if( rc!=SQLITE_OK ){
  1.4774 +      assert( rc!=SQLITE_IOERR_BLOCKED );
  1.4775 +      /* The error might have left the dirty list all fouled up here,
  1.4776 +      ** but that does not matter because if the if the dirty list did
  1.4777 +      ** get corrupted, then the transaction will roll back and
  1.4778 +      ** discard the dirty list.  There is an assert in
  1.4779 +      ** pager_get_all_dirty_pages() that verifies that no attempt
  1.4780 +      ** is made to use an invalid dirty list.
  1.4781 +      */
  1.4782 +      goto sync_exit;
  1.4783 +    }
  1.4784 +    pPager->pDirty = 0;
  1.4785 +
  1.4786 +    /* Sync the database file. */
  1.4787 +    if( !pPager->noSync && !noSync ){
  1.4788 +      rc = sqlite3OsSync(pPager->fd, pPager->sync_flags);
  1.4789 +    }
  1.4790 +    IOTRACE(("DBSYNC %p\n", pPager))
  1.4791 +
  1.4792 +    pPager->state = PAGER_SYNCED;
  1.4793 +  }else if( MEMDB && nTrunc!=0 ){
  1.4794 +    rc = sqlite3PagerTruncate(pPager, nTrunc);
  1.4795 +  }
  1.4796 +
  1.4797 +sync_exit:
  1.4798 +  if( rc==SQLITE_IOERR_BLOCKED ){
  1.4799 +    /* pager_incr_changecounter() may attempt to obtain an exclusive
  1.4800 +     * lock to spill the cache and return IOERR_BLOCKED. But since 
  1.4801 +     * there is no chance the cache is inconsistent, it is
  1.4802 +     * better to return SQLITE_BUSY.
  1.4803 +     */
  1.4804 +    rc = SQLITE_BUSY;
  1.4805 +  }
  1.4806 +  pagerLeave(pPager);
  1.4807 +  return rc;
  1.4808 +}
  1.4809 +
  1.4810 +
  1.4811 +/*
  1.4812 +** Commit all changes to the database and release the write lock.
  1.4813 +**
  1.4814 +** If the commit fails for any reason, a rollback attempt is made
  1.4815 +** and an error code is returned.  If the commit worked, SQLITE_OK
  1.4816 +** is returned.
  1.4817 +*/
  1.4818 +int sqlite3PagerCommitPhaseTwo(Pager *pPager){
  1.4819 +  int rc;
  1.4820 +  PgHdr *pPg;
  1.4821 +
  1.4822 +  if( pPager->errCode ){
  1.4823 +    return pPager->errCode;
  1.4824 +  }
  1.4825 +  if( pPager->state<PAGER_RESERVED ){
  1.4826 +    return SQLITE_ERROR;
  1.4827 +  }
  1.4828 +  if( pPager->dbModified==0 &&
  1.4829 +        (pPager->journalMode!=PAGER_JOURNALMODE_DELETE ||
  1.4830 +          pPager->exclusiveMode!=0) ){
  1.4831 +    assert( pPager->dirtyCache==0 || pPager->journalOpen==0 );
  1.4832 +    return SQLITE_OK;
  1.4833 +  }
  1.4834 +  pagerEnter(pPager);
  1.4835 +  PAGERTRACE2("COMMIT %d\n", PAGERID(pPager));
  1.4836 +  if( MEMDB ){
  1.4837 +    pPg = pager_get_all_dirty_pages(pPager);
  1.4838 +    while( pPg ){
  1.4839 +      PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
  1.4840 +      clearHistory(pHist);
  1.4841 +      pPg->dirty = 0;
  1.4842 +      pPg->inJournal = 0;
  1.4843 +      pHist->inStmt = 0;
  1.4844 +      pPg->needSync = 0;
  1.4845 +      pHist->pPrevStmt = pHist->pNextStmt = 0;
  1.4846 +      pPg = pPg->pDirty;
  1.4847 +    }
  1.4848 +    pPager->pDirty = 0;
  1.4849 +#ifndef NDEBUG
  1.4850 +    for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
  1.4851 +      PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
  1.4852 +      assert( !pPg->alwaysRollback );
  1.4853 +      assert( !pHist->pOrig );
  1.4854 +      assert( !pHist->pStmt );
  1.4855 +    }
  1.4856 +#endif
  1.4857 +    pPager->pStmt = 0;
  1.4858 +    pPager->state = PAGER_SHARED;
  1.4859 +    pagerLeave(pPager);
  1.4860 +    return SQLITE_OK;
  1.4861 +  }
  1.4862 +  assert( pPager->state==PAGER_SYNCED || !pPager->dirtyCache );
  1.4863 +  rc = pager_end_transaction(pPager, pPager->setMaster);
  1.4864 +  rc = pager_error(pPager, rc);
  1.4865 +  pagerLeave(pPager);
  1.4866 +  return rc;
  1.4867 +}
  1.4868 +
  1.4869 +/*
  1.4870 +** Rollback all changes.  The database falls back to PAGER_SHARED mode.
  1.4871 +** All in-memory cache pages revert to their original data contents.
  1.4872 +** The journal is deleted.
  1.4873 +**
  1.4874 +** This routine cannot fail unless some other process is not following
  1.4875 +** the correct locking protocol or unless some other
  1.4876 +** process is writing trash into the journal file (SQLITE_CORRUPT) or
  1.4877 +** unless a prior malloc() failed (SQLITE_NOMEM).  Appropriate error
  1.4878 +** codes are returned for all these occasions.  Otherwise,
  1.4879 +** SQLITE_OK is returned.
  1.4880 +*/
  1.4881 +int sqlite3PagerRollback(Pager *pPager){
  1.4882 +  int rc;
  1.4883 +  PAGERTRACE2("ROLLBACK %d\n", PAGERID(pPager));
  1.4884 +  if( MEMDB ){
  1.4885 +    PgHdr *p;
  1.4886 +    for(p=pPager->pAll; p; p=p->pNextAll){
  1.4887 +      PgHistory *pHist;
  1.4888 +      assert( !p->alwaysRollback );
  1.4889 +      if( !p->dirty ){
  1.4890 +        assert( !((PgHistory *)PGHDR_TO_HIST(p, pPager))->pOrig );
  1.4891 +        assert( !((PgHistory *)PGHDR_TO_HIST(p, pPager))->pStmt );
  1.4892 +        continue;
  1.4893 +      }
  1.4894 +
  1.4895 +      pHist = PGHDR_TO_HIST(p, pPager);
  1.4896 +      if( pHist->pOrig ){
  1.4897 +        memcpy(PGHDR_TO_DATA(p), pHist->pOrig, pPager->pageSize);
  1.4898 +        PAGERTRACE3("ROLLBACK-PAGE %d of %d\n", p->pgno, PAGERID(pPager));
  1.4899 +      }else{
  1.4900 +        PAGERTRACE3("PAGE %d is clean on %d\n", p->pgno, PAGERID(pPager));
  1.4901 +      }
  1.4902 +      clearHistory(pHist);
  1.4903 +      p->dirty = 0;
  1.4904 +      p->inJournal = 0;
  1.4905 +      pHist->inStmt = 0;
  1.4906 +      pHist->pPrevStmt = pHist->pNextStmt = 0;
  1.4907 +      if( pPager->xReiniter ){
  1.4908 +        pPager->xReiniter(p, pPager->pageSize);
  1.4909 +      }
  1.4910 +    }
  1.4911 +    pPager->pDirty = 0;
  1.4912 +    pPager->pStmt = 0;
  1.4913 +    pPager->dbSize = pPager->origDbSize;
  1.4914 +    pager_truncate_cache(pPager);
  1.4915 +    pPager->stmtInUse = 0;
  1.4916 +    pPager->state = PAGER_SHARED;
  1.4917 +    return SQLITE_OK;
  1.4918 +  }
  1.4919 +
  1.4920 +  pagerEnter(pPager);
  1.4921 +  if( !pPager->dirtyCache || !pPager->journalOpen ){
  1.4922 +    rc = pager_end_transaction(pPager, pPager->setMaster);
  1.4923 +    pagerLeave(pPager);
  1.4924 +    return rc;
  1.4925 +  }
  1.4926 +
  1.4927 +  if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
  1.4928 +    if( pPager->state>=PAGER_EXCLUSIVE ){
  1.4929 +      pager_playback(pPager, 0);
  1.4930 +    }
  1.4931 +    pagerLeave(pPager);
  1.4932 +    return pPager->errCode;
  1.4933 +  }
  1.4934 +  if( pPager->state==PAGER_RESERVED ){
  1.4935 +    int rc2;
  1.4936 +    rc = pager_playback(pPager, 0);
  1.4937 +    rc2 = pager_end_transaction(pPager, pPager->setMaster);
  1.4938 +    if( rc==SQLITE_OK ){
  1.4939 +      rc = rc2;
  1.4940 +    }
  1.4941 +  }else{
  1.4942 +    rc = pager_playback(pPager, 0);
  1.4943 +  }
  1.4944 +  /* pager_reset(pPager); */
  1.4945 +  pPager->dbSize = -1;
  1.4946 +
  1.4947 +  /* If an error occurs during a ROLLBACK, we can no longer trust the pager
  1.4948 +  ** cache. So call pager_error() on the way out to make any error 
  1.4949 +  ** persistent.
  1.4950 +  */
  1.4951 +  rc = pager_error(pPager, rc);
  1.4952 +  pagerLeave(pPager);
  1.4953 +  return rc;
  1.4954 +}
  1.4955 +
  1.4956 +/*
  1.4957 +** Return TRUE if the database file is opened read-only.  Return FALSE
  1.4958 +** if the database is (in theory) writable.
  1.4959 +*/
  1.4960 +int sqlite3PagerIsreadonly(Pager *pPager){
  1.4961 +  return pPager->readOnly;
  1.4962 +}
  1.4963 +
  1.4964 +/*
  1.4965 +** Return the number of references to the pager.
  1.4966 +*/
  1.4967 +int sqlite3PagerRefcount(Pager *pPager){
  1.4968 +  return pPager->nRef;
  1.4969 +}
  1.4970 +
  1.4971 +#ifdef SQLITE_TEST
  1.4972 +/*
  1.4973 +** This routine is used for testing and analysis only.
  1.4974 +*/
  1.4975 +int *sqlite3PagerStats(Pager *pPager){
  1.4976 +  static int a[11];
  1.4977 +  a[0] = pPager->nRef;
  1.4978 +  a[1] = pPager->nPage;
  1.4979 +  a[2] = pPager->mxPage;
  1.4980 +  a[3] = pPager->dbSize;
  1.4981 +  a[4] = pPager->state;
  1.4982 +  a[5] = pPager->errCode;
  1.4983 +  a[6] = pPager->nHit;
  1.4984 +  a[7] = pPager->nMiss;
  1.4985 +  a[8] = 0;  /* Used to be pPager->nOvfl */
  1.4986 +  a[9] = pPager->nRead;
  1.4987 +  a[10] = pPager->nWrite;
  1.4988 +  return a;
  1.4989 +}
  1.4990 +int sqlite3PagerIsMemdb(Pager *pPager){
  1.4991 +  return MEMDB;
  1.4992 +}
  1.4993 +#endif
  1.4994 +
  1.4995 +/*
  1.4996 +** Set the statement rollback point.
  1.4997 +**
  1.4998 +** This routine should be called with the transaction journal already
  1.4999 +** open.  A new statement journal is created that can be used to rollback
  1.5000 +** changes of a single SQL command within a larger transaction.
  1.5001 +*/
  1.5002 +static int pagerStmtBegin(Pager *pPager){
  1.5003 +  int rc;
  1.5004 +  assert( !pPager->stmtInUse );
  1.5005 +  assert( pPager->state>=PAGER_SHARED );
  1.5006 +  assert( pPager->dbSize>=0 );
  1.5007 +  PAGERTRACE2("STMT-BEGIN %d\n", PAGERID(pPager));
  1.5008 +  if( MEMDB ){
  1.5009 +    pPager->stmtInUse = 1;
  1.5010 +    pPager->stmtSize = pPager->dbSize;
  1.5011 +    return SQLITE_OK;
  1.5012 +  }
  1.5013 +  if( !pPager->journalOpen ){
  1.5014 +    pPager->stmtAutoopen = 1;
  1.5015 +    return SQLITE_OK;
  1.5016 +  }
  1.5017 +  assert( pPager->journalOpen );
  1.5018 +  pagerLeave(pPager);
  1.5019 +  assert( pPager->pInStmt==0 );
  1.5020 +  pPager->pInStmt = sqlite3BitvecCreate(pPager->dbSize);
  1.5021 +  pagerEnter(pPager);
  1.5022 +  if( pPager->pInStmt==0 ){
  1.5023 +    /* sqlite3OsLock(pPager->fd, SHARED_LOCK); */
  1.5024 +    return SQLITE_NOMEM;
  1.5025 +  }
  1.5026 +  pPager->stmtJSize = pPager->journalOff;
  1.5027 +  pPager->stmtSize = pPager->dbSize;
  1.5028 +  pPager->stmtHdrOff = 0;
  1.5029 +  pPager->stmtCksum = pPager->cksumInit;
  1.5030 +  if( !pPager->stmtOpen ){
  1.5031 +    rc = sqlite3PagerOpentemp(pPager, pPager->stfd, SQLITE_OPEN_SUBJOURNAL);
  1.5032 +    if( rc ){
  1.5033 +      goto stmt_begin_failed;
  1.5034 +    }
  1.5035 +    pPager->stmtOpen = 1;
  1.5036 +    pPager->stmtNRec = 0;
  1.5037 +  }
  1.5038 +  pPager->stmtInUse = 1;
  1.5039 +  return SQLITE_OK;
  1.5040 + 
  1.5041 +stmt_begin_failed:
  1.5042 +  if( pPager->pInStmt ){
  1.5043 +    sqlite3BitvecDestroy(pPager->pInStmt);
  1.5044 +    pPager->pInStmt = 0;
  1.5045 +  }
  1.5046 +  return rc;
  1.5047 +}
  1.5048 +int sqlite3PagerStmtBegin(Pager *pPager){
  1.5049 +  int rc;
  1.5050 +  pagerEnter(pPager);
  1.5051 +  rc = pagerStmtBegin(pPager);
  1.5052 +  pagerLeave(pPager);
  1.5053 +  return rc;
  1.5054 +}
  1.5055 +
  1.5056 +/*
  1.5057 +** Commit a statement.
  1.5058 +*/
  1.5059 +int sqlite3PagerStmtCommit(Pager *pPager){
  1.5060 +  pagerEnter(pPager);
  1.5061 +  if( pPager->stmtInUse ){
  1.5062 +    PgHdr *pPg, *pNext;
  1.5063 +    PAGERTRACE2("STMT-COMMIT %d\n", PAGERID(pPager));
  1.5064 +    if( !MEMDB ){
  1.5065 +      /* sqlite3OsTruncate(pPager->stfd, 0); */
  1.5066 +      sqlite3BitvecDestroy(pPager->pInStmt);
  1.5067 +      pPager->pInStmt = 0;
  1.5068 +    }else{
  1.5069 +      for(pPg=pPager->pStmt; pPg; pPg=pNext){
  1.5070 +        PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
  1.5071 +        pNext = pHist->pNextStmt;
  1.5072 +        assert( pHist->inStmt );
  1.5073 +        pHist->inStmt = 0;
  1.5074 +        pHist->pPrevStmt = pHist->pNextStmt = 0;
  1.5075 +        sqlite3PageFree(pHist->pStmt);
  1.5076 +        pHist->pStmt = 0;
  1.5077 +      }
  1.5078 +    }
  1.5079 +    pPager->stmtNRec = 0;
  1.5080 +    pPager->stmtInUse = 0;
  1.5081 +    pPager->pStmt = 0;
  1.5082 +  }
  1.5083 +  pPager->stmtAutoopen = 0;
  1.5084 +  pagerLeave(pPager);
  1.5085 +  return SQLITE_OK;
  1.5086 +}
  1.5087 +
  1.5088 +/*
  1.5089 +** Rollback a statement.
  1.5090 +*/
  1.5091 +int sqlite3PagerStmtRollback(Pager *pPager){
  1.5092 +  int rc;
  1.5093 +  pagerEnter(pPager);
  1.5094 +  if( pPager->stmtInUse ){
  1.5095 +    PAGERTRACE2("STMT-ROLLBACK %d\n", PAGERID(pPager));
  1.5096 +    if( MEMDB ){
  1.5097 +      PgHdr *pPg;
  1.5098 +      PgHistory *pHist;
  1.5099 +      for(pPg=pPager->pStmt; pPg; pPg=pHist->pNextStmt){
  1.5100 +        pHist = PGHDR_TO_HIST(pPg, pPager);
  1.5101 +        if( pHist->pStmt ){
  1.5102 +          memcpy(PGHDR_TO_DATA(pPg), pHist->pStmt, pPager->pageSize);
  1.5103 +          sqlite3PageFree(pHist->pStmt);
  1.5104 +          pHist->pStmt = 0;
  1.5105 +        }
  1.5106 +      }
  1.5107 +      pPager->dbSize = pPager->stmtSize;
  1.5108 +      pager_truncate_cache(pPager);
  1.5109 +      rc = SQLITE_OK;
  1.5110 +    }else{
  1.5111 +      rc = pager_stmt_playback(pPager);
  1.5112 +    }
  1.5113 +    sqlite3PagerStmtCommit(pPager);
  1.5114 +  }else{
  1.5115 +    rc = SQLITE_OK;
  1.5116 +  }
  1.5117 +  pPager->stmtAutoopen = 0;
  1.5118 +  pagerLeave(pPager);
  1.5119 +  return rc;
  1.5120 +}
  1.5121 +
  1.5122 +/*
  1.5123 +** Return the full pathname of the database file.
  1.5124 +*/
  1.5125 +const char *sqlite3PagerFilename(Pager *pPager){
  1.5126 +  return pPager->zFilename;
  1.5127 +}
  1.5128 +
  1.5129 +/*
  1.5130 +** Return the VFS structure for the pager.
  1.5131 +*/
  1.5132 +const sqlite3_vfs *sqlite3PagerVfs(Pager *pPager){
  1.5133 +  return pPager->pVfs;
  1.5134 +}
  1.5135 +
  1.5136 +/*
  1.5137 +** Return the file handle for the database file associated
  1.5138 +** with the pager.  This might return NULL if the file has
  1.5139 +** not yet been opened.
  1.5140 +*/
  1.5141 +sqlite3_file *sqlite3PagerFile(Pager *pPager){
  1.5142 +  return pPager->fd;
  1.5143 +}
  1.5144 +
  1.5145 +/*
  1.5146 +** Return the directory of the database file.
  1.5147 +*/
  1.5148 +const char *sqlite3PagerDirname(Pager *pPager){
  1.5149 +  return pPager->zDirectory;
  1.5150 +}
  1.5151 +
  1.5152 +/*
  1.5153 +** Return the full pathname of the journal file.
  1.5154 +*/
  1.5155 +const char *sqlite3PagerJournalname(Pager *pPager){
  1.5156 +  return pPager->zJournal;
  1.5157 +}
  1.5158 +
  1.5159 +/*
  1.5160 +** Return true if fsync() calls are disabled for this pager.  Return FALSE
  1.5161 +** if fsync()s are executed normally.
  1.5162 +*/
  1.5163 +int sqlite3PagerNosync(Pager *pPager){
  1.5164 +  return pPager->noSync;
  1.5165 +}
  1.5166 +
  1.5167 +#ifdef SQLITE_HAS_CODEC
  1.5168 +/*
  1.5169 +** Set the codec for this pager
  1.5170 +*/
  1.5171 +void sqlite3PagerSetCodec(
  1.5172 +  Pager *pPager,
  1.5173 +  void *(*xCodec)(void*,void*,Pgno,int),
  1.5174 +  void *pCodecArg
  1.5175 +){
  1.5176 +  pPager->xCodec = xCodec;
  1.5177 +  pPager->pCodecArg = pCodecArg;
  1.5178 +}
  1.5179 +#endif
  1.5180 +
  1.5181 +#ifndef SQLITE_OMIT_AUTOVACUUM
  1.5182 +/*
  1.5183 +** Move the page pPg to location pgno in the file.
  1.5184 +**
  1.5185 +** There must be no references to the page previously located at
  1.5186 +** pgno (which we call pPgOld) though that page is allowed to be
  1.5187 +** in cache.  If the page previous located at pgno is not already
  1.5188 +** in the rollback journal, it is not put there by by this routine.
  1.5189 +**
  1.5190 +** References to the page pPg remain valid. Updating any
  1.5191 +** meta-data associated with pPg (i.e. data stored in the nExtra bytes
  1.5192 +** allocated along with the page) is the responsibility of the caller.
  1.5193 +**
  1.5194 +** A transaction must be active when this routine is called. It used to be
  1.5195 +** required that a statement transaction was not active, but this restriction
  1.5196 +** has been removed (CREATE INDEX needs to move a page when a statement
  1.5197 +** transaction is active).
  1.5198 +**
  1.5199 +** If the fourth argument, isCommit, is non-zero, then this page is being
  1.5200 +** moved as part of a database reorganization just before the transaction 
  1.5201 +** is being committed. In this case, it is guaranteed that the database page 
  1.5202 +** pPg refers to will not be written to again within this transaction.
  1.5203 +*/
  1.5204 +int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno, int isCommit){
  1.5205 +  PgHdr *pPgOld;  /* The page being overwritten. */
  1.5206 +  int h;
  1.5207 +  Pgno needSyncPgno = 0;
  1.5208 +
  1.5209 +  pagerEnter(pPager);
  1.5210 +  assert( pPg->nRef>0 );
  1.5211 +
  1.5212 +  PAGERTRACE5("MOVE %d page %d (needSync=%d) moves to %d\n", 
  1.5213 +      PAGERID(pPager), pPg->pgno, pPg->needSync, pgno);
  1.5214 +  IOTRACE(("MOVE %p %d %d\n", pPager, pPg->pgno, pgno))
  1.5215 +
  1.5216 +  pager_get_content(pPg);
  1.5217 +
  1.5218 +  /* If the journal needs to be sync()ed before page pPg->pgno can
  1.5219 +  ** be written to, store pPg->pgno in local variable needSyncPgno.
  1.5220 +  **
  1.5221 +  ** If the isCommit flag is set, there is no need to remember that
  1.5222 +  ** the journal needs to be sync()ed before database page pPg->pgno 
  1.5223 +  ** can be written to. The caller has already promised not to write to it.
  1.5224 +  */
  1.5225 +  if( pPg->needSync && !isCommit ){
  1.5226 +    needSyncPgno = pPg->pgno;
  1.5227 +    assert( pPg->inJournal || (int)pgno>pPager->origDbSize );
  1.5228 +    assert( pPg->dirty );
  1.5229 +    assert( pPager->needSync );
  1.5230 +  }
  1.5231 +
  1.5232 +  /* Unlink pPg from its hash-chain */
  1.5233 +  unlinkHashChain(pPager, pPg);
  1.5234 +
  1.5235 +  /* If the cache contains a page with page-number pgno, remove it
  1.5236 +  ** from its hash chain. Also, if the PgHdr.needSync was set for 
  1.5237 +  ** page pgno before the 'move' operation, it needs to be retained 
  1.5238 +  ** for the page moved there.
  1.5239 +  */
  1.5240 +  pPg->needSync = 0;
  1.5241 +  pPgOld = pager_lookup(pPager, pgno);
  1.5242 +  if( pPgOld ){
  1.5243 +    assert( pPgOld->nRef==0 );
  1.5244 +    unlinkHashChain(pPager, pPgOld);
  1.5245 +    makeClean(pPgOld);
  1.5246 +    pPg->needSync = pPgOld->needSync;
  1.5247 +  }else{
  1.5248 +    pPg->needSync = 0;
  1.5249 +  }
  1.5250 +  pPg->inJournal = sqlite3BitvecTest(pPager->pInJournal, pgno);
  1.5251 +
  1.5252 +  /* Change the page number for pPg and insert it into the new hash-chain. */
  1.5253 +  assert( pgno!=0 );
  1.5254 +  pPg->pgno = pgno;
  1.5255 +  h = pgno & (pPager->nHash-1);
  1.5256 +  if( pPager->aHash[h] ){
  1.5257 +    assert( pPager->aHash[h]->pPrevHash==0 );
  1.5258 +    pPager->aHash[h]->pPrevHash = pPg;
  1.5259 +  }
  1.5260 +  pPg->pNextHash = pPager->aHash[h];
  1.5261 +  pPager->aHash[h] = pPg;
  1.5262 +  pPg->pPrevHash = 0;
  1.5263 +
  1.5264 +  makeDirty(pPg);
  1.5265 +  pPager->dirtyCache = 1;
  1.5266 +  pPager->dbModified = 1;
  1.5267 +
  1.5268 +  if( needSyncPgno ){
  1.5269 +    /* If needSyncPgno is non-zero, then the journal file needs to be 
  1.5270 +    ** sync()ed before any data is written to database file page needSyncPgno.
  1.5271 +    ** Currently, no such page exists in the page-cache and the 
  1.5272 +    ** "is journaled" bitvec flag has been set. This needs to be remedied by
  1.5273 +    ** loading the page into the pager-cache and setting the PgHdr.needSync 
  1.5274 +    ** flag.
  1.5275 +    **
  1.5276 +    ** If the attempt to load the page into the page-cache fails, (due
  1.5277 +    ** to a malloc() or IO failure), clear the bit in the pInJournal[]
  1.5278 +    ** array. Otherwise, if the page is loaded and written again in
  1.5279 +    ** this transaction, it may be written to the database file before
  1.5280 +    ** it is synced into the journal file. This way, it may end up in
  1.5281 +    ** the journal file twice, but that is not a problem.
  1.5282 +    **
  1.5283 +    ** The sqlite3PagerGet() call may cause the journal to sync. So make
  1.5284 +    ** sure the Pager.needSync flag is set too.
  1.5285 +    */
  1.5286 +    int rc;
  1.5287 +    PgHdr *pPgHdr;
  1.5288 +    assert( pPager->needSync );
  1.5289 +    rc = sqlite3PagerGet(pPager, needSyncPgno, &pPgHdr);
  1.5290 +    if( rc!=SQLITE_OK ){
  1.5291 +      if( pPager->pInJournal && (int)needSyncPgno<=pPager->origDbSize ){
  1.5292 +        sqlite3BitvecClear(pPager->pInJournal, needSyncPgno);
  1.5293 +      }
  1.5294 +      pagerLeave(pPager);
  1.5295 +      return rc;
  1.5296 +    }
  1.5297 +    pPager->needSync = 1;
  1.5298 +    pPgHdr->needSync = 1;
  1.5299 +    pPgHdr->inJournal = 1;
  1.5300 +    makeDirty(pPgHdr);
  1.5301 +    sqlite3PagerUnref(pPgHdr);
  1.5302 +  }
  1.5303 +
  1.5304 +  pagerLeave(pPager);
  1.5305 +  return SQLITE_OK;
  1.5306 +}
  1.5307 +#endif
  1.5308 +
  1.5309 +/*
  1.5310 +** Return a pointer to the data for the specified page.
  1.5311 +*/
  1.5312 +void *sqlite3PagerGetData(DbPage *pPg){
  1.5313 +  return PGHDR_TO_DATA(pPg);
  1.5314 +}
  1.5315 +
  1.5316 +/*
  1.5317 +** Return a pointer to the Pager.nExtra bytes of "extra" space 
  1.5318 +** allocated along with the specified page.
  1.5319 +*/
  1.5320 +void *sqlite3PagerGetExtra(DbPage *pPg){
  1.5321 +  Pager *pPager = pPg->pPager;
  1.5322 +  return (pPager?PGHDR_TO_EXTRA(pPg, pPager):0);
  1.5323 +}
  1.5324 +
  1.5325 +/*
  1.5326 +** Get/set the locking-mode for this pager. Parameter eMode must be one
  1.5327 +** of PAGER_LOCKINGMODE_QUERY, PAGER_LOCKINGMODE_NORMAL or 
  1.5328 +** PAGER_LOCKINGMODE_EXCLUSIVE. If the parameter is not _QUERY, then
  1.5329 +** the locking-mode is set to the value specified.
  1.5330 +**
  1.5331 +** The returned value is either PAGER_LOCKINGMODE_NORMAL or
  1.5332 +** PAGER_LOCKINGMODE_EXCLUSIVE, indicating the current (possibly updated)
  1.5333 +** locking-mode.
  1.5334 +*/
  1.5335 +int sqlite3PagerLockingMode(Pager *pPager, int eMode){
  1.5336 +  assert( eMode==PAGER_LOCKINGMODE_QUERY
  1.5337 +            || eMode==PAGER_LOCKINGMODE_NORMAL
  1.5338 +            || eMode==PAGER_LOCKINGMODE_EXCLUSIVE );
  1.5339 +  assert( PAGER_LOCKINGMODE_QUERY<0 );
  1.5340 +  assert( PAGER_LOCKINGMODE_NORMAL>=0 && PAGER_LOCKINGMODE_EXCLUSIVE>=0 );
  1.5341 +  if( eMode>=0 && !pPager->tempFile ){
  1.5342 +    pPager->exclusiveMode = eMode;
  1.5343 +  }
  1.5344 +  return (int)pPager->exclusiveMode;
  1.5345 +}
  1.5346 +
  1.5347 +/*
  1.5348 +** Get/set the journal-mode for this pager. Parameter eMode must be one
  1.5349 +** of PAGER_JOURNALMODE_QUERY, PAGER_JOURNALMODE_DELETE or 
  1.5350 +** PAGER_JOURNALMODE_PERSIST. If the parameter is not _QUERY, then
  1.5351 +** the journal-mode is set to the value specified.
  1.5352 +**
  1.5353 +** The returned value is either PAGER_JOURNALMODE_DELETE or
  1.5354 +** PAGER_JOURNALMODE_PERSIST, indicating the current (possibly updated)
  1.5355 +** journal-mode.
  1.5356 +*/
  1.5357 +int sqlite3PagerJournalMode(Pager *pPager, int eMode){
  1.5358 +  assert( eMode==PAGER_JOURNALMODE_QUERY
  1.5359 +            || eMode==PAGER_JOURNALMODE_DELETE
  1.5360 +            || eMode==PAGER_JOURNALMODE_PERSIST
  1.5361 +            || eMode==PAGER_JOURNALMODE_OFF );
  1.5362 +  assert( PAGER_JOURNALMODE_QUERY<0 );
  1.5363 +  assert( PAGER_JOURNALMODE_DELETE>=0 && PAGER_JOURNALMODE_PERSIST>=0 );
  1.5364 +  if( eMode>=0 ){
  1.5365 +    pPager->journalMode = eMode;
  1.5366 +  }
  1.5367 +  return (int)pPager->journalMode;
  1.5368 +}
  1.5369 +
  1.5370 +/*
  1.5371 +** Get/set the size-limit used for persistent journal files.
  1.5372 +*/
  1.5373 +i64 sqlite3PagerJournalSizeLimit(Pager *pPager, i64 iLimit){
  1.5374 +  if( iLimit>=-1 ){
  1.5375 +    pPager->journalSizeLimit = iLimit;
  1.5376 +  }
  1.5377 +  return pPager->journalSizeLimit;
  1.5378 +}
  1.5379 +
  1.5380 +#endif /* SQLITE_OMIT_DISKIO */