os/persistentdata/persistentstorage/sqlite3api/SQLite/pager.c
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/persistentdata/persistentstorage/sqlite3api/SQLite/pager.c	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,4260 @@
     1.4 +/*
     1.5 +** 2001 September 15
     1.6 +**
     1.7 +** The author disclaims copyright to this source code.  In place of
     1.8 +** a legal notice, here is a blessing:
     1.9 +**
    1.10 +**    May you do good and not evil.
    1.11 +**    May you find forgiveness for yourself and forgive others.
    1.12 +**    May you share freely, never taking more than you give.
    1.13 +**
    1.14 +*************************************************************************
    1.15 +** This is the implementation of the page cache subsystem or "pager".
    1.16 +** 
    1.17 +** The pager is used to access a database disk file.  It implements
    1.18 +** atomic commit and rollback through the use of a journal file that
    1.19 +** is separate from the database file.  The pager also implements file
    1.20 +** locking to prevent two processes from writing the same database
    1.21 +** file simultaneously, or one process from reading the database while
    1.22 +** another is writing.
    1.23 +**
    1.24 +** @(#) $Id: pager.c,v 1.496 2008/09/29 11:49:48 danielk1977 Exp $
    1.25 +*/
    1.26 +#ifndef SQLITE_OMIT_DISKIO
    1.27 +#include "sqliteInt.h"
    1.28 +
    1.29 +/*
    1.30 +** Macros for troubleshooting.  Normally turned off
    1.31 +*/
    1.32 +#if 0
    1.33 +#define sqlite3DebugPrintf printf
    1.34 +#define PAGERTRACE1(X)       sqlite3DebugPrintf(X)
    1.35 +#define PAGERTRACE2(X,Y)     sqlite3DebugPrintf(X,Y)
    1.36 +#define PAGERTRACE3(X,Y,Z)   sqlite3DebugPrintf(X,Y,Z)
    1.37 +#define PAGERTRACE4(X,Y,Z,W) sqlite3DebugPrintf(X,Y,Z,W)
    1.38 +#define PAGERTRACE5(X,Y,Z,W,V) sqlite3DebugPrintf(X,Y,Z,W,V)
    1.39 +#else
    1.40 +#define PAGERTRACE1(X)
    1.41 +#define PAGERTRACE2(X,Y)
    1.42 +#define PAGERTRACE3(X,Y,Z)
    1.43 +#define PAGERTRACE4(X,Y,Z,W)
    1.44 +#define PAGERTRACE5(X,Y,Z,W,V)
    1.45 +#endif
    1.46 +
    1.47 +/*
    1.48 +** The following two macros are used within the PAGERTRACEX() macros above
    1.49 +** to print out file-descriptors. 
    1.50 +**
    1.51 +** PAGERID() takes a pointer to a Pager struct as its argument. The
    1.52 +** associated file-descriptor is returned. FILEHANDLEID() takes an sqlite3_file
    1.53 +** struct as its argument.
    1.54 +*/
    1.55 +#define PAGERID(p) ((int)(p->fd))
    1.56 +#define FILEHANDLEID(fd) ((int)fd)
    1.57 +
    1.58 +/*
    1.59 +** The page cache as a whole is always in one of the following
    1.60 +** states:
    1.61 +**
    1.62 +**   PAGER_UNLOCK        The page cache is not currently reading or 
    1.63 +**                       writing the database file.  There is no
    1.64 +**                       data held in memory.  This is the initial
    1.65 +**                       state.
    1.66 +**
    1.67 +**   PAGER_SHARED        The page cache is reading the database.
    1.68 +**                       Writing is not permitted.  There can be
    1.69 +**                       multiple readers accessing the same database
    1.70 +**                       file at the same time.
    1.71 +**
    1.72 +**   PAGER_RESERVED      This process has reserved the database for writing
    1.73 +**                       but has not yet made any changes.  Only one process
    1.74 +**                       at a time can reserve the database.  The original
    1.75 +**                       database file has not been modified so other
    1.76 +**                       processes may still be reading the on-disk
    1.77 +**                       database file.
    1.78 +**
    1.79 +**   PAGER_EXCLUSIVE     The page cache is writing the database.
    1.80 +**                       Access is exclusive.  No other processes or
    1.81 +**                       threads can be reading or writing while one
    1.82 +**                       process is writing.
    1.83 +**
    1.84 +**   PAGER_SYNCED        The pager moves to this state from PAGER_EXCLUSIVE
    1.85 +**                       after all dirty pages have been written to the
    1.86 +**                       database file and the file has been synced to
    1.87 +**                       disk. All that remains to do is to remove or
    1.88 +**                       truncate the journal file and the transaction 
    1.89 +**                       will be committed.
    1.90 +**
    1.91 +** The page cache comes up in PAGER_UNLOCK.  The first time a
    1.92 +** sqlite3PagerGet() occurs, the state transitions to PAGER_SHARED.
    1.93 +** After all pages have been released using sqlite_page_unref(),
    1.94 +** the state transitions back to PAGER_UNLOCK.  The first time
    1.95 +** that sqlite3PagerWrite() is called, the state transitions to
    1.96 +** PAGER_RESERVED.  (Note that sqlite3PagerWrite() can only be
    1.97 +** called on an outstanding page which means that the pager must
    1.98 +** be in PAGER_SHARED before it transitions to PAGER_RESERVED.)
    1.99 +** PAGER_RESERVED means that there is an open rollback journal.
   1.100 +** The transition to PAGER_EXCLUSIVE occurs before any changes
   1.101 +** are made to the database file, though writes to the rollback
   1.102 +** journal occurs with just PAGER_RESERVED.  After an sqlite3PagerRollback()
   1.103 +** or sqlite3PagerCommitPhaseTwo(), the state can go back to PAGER_SHARED,
   1.104 +** or it can stay at PAGER_EXCLUSIVE if we are in exclusive access mode.
   1.105 +*/
   1.106 +#define PAGER_UNLOCK      0
   1.107 +#define PAGER_SHARED      1   /* same as SHARED_LOCK */
   1.108 +#define PAGER_RESERVED    2   /* same as RESERVED_LOCK */
   1.109 +#define PAGER_EXCLUSIVE   4   /* same as EXCLUSIVE_LOCK */
   1.110 +#define PAGER_SYNCED      5
   1.111 +
   1.112 +/*
   1.113 +** If the SQLITE_BUSY_RESERVED_LOCK macro is set to true at compile-time,
   1.114 +** then failed attempts to get a reserved lock will invoke the busy callback.
   1.115 +** This is off by default.  To see why, consider the following scenario:
   1.116 +** 
   1.117 +** Suppose thread A already has a shared lock and wants a reserved lock.
   1.118 +** Thread B already has a reserved lock and wants an exclusive lock.  If
   1.119 +** both threads are using their busy callbacks, it might be a long time
   1.120 +** be for one of the threads give up and allows the other to proceed.
   1.121 +** But if the thread trying to get the reserved lock gives up quickly
   1.122 +** (if it never invokes its busy callback) then the contention will be
   1.123 +** resolved quickly.
   1.124 +*/
   1.125 +#ifndef SQLITE_BUSY_RESERVED_LOCK
   1.126 +# define SQLITE_BUSY_RESERVED_LOCK 0
   1.127 +#endif
   1.128 +
   1.129 +/*
   1.130 +** This macro rounds values up so that if the value is an address it
   1.131 +** is guaranteed to be an address that is aligned to an 8-byte boundary.
   1.132 +*/
   1.133 +#define FORCE_ALIGNMENT(X)   (((X)+7)&~7)
   1.134 +
   1.135 +/*
   1.136 +** A macro used for invoking the codec if there is one
   1.137 +*/
   1.138 +#ifdef SQLITE_HAS_CODEC
   1.139 +# define CODEC1(P,D,N,X) if( P->xCodec!=0 ){ P->xCodec(P->pCodecArg,D,N,X); }
   1.140 +# define CODEC2(P,D,N,X) ((char*)(P->xCodec!=0?P->xCodec(P->pCodecArg,D,N,X):D))
   1.141 +#else
   1.142 +# define CODEC1(P,D,N,X) /* NO-OP */
   1.143 +# define CODEC2(P,D,N,X) ((char*)D)
   1.144 +#endif
   1.145 +
   1.146 +/*
   1.147 +** A open page cache is an instance of the following structure.
   1.148 +**
   1.149 +** Pager.errCode may be set to SQLITE_IOERR, SQLITE_CORRUPT, or
   1.150 +** or SQLITE_FULL. Once one of the first three errors occurs, it persists
   1.151 +** and is returned as the result of every major pager API call.  The
   1.152 +** SQLITE_FULL return code is slightly different. It persists only until the
   1.153 +** next successful rollback is performed on the pager cache. Also,
   1.154 +** SQLITE_FULL does not affect the sqlite3PagerGet() and sqlite3PagerLookup()
   1.155 +** APIs, they may still be used successfully.
   1.156 +*/
   1.157 +struct Pager {
   1.158 +  sqlite3_vfs *pVfs;          /* OS functions to use for IO */
   1.159 +  u8 journalOpen;             /* True if journal file descriptors is valid */
   1.160 +  u8 journalStarted;          /* True if header of journal is synced */
   1.161 +  u8 useJournal;              /* Use a rollback journal on this file */
   1.162 +  u8 noReadlock;              /* Do not bother to obtain readlocks */
   1.163 +  u8 stmtOpen;                /* True if the statement subjournal is open */
   1.164 +  u8 stmtInUse;               /* True we are in a statement subtransaction */
   1.165 +  u8 stmtAutoopen;            /* Open stmt journal when main journal is opened*/
   1.166 +  u8 noSync;                  /* Do not sync the journal if true */
   1.167 +  u8 fullSync;                /* Do extra syncs of the journal for robustness */
   1.168 +  u8 sync_flags;              /* One of SYNC_NORMAL or SYNC_FULL */
   1.169 +  u8 state;                   /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */
   1.170 +  u8 tempFile;                /* zFilename is a temporary file */
   1.171 +  u8 readOnly;                /* True for a read-only database */
   1.172 +  u8 needSync;                /* True if an fsync() is needed on the journal */
   1.173 +  u8 dirtyCache;              /* True if cached pages have changed */
   1.174 +  u8 alwaysRollback;          /* Disable DontRollback() for all pages */
   1.175 +  u8 memDb;                   /* True to inhibit all file I/O */
   1.176 +  u8 setMaster;               /* True if a m-j name has been written to jrnl */
   1.177 +  u8 doNotSync;               /* Boolean. While true, do not spill the cache */
   1.178 +  u8 exclusiveMode;           /* Boolean. True if locking_mode==EXCLUSIVE */
   1.179 +  u8 journalMode;             /* On of the PAGER_JOURNALMODE_* values */
   1.180 +  u8 dbModified;              /* True if there are any changes to the Db */
   1.181 +  u8 changeCountDone;         /* Set after incrementing the change-counter */
   1.182 +  u32 vfsFlags;               /* Flags for sqlite3_vfs.xOpen() */
   1.183 +  int errCode;                /* One of several kinds of errors */
   1.184 +  int dbSize;                 /* Number of pages in the file */
   1.185 +  int origDbSize;             /* dbSize before the current change */
   1.186 +  int stmtSize;               /* Size of database (in pages) at stmt_begin() */
   1.187 +  int nRec;                   /* Number of pages written to the journal */
   1.188 +  u32 cksumInit;              /* Quasi-random value added to every checksum */
   1.189 +  int stmtNRec;               /* Number of records in stmt subjournal */
   1.190 +  int nExtra;                 /* Add this many bytes to each in-memory page */
   1.191 +  int pageSize;               /* Number of bytes in a page */
   1.192 +  int nPage;                  /* Total number of in-memory pages */
   1.193 +  int mxPage;                 /* Maximum number of pages to hold in cache */
   1.194 +  Pgno mxPgno;                /* Maximum allowed size of the database */
   1.195 +  Bitvec *pInJournal;         /* One bit for each page in the database file */
   1.196 +  Bitvec *pInStmt;            /* One bit for each page in the database */
   1.197 +  Bitvec *pAlwaysRollback;    /* One bit for each page marked always-rollback */
   1.198 +  char *zFilename;            /* Name of the database file */
   1.199 +  char *zJournal;             /* Name of the journal file */
   1.200 +  char *zDirectory;           /* Directory hold database and journal files */
   1.201 +  sqlite3_file *fd, *jfd;     /* File descriptors for database and journal */
   1.202 +  sqlite3_file *stfd;         /* File descriptor for the statement subjournal*/
   1.203 +  BusyHandler *pBusyHandler;  /* Pointer to sqlite.busyHandler */
   1.204 +  i64 journalOff;             /* Current byte offset in the journal file */
   1.205 +  i64 journalHdr;             /* Byte offset to previous journal header */
   1.206 +  i64 stmtHdrOff;             /* First journal header written this statement */
   1.207 +  i64 stmtCksum;              /* cksumInit when statement was started */
   1.208 +  i64 stmtJSize;              /* Size of journal at stmt_begin() */
   1.209 +  int sectorSize;             /* Assumed sector size during rollback */
   1.210 +#ifdef SQLITE_TEST
   1.211 +  int nHit, nMiss;            /* Cache hits and missing */
   1.212 +  int nRead, nWrite;          /* Database pages read/written */
   1.213 +#endif
   1.214 +  void (*xReiniter)(DbPage*); /* Call this routine when reloading pages */
   1.215 +#ifdef SQLITE_HAS_CODEC
   1.216 +  void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
   1.217 +  void *pCodecArg;            /* First argument to xCodec() */
   1.218 +#endif
   1.219 +  char *pTmpSpace;            /* Pager.pageSize bytes of space for tmp use */
   1.220 +  char dbFileVers[16];        /* Changes whenever database file changes */
   1.221 +  i64 journalSizeLimit;       /* Size limit for persistent journal files */
   1.222 +  PCache *pPCache;            /* Pointer to page cache object */
   1.223 +};
   1.224 +
   1.225 +/*
   1.226 +** The following global variables hold counters used for
   1.227 +** testing purposes only.  These variables do not exist in
   1.228 +** a non-testing build.  These variables are not thread-safe.
   1.229 +*/
   1.230 +#ifdef SQLITE_TEST
   1.231 +int sqlite3_pager_readdb_count = 0;    /* Number of full pages read from DB */
   1.232 +int sqlite3_pager_writedb_count = 0;   /* Number of full pages written to DB */
   1.233 +int sqlite3_pager_writej_count = 0;    /* Number of pages written to journal */
   1.234 +# define PAGER_INCR(v)  v++
   1.235 +#else
   1.236 +# define PAGER_INCR(v)
   1.237 +#endif
   1.238 +
   1.239 +
   1.240 +
   1.241 +/*
   1.242 +** Journal files begin with the following magic string.  The data
   1.243 +** was obtained from /dev/random.  It is used only as a sanity check.
   1.244 +**
   1.245 +** Since version 2.8.0, the journal format contains additional sanity
   1.246 +** checking information.  If the power fails while the journal is begin
   1.247 +** written, semi-random garbage data might appear in the journal
   1.248 +** file after power is restored.  If an attempt is then made
   1.249 +** to roll the journal back, the database could be corrupted.  The additional
   1.250 +** sanity checking data is an attempt to discover the garbage in the
   1.251 +** journal and ignore it.
   1.252 +**
   1.253 +** The sanity checking information for the new journal format consists
   1.254 +** of a 32-bit checksum on each page of data.  The checksum covers both
   1.255 +** the page number and the pPager->pageSize bytes of data for the page.
   1.256 +** This cksum is initialized to a 32-bit random value that appears in the
   1.257 +** journal file right after the header.  The random initializer is important,
   1.258 +** because garbage data that appears at the end of a journal is likely
   1.259 +** data that was once in other files that have now been deleted.  If the
   1.260 +** garbage data came from an obsolete journal file, the checksums might
   1.261 +** be correct.  But by initializing the checksum to random value which
   1.262 +** is different for every journal, we minimize that risk.
   1.263 +*/
   1.264 +static const unsigned char aJournalMagic[] = {
   1.265 +  0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7,
   1.266 +};
   1.267 +
   1.268 +/*
   1.269 +** The size of the header and of each page in the journal is determined
   1.270 +** by the following macros.
   1.271 +*/
   1.272 +#define JOURNAL_PG_SZ(pPager)  ((pPager->pageSize) + 8)
   1.273 +
   1.274 +/*
   1.275 +** The journal header size for this pager. In the future, this could be
   1.276 +** set to some value read from the disk controller. The important
   1.277 +** characteristic is that it is the same size as a disk sector.
   1.278 +*/
   1.279 +#define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize)
   1.280 +
   1.281 +/*
   1.282 +** The macro MEMDB is true if we are dealing with an in-memory database.
   1.283 +** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set,
   1.284 +** the value of MEMDB will be a constant and the compiler will optimize
   1.285 +** out code that would never execute.
   1.286 +*/
   1.287 +#ifdef SQLITE_OMIT_MEMORYDB
   1.288 +# define MEMDB 0
   1.289 +#else
   1.290 +# define MEMDB pPager->memDb
   1.291 +#endif
   1.292 +
   1.293 +/*
   1.294 +** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is
   1.295 +** reserved for working around a windows/posix incompatibility). It is
   1.296 +** used in the journal to signify that the remainder of the journal file 
   1.297 +** is devoted to storing a master journal name - there are no more pages to
   1.298 +** roll back. See comments for function writeMasterJournal() for details.
   1.299 +*/
   1.300 +/* #define PAGER_MJ_PGNO(x) (PENDING_BYTE/((x)->pageSize)) */
   1.301 +#define PAGER_MJ_PGNO(x) ((PENDING_BYTE/((x)->pageSize))+1)
   1.302 +
   1.303 +/*
   1.304 +** The maximum legal page number is (2^31 - 1).
   1.305 +*/
   1.306 +#define PAGER_MAX_PGNO 2147483647
   1.307 +
   1.308 +/*
   1.309 +** Return true if page *pPg has already been written to the statement
   1.310 +** journal (or statement snapshot has been created, if *pPg is part
   1.311 +** of an in-memory database).
   1.312 +*/
   1.313 +static int pageInStatement(PgHdr *pPg){
   1.314 +  Pager *pPager = pPg->pPager;
   1.315 +  if( MEMDB ){
   1.316 +    return pPg->apSave[1]!=0;
   1.317 +  }else{
   1.318 +    return sqlite3BitvecTest(pPager->pInStmt, pPg->pgno);
   1.319 +  }
   1.320 +}
   1.321 +
   1.322 +/*
   1.323 +** Read a 32-bit integer from the given file descriptor.  Store the integer
   1.324 +** that is read in *pRes.  Return SQLITE_OK if everything worked, or an
   1.325 +** error code is something goes wrong.
   1.326 +**
   1.327 +** All values are stored on disk as big-endian.
   1.328 +*/
   1.329 +static int read32bits(sqlite3_file *fd, i64 offset, u32 *pRes){
   1.330 +  unsigned char ac[4];
   1.331 +  int rc = sqlite3OsRead(fd, ac, sizeof(ac), offset);
   1.332 +  if( rc==SQLITE_OK ){
   1.333 +    *pRes = sqlite3Get4byte(ac);
   1.334 +  }
   1.335 +  return rc;
   1.336 +}
   1.337 +
   1.338 +/*
   1.339 +** Write a 32-bit integer into a string buffer in big-endian byte order.
   1.340 +*/
   1.341 +#define put32bits(A,B)  sqlite3Put4byte((u8*)A,B)
   1.342 +
   1.343 +/*
   1.344 +** Write a 32-bit integer into the given file descriptor.  Return SQLITE_OK
   1.345 +** on success or an error code is something goes wrong.
   1.346 +*/
   1.347 +static int write32bits(sqlite3_file *fd, i64 offset, u32 val){
   1.348 +  char ac[4];
   1.349 +  put32bits(ac, val);
   1.350 +  return sqlite3OsWrite(fd, ac, 4, offset);
   1.351 +}
   1.352 +
   1.353 +/*
   1.354 +** If file pFd is open, call sqlite3OsUnlock() on it.
   1.355 +*/
   1.356 +static int osUnlock(sqlite3_file *pFd, int eLock){
   1.357 +  if( !pFd->pMethods ){
   1.358 +    return SQLITE_OK;
   1.359 +  }
   1.360 +  return sqlite3OsUnlock(pFd, eLock);
   1.361 +}
   1.362 +
   1.363 +/*
   1.364 +** This function determines whether or not the atomic-write optimization
   1.365 +** can be used with this pager. The optimization can be used if:
   1.366 +**
   1.367 +**  (a) the value returned by OsDeviceCharacteristics() indicates that
   1.368 +**      a database page may be written atomically, and
   1.369 +**  (b) the value returned by OsSectorSize() is less than or equal
   1.370 +**      to the page size.
   1.371 +**
   1.372 +** If the optimization cannot be used, 0 is returned. If it can be used,
   1.373 +** then the value returned is the size of the journal file when it
   1.374 +** contains rollback data for exactly one page.
   1.375 +*/
   1.376 +#ifdef SQLITE_ENABLE_ATOMIC_WRITE
   1.377 +static int jrnlBufferSize(Pager *pPager){
   1.378 +  int dc;           /* Device characteristics */
   1.379 +  int nSector;      /* Sector size */
   1.380 +  int szPage;        /* Page size */
   1.381 +  sqlite3_file *fd = pPager->fd;
   1.382 +
   1.383 +  if( fd->pMethods ){
   1.384 +    dc = sqlite3OsDeviceCharacteristics(fd);
   1.385 +    nSector = sqlite3OsSectorSize(fd);
   1.386 +    szPage = pPager->pageSize;
   1.387 +  }
   1.388 +
   1.389 +  assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
   1.390 +  assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
   1.391 +
   1.392 +  if( !fd->pMethods || 
   1.393 +       (dc & (SQLITE_IOCAP_ATOMIC|(szPage>>8)) && nSector<=szPage) ){
   1.394 +    return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager);
   1.395 +  }
   1.396 +  return 0;
   1.397 +}
   1.398 +#endif
   1.399 +
   1.400 +/*
   1.401 +** This function should be called when an error occurs within the pager
   1.402 +** code. The first argument is a pointer to the pager structure, the
   1.403 +** second the error-code about to be returned by a pager API function. 
   1.404 +** The value returned is a copy of the second argument to this function. 
   1.405 +**
   1.406 +** If the second argument is SQLITE_IOERR, SQLITE_CORRUPT, or SQLITE_FULL
   1.407 +** the error becomes persistent. Until the persisten error is cleared,
   1.408 +** subsequent API calls on this Pager will immediately return the same 
   1.409 +** error code.
   1.410 +**
   1.411 +** A persistent error indicates that the contents of the pager-cache 
   1.412 +** cannot be trusted. This state can be cleared by completely discarding 
   1.413 +** the contents of the pager-cache. If a transaction was active when
   1.414 +** the persistent error occured, then the rollback journal may need
   1.415 +** to be replayed.
   1.416 +*/
   1.417 +static void pager_unlock(Pager *pPager);
   1.418 +static int pager_error(Pager *pPager, int rc){
   1.419 +  int rc2 = rc & 0xff;
   1.420 +  assert(
   1.421 +       pPager->errCode==SQLITE_FULL ||
   1.422 +       pPager->errCode==SQLITE_OK ||
   1.423 +       (pPager->errCode & 0xff)==SQLITE_IOERR
   1.424 +  );
   1.425 +  if(
   1.426 +    rc2==SQLITE_FULL ||
   1.427 +    rc2==SQLITE_IOERR ||
   1.428 +    rc2==SQLITE_CORRUPT
   1.429 +  ){
   1.430 +    pPager->errCode = rc;
   1.431 +    if( pPager->state==PAGER_UNLOCK 
   1.432 +     && sqlite3PcacheRefCount(pPager->pPCache)==0 
   1.433 +    ){
   1.434 +      /* If the pager is already unlocked, call pager_unlock() now to
   1.435 +      ** clear the error state and ensure that the pager-cache is 
   1.436 +      ** completely empty.
   1.437 +      */
   1.438 +      pager_unlock(pPager);
   1.439 +    }
   1.440 +  }
   1.441 +  return rc;
   1.442 +}
   1.443 +
   1.444 +/*
   1.445 +** If SQLITE_CHECK_PAGES is defined then we do some sanity checking
   1.446 +** on the cache using a hash function.  This is used for testing
   1.447 +** and debugging only.
   1.448 +*/
   1.449 +#ifdef SQLITE_CHECK_PAGES
   1.450 +/*
   1.451 +** Return a 32-bit hash of the page data for pPage.
   1.452 +*/
   1.453 +static u32 pager_datahash(int nByte, unsigned char *pData){
   1.454 +  u32 hash = 0;
   1.455 +  int i;
   1.456 +  for(i=0; i<nByte; i++){
   1.457 +    hash = (hash*1039) + pData[i];
   1.458 +  }
   1.459 +  return hash;
   1.460 +}
   1.461 +static u32 pager_pagehash(PgHdr *pPage){
   1.462 +  return pager_datahash(pPage->pPager->pageSize, (unsigned char *)pPage->pData);
   1.463 +}
   1.464 +static u32 pager_set_pagehash(PgHdr *pPage){
   1.465 +  pPage->pageHash = pager_pagehash(pPage);
   1.466 +}
   1.467 +
   1.468 +/*
   1.469 +** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES
   1.470 +** is defined, and NDEBUG is not defined, an assert() statement checks
   1.471 +** that the page is either dirty or still matches the calculated page-hash.
   1.472 +*/
   1.473 +#define CHECK_PAGE(x) checkPage(x)
   1.474 +static void checkPage(PgHdr *pPg){
   1.475 +  Pager *pPager = pPg->pPager;
   1.476 +  assert( !pPg->pageHash || pPager->errCode || MEMDB 
   1.477 +      || (pPg->flags&PGHDR_DIRTY) || pPg->pageHash==pager_pagehash(pPg) );
   1.478 +}
   1.479 +
   1.480 +#else
   1.481 +#define pager_datahash(X,Y)  0
   1.482 +#define pager_pagehash(X)  0
   1.483 +#define CHECK_PAGE(x)
   1.484 +#endif  /* SQLITE_CHECK_PAGES */
   1.485 +
   1.486 +/*
   1.487 +** When this is called the journal file for pager pPager must be open.
   1.488 +** The master journal file name is read from the end of the file and 
   1.489 +** written into memory supplied by the caller. 
   1.490 +**
   1.491 +** zMaster must point to a buffer of at least nMaster bytes allocated by
   1.492 +** the caller. This should be sqlite3_vfs.mxPathname+1 (to ensure there is
   1.493 +** enough space to write the master journal name). If the master journal
   1.494 +** name in the journal is longer than nMaster bytes (including a
   1.495 +** nul-terminator), then this is handled as if no master journal name
   1.496 +** were present in the journal.
   1.497 +**
   1.498 +** If no master journal file name is present zMaster[0] is set to 0 and
   1.499 +** SQLITE_OK returned.
   1.500 +*/
   1.501 +static int readMasterJournal(sqlite3_file *pJrnl, char *zMaster, int nMaster){
   1.502 +  int rc;
   1.503 +  u32 len;
   1.504 +  i64 szJ;
   1.505 +  u32 cksum;
   1.506 +  u32 u;                   /* Unsigned loop counter */
   1.507 +  unsigned char aMagic[8]; /* A buffer to hold the magic header */
   1.508 +
   1.509 +  zMaster[0] = '\0';
   1.510 +
   1.511 +  rc = sqlite3OsFileSize(pJrnl, &szJ);
   1.512 +  if( rc!=SQLITE_OK || szJ<16 ) return rc;
   1.513 +
   1.514 +  rc = read32bits(pJrnl, szJ-16, &len);
   1.515 +  if( rc!=SQLITE_OK ) return rc;
   1.516 +
   1.517 +  if( len>=nMaster ){
   1.518 +    return SQLITE_OK;
   1.519 +  }
   1.520 +
   1.521 +  rc = read32bits(pJrnl, szJ-12, &cksum);
   1.522 +  if( rc!=SQLITE_OK ) return rc;
   1.523 +
   1.524 +  rc = sqlite3OsRead(pJrnl, aMagic, 8, szJ-8);
   1.525 +  if( rc!=SQLITE_OK || memcmp(aMagic, aJournalMagic, 8) ) return rc;
   1.526 +
   1.527 +  rc = sqlite3OsRead(pJrnl, zMaster, len, szJ-16-len);
   1.528 +  if( rc!=SQLITE_OK ){
   1.529 +    return rc;
   1.530 +  }
   1.531 +  zMaster[len] = '\0';
   1.532 +
   1.533 +  /* See if the checksum matches the master journal name */
   1.534 +  for(u=0; u<len; u++){
   1.535 +    cksum -= zMaster[u];
   1.536 +   }
   1.537 +  if( cksum ){
   1.538 +    /* If the checksum doesn't add up, then one or more of the disk sectors
   1.539 +    ** containing the master journal filename is corrupted. This means
   1.540 +    ** definitely roll back, so just return SQLITE_OK and report a (nul)
   1.541 +    ** master-journal filename.
   1.542 +    */
   1.543 +    zMaster[0] = '\0';
   1.544 +  }
   1.545 +   
   1.546 +  return SQLITE_OK;
   1.547 +}
   1.548 +
   1.549 +/*
   1.550 +** Seek the journal file descriptor to the next sector boundary where a
   1.551 +** journal header may be read or written. Pager.journalOff is updated with
   1.552 +** the new seek offset.
   1.553 +**
   1.554 +** i.e for a sector size of 512:
   1.555 +**
   1.556 +** Input Offset              Output Offset
   1.557 +** ---------------------------------------
   1.558 +** 0                         0
   1.559 +** 512                       512
   1.560 +** 100                       512
   1.561 +** 2000                      2048
   1.562 +** 
   1.563 +*/
   1.564 +static void seekJournalHdr(Pager *pPager){
   1.565 +  i64 offset = 0;
   1.566 +  i64 c = pPager->journalOff;
   1.567 +  if( c ){
   1.568 +    offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager);
   1.569 +  }
   1.570 +  assert( offset%JOURNAL_HDR_SZ(pPager)==0 );
   1.571 +  assert( offset>=c );
   1.572 +  assert( (offset-c)<JOURNAL_HDR_SZ(pPager) );
   1.573 +  pPager->journalOff = offset;
   1.574 +}
   1.575 +
   1.576 +/*
   1.577 +** Write zeros over the header of the journal file.  This has the
   1.578 +** effect of invalidating the journal file and committing the
   1.579 +** transaction.
   1.580 +*/
   1.581 +static int zeroJournalHdr(Pager *pPager, int doTruncate){
   1.582 +  int rc = SQLITE_OK;
   1.583 +  static const char zeroHdr[28] = {0};
   1.584 +
   1.585 +  if( pPager->journalOff ){
   1.586 +    i64 iLimit = pPager->journalSizeLimit;
   1.587 +
   1.588 +    IOTRACE(("JZEROHDR %p\n", pPager))
   1.589 +    if( doTruncate || iLimit==0 ){
   1.590 +      rc = sqlite3OsTruncate(pPager->jfd, 0);
   1.591 +    }else{
   1.592 +      rc = sqlite3OsWrite(pPager->jfd, zeroHdr, sizeof(zeroHdr), 0);
   1.593 +    }
   1.594 +    if( rc==SQLITE_OK && !pPager->noSync ){
   1.595 +      rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_DATAONLY|pPager->sync_flags);
   1.596 +    }
   1.597 +
   1.598 +    /* At this point the transaction is committed but the write lock 
   1.599 +    ** is still held on the file. If there is a size limit configured for 
   1.600 +    ** the persistent journal and the journal file currently consumes more
   1.601 +    ** space than that limit allows for, truncate it now. There is no need
   1.602 +    ** to sync the file following this operation.
   1.603 +    */
   1.604 +    if( rc==SQLITE_OK && iLimit>0 ){
   1.605 +      i64 sz;
   1.606 +      rc = sqlite3OsFileSize(pPager->jfd, &sz);
   1.607 +      if( rc==SQLITE_OK && sz>iLimit ){
   1.608 +        rc = sqlite3OsTruncate(pPager->jfd, iLimit);
   1.609 +      }
   1.610 +    }
   1.611 +  }
   1.612 +  return rc;
   1.613 +}
   1.614 +
   1.615 +/*
   1.616 +** The journal file must be open when this routine is called. A journal
   1.617 +** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the
   1.618 +** current location.
   1.619 +**
   1.620 +** The format for the journal header is as follows:
   1.621 +** - 8 bytes: Magic identifying journal format.
   1.622 +** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
   1.623 +** - 4 bytes: Random number used for page hash.
   1.624 +** - 4 bytes: Initial database page count.
   1.625 +** - 4 bytes: Sector size used by the process that wrote this journal.
   1.626 +** - 4 bytes: Database page size.
   1.627 +** 
   1.628 +** Followed by (JOURNAL_HDR_SZ - 28) bytes of unused space.
   1.629 +*/
   1.630 +static int writeJournalHdr(Pager *pPager){
   1.631 +  int rc = SQLITE_OK;
   1.632 +  char *zHeader = pPager->pTmpSpace;
   1.633 +  int nHeader = pPager->pageSize;
   1.634 +  int nWrite;
   1.635 +
   1.636 +  if( nHeader>JOURNAL_HDR_SZ(pPager) ){
   1.637 +    nHeader = JOURNAL_HDR_SZ(pPager);
   1.638 +  }
   1.639 +
   1.640 +  if( pPager->stmtHdrOff==0 ){
   1.641 +    pPager->stmtHdrOff = pPager->journalOff;
   1.642 +  }
   1.643 +
   1.644 +  seekJournalHdr(pPager);
   1.645 +  pPager->journalHdr = pPager->journalOff;
   1.646 +
   1.647 +  memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
   1.648 +
   1.649 +  /* 
   1.650 +  ** Write the nRec Field - the number of page records that follow this
   1.651 +  ** journal header. Normally, zero is written to this value at this time.
   1.652 +  ** After the records are added to the journal (and the journal synced, 
   1.653 +  ** if in full-sync mode), the zero is overwritten with the true number
   1.654 +  ** of records (see syncJournal()).
   1.655 +  **
   1.656 +  ** A faster alternative is to write 0xFFFFFFFF to the nRec field. When
   1.657 +  ** reading the journal this value tells SQLite to assume that the
   1.658 +  ** rest of the journal file contains valid page records. This assumption
   1.659 +  ** is dangerous, as if a failure occured whilst writing to the journal
   1.660 +  ** file it may contain some garbage data. There are two scenarios
   1.661 +  ** where this risk can be ignored:
   1.662 +  **
   1.663 +  **   * When the pager is in no-sync mode. Corruption can follow a
   1.664 +  **     power failure in this case anyway.
   1.665 +  **
   1.666 +  **   * When the SQLITE_IOCAP_SAFE_APPEND flag is set. This guarantees
   1.667 +  **     that garbage data is never appended to the journal file.
   1.668 +  */
   1.669 +  assert(pPager->fd->pMethods||pPager->noSync);
   1.670 +  if( (pPager->noSync) 
   1.671 +   || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND) 
   1.672 +  ){
   1.673 +    put32bits(&zHeader[sizeof(aJournalMagic)], 0xffffffff);
   1.674 +  }else{
   1.675 +    put32bits(&zHeader[sizeof(aJournalMagic)], 0);
   1.676 +  }
   1.677 +
   1.678 +  /* The random check-hash initialiser */ 
   1.679 +  sqlite3_randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
   1.680 +  put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit);
   1.681 +  /* The initial database size */
   1.682 +  put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbSize);
   1.683 +  /* The assumed sector size for this process */
   1.684 +  put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize);
   1.685 +  if( pPager->journalHdr==0 ){
   1.686 +    /* The page size */
   1.687 +    put32bits(&zHeader[sizeof(aJournalMagic)+16], pPager->pageSize);
   1.688 +  }
   1.689 +
   1.690 +  for(nWrite=0; rc==SQLITE_OK&&nWrite<JOURNAL_HDR_SZ(pPager); nWrite+=nHeader){
   1.691 +    IOTRACE(("JHDR %p %lld %d\n", pPager, pPager->journalHdr, nHeader))
   1.692 +    rc = sqlite3OsWrite(pPager->jfd, zHeader, nHeader, pPager->journalOff);
   1.693 +    pPager->journalOff += nHeader;
   1.694 +  }
   1.695 +
   1.696 +  return rc;
   1.697 +}
   1.698 +
   1.699 +/*
   1.700 +** The journal file must be open when this is called. A journal header file
   1.701 +** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal
   1.702 +** file. See comments above function writeJournalHdr() for a description of
   1.703 +** the journal header format.
   1.704 +**
   1.705 +** If the header is read successfully, *nRec is set to the number of
   1.706 +** page records following this header and *dbSize is set to the size of the
   1.707 +** database before the transaction began, in pages. Also, pPager->cksumInit
   1.708 +** is set to the value read from the journal header. SQLITE_OK is returned
   1.709 +** in this case.
   1.710 +**
   1.711 +** If the journal header file appears to be corrupted, SQLITE_DONE is
   1.712 +** returned and *nRec and *dbSize are not set.  If JOURNAL_HDR_SZ bytes
   1.713 +** cannot be read from the journal file an error code is returned.
   1.714 +*/
   1.715 +static int readJournalHdr(
   1.716 +  Pager *pPager, 
   1.717 +  i64 journalSize,
   1.718 +  u32 *pNRec, 
   1.719 +  u32 *pDbSize
   1.720 +){
   1.721 +  int rc;
   1.722 +  unsigned char aMagic[8]; /* A buffer to hold the magic header */
   1.723 +  i64 jrnlOff;
   1.724 +  int iPageSize;
   1.725 +
   1.726 +  seekJournalHdr(pPager);
   1.727 +  if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){
   1.728 +    return SQLITE_DONE;
   1.729 +  }
   1.730 +  jrnlOff = pPager->journalOff;
   1.731 +
   1.732 +  rc = sqlite3OsRead(pPager->jfd, aMagic, sizeof(aMagic), jrnlOff);
   1.733 +  if( rc ) return rc;
   1.734 +  jrnlOff += sizeof(aMagic);
   1.735 +
   1.736 +  if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){
   1.737 +    return SQLITE_DONE;
   1.738 +  }
   1.739 +
   1.740 +  rc = read32bits(pPager->jfd, jrnlOff, pNRec);
   1.741 +  if( rc ) return rc;
   1.742 +
   1.743 +  rc = read32bits(pPager->jfd, jrnlOff+4, &pPager->cksumInit);
   1.744 +  if( rc ) return rc;
   1.745 +
   1.746 +  rc = read32bits(pPager->jfd, jrnlOff+8, pDbSize);
   1.747 +  if( rc ) return rc;
   1.748 +
   1.749 +  rc = read32bits(pPager->jfd, jrnlOff+16, (u32 *)&iPageSize);
   1.750 +  if( rc==SQLITE_OK 
   1.751 +   && iPageSize>=512 
   1.752 +   && iPageSize<=SQLITE_MAX_PAGE_SIZE 
   1.753 +   && ((iPageSize-1)&iPageSize)==0 
   1.754 +  ){
   1.755 +    u16 pagesize = iPageSize;
   1.756 +    rc = sqlite3PagerSetPagesize(pPager, &pagesize);
   1.757 +  }
   1.758 +  if( rc ) return rc;
   1.759 +
   1.760 +  /* Update the assumed sector-size to match the value used by 
   1.761 +  ** the process that created this journal. If this journal was
   1.762 +  ** created by a process other than this one, then this routine
   1.763 +  ** is being called from within pager_playback(). The local value
   1.764 +  ** of Pager.sectorSize is restored at the end of that routine.
   1.765 +  */
   1.766 +  rc = read32bits(pPager->jfd, jrnlOff+12, (u32 *)&pPager->sectorSize);
   1.767 +  if( rc ) return rc;
   1.768 +
   1.769 +  pPager->journalOff += JOURNAL_HDR_SZ(pPager);
   1.770 +  return SQLITE_OK;
   1.771 +}
   1.772 +
   1.773 +
   1.774 +/*
   1.775 +** Write the supplied master journal name into the journal file for pager
   1.776 +** pPager at the current location. The master journal name must be the last
   1.777 +** thing written to a journal file. If the pager is in full-sync mode, the
   1.778 +** journal file descriptor is advanced to the next sector boundary before
   1.779 +** anything is written. The format is:
   1.780 +**
   1.781 +** + 4 bytes: PAGER_MJ_PGNO.
   1.782 +** + N bytes: length of master journal name.
   1.783 +** + 4 bytes: N
   1.784 +** + 4 bytes: Master journal name checksum.
   1.785 +** + 8 bytes: aJournalMagic[].
   1.786 +**
   1.787 +** The master journal page checksum is the sum of the bytes in the master
   1.788 +** journal name.
   1.789 +**
   1.790 +** If zMaster is a NULL pointer (occurs for a single database transaction), 
   1.791 +** this call is a no-op.
   1.792 +*/
   1.793 +static int writeMasterJournal(Pager *pPager, const char *zMaster){
   1.794 +  int rc;
   1.795 +  int len; 
   1.796 +  int i; 
   1.797 +  i64 jrnlOff;
   1.798 +  i64 jrnlSize;
   1.799 +  u32 cksum = 0;
   1.800 +  char zBuf[sizeof(aJournalMagic)+2*4];
   1.801 +
   1.802 +  if( !zMaster || pPager->setMaster) return SQLITE_OK;
   1.803 +  pPager->setMaster = 1;
   1.804 +
   1.805 +  len = strlen(zMaster);
   1.806 +  for(i=0; i<len; i++){
   1.807 +    cksum += zMaster[i];
   1.808 +  }
   1.809 +
   1.810 +  /* If in full-sync mode, advance to the next disk sector before writing
   1.811 +  ** the master journal name. This is in case the previous page written to
   1.812 +  ** the journal has already been synced.
   1.813 +  */
   1.814 +  if( pPager->fullSync ){
   1.815 +    seekJournalHdr(pPager);
   1.816 +  }
   1.817 +  jrnlOff = pPager->journalOff;
   1.818 +  pPager->journalOff += (len+20);
   1.819 +
   1.820 +  rc = write32bits(pPager->jfd, jrnlOff, PAGER_MJ_PGNO(pPager));
   1.821 +  if( rc!=SQLITE_OK ) return rc;
   1.822 +  jrnlOff += 4;
   1.823 +
   1.824 +  rc = sqlite3OsWrite(pPager->jfd, zMaster, len, jrnlOff);
   1.825 +  if( rc!=SQLITE_OK ) return rc;
   1.826 +  jrnlOff += len;
   1.827 +
   1.828 +  put32bits(zBuf, len);
   1.829 +  put32bits(&zBuf[4], cksum);
   1.830 +  memcpy(&zBuf[8], aJournalMagic, sizeof(aJournalMagic));
   1.831 +  rc = sqlite3OsWrite(pPager->jfd, zBuf, 8+sizeof(aJournalMagic), jrnlOff);
   1.832 +  jrnlOff += 8+sizeof(aJournalMagic);
   1.833 +  pPager->needSync = !pPager->noSync;
   1.834 +
   1.835 +  /* If the pager is in peristent-journal mode, then the physical 
   1.836 +  ** journal-file may extend past the end of the master-journal name
   1.837 +  ** and 8 bytes of magic data just written to the file. This is 
   1.838 +  ** dangerous because the code to rollback a hot-journal file
   1.839 +  ** will not be able to find the master-journal name to determine 
   1.840 +  ** whether or not the journal is hot. 
   1.841 +  **
   1.842 +  ** Easiest thing to do in this scenario is to truncate the journal 
   1.843 +  ** file to the required size.
   1.844 +  */ 
   1.845 +  if( (rc==SQLITE_OK)
   1.846 +   && (rc = sqlite3OsFileSize(pPager->jfd, &jrnlSize))==SQLITE_OK
   1.847 +   && jrnlSize>jrnlOff
   1.848 +  ){
   1.849 +    rc = sqlite3OsTruncate(pPager->jfd, jrnlOff);
   1.850 +  }
   1.851 +  return rc;
   1.852 +}
   1.853 +
   1.854 +/*
   1.855 +** Find a page in the hash table given its page number.  Return
   1.856 +** a pointer to the page or NULL if not found.
   1.857 +*/
   1.858 +static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
   1.859 +  PgHdr *p;
   1.860 +  sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &p);
   1.861 +  return p;
   1.862 +}
   1.863 +
   1.864 +/*
   1.865 +** Clear the in-memory cache.  This routine
   1.866 +** sets the state of the pager back to what it was when it was first
   1.867 +** opened.  Any outstanding pages are invalidated and subsequent attempts
   1.868 +** to access those pages will likely result in a coredump.
   1.869 +*/
   1.870 +static void pager_reset(Pager *pPager){
   1.871 +  if( pPager->errCode ) return;
   1.872 +  sqlite3PcacheClear(pPager->pPCache);
   1.873 +}
   1.874 +
   1.875 +/*
   1.876 +** Unlock the database file. 
   1.877 +**
   1.878 +** If the pager is currently in error state, discard the contents of 
   1.879 +** the cache and reset the Pager structure internal state. If there is
   1.880 +** an open journal-file, then the next time a shared-lock is obtained
   1.881 +** on the pager file (by this or any other process), it will be
   1.882 +** treated as a hot-journal and rolled back.
   1.883 +*/
   1.884 +static void pager_unlock(Pager *pPager){
   1.885 +  if( !pPager->exclusiveMode ){
   1.886 +    if( !MEMDB ){
   1.887 +      int rc = osUnlock(pPager->fd, NO_LOCK);
   1.888 +      if( rc ) pPager->errCode = rc;
   1.889 +      pPager->dbSize = -1;
   1.890 +      IOTRACE(("UNLOCK %p\n", pPager))
   1.891 +
   1.892 +      /* Always close the journal file when dropping the database lock.
   1.893 +      ** Otherwise, another connection with journal_mode=delete might
   1.894 +      ** delete the file out from under us.
   1.895 +      */
   1.896 +      if( pPager->journalOpen ){
   1.897 +        sqlite3OsClose(pPager->jfd);
   1.898 +        pPager->journalOpen = 0;
   1.899 +        sqlite3BitvecDestroy(pPager->pInJournal);
   1.900 +        pPager->pInJournal = 0;
   1.901 +        sqlite3BitvecDestroy(pPager->pAlwaysRollback);
   1.902 +        pPager->pAlwaysRollback = 0;
   1.903 +      }
   1.904 +
   1.905 +      /* If Pager.errCode is set, the contents of the pager cache cannot be
   1.906 +      ** trusted. Now that the pager file is unlocked, the contents of the
   1.907 +      ** cache can be discarded and the error code safely cleared.
   1.908 +      */
   1.909 +      if( pPager->errCode ){
   1.910 +        if( rc==SQLITE_OK ) pPager->errCode = SQLITE_OK;
   1.911 +        pager_reset(pPager);
   1.912 +        if( pPager->stmtOpen ){
   1.913 +          sqlite3OsClose(pPager->stfd);
   1.914 +          sqlite3BitvecDestroy(pPager->pInStmt);
   1.915 +          pPager->pInStmt = 0;
   1.916 +        }
   1.917 +        pPager->stmtOpen = 0;
   1.918 +        pPager->stmtInUse = 0;
   1.919 +        pPager->journalOff = 0;
   1.920 +        pPager->journalStarted = 0;
   1.921 +        pPager->stmtAutoopen = 0;
   1.922 +        pPager->origDbSize = 0;
   1.923 +      }
   1.924 +    }
   1.925 +
   1.926 +    if( !MEMDB || pPager->errCode==SQLITE_OK ){
   1.927 +      pPager->state = PAGER_UNLOCK;
   1.928 +      pPager->changeCountDone = 0;
   1.929 +    }
   1.930 +  }
   1.931 +}
   1.932 +
   1.933 +/*
   1.934 +** Execute a rollback if a transaction is active and unlock the 
   1.935 +** database file. If the pager has already entered the error state, 
   1.936 +** do not attempt the rollback.
   1.937 +*/
   1.938 +static void pagerUnlockAndRollback(Pager *p){
   1.939 +  if( p->errCode==SQLITE_OK && p->state>=PAGER_RESERVED ){
   1.940 +    sqlite3BeginBenignMalloc();
   1.941 +    sqlite3PagerRollback(p);
   1.942 +    sqlite3EndBenignMalloc();
   1.943 +  }
   1.944 +  pager_unlock(p);
   1.945 +}
   1.946 +
   1.947 +/*
   1.948 +** This routine ends a transaction.  A transaction is ended by either
   1.949 +** a COMMIT or a ROLLBACK.
   1.950 +**
   1.951 +** When this routine is called, the pager has the journal file open and
   1.952 +** a RESERVED or EXCLUSIVE lock on the database.  This routine will release
   1.953 +** the database lock and acquires a SHARED lock in its place if that is
   1.954 +** the appropriate thing to do.  Release locks usually is appropriate,
   1.955 +** unless we are in exclusive access mode or unless this is a 
   1.956 +** COMMIT AND BEGIN or ROLLBACK AND BEGIN operation.
   1.957 +**
   1.958 +** The journal file is either deleted or truncated.
   1.959 +**
   1.960 +** TODO: Consider keeping the journal file open for temporary databases.
   1.961 +** This might give a performance improvement on windows where opening
   1.962 +** a file is an expensive operation.
   1.963 +*/
   1.964 +static int pager_end_transaction(Pager *pPager, int hasMaster){
   1.965 +  int rc = SQLITE_OK;
   1.966 +  int rc2 = SQLITE_OK;
   1.967 +  assert( !MEMDB );
   1.968 +  if( pPager->state<PAGER_RESERVED ){
   1.969 +    return SQLITE_OK;
   1.970 +  }
   1.971 +  sqlite3PagerStmtCommit(pPager);
   1.972 +  if( pPager->stmtOpen && !pPager->exclusiveMode ){
   1.973 +    sqlite3OsClose(pPager->stfd);
   1.974 +    pPager->stmtOpen = 0;
   1.975 +  }
   1.976 +  if( pPager->journalOpen ){
   1.977 +    if( pPager->journalMode==PAGER_JOURNALMODE_TRUNCATE
   1.978 +         && (rc = sqlite3OsTruncate(pPager->jfd, 0))==SQLITE_OK ){
   1.979 +      pPager->journalOff = 0;
   1.980 +      pPager->journalStarted = 0;
   1.981 +    }else if( pPager->exclusiveMode 
   1.982 +     || pPager->journalMode==PAGER_JOURNALMODE_PERSIST
   1.983 +    ){
   1.984 +      rc = zeroJournalHdr(pPager, hasMaster);
   1.985 +      pager_error(pPager, rc);
   1.986 +      pPager->journalOff = 0;
   1.987 +      pPager->journalStarted = 0;
   1.988 +    }else{
   1.989 +      assert( pPager->journalMode==PAGER_JOURNALMODE_DELETE || rc );
   1.990 +      sqlite3OsClose(pPager->jfd);
   1.991 +      pPager->journalOpen = 0;
   1.992 +      if( rc==SQLITE_OK && !pPager->tempFile ){
   1.993 +        rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
   1.994 +      }
   1.995 +    }
   1.996 +    sqlite3BitvecDestroy(pPager->pInJournal);
   1.997 +    pPager->pInJournal = 0;
   1.998 +    sqlite3BitvecDestroy(pPager->pAlwaysRollback);
   1.999 +    pPager->pAlwaysRollback = 0;
  1.1000 +    sqlite3PcacheCleanAll(pPager->pPCache);
  1.1001 +#ifdef SQLITE_CHECK_PAGES
  1.1002 +    sqlite3PcacheIterate(pPager->pPCache, pager_set_pagehash);
  1.1003 +#endif
  1.1004 +    sqlite3PcacheClearFlags(pPager->pPCache,
  1.1005 +       PGHDR_IN_JOURNAL | PGHDR_NEED_SYNC
  1.1006 +    );
  1.1007 +    pPager->dirtyCache = 0;
  1.1008 +    pPager->nRec = 0;
  1.1009 +  }else{
  1.1010 +    assert( pPager->pInJournal==0 );
  1.1011 +  }
  1.1012 +
  1.1013 +  if( !pPager->exclusiveMode ){
  1.1014 +    rc2 = osUnlock(pPager->fd, SHARED_LOCK);
  1.1015 +    pPager->state = PAGER_SHARED;
  1.1016 +  }else if( pPager->state==PAGER_SYNCED ){
  1.1017 +    pPager->state = PAGER_EXCLUSIVE;
  1.1018 +  }
  1.1019 +  pPager->origDbSize = 0;
  1.1020 +  pPager->setMaster = 0;
  1.1021 +  pPager->needSync = 0;
  1.1022 +  /* lruListSetFirstSynced(pPager); */
  1.1023 +  pPager->dbSize = -1;
  1.1024 +  pPager->dbModified = 0;
  1.1025 +
  1.1026 +  return (rc==SQLITE_OK?rc2:rc);
  1.1027 +}
  1.1028 +
  1.1029 +/*
  1.1030 +** Compute and return a checksum for the page of data.
  1.1031 +**
  1.1032 +** This is not a real checksum.  It is really just the sum of the 
  1.1033 +** random initial value and the page number.  We experimented with
  1.1034 +** a checksum of the entire data, but that was found to be too slow.
  1.1035 +**
  1.1036 +** Note that the page number is stored at the beginning of data and
  1.1037 +** the checksum is stored at the end.  This is important.  If journal
  1.1038 +** corruption occurs due to a power failure, the most likely scenario
  1.1039 +** is that one end or the other of the record will be changed.  It is
  1.1040 +** much less likely that the two ends of the journal record will be
  1.1041 +** correct and the middle be corrupt.  Thus, this "checksum" scheme,
  1.1042 +** though fast and simple, catches the mostly likely kind of corruption.
  1.1043 +**
  1.1044 +** FIX ME:  Consider adding every 200th (or so) byte of the data to the
  1.1045 +** checksum.  That way if a single page spans 3 or more disk sectors and
  1.1046 +** only the middle sector is corrupt, we will still have a reasonable
  1.1047 +** chance of failing the checksum and thus detecting the problem.
  1.1048 +*/
  1.1049 +static u32 pager_cksum(Pager *pPager, const u8 *aData){
  1.1050 +  u32 cksum = pPager->cksumInit;
  1.1051 +  int i = pPager->pageSize-200;
  1.1052 +  while( i>0 ){
  1.1053 +    cksum += aData[i];
  1.1054 +    i -= 200;
  1.1055 +  }
  1.1056 +  return cksum;
  1.1057 +}
  1.1058 +
  1.1059 +/* Forward declaration */
  1.1060 +static void makeClean(PgHdr*);
  1.1061 +
  1.1062 +/*
  1.1063 +** Read a single page from the journal file opened on file descriptor
  1.1064 +** jfd.  Playback this one page.
  1.1065 +**
  1.1066 +** The isMainJrnl flag is true if this is the main rollback journal and
  1.1067 +** false for the statement journal.  The main rollback journal uses
  1.1068 +** checksums - the statement journal does not.
  1.1069 +*/
  1.1070 +static int pager_playback_one_page(
  1.1071 +  Pager *pPager,       /* The pager being played back */
  1.1072 +  sqlite3_file *jfd,   /* The file that is the journal being rolled back */
  1.1073 +  i64 offset,          /* Offset of the page within the journal */
  1.1074 +  int isMainJrnl       /* True for main rollback journal. False for Stmt jrnl */
  1.1075 +){
  1.1076 +  int rc;
  1.1077 +  PgHdr *pPg;                   /* An existing page in the cache */
  1.1078 +  Pgno pgno;                    /* The page number of a page in journal */
  1.1079 +  u32 cksum;                    /* Checksum used for sanity checking */
  1.1080 +  u8 *aData = (u8 *)pPager->pTmpSpace;   /* Temp storage for a page */
  1.1081 +
  1.1082 +  /* isMainJrnl should be true for the main journal and false for
  1.1083 +  ** statement journals.  Verify that this is always the case
  1.1084 +  */
  1.1085 +  assert( jfd == (isMainJrnl ? pPager->jfd : pPager->stfd) );
  1.1086 +  assert( aData );
  1.1087 +
  1.1088 +  rc = read32bits(jfd, offset, &pgno);
  1.1089 +  if( rc!=SQLITE_OK ) return rc;
  1.1090 +  rc = sqlite3OsRead(jfd, aData, pPager->pageSize, offset+4);
  1.1091 +  if( rc!=SQLITE_OK ) return rc;
  1.1092 +  pPager->journalOff += pPager->pageSize + 4;
  1.1093 +
  1.1094 +  /* Sanity checking on the page.  This is more important that I originally
  1.1095 +  ** thought.  If a power failure occurs while the journal is being written,
  1.1096 +  ** it could cause invalid data to be written into the journal.  We need to
  1.1097 +  ** detect this invalid data (with high probability) and ignore it.
  1.1098 +  */
  1.1099 +  if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
  1.1100 +    return SQLITE_DONE;
  1.1101 +  }
  1.1102 +  if( pgno>(unsigned)pPager->dbSize ){
  1.1103 +    return SQLITE_OK;
  1.1104 +  }
  1.1105 +  if( isMainJrnl ){
  1.1106 +    rc = read32bits(jfd, offset+pPager->pageSize+4, &cksum);
  1.1107 +    if( rc ) return rc;
  1.1108 +    pPager->journalOff += 4;
  1.1109 +    if( pager_cksum(pPager, aData)!=cksum ){
  1.1110 +      return SQLITE_DONE;
  1.1111 +    }
  1.1112 +  }
  1.1113 +
  1.1114 +  assert( pPager->state==PAGER_RESERVED || pPager->state>=PAGER_EXCLUSIVE );
  1.1115 +
  1.1116 +  /* If the pager is in RESERVED state, then there must be a copy of this
  1.1117 +  ** page in the pager cache. In this case just update the pager cache,
  1.1118 +  ** not the database file. The page is left marked dirty in this case.
  1.1119 +  **
  1.1120 +  ** An exception to the above rule: If the database is in no-sync mode
  1.1121 +  ** and a page is moved during an incremental vacuum then the page may
  1.1122 +  ** not be in the pager cache. Later: if a malloc() or IO error occurs
  1.1123 +  ** during a Movepage() call, then the page may not be in the cache
  1.1124 +  ** either. So the condition described in the above paragraph is not
  1.1125 +  ** assert()able.
  1.1126 +  **
  1.1127 +  ** If in EXCLUSIVE state, then we update the pager cache if it exists
  1.1128 +  ** and the main file. The page is then marked not dirty.
  1.1129 +  **
  1.1130 +  ** Ticket #1171:  The statement journal might contain page content that is
  1.1131 +  ** different from the page content at the start of the transaction.
  1.1132 +  ** This occurs when a page is changed prior to the start of a statement
  1.1133 +  ** then changed again within the statement.  When rolling back such a
  1.1134 +  ** statement we must not write to the original database unless we know
  1.1135 +  ** for certain that original page contents are synced into the main rollback
  1.1136 +  ** journal.  Otherwise, a power loss might leave modified data in the
  1.1137 +  ** database file without an entry in the rollback journal that can
  1.1138 +  ** restore the database to its original form.  Two conditions must be
  1.1139 +  ** met before writing to the database files. (1) the database must be
  1.1140 +  ** locked.  (2) we know that the original page content is fully synced
  1.1141 +  ** in the main journal either because the page is not in cache or else
  1.1142 +  ** the page is marked as needSync==0.
  1.1143 +  **
  1.1144 +  ** 2008-04-14:  When attempting to vacuum a corrupt database file, it
  1.1145 +  ** is possible to fail a statement on a database that does not yet exist.
  1.1146 +  ** Do not attempt to write if database file has never been opened.
  1.1147 +  */
  1.1148 +  pPg = pager_lookup(pPager, pgno);
  1.1149 +  PAGERTRACE4("PLAYBACK %d page %d hash(%08x)\n",
  1.1150 +               PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, aData));
  1.1151 +  if( (pPager->state>=PAGER_EXCLUSIVE)
  1.1152 +   && (pPg==0 || 0==(pPg->flags&PGHDR_NEED_SYNC))
  1.1153 +   && (pPager->fd->pMethods)
  1.1154 +  ){
  1.1155 +    i64 ofst = (pgno-1)*(i64)pPager->pageSize;
  1.1156 +    rc = sqlite3OsWrite(pPager->fd, aData, pPager->pageSize, ofst);
  1.1157 +  }
  1.1158 +  if( pPg ){
  1.1159 +    /* No page should ever be explicitly rolled back that is in use, except
  1.1160 +    ** for page 1 which is held in use in order to keep the lock on the
  1.1161 +    ** database active. However such a page may be rolled back as a result
  1.1162 +    ** of an internal error resulting in an automatic call to
  1.1163 +    ** sqlite3PagerRollback().
  1.1164 +    */
  1.1165 +    void *pData;
  1.1166 +    pData = pPg->pData;
  1.1167 +    memcpy(pData, aData, pPager->pageSize);
  1.1168 +    if( pPager->xReiniter ){
  1.1169 +      pPager->xReiniter(pPg);
  1.1170 +    }
  1.1171 +    if( isMainJrnl ) makeClean(pPg);
  1.1172 +#ifdef SQLITE_CHECK_PAGES
  1.1173 +    pPg->pageHash = pager_pagehash(pPg);
  1.1174 +#endif
  1.1175 +    /* If this was page 1, then restore the value of Pager.dbFileVers.
  1.1176 +    ** Do this before any decoding. */
  1.1177 +    if( pgno==1 ){
  1.1178 +      memcpy(&pPager->dbFileVers, &((u8*)pData)[24],sizeof(pPager->dbFileVers));
  1.1179 +    }
  1.1180 +
  1.1181 +    /* Decode the page just read from disk */
  1.1182 +    CODEC1(pPager, pData, pPg->pgno, 3);
  1.1183 +    sqlite3PcacheRelease(pPg);
  1.1184 +  }
  1.1185 +  return rc;
  1.1186 +}
  1.1187 +
  1.1188 +/*
  1.1189 +** Parameter zMaster is the name of a master journal file. A single journal
  1.1190 +** file that referred to the master journal file has just been rolled back.
  1.1191 +** This routine checks if it is possible to delete the master journal file,
  1.1192 +** and does so if it is.
  1.1193 +**
  1.1194 +** Argument zMaster may point to Pager.pTmpSpace. So that buffer is not 
  1.1195 +** available for use within this function.
  1.1196 +**
  1.1197 +**
  1.1198 +** The master journal file contains the names of all child journals.
  1.1199 +** To tell if a master journal can be deleted, check to each of the
  1.1200 +** children.  If all children are either missing or do not refer to
  1.1201 +** a different master journal, then this master journal can be deleted.
  1.1202 +*/
  1.1203 +static int pager_delmaster(Pager *pPager, const char *zMaster){
  1.1204 +  sqlite3_vfs *pVfs = pPager->pVfs;
  1.1205 +  int rc;
  1.1206 +  int master_open = 0;
  1.1207 +  sqlite3_file *pMaster;
  1.1208 +  sqlite3_file *pJournal;
  1.1209 +  char *zMasterJournal = 0; /* Contents of master journal file */
  1.1210 +  i64 nMasterJournal;       /* Size of master journal file */
  1.1211 +
  1.1212 +  /* Open the master journal file exclusively in case some other process
  1.1213 +  ** is running this routine also. Not that it makes too much difference.
  1.1214 +  */
  1.1215 +  pMaster = (sqlite3_file *)sqlite3Malloc(pVfs->szOsFile * 2);
  1.1216 +  pJournal = (sqlite3_file *)(((u8 *)pMaster) + pVfs->szOsFile);
  1.1217 +  if( !pMaster ){
  1.1218 +    rc = SQLITE_NOMEM;
  1.1219 +  }else{
  1.1220 +    int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MASTER_JOURNAL);
  1.1221 +    rc = sqlite3OsOpen(pVfs, zMaster, pMaster, flags, 0);
  1.1222 +  }
  1.1223 +  if( rc!=SQLITE_OK ) goto delmaster_out;
  1.1224 +  master_open = 1;
  1.1225 +
  1.1226 +  rc = sqlite3OsFileSize(pMaster, &nMasterJournal);
  1.1227 +  if( rc!=SQLITE_OK ) goto delmaster_out;
  1.1228 +
  1.1229 +  if( nMasterJournal>0 ){
  1.1230 +    char *zJournal;
  1.1231 +    char *zMasterPtr = 0;
  1.1232 +    int nMasterPtr = pPager->pVfs->mxPathname+1;
  1.1233 +
  1.1234 +    /* Load the entire master journal file into space obtained from
  1.1235 +    ** sqlite3_malloc() and pointed to by zMasterJournal. 
  1.1236 +    */
  1.1237 +    zMasterJournal = (char *)sqlite3Malloc(nMasterJournal + nMasterPtr);
  1.1238 +    if( !zMasterJournal ){
  1.1239 +      rc = SQLITE_NOMEM;
  1.1240 +      goto delmaster_out;
  1.1241 +    }
  1.1242 +    zMasterPtr = &zMasterJournal[nMasterJournal];
  1.1243 +    rc = sqlite3OsRead(pMaster, zMasterJournal, nMasterJournal, 0);
  1.1244 +    if( rc!=SQLITE_OK ) goto delmaster_out;
  1.1245 +
  1.1246 +    zJournal = zMasterJournal;
  1.1247 +    while( (zJournal-zMasterJournal)<nMasterJournal ){
  1.1248 +      int exists;
  1.1249 +      rc = sqlite3OsAccess(pVfs, zJournal, SQLITE_ACCESS_EXISTS, &exists);
  1.1250 +      if( rc!=SQLITE_OK ){
  1.1251 +        goto delmaster_out;
  1.1252 +      }
  1.1253 +      if( exists ){
  1.1254 +        /* One of the journals pointed to by the master journal exists.
  1.1255 +        ** Open it and check if it points at the master journal. If
  1.1256 +        ** so, return without deleting the master journal file.
  1.1257 +        */
  1.1258 +        int c;
  1.1259 +        int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL);
  1.1260 +        rc = sqlite3OsOpen(pVfs, zJournal, pJournal, flags, 0);
  1.1261 +        if( rc!=SQLITE_OK ){
  1.1262 +          goto delmaster_out;
  1.1263 +        }
  1.1264 +
  1.1265 +        rc = readMasterJournal(pJournal, zMasterPtr, nMasterPtr);
  1.1266 +        sqlite3OsClose(pJournal);
  1.1267 +        if( rc!=SQLITE_OK ){
  1.1268 +          goto delmaster_out;
  1.1269 +        }
  1.1270 +
  1.1271 +        c = zMasterPtr[0]!=0 && strcmp(zMasterPtr, zMaster)==0;
  1.1272 +        if( c ){
  1.1273 +          /* We have a match. Do not delete the master journal file. */
  1.1274 +          goto delmaster_out;
  1.1275 +        }
  1.1276 +      }
  1.1277 +      zJournal += (strlen(zJournal)+1);
  1.1278 +    }
  1.1279 +  }
  1.1280 +  
  1.1281 +  rc = sqlite3OsDelete(pVfs, zMaster, 0);
  1.1282 +
  1.1283 +delmaster_out:
  1.1284 +  if( zMasterJournal ){
  1.1285 +    sqlite3_free(zMasterJournal);
  1.1286 +  }  
  1.1287 +  if( master_open ){
  1.1288 +    sqlite3OsClose(pMaster);
  1.1289 +  }
  1.1290 +  sqlite3_free(pMaster);
  1.1291 +  return rc;
  1.1292 +}
  1.1293 +
  1.1294 +
  1.1295 +static void pager_truncate_cache(Pager *pPager);
  1.1296 +
  1.1297 +/*
  1.1298 +** Truncate the main file of the given pager to the number of pages
  1.1299 +** indicated. Also truncate the cached representation of the file.
  1.1300 +**
  1.1301 +** Might might be the case that the file on disk is smaller than nPage.
  1.1302 +** This can happen, for example, if we are in the middle of a transaction
  1.1303 +** which has extended the file size and the new pages are still all held
  1.1304 +** in cache, then an INSERT or UPDATE does a statement rollback.  Some
  1.1305 +** operating system implementations can get confused if you try to
  1.1306 +** truncate a file to some size that is larger than it currently is,
  1.1307 +** so detect this case and write a single zero byte to the end of the new
  1.1308 +** file instead.
  1.1309 +*/
  1.1310 +static int pager_truncate(Pager *pPager, int nPage){
  1.1311 +  int rc = SQLITE_OK;
  1.1312 +  if( pPager->state>=PAGER_EXCLUSIVE && pPager->fd->pMethods ){
  1.1313 +    i64 currentSize, newSize;
  1.1314 +    rc = sqlite3OsFileSize(pPager->fd, &currentSize);
  1.1315 +    newSize = pPager->pageSize*(i64)nPage;
  1.1316 +    if( rc==SQLITE_OK && currentSize!=newSize ){
  1.1317 +      if( currentSize>newSize ){
  1.1318 +        rc = sqlite3OsTruncate(pPager->fd, newSize);
  1.1319 +      }else{
  1.1320 +        rc = sqlite3OsWrite(pPager->fd, "", 1, newSize-1);
  1.1321 +      }
  1.1322 +    }
  1.1323 +  }
  1.1324 +  if( rc==SQLITE_OK ){
  1.1325 +    pPager->dbSize = nPage;
  1.1326 +    pager_truncate_cache(pPager);
  1.1327 +  }
  1.1328 +  return rc;
  1.1329 +}
  1.1330 +
  1.1331 +/*
  1.1332 +** Set the sectorSize for the given pager.
  1.1333 +**
  1.1334 +** The sector size is at least as big as the sector size reported
  1.1335 +** by sqlite3OsSectorSize().  The minimum sector size is 512.
  1.1336 +*/
  1.1337 +static void setSectorSize(Pager *pPager){
  1.1338 +  assert(pPager->fd->pMethods||pPager->tempFile);
  1.1339 +  if( !pPager->tempFile ){
  1.1340 +    /* Sector size doesn't matter for temporary files. Also, the file
  1.1341 +    ** may not have been opened yet, in whcih case the OsSectorSize()
  1.1342 +    ** call will segfault.
  1.1343 +    */
  1.1344 +    pPager->sectorSize = sqlite3OsSectorSize(pPager->fd);
  1.1345 +  }
  1.1346 +  if( pPager->sectorSize<512 ){
  1.1347 +    pPager->sectorSize = 512;
  1.1348 +  }
  1.1349 +}
  1.1350 +
  1.1351 +/*
  1.1352 +** Playback the journal and thus restore the database file to
  1.1353 +** the state it was in before we started making changes.  
  1.1354 +**
  1.1355 +** The journal file format is as follows: 
  1.1356 +**
  1.1357 +**  (1)  8 byte prefix.  A copy of aJournalMagic[].
  1.1358 +**  (2)  4 byte big-endian integer which is the number of valid page records
  1.1359 +**       in the journal.  If this value is 0xffffffff, then compute the
  1.1360 +**       number of page records from the journal size.
  1.1361 +**  (3)  4 byte big-endian integer which is the initial value for the 
  1.1362 +**       sanity checksum.
  1.1363 +**  (4)  4 byte integer which is the number of pages to truncate the
  1.1364 +**       database to during a rollback.
  1.1365 +**  (5)  4 byte big-endian integer which is the sector size.  The header
  1.1366 +**       is this many bytes in size.
  1.1367 +**  (6)  4 byte big-endian integer which is the page case.
  1.1368 +**  (7)  4 byte integer which is the number of bytes in the master journal
  1.1369 +**       name.  The value may be zero (indicate that there is no master
  1.1370 +**       journal.)
  1.1371 +**  (8)  N bytes of the master journal name.  The name will be nul-terminated
  1.1372 +**       and might be shorter than the value read from (5).  If the first byte
  1.1373 +**       of the name is \000 then there is no master journal.  The master
  1.1374 +**       journal name is stored in UTF-8.
  1.1375 +**  (9)  Zero or more pages instances, each as follows:
  1.1376 +**        +  4 byte page number.
  1.1377 +**        +  pPager->pageSize bytes of data.
  1.1378 +**        +  4 byte checksum
  1.1379 +**
  1.1380 +** When we speak of the journal header, we mean the first 8 items above.
  1.1381 +** Each entry in the journal is an instance of the 9th item.
  1.1382 +**
  1.1383 +** Call the value from the second bullet "nRec".  nRec is the number of
  1.1384 +** valid page entries in the journal.  In most cases, you can compute the
  1.1385 +** value of nRec from the size of the journal file.  But if a power
  1.1386 +** failure occurred while the journal was being written, it could be the
  1.1387 +** case that the size of the journal file had already been increased but
  1.1388 +** the extra entries had not yet made it safely to disk.  In such a case,
  1.1389 +** the value of nRec computed from the file size would be too large.  For
  1.1390 +** that reason, we always use the nRec value in the header.
  1.1391 +**
  1.1392 +** If the nRec value is 0xffffffff it means that nRec should be computed
  1.1393 +** from the file size.  This value is used when the user selects the
  1.1394 +** no-sync option for the journal.  A power failure could lead to corruption
  1.1395 +** in this case.  But for things like temporary table (which will be
  1.1396 +** deleted when the power is restored) we don't care.  
  1.1397 +**
  1.1398 +** If the file opened as the journal file is not a well-formed
  1.1399 +** journal file then all pages up to the first corrupted page are rolled
  1.1400 +** back (or no pages if the journal header is corrupted). The journal file
  1.1401 +** is then deleted and SQLITE_OK returned, just as if no corruption had
  1.1402 +** been encountered.
  1.1403 +**
  1.1404 +** If an I/O or malloc() error occurs, the journal-file is not deleted
  1.1405 +** and an error code is returned.
  1.1406 +*/
  1.1407 +static int pager_playback(Pager *pPager, int isHot){
  1.1408 +  sqlite3_vfs *pVfs = pPager->pVfs;
  1.1409 +  i64 szJ;                 /* Size of the journal file in bytes */
  1.1410 +  u32 nRec;                /* Number of Records in the journal */
  1.1411 +  u32 u;                   /* Unsigned loop counter */
  1.1412 +  Pgno mxPg = 0;           /* Size of the original file in pages */
  1.1413 +  int rc;                  /* Result code of a subroutine */
  1.1414 +  int res = 1;             /* Value returned by sqlite3OsAccess() */
  1.1415 +  char *zMaster = 0;       /* Name of master journal file if any */
  1.1416 +
  1.1417 +  /* Figure out how many records are in the journal.  Abort early if
  1.1418 +  ** the journal is empty.
  1.1419 +  */
  1.1420 +  assert( pPager->journalOpen );
  1.1421 +  rc = sqlite3OsFileSize(pPager->jfd, &szJ);
  1.1422 +  if( rc!=SQLITE_OK || szJ==0 ){
  1.1423 +    goto end_playback;
  1.1424 +  }
  1.1425 +
  1.1426 +  /* Read the master journal name from the journal, if it is present.
  1.1427 +  ** If a master journal file name is specified, but the file is not
  1.1428 +  ** present on disk, then the journal is not hot and does not need to be
  1.1429 +  ** played back.
  1.1430 +  */
  1.1431 +  zMaster = pPager->pTmpSpace;
  1.1432 +  rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
  1.1433 +  if( rc==SQLITE_OK && zMaster[0] ){
  1.1434 +    rc = sqlite3OsAccess(pVfs, zMaster, SQLITE_ACCESS_EXISTS, &res);
  1.1435 +  }
  1.1436 +  zMaster = 0;
  1.1437 +  if( rc!=SQLITE_OK || !res ){
  1.1438 +    goto end_playback;
  1.1439 +  }
  1.1440 +  pPager->journalOff = 0;
  1.1441 +
  1.1442 +  /* This loop terminates either when the readJournalHdr() call returns
  1.1443 +  ** SQLITE_DONE or an IO error occurs. */
  1.1444 +  while( 1 ){
  1.1445 +
  1.1446 +    /* Read the next journal header from the journal file.  If there are
  1.1447 +    ** not enough bytes left in the journal file for a complete header, or
  1.1448 +    ** it is corrupted, then a process must of failed while writing it.
  1.1449 +    ** This indicates nothing more needs to be rolled back.
  1.1450 +    */
  1.1451 +    rc = readJournalHdr(pPager, szJ, &nRec, &mxPg);
  1.1452 +    if( rc!=SQLITE_OK ){ 
  1.1453 +      if( rc==SQLITE_DONE ){
  1.1454 +        rc = SQLITE_OK;
  1.1455 +      }
  1.1456 +      goto end_playback;
  1.1457 +    }
  1.1458 +
  1.1459 +    /* If nRec is 0xffffffff, then this journal was created by a process
  1.1460 +    ** working in no-sync mode. This means that the rest of the journal
  1.1461 +    ** file consists of pages, there are no more journal headers. Compute
  1.1462 +    ** the value of nRec based on this assumption.
  1.1463 +    */
  1.1464 +    if( nRec==0xffffffff ){
  1.1465 +      assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) );
  1.1466 +      nRec = (szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager);
  1.1467 +    }
  1.1468 +
  1.1469 +    /* If nRec is 0 and this rollback is of a transaction created by this
  1.1470 +    ** process and if this is the final header in the journal, then it means
  1.1471 +    ** that this part of the journal was being filled but has not yet been
  1.1472 +    ** synced to disk.  Compute the number of pages based on the remaining
  1.1473 +    ** size of the file.
  1.1474 +    **
  1.1475 +    ** The third term of the test was added to fix ticket #2565.
  1.1476 +    */
  1.1477 +    if( nRec==0 && !isHot &&
  1.1478 +        pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){
  1.1479 +      nRec = (szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager);
  1.1480 +    }
  1.1481 +
  1.1482 +    /* If this is the first header read from the journal, truncate the
  1.1483 +    ** database file back to its original size.
  1.1484 +    */
  1.1485 +    if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){
  1.1486 +      rc = pager_truncate(pPager, mxPg);
  1.1487 +      if( rc!=SQLITE_OK ){
  1.1488 +        goto end_playback;
  1.1489 +      }
  1.1490 +    }
  1.1491 +
  1.1492 +    /* Copy original pages out of the journal and back into the database file.
  1.1493 +    */
  1.1494 +    for(u=0; u<nRec; u++){
  1.1495 +      rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1);
  1.1496 +      if( rc!=SQLITE_OK ){
  1.1497 +        if( rc==SQLITE_DONE ){
  1.1498 +          rc = SQLITE_OK;
  1.1499 +          pPager->journalOff = szJ;
  1.1500 +          break;
  1.1501 +        }else{
  1.1502 +          /* If we are unable to rollback, then the database is probably
  1.1503 +          ** going to end up being corrupt.  It is corrupt to us, anyhow.
  1.1504 +          ** Perhaps the next process to come along can fix it....
  1.1505 +          */
  1.1506 +          rc = SQLITE_CORRUPT_BKPT;
  1.1507 +          goto end_playback;
  1.1508 +        }
  1.1509 +      }
  1.1510 +    }
  1.1511 +  }
  1.1512 +  /*NOTREACHED*/
  1.1513 +  assert( 0 );
  1.1514 +
  1.1515 +end_playback:
  1.1516 +  if( rc==SQLITE_OK ){
  1.1517 +    zMaster = pPager->pTmpSpace;
  1.1518 +    rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
  1.1519 +  }
  1.1520 +  if( rc==SQLITE_OK ){
  1.1521 +    rc = pager_end_transaction(pPager, zMaster[0]!='\0');
  1.1522 +  }
  1.1523 +  if( rc==SQLITE_OK && zMaster[0] ){
  1.1524 +    /* If there was a master journal and this routine will return success,
  1.1525 +    ** see if it is possible to delete the master journal.
  1.1526 +    */
  1.1527 +    rc = pager_delmaster(pPager, zMaster);
  1.1528 +  }
  1.1529 +
  1.1530 +  /* The Pager.sectorSize variable may have been updated while rolling
  1.1531 +  ** back a journal created by a process with a different sector size
  1.1532 +  ** value. Reset it to the correct value for this process.
  1.1533 +  */
  1.1534 +  setSectorSize(pPager);
  1.1535 +  return rc;
  1.1536 +}
  1.1537 +
  1.1538 +/*
  1.1539 +** Playback the statement journal.
  1.1540 +**
  1.1541 +** This is similar to playing back the transaction journal but with
  1.1542 +** a few extra twists.
  1.1543 +**
  1.1544 +**    (1)  The number of pages in the database file at the start of
  1.1545 +**         the statement is stored in pPager->stmtSize, not in the
  1.1546 +**         journal file itself.
  1.1547 +**
  1.1548 +**    (2)  In addition to playing back the statement journal, also
  1.1549 +**         playback all pages of the transaction journal beginning
  1.1550 +**         at offset pPager->stmtJSize.
  1.1551 +*/
  1.1552 +static int pager_stmt_playback(Pager *pPager){
  1.1553 +  i64 szJ;                 /* Size of the full journal */
  1.1554 +  i64 hdrOff;
  1.1555 +  int nRec;                /* Number of Records */
  1.1556 +  int i;                   /* Loop counter */
  1.1557 +  int rc;
  1.1558 +
  1.1559 +  szJ = pPager->journalOff;
  1.1560 +
  1.1561 +  /* Set hdrOff to be the offset just after the end of the last journal
  1.1562 +  ** page written before the first journal-header for this statement
  1.1563 +  ** transaction was written, or the end of the file if no journal
  1.1564 +  ** header was written.
  1.1565 +  */
  1.1566 +  hdrOff = pPager->stmtHdrOff;
  1.1567 +  assert( pPager->fullSync || !hdrOff );
  1.1568 +  if( !hdrOff ){
  1.1569 +    hdrOff = szJ;
  1.1570 +  }
  1.1571 +  
  1.1572 +  /* Truncate the database back to its original size.
  1.1573 +  */
  1.1574 +  rc = pager_truncate(pPager, pPager->stmtSize);
  1.1575 +  assert( pPager->state>=PAGER_SHARED );
  1.1576 +
  1.1577 +  /* Figure out how many records are in the statement journal.
  1.1578 +  */
  1.1579 +  assert( pPager->stmtInUse && pPager->journalOpen );
  1.1580 +  nRec = pPager->stmtNRec;
  1.1581 +  
  1.1582 +  /* Copy original pages out of the statement journal and back into the
  1.1583 +  ** database file.  Note that the statement journal omits checksums from
  1.1584 +  ** each record since power-failure recovery is not important to statement
  1.1585 +  ** journals.
  1.1586 +  */
  1.1587 +  for(i=0; i<nRec; i++){
  1.1588 +    i64 offset = i*(4+pPager->pageSize);
  1.1589 +    rc = pager_playback_one_page(pPager, pPager->stfd, offset, 0);
  1.1590 +    assert( rc!=SQLITE_DONE );
  1.1591 +    if( rc!=SQLITE_OK ) goto end_stmt_playback;
  1.1592 +  }
  1.1593 +
  1.1594 +  /* Now roll some pages back from the transaction journal. Pager.stmtJSize
  1.1595 +  ** was the size of the journal file when this statement was started, so
  1.1596 +  ** everything after that needs to be rolled back, either into the
  1.1597 +  ** database, the memory cache, or both.
  1.1598 +  **
  1.1599 +  ** If it is not zero, then Pager.stmtHdrOff is the offset to the start
  1.1600 +  ** of the first journal header written during this statement transaction.
  1.1601 +  */
  1.1602 +  pPager->journalOff = pPager->stmtJSize;
  1.1603 +  pPager->cksumInit = pPager->stmtCksum;
  1.1604 +  while( pPager->journalOff < hdrOff ){
  1.1605 +    rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1);
  1.1606 +    assert( rc!=SQLITE_DONE );
  1.1607 +    if( rc!=SQLITE_OK ) goto end_stmt_playback;
  1.1608 +  }
  1.1609 +
  1.1610 +  while( pPager->journalOff < szJ ){
  1.1611 +    u32 nJRec;         /* Number of Journal Records */
  1.1612 +    u32 dummy;
  1.1613 +    rc = readJournalHdr(pPager, szJ, &nJRec, &dummy);
  1.1614 +    if( rc!=SQLITE_OK ){
  1.1615 +      assert( rc!=SQLITE_DONE );
  1.1616 +      goto end_stmt_playback;
  1.1617 +    }
  1.1618 +    if( nJRec==0 ){
  1.1619 +      nJRec = (szJ - pPager->journalOff) / (pPager->pageSize+8);
  1.1620 +    }
  1.1621 +    for(i=nJRec-1; i>=0 && pPager->journalOff < szJ; i--){
  1.1622 +      rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1);
  1.1623 +      assert( rc!=SQLITE_DONE );
  1.1624 +      if( rc!=SQLITE_OK ) goto end_stmt_playback;
  1.1625 +    }
  1.1626 +  }
  1.1627 +
  1.1628 +  pPager->journalOff = szJ;
  1.1629 +  
  1.1630 +end_stmt_playback:
  1.1631 +  if( rc==SQLITE_OK) {
  1.1632 +    pPager->journalOff = szJ;
  1.1633 +    /* pager_reload_cache(pPager); */
  1.1634 +  }
  1.1635 +  return rc;
  1.1636 +}
  1.1637 +
  1.1638 +/*
  1.1639 +** Change the maximum number of in-memory pages that are allowed.
  1.1640 +*/
  1.1641 +void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){
  1.1642 +  sqlite3PcacheSetCachesize(pPager->pPCache, mxPage);
  1.1643 +}
  1.1644 +
  1.1645 +/*
  1.1646 +** Adjust the robustness of the database to damage due to OS crashes
  1.1647 +** or power failures by changing the number of syncs()s when writing
  1.1648 +** the rollback journal.  There are three levels:
  1.1649 +**
  1.1650 +**    OFF       sqlite3OsSync() is never called.  This is the default
  1.1651 +**              for temporary and transient files.
  1.1652 +**
  1.1653 +**    NORMAL    The journal is synced once before writes begin on the
  1.1654 +**              database.  This is normally adequate protection, but
  1.1655 +**              it is theoretically possible, though very unlikely,
  1.1656 +**              that an inopertune power failure could leave the journal
  1.1657 +**              in a state which would cause damage to the database
  1.1658 +**              when it is rolled back.
  1.1659 +**
  1.1660 +**    FULL      The journal is synced twice before writes begin on the
  1.1661 +**              database (with some additional information - the nRec field
  1.1662 +**              of the journal header - being written in between the two
  1.1663 +**              syncs).  If we assume that writing a
  1.1664 +**              single disk sector is atomic, then this mode provides
  1.1665 +**              assurance that the journal will not be corrupted to the
  1.1666 +**              point of causing damage to the database during rollback.
  1.1667 +**
  1.1668 +** Numeric values associated with these states are OFF==1, NORMAL=2,
  1.1669 +** and FULL=3.
  1.1670 +*/
  1.1671 +#ifndef SQLITE_OMIT_PAGER_PRAGMAS
  1.1672 +void sqlite3PagerSetSafetyLevel(Pager *pPager, int level, int bFullFsync){
  1.1673 +  pPager->noSync =  level==1 || pPager->tempFile || MEMDB;
  1.1674 +  pPager->fullSync = level==3 && !pPager->tempFile;
  1.1675 +  pPager->sync_flags = (bFullFsync?SQLITE_SYNC_FULL:SQLITE_SYNC_NORMAL);
  1.1676 +  if( pPager->noSync ) pPager->needSync = 0;
  1.1677 +}
  1.1678 +#endif
  1.1679 +
  1.1680 +/*
  1.1681 +** The following global variable is incremented whenever the library
  1.1682 +** attempts to open a temporary file.  This information is used for
  1.1683 +** testing and analysis only.  
  1.1684 +*/
  1.1685 +#ifdef SQLITE_TEST
  1.1686 +int sqlite3_opentemp_count = 0;
  1.1687 +#endif
  1.1688 +
  1.1689 +/*
  1.1690 +** Open a temporary file. 
  1.1691 +**
  1.1692 +** Write the file descriptor into *fd.  Return SQLITE_OK on success or some
  1.1693 +** other error code if we fail. The OS will automatically delete the temporary
  1.1694 +** file when it is closed.
  1.1695 +*/
  1.1696 +static int sqlite3PagerOpentemp(
  1.1697 +  Pager *pPager,        /* The pager object */
  1.1698 +  sqlite3_file *pFile,  /* Write the file descriptor here */
  1.1699 +  int vfsFlags          /* Flags passed through to the VFS */
  1.1700 +){
  1.1701 +  int rc;
  1.1702 +
  1.1703 +#ifdef SQLITE_TEST
  1.1704 +  sqlite3_opentemp_count++;  /* Used for testing and analysis only */
  1.1705 +#endif
  1.1706 +
  1.1707 +  vfsFlags |=  SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE |
  1.1708 +            SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE;
  1.1709 +  rc = sqlite3OsOpen(pPager->pVfs, 0, pFile, vfsFlags, 0);
  1.1710 +  assert( rc!=SQLITE_OK || pFile->pMethods );
  1.1711 +  return rc;
  1.1712 +}
  1.1713 +
  1.1714 +static int pagerStress(void *,PgHdr *);
  1.1715 +
  1.1716 +/*
  1.1717 +** Create a new page cache and put a pointer to the page cache in *ppPager.
  1.1718 +** The file to be cached need not exist.  The file is not locked until
  1.1719 +** the first call to sqlite3PagerGet() and is only held open until the
  1.1720 +** last page is released using sqlite3PagerUnref().
  1.1721 +**
  1.1722 +** If zFilename is NULL then a randomly-named temporary file is created
  1.1723 +** and used as the file to be cached.  The file will be deleted
  1.1724 +** automatically when it is closed.
  1.1725 +**
  1.1726 +** If zFilename is ":memory:" then all information is held in cache.
  1.1727 +** It is never written to disk.  This can be used to implement an
  1.1728 +** in-memory database.
  1.1729 +*/
  1.1730 +int sqlite3PagerOpen(
  1.1731 +  sqlite3_vfs *pVfs,       /* The virtual file system to use */
  1.1732 +  Pager **ppPager,         /* Return the Pager structure here */
  1.1733 +  const char *zFilename,   /* Name of the database file to open */
  1.1734 +  int nExtra,              /* Extra bytes append to each in-memory page */
  1.1735 +  int flags,               /* flags controlling this file */
  1.1736 +  int vfsFlags             /* flags passed through to sqlite3_vfs.xOpen() */
  1.1737 +){
  1.1738 +  u8 *pPtr;
  1.1739 +  Pager *pPager = 0;
  1.1740 +  int rc = SQLITE_OK;
  1.1741 +  int i;
  1.1742 +  int tempFile = 0;
  1.1743 +  int memDb = 0;
  1.1744 +  int readOnly = 0;
  1.1745 +  int useJournal = (flags & PAGER_OMIT_JOURNAL)==0;
  1.1746 +  int noReadlock = (flags & PAGER_NO_READLOCK)!=0;
  1.1747 +  int journalFileSize = sqlite3JournalSize(pVfs);
  1.1748 +  int pcacheSize = sqlite3PcacheSize();
  1.1749 +  int szPageDflt = SQLITE_DEFAULT_PAGE_SIZE;
  1.1750 +  char *zPathname = 0;
  1.1751 +  int nPathname = 0;
  1.1752 +
  1.1753 +  /* The default return is a NULL pointer */
  1.1754 +  *ppPager = 0;
  1.1755 +
  1.1756 +  /* Compute and store the full pathname in an allocated buffer pointed
  1.1757 +  ** to by zPathname, length nPathname. Or, if this is a temporary file,
  1.1758 +  ** leave both nPathname and zPathname set to 0.
  1.1759 +  */
  1.1760 +  if( zFilename && zFilename[0] ){
  1.1761 +    nPathname = pVfs->mxPathname+1;
  1.1762 +    zPathname = sqlite3Malloc(nPathname*2);
  1.1763 +    if( zPathname==0 ){
  1.1764 +      return SQLITE_NOMEM;
  1.1765 +    }
  1.1766 +#ifndef SQLITE_OMIT_MEMORYDB
  1.1767 +    if( strcmp(zFilename,":memory:")==0 ){
  1.1768 +      memDb = 1;
  1.1769 +      zPathname[0] = 0;
  1.1770 +      useJournal = 0;
  1.1771 +    }else
  1.1772 +#endif
  1.1773 +    {
  1.1774 +      rc = sqlite3OsFullPathname(pVfs, zFilename, nPathname, zPathname);
  1.1775 +    }
  1.1776 +    if( rc!=SQLITE_OK ){
  1.1777 +      sqlite3_free(zPathname);
  1.1778 +      return rc;
  1.1779 +    }
  1.1780 +    nPathname = strlen(zPathname);
  1.1781 +  }
  1.1782 +
  1.1783 +  /* Allocate memory for the pager structure */
  1.1784 +  pPager = sqlite3MallocZero(
  1.1785 +    sizeof(*pPager) +           /* Pager structure */
  1.1786 +    pcacheSize      +           /* PCache object */
  1.1787 +    journalFileSize +           /* The journal file structure */ 
  1.1788 +    pVfs->szOsFile * 3 +        /* The main db and two journal files */ 
  1.1789 +    3*nPathname + 40            /* zFilename, zDirectory, zJournal */
  1.1790 +  );
  1.1791 +  if( !pPager ){
  1.1792 +    sqlite3_free(zPathname);
  1.1793 +    return SQLITE_NOMEM;
  1.1794 +  }
  1.1795 +  pPager->pPCache = (PCache *)&pPager[1];
  1.1796 +  pPtr = ((u8 *)&pPager[1]) + pcacheSize;
  1.1797 +  pPager->vfsFlags = vfsFlags;
  1.1798 +  pPager->fd = (sqlite3_file*)&pPtr[pVfs->szOsFile*0];
  1.1799 +  pPager->stfd = (sqlite3_file*)&pPtr[pVfs->szOsFile*1];
  1.1800 +  pPager->jfd = (sqlite3_file*)&pPtr[pVfs->szOsFile*2];
  1.1801 +  pPager->zFilename = (char*)&pPtr[pVfs->szOsFile*2+journalFileSize];
  1.1802 +  pPager->zDirectory = &pPager->zFilename[nPathname+1];
  1.1803 +  pPager->zJournal = &pPager->zDirectory[nPathname+1];
  1.1804 +  pPager->pVfs = pVfs;
  1.1805 +  if( zPathname ){
  1.1806 +    memcpy(pPager->zFilename, zPathname, nPathname+1);
  1.1807 +    sqlite3_free(zPathname);
  1.1808 +  }
  1.1809 +
  1.1810 +  /* Open the pager file.
  1.1811 +  */
  1.1812 +  if( zFilename && zFilename[0] && !memDb ){
  1.1813 +    if( nPathname>(pVfs->mxPathname - sizeof("-journal")) ){
  1.1814 +      rc = SQLITE_CANTOPEN;
  1.1815 +    }else{
  1.1816 +      int fout = 0;
  1.1817 +      rc = sqlite3OsOpen(pVfs, pPager->zFilename, pPager->fd,
  1.1818 +                         pPager->vfsFlags, &fout);
  1.1819 +      readOnly = (fout&SQLITE_OPEN_READONLY);
  1.1820 +
  1.1821 +      /* If the file was successfully opened for read/write access,
  1.1822 +      ** choose a default page size in case we have to create the
  1.1823 +      ** database file. The default page size is the maximum of:
  1.1824 +      **
  1.1825 +      **    + SQLITE_DEFAULT_PAGE_SIZE,
  1.1826 +      **    + The value returned by sqlite3OsSectorSize()
  1.1827 +      **    + The largest page size that can be written atomically.
  1.1828 +      */
  1.1829 +      if( rc==SQLITE_OK && !readOnly ){
  1.1830 +        int iSectorSize = sqlite3OsSectorSize(pPager->fd);
  1.1831 +        if( szPageDflt<iSectorSize ){
  1.1832 +          szPageDflt = iSectorSize;
  1.1833 +        }
  1.1834 +#ifdef SQLITE_ENABLE_ATOMIC_WRITE
  1.1835 +        {
  1.1836 +          int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
  1.1837 +          int ii;
  1.1838 +          assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
  1.1839 +          assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
  1.1840 +          assert(SQLITE_MAX_DEFAULT_PAGE_SIZE<=65536);
  1.1841 +          for(ii=szPageDflt; ii<=SQLITE_MAX_DEFAULT_PAGE_SIZE; ii=ii*2){
  1.1842 +            if( iDc&(SQLITE_IOCAP_ATOMIC|(ii>>8)) ) szPageDflt = ii;
  1.1843 +          }
  1.1844 +        }
  1.1845 +#endif
  1.1846 +        if( szPageDflt>SQLITE_MAX_DEFAULT_PAGE_SIZE ){
  1.1847 +          szPageDflt = SQLITE_MAX_DEFAULT_PAGE_SIZE;
  1.1848 +        }
  1.1849 +      }
  1.1850 +    }
  1.1851 +  }else if( !memDb ){
  1.1852 +    /* If a temporary file is requested, it is not opened immediately.
  1.1853 +    ** In this case we accept the default page size and delay actually
  1.1854 +    ** opening the file until the first call to OsWrite().
  1.1855 +    */ 
  1.1856 +    tempFile = 1;
  1.1857 +    pPager->state = PAGER_EXCLUSIVE;
  1.1858 +  }
  1.1859 +
  1.1860 +  if( pPager && rc==SQLITE_OK ){
  1.1861 +    pPager->pTmpSpace = sqlite3PageMalloc(szPageDflt);
  1.1862 +  }
  1.1863 +
  1.1864 +  /* If an error occured in either of the blocks above.
  1.1865 +  ** Free the Pager structure and close the file.
  1.1866 +  ** Since the pager is not allocated there is no need to set 
  1.1867 +  ** any Pager.errMask variables.
  1.1868 +  */
  1.1869 +  if( !pPager || !pPager->pTmpSpace ){
  1.1870 +    sqlite3OsClose(pPager->fd);
  1.1871 +    sqlite3_free(pPager);
  1.1872 +    return ((rc==SQLITE_OK)?SQLITE_NOMEM:rc);
  1.1873 +  }
  1.1874 +  nExtra = FORCE_ALIGNMENT(nExtra);
  1.1875 +  sqlite3PcacheOpen(szPageDflt, nExtra, !memDb,
  1.1876 +                    !memDb?pagerStress:0, (void *)pPager, pPager->pPCache);
  1.1877 +
  1.1878 +  PAGERTRACE3("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename);
  1.1879 +  IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename))
  1.1880 +
  1.1881 +  /* Fill in Pager.zDirectory[] */
  1.1882 +  memcpy(pPager->zDirectory, pPager->zFilename, nPathname+1);
  1.1883 +  for(i=strlen(pPager->zDirectory); i>0 && pPager->zDirectory[i-1]!='/'; i--){}
  1.1884 +  if( i>0 ) pPager->zDirectory[i-1] = 0;
  1.1885 +
  1.1886 +  /* Fill in Pager.zJournal[] */
  1.1887 +  if( zPathname ){
  1.1888 +    memcpy(pPager->zJournal, pPager->zFilename, nPathname);
  1.1889 +    memcpy(&pPager->zJournal[nPathname], "-journal", 9);
  1.1890 +  }else{
  1.1891 +    pPager->zJournal = 0;
  1.1892 +  }
  1.1893 +
  1.1894 +  /* pPager->journalOpen = 0; */
  1.1895 +  pPager->useJournal = useJournal;
  1.1896 +  pPager->noReadlock = noReadlock && readOnly;
  1.1897 +  /* pPager->stmtOpen = 0; */
  1.1898 +  /* pPager->stmtInUse = 0; */
  1.1899 +  /* pPager->nRef = 0; */
  1.1900 +  pPager->dbSize = memDb-1;
  1.1901 +  pPager->pageSize = szPageDflt;
  1.1902 +  /* pPager->stmtSize = 0; */
  1.1903 +  /* pPager->stmtJSize = 0; */
  1.1904 +  /* pPager->nPage = 0; */
  1.1905 +  pPager->mxPage = 100;
  1.1906 +  pPager->mxPgno = SQLITE_MAX_PAGE_COUNT;
  1.1907 +  /* pPager->state = PAGER_UNLOCK; */
  1.1908 +  assert( pPager->state == (tempFile ? PAGER_EXCLUSIVE : PAGER_UNLOCK) );
  1.1909 +  /* pPager->errMask = 0; */
  1.1910 +  pPager->tempFile = tempFile;
  1.1911 +  assert( tempFile==PAGER_LOCKINGMODE_NORMAL 
  1.1912 +          || tempFile==PAGER_LOCKINGMODE_EXCLUSIVE );
  1.1913 +  assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 );
  1.1914 +  pPager->exclusiveMode = tempFile; 
  1.1915 +  pPager->memDb = memDb;
  1.1916 +  pPager->readOnly = readOnly;
  1.1917 +  /* pPager->needSync = 0; */
  1.1918 +  pPager->noSync = pPager->tempFile || !useJournal;
  1.1919 +  pPager->fullSync = (pPager->noSync?0:1);
  1.1920 +  pPager->sync_flags = SQLITE_SYNC_NORMAL;
  1.1921 +  /* pPager->pFirst = 0; */
  1.1922 +  /* pPager->pFirstSynced = 0; */
  1.1923 +  /* pPager->pLast = 0; */
  1.1924 +  pPager->nExtra = nExtra;
  1.1925 +  pPager->journalSizeLimit = SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT;
  1.1926 +  assert(pPager->fd->pMethods||memDb||tempFile);
  1.1927 +  if( !memDb ){
  1.1928 +    setSectorSize(pPager);
  1.1929 +  }
  1.1930 +  /* pPager->pBusyHandler = 0; */
  1.1931 +  /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */
  1.1932 +  *ppPager = pPager;
  1.1933 +  return SQLITE_OK;
  1.1934 +}
  1.1935 +
  1.1936 +/*
  1.1937 +** Set the busy handler function.
  1.1938 +*/
  1.1939 +void sqlite3PagerSetBusyhandler(Pager *pPager, BusyHandler *pBusyHandler){
  1.1940 +  pPager->pBusyHandler = pBusyHandler;
  1.1941 +}
  1.1942 +
  1.1943 +/*
  1.1944 +** Set the reinitializer for this pager.  If not NULL, the reinitializer
  1.1945 +** is called when the content of a page in cache is restored to its original
  1.1946 +** value as a result of a rollback.  The callback gives higher-level code
  1.1947 +** an opportunity to restore the EXTRA section to agree with the restored
  1.1948 +** page data.
  1.1949 +*/
  1.1950 +void sqlite3PagerSetReiniter(Pager *pPager, void (*xReinit)(DbPage*)){
  1.1951 +  pPager->xReiniter = xReinit;
  1.1952 +}
  1.1953 +
  1.1954 +/*
  1.1955 +** Set the page size to *pPageSize. If the suggest new page size is
  1.1956 +** inappropriate, then an alternative page size is set to that
  1.1957 +** value before returning.
  1.1958 +*/
  1.1959 +int sqlite3PagerSetPagesize(Pager *pPager, u16 *pPageSize){
  1.1960 +  int rc = pPager->errCode;
  1.1961 +  if( rc==SQLITE_OK ){
  1.1962 +    u16 pageSize = *pPageSize;
  1.1963 +    assert( pageSize==0 || (pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE) );
  1.1964 +    if( pageSize && pageSize!=pPager->pageSize 
  1.1965 +     && (pPager->memDb==0 || pPager->dbSize==0)
  1.1966 +     && sqlite3PcacheRefCount(pPager->pPCache)==0 
  1.1967 +    ){
  1.1968 +      char *pNew = (char *)sqlite3PageMalloc(pageSize);
  1.1969 +      if( !pNew ){
  1.1970 +        rc = SQLITE_NOMEM;
  1.1971 +      }else{
  1.1972 +        pager_reset(pPager);
  1.1973 +        pPager->pageSize = pageSize;
  1.1974 +        if( !pPager->memDb ) setSectorSize(pPager);
  1.1975 +        sqlite3PageFree(pPager->pTmpSpace);
  1.1976 +        pPager->pTmpSpace = pNew;
  1.1977 +        sqlite3PcacheSetPageSize(pPager->pPCache, pageSize);
  1.1978 +      }
  1.1979 +    }
  1.1980 +    *pPageSize = pPager->pageSize;
  1.1981 +  }
  1.1982 +  return rc;
  1.1983 +}
  1.1984 +
  1.1985 +/*
  1.1986 +** Return a pointer to the "temporary page" buffer held internally
  1.1987 +** by the pager.  This is a buffer that is big enough to hold the
  1.1988 +** entire content of a database page.  This buffer is used internally
  1.1989 +** during rollback and will be overwritten whenever a rollback
  1.1990 +** occurs.  But other modules are free to use it too, as long as
  1.1991 +** no rollbacks are happening.
  1.1992 +*/
  1.1993 +void *sqlite3PagerTempSpace(Pager *pPager){
  1.1994 +  return pPager->pTmpSpace;
  1.1995 +}
  1.1996 +
  1.1997 +/*
  1.1998 +** Attempt to set the maximum database page count if mxPage is positive. 
  1.1999 +** Make no changes if mxPage is zero or negative.  And never reduce the
  1.2000 +** maximum page count below the current size of the database.
  1.2001 +**
  1.2002 +** Regardless of mxPage, return the current maximum page count.
  1.2003 +*/
  1.2004 +int sqlite3PagerMaxPageCount(Pager *pPager, int mxPage){
  1.2005 +  if( mxPage>0 ){
  1.2006 +    pPager->mxPgno = mxPage;
  1.2007 +  }
  1.2008 +  sqlite3PagerPagecount(pPager, 0);
  1.2009 +  return pPager->mxPgno;
  1.2010 +}
  1.2011 +
  1.2012 +/*
  1.2013 +** The following set of routines are used to disable the simulated
  1.2014 +** I/O error mechanism.  These routines are used to avoid simulated
  1.2015 +** errors in places where we do not care about errors.
  1.2016 +**
  1.2017 +** Unless -DSQLITE_TEST=1 is used, these routines are all no-ops
  1.2018 +** and generate no code.
  1.2019 +*/
  1.2020 +#ifdef SQLITE_TEST
  1.2021 +extern int sqlite3_io_error_pending;
  1.2022 +extern int sqlite3_io_error_hit;
  1.2023 +static int saved_cnt;
  1.2024 +void disable_simulated_io_errors(void){
  1.2025 +  saved_cnt = sqlite3_io_error_pending;
  1.2026 +  sqlite3_io_error_pending = -1;
  1.2027 +}
  1.2028 +void enable_simulated_io_errors(void){
  1.2029 +  sqlite3_io_error_pending = saved_cnt;
  1.2030 +}
  1.2031 +#else
  1.2032 +# define disable_simulated_io_errors()
  1.2033 +# define enable_simulated_io_errors()
  1.2034 +#endif
  1.2035 +
  1.2036 +/*
  1.2037 +** Read the first N bytes from the beginning of the file into memory
  1.2038 +** that pDest points to. 
  1.2039 +**
  1.2040 +** No error checking is done. The rational for this is that this function 
  1.2041 +** may be called even if the file does not exist or contain a header. In 
  1.2042 +** these cases sqlite3OsRead() will return an error, to which the correct 
  1.2043 +** response is to zero the memory at pDest and continue.  A real IO error 
  1.2044 +** will presumably recur and be picked up later (Todo: Think about this).
  1.2045 +*/
  1.2046 +int sqlite3PagerReadFileheader(Pager *pPager, int N, unsigned char *pDest){
  1.2047 +  int rc = SQLITE_OK;
  1.2048 +  memset(pDest, 0, N);
  1.2049 +  assert(MEMDB||pPager->fd->pMethods||pPager->tempFile);
  1.2050 +  if( pPager->fd->pMethods ){
  1.2051 +    IOTRACE(("DBHDR %p 0 %d\n", pPager, N))
  1.2052 +    rc = sqlite3OsRead(pPager->fd, pDest, N, 0);
  1.2053 +    if( rc==SQLITE_IOERR_SHORT_READ ){
  1.2054 +      rc = SQLITE_OK;
  1.2055 +    }
  1.2056 +  }
  1.2057 +  return rc;
  1.2058 +}
  1.2059 +
  1.2060 +/*
  1.2061 +** Return the total number of pages in the disk file associated with
  1.2062 +** pPager. 
  1.2063 +**
  1.2064 +** If the PENDING_BYTE lies on the page directly after the end of the
  1.2065 +** file, then consider this page part of the file too. For example, if
  1.2066 +** PENDING_BYTE is byte 4096 (the first byte of page 5) and the size of the
  1.2067 +** file is 4096 bytes, 5 is returned instead of 4.
  1.2068 +*/
  1.2069 +int sqlite3PagerPagecount(Pager *pPager, int *pnPage){
  1.2070 +  i64 n = 0;
  1.2071 +  int rc;
  1.2072 +  assert( pPager!=0 );
  1.2073 +  if( pPager->errCode ){
  1.2074 +    rc = pPager->errCode;
  1.2075 +    return rc;
  1.2076 +  }
  1.2077 +  if( pPager->dbSize>=0 ){
  1.2078 +    n = pPager->dbSize;
  1.2079 +  } else {
  1.2080 +    assert(pPager->fd->pMethods||pPager->tempFile);
  1.2081 +    if( (pPager->fd->pMethods)
  1.2082 +     && (rc = sqlite3OsFileSize(pPager->fd, &n))!=SQLITE_OK ){
  1.2083 +      pager_error(pPager, rc);
  1.2084 +      return rc;
  1.2085 +    }
  1.2086 +    if( n>0 && n<pPager->pageSize ){
  1.2087 +      n = 1;
  1.2088 +    }else{
  1.2089 +      n /= pPager->pageSize;
  1.2090 +    }
  1.2091 +    if( pPager->state!=PAGER_UNLOCK ){
  1.2092 +      pPager->dbSize = n;
  1.2093 +    }
  1.2094 +  }
  1.2095 +  if( n==(PENDING_BYTE/pPager->pageSize) ){
  1.2096 +    n++;
  1.2097 +  }
  1.2098 +  if( n>pPager->mxPgno ){
  1.2099 +    pPager->mxPgno = n;
  1.2100 +  }
  1.2101 +  if( pnPage ){
  1.2102 +    *pnPage = n;
  1.2103 +  }
  1.2104 +  return SQLITE_OK;
  1.2105 +}
  1.2106 +
  1.2107 +/*
  1.2108 +** Forward declaration
  1.2109 +*/
  1.2110 +static int syncJournal(Pager*);
  1.2111 +
  1.2112 +/*
  1.2113 +** This routine is used to truncate the cache when a database
  1.2114 +** is truncated.  Drop from the cache all pages whose pgno is
  1.2115 +** larger than pPager->dbSize and is unreferenced.
  1.2116 +**
  1.2117 +** Referenced pages larger than pPager->dbSize are zeroed.
  1.2118 +**
  1.2119 +** Actually, at the point this routine is called, it would be
  1.2120 +** an error to have a referenced page.  But rather than delete
  1.2121 +** that page and guarantee a subsequent segfault, it seems better
  1.2122 +** to zero it and hope that we error out sanely.
  1.2123 +*/
  1.2124 +static void pager_truncate_cache(Pager *pPager){
  1.2125 +  sqlite3PcacheTruncate(pPager->pPCache, pPager->dbSize);
  1.2126 +}
  1.2127 +
  1.2128 +/*
  1.2129 +** Try to obtain a lock on a file.  Invoke the busy callback if the lock
  1.2130 +** is currently not available.  Repeat until the busy callback returns
  1.2131 +** false or until the lock succeeds.
  1.2132 +**
  1.2133 +** Return SQLITE_OK on success and an error code if we cannot obtain
  1.2134 +** the lock.
  1.2135 +*/
  1.2136 +static int pager_wait_on_lock(Pager *pPager, int locktype){
  1.2137 +  int rc;
  1.2138 +
  1.2139 +  /* The OS lock values must be the same as the Pager lock values */
  1.2140 +  assert( PAGER_SHARED==SHARED_LOCK );
  1.2141 +  assert( PAGER_RESERVED==RESERVED_LOCK );
  1.2142 +  assert( PAGER_EXCLUSIVE==EXCLUSIVE_LOCK );
  1.2143 +
  1.2144 +  /* If the file is currently unlocked then the size must be unknown */
  1.2145 +  assert( pPager->state>=PAGER_SHARED || pPager->dbSize<0 || MEMDB );
  1.2146 +
  1.2147 +  if( pPager->state>=locktype ){
  1.2148 +    rc = SQLITE_OK;
  1.2149 +  }else{
  1.2150 +    if( pPager->pBusyHandler ) pPager->pBusyHandler->nBusy = 0;
  1.2151 +    do {
  1.2152 +      rc = sqlite3OsLock(pPager->fd, locktype);
  1.2153 +    }while( rc==SQLITE_BUSY && sqlite3InvokeBusyHandler(pPager->pBusyHandler) );
  1.2154 +    if( rc==SQLITE_OK ){
  1.2155 +      pPager->state = locktype;
  1.2156 +      IOTRACE(("LOCK %p %d\n", pPager, locktype))
  1.2157 +    }
  1.2158 +  }
  1.2159 +  return rc;
  1.2160 +}
  1.2161 +
  1.2162 +/*
  1.2163 +** Truncate the file to the number of pages specified.
  1.2164 +*/
  1.2165 +int sqlite3PagerTruncate(Pager *pPager, Pgno nPage){
  1.2166 +  int rc = SQLITE_OK;
  1.2167 +  assert( pPager->state>=PAGER_SHARED || MEMDB );
  1.2168 +
  1.2169 +
  1.2170 +  sqlite3PagerPagecount(pPager, 0);
  1.2171 +  if( pPager->errCode ){
  1.2172 +    rc = pPager->errCode;
  1.2173 +  }else if( nPage<(unsigned)pPager->dbSize ){
  1.2174 +    if( MEMDB ){
  1.2175 +      pPager->dbSize = nPage;
  1.2176 +      pager_truncate_cache(pPager);
  1.2177 +    }else{
  1.2178 +      rc = syncJournal(pPager);
  1.2179 +      if( rc==SQLITE_OK ){
  1.2180 +        /* Get an exclusive lock on the database before truncating. */
  1.2181 +        rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
  1.2182 +      }
  1.2183 +      if( rc==SQLITE_OK ){
  1.2184 +        rc = pager_truncate(pPager, nPage);
  1.2185 +      }
  1.2186 +    }
  1.2187 +  }
  1.2188 +
  1.2189 +  return rc;
  1.2190 +}
  1.2191 +
  1.2192 +/*
  1.2193 +** Shutdown the page cache.  Free all memory and close all files.
  1.2194 +**
  1.2195 +** If a transaction was in progress when this routine is called, that
  1.2196 +** transaction is rolled back.  All outstanding pages are invalidated
  1.2197 +** and their memory is freed.  Any attempt to use a page associated
  1.2198 +** with this page cache after this function returns will likely
  1.2199 +** result in a coredump.
  1.2200 +**
  1.2201 +** This function always succeeds. If a transaction is active an attempt
  1.2202 +** is made to roll it back. If an error occurs during the rollback 
  1.2203 +** a hot journal may be left in the filesystem but no error is returned
  1.2204 +** to the caller.
  1.2205 +*/
  1.2206 +int sqlite3PagerClose(Pager *pPager){
  1.2207 +
  1.2208 +  disable_simulated_io_errors();
  1.2209 +  sqlite3BeginBenignMalloc();
  1.2210 +  pPager->errCode = 0;
  1.2211 +  pPager->exclusiveMode = 0;
  1.2212 +  pager_reset(pPager);
  1.2213 +  pagerUnlockAndRollback(pPager);
  1.2214 +  enable_simulated_io_errors();
  1.2215 +  sqlite3EndBenignMalloc();
  1.2216 +  PAGERTRACE2("CLOSE %d\n", PAGERID(pPager));
  1.2217 +  IOTRACE(("CLOSE %p\n", pPager))
  1.2218 +  if( pPager->journalOpen ){
  1.2219 +    sqlite3OsClose(pPager->jfd);
  1.2220 +  }
  1.2221 +  sqlite3BitvecDestroy(pPager->pInJournal);
  1.2222 +  sqlite3BitvecDestroy(pPager->pAlwaysRollback);
  1.2223 +  if( pPager->stmtOpen ){
  1.2224 +    sqlite3OsClose(pPager->stfd);
  1.2225 +  }
  1.2226 +  sqlite3OsClose(pPager->fd);
  1.2227 +  /* Temp files are automatically deleted by the OS
  1.2228 +  ** if( pPager->tempFile ){
  1.2229 +  **   sqlite3OsDelete(pPager->zFilename);
  1.2230 +  ** }
  1.2231 +  */
  1.2232 +
  1.2233 +  sqlite3PageFree(pPager->pTmpSpace);
  1.2234 +  sqlite3PcacheClose(pPager->pPCache);
  1.2235 +  sqlite3_free(pPager);
  1.2236 +  return SQLITE_OK;
  1.2237 +}
  1.2238 +
  1.2239 +#if !defined(NDEBUG) || defined(SQLITE_TEST)
  1.2240 +/*
  1.2241 +** Return the page number for the given page data.
  1.2242 +*/
  1.2243 +Pgno sqlite3PagerPagenumber(DbPage *p){
  1.2244 +  return p->pgno;
  1.2245 +}
  1.2246 +#endif
  1.2247 +
  1.2248 +/*
  1.2249 +** Increment the reference count for a page.  The input pointer is
  1.2250 +** a reference to the page data.
  1.2251 +*/
  1.2252 +int sqlite3PagerRef(DbPage *pPg){
  1.2253 +  sqlite3PcacheRef(pPg);
  1.2254 +  return SQLITE_OK;
  1.2255 +}
  1.2256 +
  1.2257 +/*
  1.2258 +** Sync the journal.  In other words, make sure all the pages that have
  1.2259 +** been written to the journal have actually reached the surface of the
  1.2260 +** disk.  It is not safe to modify the original database file until after
  1.2261 +** the journal has been synced.  If the original database is modified before
  1.2262 +** the journal is synced and a power failure occurs, the unsynced journal
  1.2263 +** data would be lost and we would be unable to completely rollback the
  1.2264 +** database changes.  Database corruption would occur.
  1.2265 +** 
  1.2266 +** This routine also updates the nRec field in the header of the journal.
  1.2267 +** (See comments on the pager_playback() routine for additional information.)
  1.2268 +** If the sync mode is FULL, two syncs will occur.  First the whole journal
  1.2269 +** is synced, then the nRec field is updated, then a second sync occurs.
  1.2270 +**
  1.2271 +** For temporary databases, we do not care if we are able to rollback
  1.2272 +** after a power failure, so no sync occurs.
  1.2273 +**
  1.2274 +** If the IOCAP_SEQUENTIAL flag is set for the persistent media on which
  1.2275 +** the database is stored, then OsSync() is never called on the journal
  1.2276 +** file. In this case all that is required is to update the nRec field in
  1.2277 +** the journal header.
  1.2278 +**
  1.2279 +** This routine clears the needSync field of every page current held in
  1.2280 +** memory.
  1.2281 +*/
  1.2282 +static int syncJournal(Pager *pPager){
  1.2283 +  int rc = SQLITE_OK;
  1.2284 +
  1.2285 +  /* Sync the journal before modifying the main database
  1.2286 +  ** (assuming there is a journal and it needs to be synced.)
  1.2287 +  */
  1.2288 +  if( pPager->needSync ){
  1.2289 +    if( !pPager->tempFile ){
  1.2290 +      int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
  1.2291 +      assert( pPager->journalOpen );
  1.2292 +
  1.2293 +      if( 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){
  1.2294 +        /* Write the nRec value into the journal file header. If in
  1.2295 +        ** full-synchronous mode, sync the journal first. This ensures that
  1.2296 +        ** all data has really hit the disk before nRec is updated to mark
  1.2297 +        ** it as a candidate for rollback.
  1.2298 +        **
  1.2299 +        ** This is not required if the persistent media supports the
  1.2300 +        ** SAFE_APPEND property. Because in this case it is not possible 
  1.2301 +        ** for garbage data to be appended to the file, the nRec field
  1.2302 +        ** is populated with 0xFFFFFFFF when the journal header is written
  1.2303 +        ** and never needs to be updated.
  1.2304 +        */
  1.2305 +        i64 jrnlOff;
  1.2306 +        if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
  1.2307 +          PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager));
  1.2308 +          IOTRACE(("JSYNC %p\n", pPager))
  1.2309 +          rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags);
  1.2310 +          if( rc!=0 ) return rc;
  1.2311 +        }
  1.2312 +
  1.2313 +        jrnlOff = pPager->journalHdr + sizeof(aJournalMagic);
  1.2314 +        IOTRACE(("JHDR %p %lld %d\n", pPager, jrnlOff, 4));
  1.2315 +        rc = write32bits(pPager->jfd, jrnlOff, pPager->nRec);
  1.2316 +        if( rc ) return rc;
  1.2317 +      }
  1.2318 +      if( 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
  1.2319 +        PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager));
  1.2320 +        IOTRACE(("JSYNC %p\n", pPager))
  1.2321 +        rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags| 
  1.2322 +          (pPager->sync_flags==SQLITE_SYNC_FULL?SQLITE_SYNC_DATAONLY:0)
  1.2323 +        );
  1.2324 +        if( rc!=0 ) return rc;
  1.2325 +      }
  1.2326 +      pPager->journalStarted = 1;
  1.2327 +    }
  1.2328 +    pPager->needSync = 0;
  1.2329 +
  1.2330 +    /* Erase the needSync flag from every page.
  1.2331 +    */
  1.2332 +    sqlite3PcacheClearFlags(pPager->pPCache, PGHDR_NEED_SYNC);
  1.2333 +  }
  1.2334 +
  1.2335 +#ifndef NDEBUG
  1.2336 +  /* If the Pager.needSync flag is clear then the PgHdr.needSync
  1.2337 +  ** flag must also be clear for all pages.  Verify that this
  1.2338 +  ** invariant is true.
  1.2339 +  */
  1.2340 +  else{
  1.2341 +    sqlite3PcacheAssertFlags(pPager->pPCache, 0, PGHDR_NEED_SYNC);
  1.2342 +  }
  1.2343 +#endif
  1.2344 +
  1.2345 +  return rc;
  1.2346 +}
  1.2347 +
  1.2348 +/*
  1.2349 +** Given a list of pages (connected by the PgHdr.pDirty pointer) write
  1.2350 +** every one of those pages out to the database file. No calls are made
  1.2351 +** to the page-cache to mark the pages as clean. It is the responsibility
  1.2352 +** of the caller to use PcacheCleanAll() or PcacheMakeClean() to mark
  1.2353 +** the pages as clean.
  1.2354 +*/
  1.2355 +static int pager_write_pagelist(PgHdr *pList){
  1.2356 +  Pager *pPager;
  1.2357 +  int rc;
  1.2358 +
  1.2359 +  if( pList==0 ) return SQLITE_OK;
  1.2360 +  pPager = pList->pPager;
  1.2361 +
  1.2362 +  /* At this point there may be either a RESERVED or EXCLUSIVE lock on the
  1.2363 +  ** database file. If there is already an EXCLUSIVE lock, the following
  1.2364 +  ** calls to sqlite3OsLock() are no-ops.
  1.2365 +  **
  1.2366 +  ** Moving the lock from RESERVED to EXCLUSIVE actually involves going
  1.2367 +  ** through an intermediate state PENDING.   A PENDING lock prevents new
  1.2368 +  ** readers from attaching to the database but is unsufficient for us to
  1.2369 +  ** write.  The idea of a PENDING lock is to prevent new readers from
  1.2370 +  ** coming in while we wait for existing readers to clear.
  1.2371 +  **
  1.2372 +  ** While the pager is in the RESERVED state, the original database file
  1.2373 +  ** is unchanged and we can rollback without having to playback the
  1.2374 +  ** journal into the original database file.  Once we transition to
  1.2375 +  ** EXCLUSIVE, it means the database file has been changed and any rollback
  1.2376 +  ** will require a journal playback.
  1.2377 +  */
  1.2378 +  rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
  1.2379 +  if( rc!=SQLITE_OK ){
  1.2380 +    return rc;
  1.2381 +  }
  1.2382 +
  1.2383 +  while( pList ){
  1.2384 +
  1.2385 +    /* If the file has not yet been opened, open it now. */
  1.2386 +    if( !pPager->fd->pMethods ){
  1.2387 +      assert(pPager->tempFile);
  1.2388 +      rc = sqlite3PagerOpentemp(pPager, pPager->fd, pPager->vfsFlags);
  1.2389 +      if( rc ) return rc;
  1.2390 +    }
  1.2391 +
  1.2392 +    /* If there are dirty pages in the page cache with page numbers greater
  1.2393 +    ** than Pager.dbSize, this means sqlite3PagerTruncate() was called to
  1.2394 +    ** make the file smaller (presumably by auto-vacuum code). Do not write
  1.2395 +    ** any such pages to the file.
  1.2396 +    */
  1.2397 +    if( pList->pgno<=pPager->dbSize && 0==(pList->flags&PGHDR_DONT_WRITE) ){
  1.2398 +      i64 offset = (pList->pgno-1)*(i64)pPager->pageSize;
  1.2399 +      char *pData = CODEC2(pPager, pList->pData, pList->pgno, 6);
  1.2400 +      PAGERTRACE4("STORE %d page %d hash(%08x)\n",
  1.2401 +                   PAGERID(pPager), pList->pgno, pager_pagehash(pList));
  1.2402 +      IOTRACE(("PGOUT %p %d\n", pPager, pList->pgno));
  1.2403 +      rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize, offset);
  1.2404 +      PAGER_INCR(sqlite3_pager_writedb_count);
  1.2405 +      PAGER_INCR(pPager->nWrite);
  1.2406 +      if( pList->pgno==1 ){
  1.2407 +        memcpy(&pPager->dbFileVers, &pData[24], sizeof(pPager->dbFileVers));
  1.2408 +      }
  1.2409 +    }
  1.2410 +#ifndef NDEBUG
  1.2411 +    else{
  1.2412 +      PAGERTRACE3("NOSTORE %d page %d\n", PAGERID(pPager), pList->pgno);
  1.2413 +    }
  1.2414 +#endif
  1.2415 +    if( rc ) return rc;
  1.2416 +#ifdef SQLITE_CHECK_PAGES
  1.2417 +    pList->pageHash = pager_pagehash(pList);
  1.2418 +#endif
  1.2419 +    pList = pList->pDirty;
  1.2420 +  }
  1.2421 +
  1.2422 +  return SQLITE_OK;
  1.2423 +}
  1.2424 +
  1.2425 +/*
  1.2426 +** This function is called by the pcache layer when it has reached some
  1.2427 +** soft memory limit. The argument is a pointer to a purgeable Pager 
  1.2428 +** object. This function attempts to make a single dirty page that has no
  1.2429 +** outstanding references (if one exists) clean so that it can be recycled 
  1.2430 +** by the pcache layer.
  1.2431 +*/
  1.2432 +static int pagerStress(void *p, PgHdr *pPg){
  1.2433 +  Pager *pPager = (Pager *)p;
  1.2434 +  int rc = SQLITE_OK;
  1.2435 +
  1.2436 +  if( pPager->doNotSync ){
  1.2437 +    return SQLITE_OK;
  1.2438 +  }
  1.2439 +
  1.2440 +  assert( pPg->flags&PGHDR_DIRTY );
  1.2441 +  if( pPager->errCode==SQLITE_OK ){
  1.2442 +    if( pPg->flags&PGHDR_NEED_SYNC ){
  1.2443 +      rc = syncJournal(pPager);
  1.2444 +      if( rc==SQLITE_OK && pPager->fullSync && 
  1.2445 +        !(sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND)
  1.2446 +      ){
  1.2447 +        pPager->nRec = 0;
  1.2448 +        rc = writeJournalHdr(pPager);
  1.2449 +      }
  1.2450 +    }
  1.2451 +    if( rc==SQLITE_OK ){
  1.2452 +      pPg->pDirty = 0;
  1.2453 +      rc = pager_write_pagelist(pPg);
  1.2454 +    }
  1.2455 +    if( rc!=SQLITE_OK ){
  1.2456 +      pager_error(pPager, rc);
  1.2457 +    }
  1.2458 +  }
  1.2459 +
  1.2460 +  if( rc==SQLITE_OK ){
  1.2461 +    sqlite3PcacheMakeClean(pPg);
  1.2462 +  }
  1.2463 +  return rc;
  1.2464 +}
  1.2465 +
  1.2466 +
  1.2467 +/*
  1.2468 +** Return 1 if there is a hot journal on the given pager.
  1.2469 +** A hot journal is one that needs to be played back.
  1.2470 +**
  1.2471 +** If the current size of the database file is 0 but a journal file
  1.2472 +** exists, that is probably an old journal left over from a prior
  1.2473 +** database with the same name.  Just delete the journal.
  1.2474 +**
  1.2475 +** Return negative if unable to determine the status of the journal.
  1.2476 +**
  1.2477 +** This routine does not open the journal file to examine its
  1.2478 +** content.  Hence, the journal might contain the name of a master
  1.2479 +** journal file that has been deleted, and hence not be hot.  Or
  1.2480 +** the header of the journal might be zeroed out.  This routine
  1.2481 +** does not discover these cases of a non-hot journal - if the
  1.2482 +** journal file exists and is not empty this routine assumes it
  1.2483 +** is hot.  The pager_playback() routine will discover that the
  1.2484 +** journal file is not really hot and will no-op.
  1.2485 +*/
  1.2486 +static int hasHotJournal(Pager *pPager, int *pExists){
  1.2487 +  sqlite3_vfs *pVfs = pPager->pVfs;
  1.2488 +  int rc = SQLITE_OK;
  1.2489 +  int exists;
  1.2490 +  int locked;
  1.2491 +  assert( pPager!=0 );
  1.2492 +  assert( pPager->useJournal );
  1.2493 +  assert( pPager->fd->pMethods );
  1.2494 +  *pExists = 0;
  1.2495 +  rc = sqlite3OsAccess(pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &exists);
  1.2496 +  if( rc==SQLITE_OK && exists ){
  1.2497 +    rc = sqlite3OsCheckReservedLock(pPager->fd, &locked);
  1.2498 +  }
  1.2499 +  if( rc==SQLITE_OK && exists && !locked ){
  1.2500 +    int nPage;
  1.2501 +    rc = sqlite3PagerPagecount(pPager, &nPage);
  1.2502 +    if( rc==SQLITE_OK ){
  1.2503 +     if( nPage==0 ){
  1.2504 +        sqlite3OsDelete(pVfs, pPager->zJournal, 0);
  1.2505 +      }else{
  1.2506 +        *pExists = 1;
  1.2507 +      }
  1.2508 +    }
  1.2509 +  }
  1.2510 +  return rc;
  1.2511 +}
  1.2512 +
  1.2513 +/*
  1.2514 +** Read the content of page pPg out of the database file.
  1.2515 +*/
  1.2516 +static int readDbPage(Pager *pPager, PgHdr *pPg, Pgno pgno){
  1.2517 +  int rc;
  1.2518 +  i64 offset;
  1.2519 +  assert( MEMDB==0 );
  1.2520 +  assert(pPager->fd->pMethods||pPager->tempFile);
  1.2521 +  if( !pPager->fd->pMethods ){
  1.2522 +    return SQLITE_IOERR_SHORT_READ;
  1.2523 +  }
  1.2524 +  offset = (pgno-1)*(i64)pPager->pageSize;
  1.2525 +  rc = sqlite3OsRead(pPager->fd, pPg->pData, pPager->pageSize, offset);
  1.2526 +  PAGER_INCR(sqlite3_pager_readdb_count);
  1.2527 +  PAGER_INCR(pPager->nRead);
  1.2528 +  IOTRACE(("PGIN %p %d\n", pPager, pgno));
  1.2529 +  if( pgno==1 ){
  1.2530 +    memcpy(&pPager->dbFileVers, &((u8*)pPg->pData)[24],
  1.2531 +                                              sizeof(pPager->dbFileVers));
  1.2532 +  }
  1.2533 +  CODEC1(pPager, pPg->pData, pPg->pgno, 3);
  1.2534 +  PAGERTRACE4("FETCH %d page %d hash(%08x)\n",
  1.2535 +               PAGERID(pPager), pPg->pgno, pager_pagehash(pPg));
  1.2536 +  return rc;
  1.2537 +}
  1.2538 +
  1.2539 +
  1.2540 +/*
  1.2541 +** This function is called to obtain the shared lock required before
  1.2542 +** data may be read from the pager cache. If the shared lock has already
  1.2543 +** been obtained, this function is a no-op.
  1.2544 +**
  1.2545 +** Immediately after obtaining the shared lock (if required), this function
  1.2546 +** checks for a hot-journal file. If one is found, an emergency rollback
  1.2547 +** is performed immediately.
  1.2548 +*/
  1.2549 +static int pagerSharedLock(Pager *pPager){
  1.2550 +  int rc = SQLITE_OK;
  1.2551 +  int isErrorReset = 0;
  1.2552 +
  1.2553 +  /* If this database is opened for exclusive access, has no outstanding 
  1.2554 +  ** page references and is in an error-state, now is the chance to clear
  1.2555 +  ** the error. Discard the contents of the pager-cache and treat any
  1.2556 +  ** open journal file as a hot-journal.
  1.2557 +  */
  1.2558 +  if( !MEMDB && pPager->exclusiveMode 
  1.2559 +   && sqlite3PcacheRefCount(pPager->pPCache)==0 && pPager->errCode 
  1.2560 +  ){
  1.2561 +    if( pPager->journalOpen ){
  1.2562 +      isErrorReset = 1;
  1.2563 +    }
  1.2564 +    pPager->errCode = SQLITE_OK;
  1.2565 +    pager_reset(pPager);
  1.2566 +  }
  1.2567 +
  1.2568 +  /* If the pager is still in an error state, do not proceed. The error 
  1.2569 +  ** state will be cleared at some point in the future when all page 
  1.2570 +  ** references are dropped and the cache can be discarded.
  1.2571 +  */
  1.2572 +  if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
  1.2573 +    return pPager->errCode;
  1.2574 +  }
  1.2575 +
  1.2576 +  if( pPager->state==PAGER_UNLOCK || isErrorReset ){
  1.2577 +    sqlite3_vfs *pVfs = pPager->pVfs;
  1.2578 +    if( !MEMDB ){
  1.2579 +      int isHotJournal;
  1.2580 +      assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );
  1.2581 +      if( !pPager->noReadlock ){
  1.2582 +        rc = pager_wait_on_lock(pPager, SHARED_LOCK);
  1.2583 +        if( rc!=SQLITE_OK ){
  1.2584 +          assert( pPager->state==PAGER_UNLOCK );
  1.2585 +          return pager_error(pPager, rc);
  1.2586 +        }
  1.2587 +        assert( pPager->state>=SHARED_LOCK );
  1.2588 +      }
  1.2589 +  
  1.2590 +      /* If a journal file exists, and there is no RESERVED lock on the
  1.2591 +      ** database file, then it either needs to be played back or deleted.
  1.2592 +      */
  1.2593 +      if( !isErrorReset ){
  1.2594 +        rc = hasHotJournal(pPager, &isHotJournal);
  1.2595 +        if( rc!=SQLITE_OK ){
  1.2596 +          goto failed;
  1.2597 +        }
  1.2598 +      }
  1.2599 +      if( isErrorReset || isHotJournal ){
  1.2600 +        /* Get an EXCLUSIVE lock on the database file. At this point it is
  1.2601 +        ** important that a RESERVED lock is not obtained on the way to the
  1.2602 +        ** EXCLUSIVE lock. If it were, another process might open the
  1.2603 +        ** database file, detect the RESERVED lock, and conclude that the
  1.2604 +        ** database is safe to read while this process is still rolling it 
  1.2605 +        ** back.
  1.2606 +        ** 
  1.2607 +        ** Because the intermediate RESERVED lock is not requested, the
  1.2608 +        ** second process will get to this point in the code and fail to
  1.2609 +        ** obtain its own EXCLUSIVE lock on the database file.
  1.2610 +        */
  1.2611 +        if( pPager->state<EXCLUSIVE_LOCK ){
  1.2612 +          rc = sqlite3OsLock(pPager->fd, EXCLUSIVE_LOCK);
  1.2613 +          if( rc!=SQLITE_OK ){
  1.2614 +            rc = pager_error(pPager, rc);
  1.2615 +            goto failed;
  1.2616 +          }
  1.2617 +          pPager->state = PAGER_EXCLUSIVE;
  1.2618 +        }
  1.2619 + 
  1.2620 +        /* Open the journal for read/write access. This is because in 
  1.2621 +        ** exclusive-access mode the file descriptor will be kept open and
  1.2622 +        ** possibly used for a transaction later on. On some systems, the
  1.2623 +        ** OsTruncate() call used in exclusive-access mode also requires
  1.2624 +        ** a read/write file handle.
  1.2625 +        */
  1.2626 +        if( !isErrorReset && pPager->journalOpen==0 ){
  1.2627 +          int res;
  1.2628 +          rc = sqlite3OsAccess(pVfs,pPager->zJournal,SQLITE_ACCESS_EXISTS,&res);
  1.2629 +          if( rc==SQLITE_OK ){
  1.2630 +            if( res ){
  1.2631 +              int fout = 0;
  1.2632 +              int f = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL;
  1.2633 +              assert( !pPager->tempFile );
  1.2634 +              rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &fout);
  1.2635 +              assert( rc!=SQLITE_OK || pPager->jfd->pMethods );
  1.2636 +              if( fout&SQLITE_OPEN_READONLY ){
  1.2637 +                rc = SQLITE_BUSY;
  1.2638 +                sqlite3OsClose(pPager->jfd);
  1.2639 +              }
  1.2640 +            }else{
  1.2641 +              /* If the journal does not exist, that means some other process
  1.2642 +              ** has already rolled it back */
  1.2643 +              rc = SQLITE_BUSY;
  1.2644 +            }
  1.2645 +          }
  1.2646 +        }
  1.2647 +        if( rc!=SQLITE_OK ){
  1.2648 +          if( rc!=SQLITE_NOMEM && rc!=SQLITE_IOERR_UNLOCK 
  1.2649 +           && rc!=SQLITE_IOERR_NOMEM 
  1.2650 +          ){
  1.2651 +            rc = SQLITE_BUSY;
  1.2652 +          }
  1.2653 +          goto failed;
  1.2654 +        }
  1.2655 +        pPager->journalOpen = 1;
  1.2656 +        pPager->journalStarted = 0;
  1.2657 +        pPager->journalOff = 0;
  1.2658 +        pPager->setMaster = 0;
  1.2659 +        pPager->journalHdr = 0;
  1.2660 + 
  1.2661 +        /* Playback and delete the journal.  Drop the database write
  1.2662 +        ** lock and reacquire the read lock.
  1.2663 +        */
  1.2664 +        rc = pager_playback(pPager, 1);
  1.2665 +        if( rc!=SQLITE_OK ){
  1.2666 +          rc = pager_error(pPager, rc);
  1.2667 +          goto failed;
  1.2668 +        }
  1.2669 +        assert(pPager->state==PAGER_SHARED || 
  1.2670 +            (pPager->exclusiveMode && pPager->state>PAGER_SHARED)
  1.2671 +        );
  1.2672 +      }
  1.2673 +
  1.2674 +      if( sqlite3PcachePagecount(pPager->pPCache)>0 ){
  1.2675 +        /* The shared-lock has just been acquired on the database file
  1.2676 +        ** and there are already pages in the cache (from a previous
  1.2677 +        ** read or write transaction).  Check to see if the database
  1.2678 +        ** has been modified.  If the database has changed, flush the
  1.2679 +        ** cache.
  1.2680 +        **
  1.2681 +        ** Database changes is detected by looking at 15 bytes beginning
  1.2682 +        ** at offset 24 into the file.  The first 4 of these 16 bytes are
  1.2683 +        ** a 32-bit counter that is incremented with each change.  The
  1.2684 +        ** other bytes change randomly with each file change when
  1.2685 +        ** a codec is in use.
  1.2686 +        ** 
  1.2687 +        ** There is a vanishingly small chance that a change will not be 
  1.2688 +        ** detected.  The chance of an undetected change is so small that
  1.2689 +        ** it can be neglected.
  1.2690 +        */
  1.2691 +        char dbFileVers[sizeof(pPager->dbFileVers)];
  1.2692 +        sqlite3PagerPagecount(pPager, 0);
  1.2693 +
  1.2694 +        if( pPager->errCode ){
  1.2695 +          rc = pPager->errCode;
  1.2696 +          goto failed;
  1.2697 +        }
  1.2698 +
  1.2699 +        if( pPager->dbSize>0 ){
  1.2700 +          IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers)));
  1.2701 +          rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24);
  1.2702 +          if( rc!=SQLITE_OK ){
  1.2703 +            goto failed;
  1.2704 +          }
  1.2705 +        }else{
  1.2706 +          memset(dbFileVers, 0, sizeof(dbFileVers));
  1.2707 +        }
  1.2708 +
  1.2709 +        if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){
  1.2710 +          pager_reset(pPager);
  1.2711 +        }
  1.2712 +      }
  1.2713 +    }
  1.2714 +    assert( pPager->exclusiveMode || pPager->state<=PAGER_SHARED );
  1.2715 +    if( pPager->state==PAGER_UNLOCK ){
  1.2716 +      pPager->state = PAGER_SHARED;
  1.2717 +    }
  1.2718 +  }
  1.2719 +
  1.2720 + failed:
  1.2721 +  if( rc!=SQLITE_OK ){
  1.2722 +    /* pager_unlock() is a no-op for exclusive mode and in-memory databases. */
  1.2723 +    pager_unlock(pPager);
  1.2724 +  }
  1.2725 +  return rc;
  1.2726 +}
  1.2727 +
  1.2728 +/*
  1.2729 +** Make sure we have the content for a page.  If the page was
  1.2730 +** previously acquired with noContent==1, then the content was
  1.2731 +** just initialized to zeros instead of being read from disk.
  1.2732 +** But now we need the real data off of disk.  So make sure we
  1.2733 +** have it.  Read it in if we do not have it already.
  1.2734 +*/
  1.2735 +static int pager_get_content(PgHdr *pPg){
  1.2736 +  if( pPg->flags&PGHDR_NEED_READ ){
  1.2737 +    int rc = readDbPage(pPg->pPager, pPg, pPg->pgno);
  1.2738 +    if( rc==SQLITE_OK ){
  1.2739 +      pPg->flags &= ~PGHDR_NEED_READ;
  1.2740 +    }else{
  1.2741 +      return rc;
  1.2742 +    }
  1.2743 +  }
  1.2744 +  return SQLITE_OK;
  1.2745 +}
  1.2746 +
  1.2747 +/*
  1.2748 +** If the reference count has reached zero, and the pager is not in the
  1.2749 +** middle of a write transaction or opened in exclusive mode, unlock it.
  1.2750 +*/ 
  1.2751 +static void pagerUnlockIfUnused(Pager *pPager){
  1.2752 +  if( (sqlite3PcacheRefCount(pPager->pPCache)==0)
  1.2753 +    && (!pPager->exclusiveMode || pPager->journalOff>0) 
  1.2754 +  ){
  1.2755 +    pagerUnlockAndRollback(pPager);
  1.2756 +  }
  1.2757 +}
  1.2758 +
  1.2759 +/*
  1.2760 +** Drop a page from the cache using sqlite3PcacheDrop().
  1.2761 +**
  1.2762 +** If this means there are now no pages with references to them, a rollback
  1.2763 +** occurs and the lock on the database is removed.
  1.2764 +*/
  1.2765 +static void pagerDropPage(DbPage *pPg){
  1.2766 +  Pager *pPager = pPg->pPager;
  1.2767 +  sqlite3PcacheDrop(pPg);
  1.2768 +  pagerUnlockIfUnused(pPager);
  1.2769 +}
  1.2770 +
  1.2771 +/*
  1.2772 +** Acquire a page.
  1.2773 +**
  1.2774 +** A read lock on the disk file is obtained when the first page is acquired. 
  1.2775 +** This read lock is dropped when the last page is released.
  1.2776 +**
  1.2777 +** This routine works for any page number greater than 0.  If the database
  1.2778 +** file is smaller than the requested page, then no actual disk
  1.2779 +** read occurs and the memory image of the page is initialized to
  1.2780 +** all zeros.  The extra data appended to a page is always initialized
  1.2781 +** to zeros the first time a page is loaded into memory.
  1.2782 +**
  1.2783 +** The acquisition might fail for several reasons.  In all cases,
  1.2784 +** an appropriate error code is returned and *ppPage is set to NULL.
  1.2785 +**
  1.2786 +** See also sqlite3PagerLookup().  Both this routine and Lookup() attempt
  1.2787 +** to find a page in the in-memory cache first.  If the page is not already
  1.2788 +** in memory, this routine goes to disk to read it in whereas Lookup()
  1.2789 +** just returns 0.  This routine acquires a read-lock the first time it
  1.2790 +** has to go to disk, and could also playback an old journal if necessary.
  1.2791 +** Since Lookup() never goes to disk, it never has to deal with locks
  1.2792 +** or journal files.
  1.2793 +**
  1.2794 +** If noContent is false, the page contents are actually read from disk.
  1.2795 +** If noContent is true, it means that we do not care about the contents
  1.2796 +** of the page at this time, so do not do a disk read.  Just fill in the
  1.2797 +** page content with zeros.  But mark the fact that we have not read the
  1.2798 +** content by setting the PgHdr.needRead flag.  Later on, if 
  1.2799 +** sqlite3PagerWrite() is called on this page or if this routine is
  1.2800 +** called again with noContent==0, that means that the content is needed
  1.2801 +** and the disk read should occur at that point.
  1.2802 +*/
  1.2803 +int sqlite3PagerAcquire(
  1.2804 +  Pager *pPager,      /* The pager open on the database file */
  1.2805 +  Pgno pgno,          /* Page number to fetch */
  1.2806 +  DbPage **ppPage,    /* Write a pointer to the page here */
  1.2807 +  int noContent       /* Do not bother reading content from disk if true */
  1.2808 +){
  1.2809 +  PgHdr *pPg = 0;
  1.2810 +  int rc;
  1.2811 +
  1.2812 +  assert( pPager->state==PAGER_UNLOCK 
  1.2813 +       || sqlite3PcacheRefCount(pPager->pPCache)>0 
  1.2814 +       || pgno==1 
  1.2815 +  );
  1.2816 +
  1.2817 +  /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page
  1.2818 +  ** number greater than this, or zero, is requested.
  1.2819 +  */
  1.2820 +  if( pgno>PAGER_MAX_PGNO || pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
  1.2821 +    return SQLITE_CORRUPT_BKPT;
  1.2822 +  }
  1.2823 +
  1.2824 +  /* Make sure we have not hit any critical errors.
  1.2825 +  */ 
  1.2826 +  assert( pPager!=0 );
  1.2827 +  *ppPage = 0;
  1.2828 +
  1.2829 +  /* If this is the first page accessed, then get a SHARED lock
  1.2830 +  ** on the database file. pagerSharedLock() is a no-op if 
  1.2831 +  ** a database lock is already held.
  1.2832 +  */
  1.2833 +  rc = pagerSharedLock(pPager);
  1.2834 +  if( rc!=SQLITE_OK ){
  1.2835 +    return rc;
  1.2836 +  }
  1.2837 +  assert( pPager->state!=PAGER_UNLOCK );
  1.2838 +
  1.2839 +  rc = sqlite3PcacheFetch(pPager->pPCache, pgno, 1, &pPg);
  1.2840 +  if( rc!=SQLITE_OK ){
  1.2841 +    return rc;
  1.2842 +  }
  1.2843 +  if( pPg->pPager==0 ){
  1.2844 +    /* The pager cache has created a new page. Its content needs to 
  1.2845 +    ** be initialized.
  1.2846 +    */
  1.2847 +    int nMax;
  1.2848 +    PAGER_INCR(pPager->nMiss);
  1.2849 +    pPg->pPager = pPager;
  1.2850 +    if( sqlite3BitvecTest(pPager->pInJournal, pgno) ){
  1.2851 +      assert( !MEMDB );
  1.2852 +      pPg->flags |= PGHDR_IN_JOURNAL;
  1.2853 +    }
  1.2854 +    memset(pPg->pExtra, 0, pPager->nExtra);
  1.2855 +
  1.2856 +    rc = sqlite3PagerPagecount(pPager, &nMax);
  1.2857 +    if( rc!=SQLITE_OK ){
  1.2858 +      sqlite3PagerUnref(pPg);
  1.2859 +      return rc;
  1.2860 +    }
  1.2861 +
  1.2862 +    if( nMax<(int)pgno || MEMDB || noContent ){
  1.2863 +      if( pgno>pPager->mxPgno ){
  1.2864 +        sqlite3PagerUnref(pPg);
  1.2865 +        return SQLITE_FULL;
  1.2866 +      }
  1.2867 +      memset(pPg->pData, 0, pPager->pageSize);
  1.2868 +      if( noContent ){
  1.2869 +        pPg->flags |= PGHDR_NEED_READ;
  1.2870 +      }
  1.2871 +      IOTRACE(("ZERO %p %d\n", pPager, pgno));
  1.2872 +    }else{
  1.2873 +      rc = readDbPage(pPager, pPg, pgno);
  1.2874 +      if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){
  1.2875 +        /* sqlite3PagerUnref(pPg); */
  1.2876 +        pagerDropPage(pPg);
  1.2877 +        return rc;
  1.2878 +      }
  1.2879 +    }
  1.2880 +#ifdef SQLITE_CHECK_PAGES
  1.2881 +    pPg->pageHash = pager_pagehash(pPg);
  1.2882 +#endif
  1.2883 +  }else{
  1.2884 +    /* The requested page is in the page cache. */
  1.2885 +    assert(sqlite3PcacheRefCount(pPager->pPCache)>0 || pgno==1);
  1.2886 +    PAGER_INCR(pPager->nHit);
  1.2887 +    if( !noContent ){
  1.2888 +      rc = pager_get_content(pPg);
  1.2889 +      if( rc ){
  1.2890 +        sqlite3PagerUnref(pPg);
  1.2891 +        return rc;
  1.2892 +      }
  1.2893 +    }
  1.2894 +  }
  1.2895 +
  1.2896 +  *ppPage = pPg;
  1.2897 +  return SQLITE_OK;
  1.2898 +}
  1.2899 +
  1.2900 +/*
  1.2901 +** Acquire a page if it is already in the in-memory cache.  Do
  1.2902 +** not read the page from disk.  Return a pointer to the page,
  1.2903 +** or 0 if the page is not in cache.
  1.2904 +**
  1.2905 +** See also sqlite3PagerGet().  The difference between this routine
  1.2906 +** and sqlite3PagerGet() is that _get() will go to the disk and read
  1.2907 +** in the page if the page is not already in cache.  This routine
  1.2908 +** returns NULL if the page is not in cache or if a disk I/O error 
  1.2909 +** has ever happened.
  1.2910 +*/
  1.2911 +DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){
  1.2912 +  PgHdr *pPg = 0;
  1.2913 +  assert( pPager!=0 );
  1.2914 +  assert( pgno!=0 );
  1.2915 +
  1.2916 +  if( (pPager->state!=PAGER_UNLOCK)
  1.2917 +   && (pPager->errCode==SQLITE_OK || pPager->errCode==SQLITE_FULL)
  1.2918 +  ){
  1.2919 +    sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &pPg);
  1.2920 +  }
  1.2921 +
  1.2922 +  return pPg;
  1.2923 +}
  1.2924 +
  1.2925 +/*
  1.2926 +** Release a page.
  1.2927 +**
  1.2928 +** If the number of references to the page drop to zero, then the
  1.2929 +** page is added to the LRU list.  When all references to all pages
  1.2930 +** are released, a rollback occurs and the lock on the database is
  1.2931 +** removed.
  1.2932 +*/
  1.2933 +int sqlite3PagerUnref(DbPage *pPg){
  1.2934 +  if( pPg ){
  1.2935 +    Pager *pPager = pPg->pPager;
  1.2936 +    sqlite3PcacheRelease(pPg);
  1.2937 +    pagerUnlockIfUnused(pPager);
  1.2938 +  }
  1.2939 +  return SQLITE_OK;
  1.2940 +}
  1.2941 +
  1.2942 +/*
  1.2943 +** Create a journal file for pPager.  There should already be a RESERVED
  1.2944 +** or EXCLUSIVE lock on the database file when this routine is called.
  1.2945 +**
  1.2946 +** Return SQLITE_OK if everything.  Return an error code and release the
  1.2947 +** write lock if anything goes wrong.
  1.2948 +*/
  1.2949 +static int pager_open_journal(Pager *pPager){
  1.2950 +  sqlite3_vfs *pVfs = pPager->pVfs;
  1.2951 +  int flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_EXCLUSIVE|SQLITE_OPEN_CREATE);
  1.2952 +
  1.2953 +  int rc;
  1.2954 +  assert( !MEMDB );
  1.2955 +  assert( pPager->state>=PAGER_RESERVED );
  1.2956 +  assert( pPager->useJournal );
  1.2957 +  assert( pPager->pInJournal==0 );
  1.2958 +  sqlite3PagerPagecount(pPager, 0);
  1.2959 +  pPager->pInJournal = sqlite3BitvecCreate(pPager->dbSize);
  1.2960 +  if( pPager->pInJournal==0 ){
  1.2961 +    rc = SQLITE_NOMEM;
  1.2962 +    goto failed_to_open_journal;
  1.2963 +  }
  1.2964 +
  1.2965 +  if( pPager->journalOpen==0 ){
  1.2966 +    if( pPager->tempFile ){
  1.2967 +      flags |= (SQLITE_OPEN_DELETEONCLOSE|SQLITE_OPEN_TEMP_JOURNAL);
  1.2968 +    }else{
  1.2969 +      flags |= (SQLITE_OPEN_MAIN_JOURNAL);
  1.2970 +    }
  1.2971 +#ifdef SQLITE_ENABLE_ATOMIC_WRITE
  1.2972 +    rc = sqlite3JournalOpen(
  1.2973 +        pVfs, pPager->zJournal, pPager->jfd, flags, jrnlBufferSize(pPager)
  1.2974 +    );
  1.2975 +#else
  1.2976 +    rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, flags, 0);
  1.2977 +#endif
  1.2978 +    assert( rc!=SQLITE_OK || pPager->jfd->pMethods );
  1.2979 +    pPager->journalOff = 0;
  1.2980 +    pPager->setMaster = 0;
  1.2981 +    pPager->journalHdr = 0;
  1.2982 +    if( rc!=SQLITE_OK ){
  1.2983 +      if( rc==SQLITE_NOMEM ){
  1.2984 +        sqlite3OsDelete(pVfs, pPager->zJournal, 0);
  1.2985 +      }
  1.2986 +      goto failed_to_open_journal;
  1.2987 +    }
  1.2988 +  }
  1.2989 +  pPager->journalOpen = 1;
  1.2990 +  pPager->journalStarted = 0;
  1.2991 +  pPager->needSync = 0;
  1.2992 +  pPager->nRec = 0;
  1.2993 +  if( pPager->errCode ){
  1.2994 +    rc = pPager->errCode;
  1.2995 +    goto failed_to_open_journal;
  1.2996 +  }
  1.2997 +  pPager->origDbSize = pPager->dbSize;
  1.2998 +
  1.2999 +  rc = writeJournalHdr(pPager);
  1.3000 +
  1.3001 +  if( pPager->stmtAutoopen && rc==SQLITE_OK ){
  1.3002 +    rc = sqlite3PagerStmtBegin(pPager);
  1.3003 +  }
  1.3004 +  if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && rc!=SQLITE_IOERR_NOMEM ){
  1.3005 +    rc = pager_end_transaction(pPager, 0);
  1.3006 +    if( rc==SQLITE_OK ){
  1.3007 +      rc = SQLITE_FULL;
  1.3008 +    }
  1.3009 +  }
  1.3010 +  return rc;
  1.3011 +
  1.3012 +failed_to_open_journal:
  1.3013 +  sqlite3BitvecDestroy(pPager->pInJournal);
  1.3014 +  pPager->pInJournal = 0;
  1.3015 +  return rc;
  1.3016 +}
  1.3017 +
  1.3018 +/*
  1.3019 +** Acquire a write-lock on the database.  The lock is removed when
  1.3020 +** the any of the following happen:
  1.3021 +**
  1.3022 +**   *  sqlite3PagerCommitPhaseTwo() is called.
  1.3023 +**   *  sqlite3PagerRollback() is called.
  1.3024 +**   *  sqlite3PagerClose() is called.
  1.3025 +**   *  sqlite3PagerUnref() is called to on every outstanding page.
  1.3026 +**
  1.3027 +** The first parameter to this routine is a pointer to any open page of the
  1.3028 +** database file.  Nothing changes about the page - it is used merely to
  1.3029 +** acquire a pointer to the Pager structure and as proof that there is
  1.3030 +** already a read-lock on the database.
  1.3031 +**
  1.3032 +** The second parameter indicates how much space in bytes to reserve for a
  1.3033 +** master journal file-name at the start of the journal when it is created.
  1.3034 +**
  1.3035 +** A journal file is opened if this is not a temporary file.  For temporary
  1.3036 +** files, the opening of the journal file is deferred until there is an
  1.3037 +** actual need to write to the journal.
  1.3038 +**
  1.3039 +** If the database is already reserved for writing, this routine is a no-op.
  1.3040 +**
  1.3041 +** If exFlag is true, go ahead and get an EXCLUSIVE lock on the file
  1.3042 +** immediately instead of waiting until we try to flush the cache.  The
  1.3043 +** exFlag is ignored if a transaction is already active.
  1.3044 +*/
  1.3045 +int sqlite3PagerBegin(DbPage *pPg, int exFlag){
  1.3046 +  Pager *pPager = pPg->pPager;
  1.3047 +  int rc = SQLITE_OK;
  1.3048 +  assert( pPg->nRef>0 );
  1.3049 +  assert( pPager->state!=PAGER_UNLOCK );
  1.3050 +  if( pPager->state==PAGER_SHARED ){
  1.3051 +    assert( pPager->pInJournal==0 );
  1.3052 +    sqlite3PcacheAssertFlags(pPager->pPCache, 0, PGHDR_IN_JOURNAL);
  1.3053 +    if( MEMDB ){
  1.3054 +      pPager->state = PAGER_EXCLUSIVE;
  1.3055 +      pPager->origDbSize = pPager->dbSize;
  1.3056 +    }else{
  1.3057 +      rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK);
  1.3058 +      if( rc==SQLITE_OK ){
  1.3059 +        pPager->state = PAGER_RESERVED;
  1.3060 +        if( exFlag ){
  1.3061 +          rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
  1.3062 +        }
  1.3063 +      }
  1.3064 +      if( rc!=SQLITE_OK ){
  1.3065 +        return rc;
  1.3066 +      }
  1.3067 +      pPager->dirtyCache = 0;
  1.3068 +      PAGERTRACE2("TRANSACTION %d\n", PAGERID(pPager));
  1.3069 +      if( pPager->useJournal && !pPager->tempFile
  1.3070 +             && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
  1.3071 +        rc = pager_open_journal(pPager);
  1.3072 +      }
  1.3073 +    }
  1.3074 +  }else if( pPager->journalOpen && pPager->journalOff==0 ){
  1.3075 +    /* This happens when the pager was in exclusive-access mode the last
  1.3076 +    ** time a (read or write) transaction was successfully concluded
  1.3077 +    ** by this connection. Instead of deleting the journal file it was 
  1.3078 +    ** kept open and either was truncated to 0 bytes or its header was
  1.3079 +    ** overwritten with zeros.
  1.3080 +    */
  1.3081 +    assert( pPager->nRec==0 );
  1.3082 +    assert( pPager->origDbSize==0 );
  1.3083 +    assert( pPager->pInJournal==0 );
  1.3084 +    sqlite3PagerPagecount(pPager, 0);
  1.3085 +    pPager->pInJournal = sqlite3BitvecCreate( pPager->dbSize );
  1.3086 +    if( !pPager->pInJournal ){
  1.3087 +      rc = SQLITE_NOMEM;
  1.3088 +    }else{
  1.3089 +      pPager->origDbSize = pPager->dbSize;
  1.3090 +      rc = writeJournalHdr(pPager);
  1.3091 +    }
  1.3092 +  }
  1.3093 +  assert( !pPager->journalOpen || pPager->journalOff>0 || rc!=SQLITE_OK );
  1.3094 +  return rc;
  1.3095 +}
  1.3096 +
  1.3097 +/*
  1.3098 +** Make a page dirty.  Set its dirty flag and add it to the dirty
  1.3099 +** page list.
  1.3100 +*/
  1.3101 +static void makeDirty(PgHdr *pPg){
  1.3102 +  sqlite3PcacheMakeDirty(pPg);
  1.3103 +}
  1.3104 +
  1.3105 +/*
  1.3106 +** Make a page clean.  Clear its dirty bit and remove it from the
  1.3107 +** dirty page list.
  1.3108 +*/
  1.3109 +static void makeClean(PgHdr *pPg){
  1.3110 +  sqlite3PcacheMakeClean(pPg);
  1.3111 +}
  1.3112 +
  1.3113 +
  1.3114 +/*
  1.3115 +** Mark a data page as writeable.  The page is written into the journal 
  1.3116 +** if it is not there already.  This routine must be called before making
  1.3117 +** changes to a page.
  1.3118 +**
  1.3119 +** The first time this routine is called, the pager creates a new
  1.3120 +** journal and acquires a RESERVED lock on the database.  If the RESERVED
  1.3121 +** lock could not be acquired, this routine returns SQLITE_BUSY.  The
  1.3122 +** calling routine must check for that return value and be careful not to
  1.3123 +** change any page data until this routine returns SQLITE_OK.
  1.3124 +**
  1.3125 +** If the journal file could not be written because the disk is full,
  1.3126 +** then this routine returns SQLITE_FULL and does an immediate rollback.
  1.3127 +** All subsequent write attempts also return SQLITE_FULL until there
  1.3128 +** is a call to sqlite3PagerCommit() or sqlite3PagerRollback() to
  1.3129 +** reset.
  1.3130 +*/
  1.3131 +static int pager_write(PgHdr *pPg){
  1.3132 +  void *pData = pPg->pData;
  1.3133 +  Pager *pPager = pPg->pPager;
  1.3134 +  int rc = SQLITE_OK;
  1.3135 +
  1.3136 +  /* Check for errors
  1.3137 +  */
  1.3138 +  if( pPager->errCode ){ 
  1.3139 +    return pPager->errCode;
  1.3140 +  }
  1.3141 +  if( pPager->readOnly ){
  1.3142 +    return SQLITE_PERM;
  1.3143 +  }
  1.3144 +
  1.3145 +  assert( !pPager->setMaster );
  1.3146 +
  1.3147 +  CHECK_PAGE(pPg);
  1.3148 +
  1.3149 +  /* If this page was previously acquired with noContent==1, that means
  1.3150 +  ** we didn't really read in the content of the page.  This can happen
  1.3151 +  ** (for example) when the page is being moved to the freelist.  But
  1.3152 +  ** now we are (perhaps) moving the page off of the freelist for
  1.3153 +  ** reuse and we need to know its original content so that content
  1.3154 +  ** can be stored in the rollback journal.  So do the read at this
  1.3155 +  ** time.
  1.3156 +  */
  1.3157 +  rc = pager_get_content(pPg);
  1.3158 +  if( rc ){
  1.3159 +    return rc;
  1.3160 +  }
  1.3161 +
  1.3162 +  /* Mark the page as dirty.  If the page has already been written
  1.3163 +  ** to the journal then we can return right away.
  1.3164 +  */
  1.3165 +  makeDirty(pPg);
  1.3166 +  if( (pPg->flags&PGHDR_IN_JOURNAL)
  1.3167 +   && (pageInStatement(pPg) || pPager->stmtInUse==0) 
  1.3168 +  ){
  1.3169 +    pPager->dirtyCache = 1;
  1.3170 +    pPager->dbModified = 1;
  1.3171 +  }else{
  1.3172 +
  1.3173 +    /* If we get this far, it means that the page needs to be
  1.3174 +    ** written to the transaction journal or the ckeckpoint journal
  1.3175 +    ** or both.
  1.3176 +    **
  1.3177 +    ** First check to see that the transaction journal exists and
  1.3178 +    ** create it if it does not.
  1.3179 +    */
  1.3180 +    assert( pPager->state!=PAGER_UNLOCK );
  1.3181 +    rc = sqlite3PagerBegin(pPg, 0);
  1.3182 +    if( rc!=SQLITE_OK ){
  1.3183 +      return rc;
  1.3184 +    }
  1.3185 +    assert( pPager->state>=PAGER_RESERVED );
  1.3186 +    if( !pPager->journalOpen && pPager->useJournal
  1.3187 +          && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
  1.3188 +      rc = pager_open_journal(pPager);
  1.3189 +      if( rc!=SQLITE_OK ) return rc;
  1.3190 +    }
  1.3191 +    pPager->dirtyCache = 1;
  1.3192 +    pPager->dbModified = 1;
  1.3193 +  
  1.3194 +    /* The transaction journal now exists and we have a RESERVED or an
  1.3195 +    ** EXCLUSIVE lock on the main database file.  Write the current page to
  1.3196 +    ** the transaction journal if it is not there already.
  1.3197 +    */
  1.3198 +    if( !(pPg->flags&PGHDR_IN_JOURNAL) && (pPager->journalOpen || MEMDB) ){
  1.3199 +      if( (int)pPg->pgno <= pPager->origDbSize ){
  1.3200 +        if( MEMDB ){
  1.3201 +          PAGERTRACE3("JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
  1.3202 +          rc = sqlite3PcachePreserve(pPg, 0);
  1.3203 +          if( rc!=SQLITE_OK ){
  1.3204 +            return rc;
  1.3205 +          }
  1.3206 +        }else{
  1.3207 +          u32 cksum;
  1.3208 +          char *pData2;
  1.3209 +
  1.3210 +          /* We should never write to the journal file the page that
  1.3211 +          ** contains the database locks.  The following assert verifies
  1.3212 +          ** that we do not. */
  1.3213 +          assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) );
  1.3214 +          pData2 = CODEC2(pPager, pData, pPg->pgno, 7);
  1.3215 +          cksum = pager_cksum(pPager, (u8*)pData2);
  1.3216 +          rc = write32bits(pPager->jfd, pPager->journalOff, pPg->pgno);
  1.3217 +          if( rc==SQLITE_OK ){
  1.3218 +            rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize,
  1.3219 +                                pPager->journalOff + 4);
  1.3220 +            pPager->journalOff += pPager->pageSize+4;
  1.3221 +          }
  1.3222 +          if( rc==SQLITE_OK ){
  1.3223 +            rc = write32bits(pPager->jfd, pPager->journalOff, cksum);
  1.3224 +            pPager->journalOff += 4;
  1.3225 +          }
  1.3226 +          IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno, 
  1.3227 +                   pPager->journalOff, pPager->pageSize));
  1.3228 +          PAGER_INCR(sqlite3_pager_writej_count);
  1.3229 +          PAGERTRACE5("JOURNAL %d page %d needSync=%d hash(%08x)\n",
  1.3230 +               PAGERID(pPager), pPg->pgno, 
  1.3231 +               ((pPg->flags&PGHDR_NEED_SYNC)?1:0), pager_pagehash(pPg));
  1.3232 +
  1.3233 +          /* An error has occured writing to the journal file. The 
  1.3234 +          ** transaction will be rolled back by the layer above.
  1.3235 +          */
  1.3236 +          if( rc!=SQLITE_OK ){
  1.3237 +            return rc;
  1.3238 +          }
  1.3239 +
  1.3240 +          pPager->nRec++;
  1.3241 +          assert( pPager->pInJournal!=0 );
  1.3242 +          sqlite3BitvecSet(pPager->pInJournal, pPg->pgno);
  1.3243 +          if( !pPager->noSync ){
  1.3244 +            pPg->flags |= PGHDR_NEED_SYNC;
  1.3245 +          }
  1.3246 +          if( pPager->stmtInUse ){
  1.3247 +            sqlite3BitvecSet(pPager->pInStmt, pPg->pgno);
  1.3248 +          }
  1.3249 +        }
  1.3250 +      }else{
  1.3251 +        if( !pPager->journalStarted && !pPager->noSync ){
  1.3252 +          pPg->flags |= PGHDR_NEED_SYNC;
  1.3253 +        }
  1.3254 +        PAGERTRACE4("APPEND %d page %d needSync=%d\n",
  1.3255 +                PAGERID(pPager), pPg->pgno,
  1.3256 +               ((pPg->flags&PGHDR_NEED_SYNC)?1:0));
  1.3257 +      }
  1.3258 +      if( pPg->flags&PGHDR_NEED_SYNC ){
  1.3259 +        pPager->needSync = 1;
  1.3260 +      }
  1.3261 +      pPg->flags |= PGHDR_IN_JOURNAL;
  1.3262 +    }
  1.3263 +  
  1.3264 +    /* If the statement journal is open and the page is not in it,
  1.3265 +    ** then write the current page to the statement journal.  Note that
  1.3266 +    ** the statement journal format differs from the standard journal format
  1.3267 +    ** in that it omits the checksums and the header.
  1.3268 +    */
  1.3269 +    if( pPager->stmtInUse 
  1.3270 +     && !pageInStatement(pPg) 
  1.3271 +     && (int)pPg->pgno<=pPager->stmtSize 
  1.3272 +    ){
  1.3273 +      assert( (pPg->flags&PGHDR_IN_JOURNAL) 
  1.3274 +                 || (int)pPg->pgno>pPager->origDbSize );
  1.3275 +      if( MEMDB ){
  1.3276 +        rc = sqlite3PcachePreserve(pPg, 1);
  1.3277 +        if( rc!=SQLITE_OK ){
  1.3278 +          return rc;
  1.3279 +        }
  1.3280 +        PAGERTRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
  1.3281 +      }else{
  1.3282 +        i64 offset = pPager->stmtNRec*(4+pPager->pageSize);
  1.3283 +        char *pData2 = CODEC2(pPager, pData, pPg->pgno, 7);
  1.3284 +        rc = write32bits(pPager->stfd, offset, pPg->pgno);
  1.3285 +        if( rc==SQLITE_OK ){
  1.3286 +          rc = sqlite3OsWrite(pPager->stfd, pData2, pPager->pageSize, offset+4);
  1.3287 +        }
  1.3288 +        PAGERTRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
  1.3289 +        if( rc!=SQLITE_OK ){
  1.3290 +          return rc;
  1.3291 +        }
  1.3292 +        pPager->stmtNRec++;
  1.3293 +        assert( pPager->pInStmt!=0 );
  1.3294 +        sqlite3BitvecSet(pPager->pInStmt, pPg->pgno);
  1.3295 +      }
  1.3296 +    }
  1.3297 +  }
  1.3298 +
  1.3299 +  /* Update the database size and return.
  1.3300 +  */
  1.3301 +  assert( pPager->state>=PAGER_SHARED );
  1.3302 +  if( pPager->dbSize<(int)pPg->pgno ){
  1.3303 +    pPager->dbSize = pPg->pgno;
  1.3304 +    if( !MEMDB && pPager->dbSize==PENDING_BYTE/pPager->pageSize ){
  1.3305 +      pPager->dbSize++;
  1.3306 +    }
  1.3307 +  }
  1.3308 +  return rc;
  1.3309 +}
  1.3310 +
  1.3311 +/*
  1.3312 +** This function is used to mark a data-page as writable. It uses 
  1.3313 +** pager_write() to open a journal file (if it is not already open)
  1.3314 +** and write the page *pData to the journal.
  1.3315 +**
  1.3316 +** The difference between this function and pager_write() is that this
  1.3317 +** function also deals with the special case where 2 or more pages
  1.3318 +** fit on a single disk sector. In this case all co-resident pages
  1.3319 +** must have been written to the journal file before returning.
  1.3320 +*/
  1.3321 +int sqlite3PagerWrite(DbPage *pDbPage){
  1.3322 +  int rc = SQLITE_OK;
  1.3323 +
  1.3324 +  PgHdr *pPg = pDbPage;
  1.3325 +  Pager *pPager = pPg->pPager;
  1.3326 +  Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize);
  1.3327 +
  1.3328 +  if( !MEMDB && nPagePerSector>1 ){
  1.3329 +    Pgno nPageCount;          /* Total number of pages in database file */
  1.3330 +    Pgno pg1;                 /* First page of the sector pPg is located on. */
  1.3331 +    int nPage;                /* Number of pages starting at pg1 to journal */
  1.3332 +    int ii;
  1.3333 +    int needSync = 0;
  1.3334 +
  1.3335 +    /* Set the doNotSync flag to 1. This is because we cannot allow a journal
  1.3336 +    ** header to be written between the pages journaled by this function.
  1.3337 +    */
  1.3338 +    assert( pPager->doNotSync==0 );
  1.3339 +    pPager->doNotSync = 1;
  1.3340 +
  1.3341 +    /* This trick assumes that both the page-size and sector-size are
  1.3342 +    ** an integer power of 2. It sets variable pg1 to the identifier
  1.3343 +    ** of the first page of the sector pPg is located on.
  1.3344 +    */
  1.3345 +    pg1 = ((pPg->pgno-1) & ~(nPagePerSector-1)) + 1;
  1.3346 +
  1.3347 +    sqlite3PagerPagecount(pPager, (int *)&nPageCount);
  1.3348 +    if( pPg->pgno>nPageCount ){
  1.3349 +      nPage = (pPg->pgno - pg1)+1;
  1.3350 +    }else if( (pg1+nPagePerSector-1)>nPageCount ){
  1.3351 +      nPage = nPageCount+1-pg1;
  1.3352 +    }else{
  1.3353 +      nPage = nPagePerSector;
  1.3354 +    }
  1.3355 +    assert(nPage>0);
  1.3356 +    assert(pg1<=pPg->pgno);
  1.3357 +    assert((pg1+nPage)>pPg->pgno);
  1.3358 +
  1.3359 +    for(ii=0; ii<nPage && rc==SQLITE_OK; ii++){
  1.3360 +      Pgno pg = pg1+ii;
  1.3361 +      PgHdr *pPage;
  1.3362 +      if( pg==pPg->pgno || !sqlite3BitvecTest(pPager->pInJournal, pg) ){
  1.3363 +        if( pg!=PAGER_MJ_PGNO(pPager) ){
  1.3364 +          rc = sqlite3PagerGet(pPager, pg, &pPage);
  1.3365 +          if( rc==SQLITE_OK ){
  1.3366 +            rc = pager_write(pPage);
  1.3367 +            if( pPage->flags&PGHDR_NEED_SYNC ){
  1.3368 +              needSync = 1;
  1.3369 +            }
  1.3370 +            sqlite3PagerUnref(pPage);
  1.3371 +          }
  1.3372 +        }
  1.3373 +      }else if( (pPage = pager_lookup(pPager, pg))!=0 ){
  1.3374 +        if( pPage->flags&PGHDR_NEED_SYNC ){
  1.3375 +          needSync = 1;
  1.3376 +        }
  1.3377 +        sqlite3PagerUnref(pPage);
  1.3378 +      }
  1.3379 +    }
  1.3380 +
  1.3381 +    /* If the PgHdr.needSync flag is set for any of the nPage pages 
  1.3382 +    ** starting at pg1, then it needs to be set for all of them. Because
  1.3383 +    ** writing to any of these nPage pages may damage the others, the
  1.3384 +    ** journal file must contain sync()ed copies of all of them
  1.3385 +    ** before any of them can be written out to the database file.
  1.3386 +    */
  1.3387 +    if( needSync ){
  1.3388 +      assert( !MEMDB && pPager->noSync==0 );
  1.3389 +      for(ii=0; ii<nPage && needSync; ii++){
  1.3390 +        PgHdr *pPage = pager_lookup(pPager, pg1+ii);
  1.3391 +        if( pPage ) pPage->flags |= PGHDR_NEED_SYNC;
  1.3392 +        sqlite3PagerUnref(pPage);
  1.3393 +      }
  1.3394 +      assert(pPager->needSync);
  1.3395 +    }
  1.3396 +
  1.3397 +    assert( pPager->doNotSync==1 );
  1.3398 +    pPager->doNotSync = 0;
  1.3399 +  }else{
  1.3400 +    rc = pager_write(pDbPage);
  1.3401 +  }
  1.3402 +  return rc;
  1.3403 +}
  1.3404 +
  1.3405 +/*
  1.3406 +** Return TRUE if the page given in the argument was previously passed
  1.3407 +** to sqlite3PagerWrite().  In other words, return TRUE if it is ok
  1.3408 +** to change the content of the page.
  1.3409 +*/
  1.3410 +#ifndef NDEBUG
  1.3411 +int sqlite3PagerIswriteable(DbPage *pPg){
  1.3412 +  return pPg->flags&PGHDR_DIRTY;
  1.3413 +}
  1.3414 +#endif
  1.3415 +
  1.3416 +/*
  1.3417 +** A call to this routine tells the pager that it is not necessary to
  1.3418 +** write the information on page pPg back to the disk, even though
  1.3419 +** that page might be marked as dirty.
  1.3420 +**
  1.3421 +** The overlying software layer calls this routine when all of the data
  1.3422 +** on the given page is unused.  The pager marks the page as clean so
  1.3423 +** that it does not get written to disk.
  1.3424 +**
  1.3425 +** Tests show that this optimization, together with the
  1.3426 +** sqlite3PagerDontRollback() below, more than double the speed
  1.3427 +** of large INSERT operations and quadruple the speed of large DELETEs.
  1.3428 +**
  1.3429 +** When this routine is called, set the alwaysRollback flag to true.
  1.3430 +** Subsequent calls to sqlite3PagerDontRollback() for the same page
  1.3431 +** will thereafter be ignored.  This is necessary to avoid a problem
  1.3432 +** where a page with data is added to the freelist during one part of
  1.3433 +** a transaction then removed from the freelist during a later part
  1.3434 +** of the same transaction and reused for some other purpose.  When it
  1.3435 +** is first added to the freelist, this routine is called.  When reused,
  1.3436 +** the sqlite3PagerDontRollback() routine is called.  But because the
  1.3437 +** page contains critical data, we still need to be sure it gets
  1.3438 +** rolled back in spite of the sqlite3PagerDontRollback() call.
  1.3439 +*/
  1.3440 +int sqlite3PagerDontWrite(DbPage *pDbPage){
  1.3441 +  PgHdr *pPg = pDbPage;
  1.3442 +  Pager *pPager = pPg->pPager;
  1.3443 +  int rc;
  1.3444 +
  1.3445 +  if( MEMDB || pPg->pgno>pPager->origDbSize ){
  1.3446 +    return SQLITE_OK;
  1.3447 +  }
  1.3448 +  if( pPager->pAlwaysRollback==0 ){
  1.3449 +    assert( pPager->pInJournal );
  1.3450 +    pPager->pAlwaysRollback = sqlite3BitvecCreate(pPager->origDbSize);
  1.3451 +    if( !pPager->pAlwaysRollback ){
  1.3452 +      return SQLITE_NOMEM;
  1.3453 +    }
  1.3454 +  }
  1.3455 +  rc = sqlite3BitvecSet(pPager->pAlwaysRollback, pPg->pgno);
  1.3456 +
  1.3457 +  if( rc==SQLITE_OK && (pPg->flags&PGHDR_DIRTY) && !pPager->stmtInUse ){
  1.3458 +    assert( pPager->state>=PAGER_SHARED );
  1.3459 +    if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
  1.3460 +      /* If this pages is the last page in the file and the file has grown
  1.3461 +      ** during the current transaction, then do NOT mark the page as clean.
  1.3462 +      ** When the database file grows, we must make sure that the last page
  1.3463 +      ** gets written at least once so that the disk file will be the correct
  1.3464 +      ** size. If you do not write this page and the size of the file
  1.3465 +      ** on the disk ends up being too small, that can lead to database
  1.3466 +      ** corruption during the next transaction.
  1.3467 +      */
  1.3468 +    }else{
  1.3469 +      PAGERTRACE3("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager));
  1.3470 +      IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno))
  1.3471 +      pPg->flags |= PGHDR_DONT_WRITE;
  1.3472 +#ifdef SQLITE_CHECK_PAGES
  1.3473 +      pPg->pageHash = pager_pagehash(pPg);
  1.3474 +#endif
  1.3475 +    }
  1.3476 +  }
  1.3477 +  return rc;
  1.3478 +}
  1.3479 +
  1.3480 +/*
  1.3481 +** A call to this routine tells the pager that if a rollback occurs,
  1.3482 +** it is not necessary to restore the data on the given page.  This
  1.3483 +** means that the pager does not have to record the given page in the
  1.3484 +** rollback journal.
  1.3485 +**
  1.3486 +** If we have not yet actually read the content of this page (if
  1.3487 +** the PgHdr.needRead flag is set) then this routine acts as a promise
  1.3488 +** that we will never need to read the page content in the future.
  1.3489 +** so the needRead flag can be cleared at this point.
  1.3490 +*/
  1.3491 +void sqlite3PagerDontRollback(DbPage *pPg){
  1.3492 +  Pager *pPager = pPg->pPager;
  1.3493 +
  1.3494 +  assert( pPager->state>=PAGER_RESERVED );
  1.3495 +
  1.3496 +  /* If the journal file is not open, or DontWrite() has been called on
  1.3497 +  ** this page (DontWrite() sets the alwaysRollback flag), then this
  1.3498 +  ** function is a no-op.
  1.3499 +  */
  1.3500 +  if( pPager->journalOpen==0 
  1.3501 +   || sqlite3BitvecTest(pPager->pAlwaysRollback, pPg->pgno)
  1.3502 +   || pPg->pgno>pPager->origDbSize
  1.3503 +  ){
  1.3504 +    return;
  1.3505 +  }
  1.3506 +  assert( !MEMDB );    /* For a memdb, pPager->journalOpen is always 0 */
  1.3507 +
  1.3508 +#ifdef SQLITE_SECURE_DELETE
  1.3509 +  if( (pPg->flags & PGHDR_IN_JOURNAL)!=0 || (int)pPg->pgno>pPager->origDbSize ){
  1.3510 +    return;
  1.3511 +  }
  1.3512 +#endif
  1.3513 +
  1.3514 +  /* If SECURE_DELETE is disabled, then there is no way that this
  1.3515 +  ** routine can be called on a page for which sqlite3PagerDontWrite()
  1.3516 +  ** has not been previously called during the same transaction.
  1.3517 +  ** And if DontWrite() has previously been called, the following
  1.3518 +  ** conditions must be met.
  1.3519 +  **
  1.3520 +  ** (Later:)  Not true.  If the database is corrupted by having duplicate
  1.3521 +  ** pages on the freelist (ex: corrupt9.test) then the following is not
  1.3522 +  ** necessarily true:
  1.3523 +  */
  1.3524 +  /* assert( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ); */
  1.3525 +
  1.3526 +  assert( pPager->pInJournal!=0 );
  1.3527 +  sqlite3BitvecSet(pPager->pInJournal, pPg->pgno);
  1.3528 +  pPg->flags |= PGHDR_IN_JOURNAL;
  1.3529 +  pPg->flags &= ~PGHDR_NEED_READ;
  1.3530 +  if( pPager->stmtInUse ){
  1.3531 +    assert( pPager->stmtSize >= pPager->origDbSize );
  1.3532 +    sqlite3BitvecSet(pPager->pInStmt, pPg->pgno);
  1.3533 +  }
  1.3534 +  PAGERTRACE3("DONT_ROLLBACK page %d of %d\n", pPg->pgno, PAGERID(pPager));
  1.3535 +  IOTRACE(("GARBAGE %p %d\n", pPager, pPg->pgno))
  1.3536 +}
  1.3537 +
  1.3538 +
  1.3539 +/*
  1.3540 +** This routine is called to increment the database file change-counter,
  1.3541 +** stored at byte 24 of the pager file.
  1.3542 +*/
  1.3543 +static int pager_incr_changecounter(Pager *pPager, int isDirect){
  1.3544 +  PgHdr *pPgHdr;
  1.3545 +  u32 change_counter;
  1.3546 +  int rc = SQLITE_OK;
  1.3547 +
  1.3548 +#ifndef SQLITE_ENABLE_ATOMIC_WRITE
  1.3549 +  assert( isDirect==0 );  /* isDirect is only true for atomic writes */
  1.3550 +#endif
  1.3551 +  if( !pPager->changeCountDone ){
  1.3552 +    /* Open page 1 of the file for writing. */
  1.3553 +    rc = sqlite3PagerGet(pPager, 1, &pPgHdr);
  1.3554 +    if( rc!=SQLITE_OK ) return rc;
  1.3555 +
  1.3556 +    if( !isDirect ){
  1.3557 +      rc = sqlite3PagerWrite(pPgHdr);
  1.3558 +      if( rc!=SQLITE_OK ){
  1.3559 +        sqlite3PagerUnref(pPgHdr);
  1.3560 +        return rc;
  1.3561 +      }
  1.3562 +    }
  1.3563 +
  1.3564 +    /* Increment the value just read and write it back to byte 24. */
  1.3565 +    change_counter = sqlite3Get4byte((u8*)pPager->dbFileVers);
  1.3566 +    change_counter++;
  1.3567 +    put32bits(((char*)pPgHdr->pData)+24, change_counter);
  1.3568 +
  1.3569 +#ifdef SQLITE_ENABLE_ATOMIC_WRITE
  1.3570 +    if( isDirect && pPager->fd->pMethods ){
  1.3571 +      const void *zBuf = pPgHdr->pData;
  1.3572 +      rc = sqlite3OsWrite(pPager->fd, zBuf, pPager->pageSize, 0);
  1.3573 +    }
  1.3574 +#endif
  1.3575 +
  1.3576 +    /* Release the page reference. */
  1.3577 +    sqlite3PagerUnref(pPgHdr);
  1.3578 +    pPager->changeCountDone = 1;
  1.3579 +  }
  1.3580 +  return rc;
  1.3581 +}
  1.3582 +
  1.3583 +/*
  1.3584 +** Sync the pager file to disk.
  1.3585 +*/
  1.3586 +int sqlite3PagerSync(Pager *pPager){
  1.3587 +  int rc;
  1.3588 +  if( MEMDB ){
  1.3589 +    rc = SQLITE_OK;
  1.3590 +  }else{
  1.3591 +    rc = sqlite3OsSync(pPager->fd, pPager->sync_flags);
  1.3592 +  }
  1.3593 +  return rc;
  1.3594 +}
  1.3595 +
  1.3596 +/*
  1.3597 +** Sync the database file for the pager pPager. zMaster points to the name
  1.3598 +** of a master journal file that should be written into the individual
  1.3599 +** journal file. zMaster may be NULL, which is interpreted as no master
  1.3600 +** journal (a single database transaction).
  1.3601 +**
  1.3602 +** This routine ensures that the journal is synced, all dirty pages written
  1.3603 +** to the database file and the database file synced. The only thing that
  1.3604 +** remains to commit the transaction is to delete the journal file (or
  1.3605 +** master journal file if specified).
  1.3606 +**
  1.3607 +** Note that if zMaster==NULL, this does not overwrite a previous value
  1.3608 +** passed to an sqlite3PagerCommitPhaseOne() call.
  1.3609 +**
  1.3610 +** If parameter nTrunc is non-zero, then the pager file is truncated to
  1.3611 +** nTrunc pages (this is used by auto-vacuum databases).
  1.3612 +**
  1.3613 +** If the final parameter - noSync - is true, then the database file itself
  1.3614 +** is not synced. The caller must call sqlite3PagerSync() directly to
  1.3615 +** sync the database file before calling CommitPhaseTwo() to delete the
  1.3616 +** journal file in this case.
  1.3617 +*/
  1.3618 +int sqlite3PagerCommitPhaseOne(
  1.3619 +  Pager *pPager, 
  1.3620 +  const char *zMaster, 
  1.3621 +  Pgno nTrunc,
  1.3622 +  int noSync
  1.3623 +){
  1.3624 +  int rc = SQLITE_OK;
  1.3625 +
  1.3626 +  if( pPager->errCode ){
  1.3627 +    return pPager->errCode;
  1.3628 +  }
  1.3629 +
  1.3630 +  /* If no changes have been made, we can leave the transaction early.
  1.3631 +  */
  1.3632 +  if( pPager->dbModified==0 &&
  1.3633 +        (pPager->journalMode!=PAGER_JOURNALMODE_DELETE ||
  1.3634 +          pPager->exclusiveMode!=0) ){
  1.3635 +    assert( pPager->dirtyCache==0 || pPager->journalOpen==0 );
  1.3636 +    return SQLITE_OK;
  1.3637 +  }
  1.3638 +
  1.3639 +  PAGERTRACE4("DATABASE SYNC: File=%s zMaster=%s nTrunc=%d\n", 
  1.3640 +      pPager->zFilename, zMaster, nTrunc);
  1.3641 +
  1.3642 +  /* If this is an in-memory db, or no pages have been written to, or this
  1.3643 +  ** function has already been called, it is a no-op.
  1.3644 +  */
  1.3645 +  if( pPager->state!=PAGER_SYNCED && !MEMDB && pPager->dirtyCache ){
  1.3646 +    PgHdr *pPg;
  1.3647 +
  1.3648 +#ifdef SQLITE_ENABLE_ATOMIC_WRITE
  1.3649 +    /* The atomic-write optimization can be used if all of the
  1.3650 +    ** following are true:
  1.3651 +    **
  1.3652 +    **    + The file-system supports the atomic-write property for
  1.3653 +    **      blocks of size page-size, and
  1.3654 +    **    + This commit is not part of a multi-file transaction, and
  1.3655 +    **    + Exactly one page has been modified and store in the journal file.
  1.3656 +    **
  1.3657 +    ** If the optimization can be used, then the journal file will never
  1.3658 +    ** be created for this transaction.
  1.3659 +    */
  1.3660 +    int useAtomicWrite;
  1.3661 +    pPg = sqlite3PcacheDirtyList(pPager->pPCache);
  1.3662 +    useAtomicWrite = (
  1.3663 +        !zMaster && 
  1.3664 +        pPager->journalOpen &&
  1.3665 +        pPager->journalOff==jrnlBufferSize(pPager) && 
  1.3666 +        nTrunc==0 && 
  1.3667 +        (pPg==0 || pPg->pDirty==0)
  1.3668 +    );
  1.3669 +    assert( pPager->journalOpen || pPager->journalMode==PAGER_JOURNALMODE_OFF );
  1.3670 +    if( useAtomicWrite ){
  1.3671 +      /* Update the nRec field in the journal file. */
  1.3672 +      int offset = pPager->journalHdr + sizeof(aJournalMagic);
  1.3673 +      assert(pPager->nRec==1);
  1.3674 +      rc = write32bits(pPager->jfd, offset, pPager->nRec);
  1.3675 +
  1.3676 +      /* Update the db file change counter. The following call will modify
  1.3677 +      ** the in-memory representation of page 1 to include the updated
  1.3678 +      ** change counter and then write page 1 directly to the database
  1.3679 +      ** file. Because of the atomic-write property of the host file-system, 
  1.3680 +      ** this is safe.
  1.3681 +      */
  1.3682 +      if( rc==SQLITE_OK ){
  1.3683 +        rc = pager_incr_changecounter(pPager, 1);
  1.3684 +      }
  1.3685 +    }else{
  1.3686 +      rc = sqlite3JournalCreate(pPager->jfd);
  1.3687 +    }
  1.3688 +
  1.3689 +    if( !useAtomicWrite && rc==SQLITE_OK )
  1.3690 +#endif
  1.3691 +
  1.3692 +    /* If a master journal file name has already been written to the
  1.3693 +    ** journal file, then no sync is required. This happens when it is
  1.3694 +    ** written, then the process fails to upgrade from a RESERVED to an
  1.3695 +    ** EXCLUSIVE lock. The next time the process tries to commit the
  1.3696 +    ** transaction the m-j name will have already been written.
  1.3697 +    */
  1.3698 +    if( !pPager->setMaster ){
  1.3699 +      rc = pager_incr_changecounter(pPager, 0);
  1.3700 +      if( rc!=SQLITE_OK ) goto sync_exit;
  1.3701 +      if( pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
  1.3702 +#ifndef SQLITE_OMIT_AUTOVACUUM
  1.3703 +        if( nTrunc!=0 ){
  1.3704 +          /* If this transaction has made the database smaller, then all pages
  1.3705 +          ** being discarded by the truncation must be written to the journal
  1.3706 +          ** file.
  1.3707 +          */
  1.3708 +          Pgno i;
  1.3709 +          int iSkip = PAGER_MJ_PGNO(pPager);
  1.3710 +          for( i=nTrunc+1; i<=pPager->origDbSize; i++ ){
  1.3711 +            if( !sqlite3BitvecTest(pPager->pInJournal, i) && i!=iSkip ){
  1.3712 +              rc = sqlite3PagerGet(pPager, i, &pPg);
  1.3713 +              if( rc!=SQLITE_OK ) goto sync_exit;
  1.3714 +              rc = sqlite3PagerWrite(pPg);
  1.3715 +              sqlite3PagerUnref(pPg);
  1.3716 +              if( rc!=SQLITE_OK ) goto sync_exit;
  1.3717 +            }
  1.3718 +          } 
  1.3719 +        }
  1.3720 +#endif
  1.3721 +        rc = writeMasterJournal(pPager, zMaster);
  1.3722 +        if( rc!=SQLITE_OK ) goto sync_exit;
  1.3723 +        rc = syncJournal(pPager);
  1.3724 +      }
  1.3725 +    }
  1.3726 +    if( rc!=SQLITE_OK ) goto sync_exit;
  1.3727 +
  1.3728 +#ifndef SQLITE_OMIT_AUTOVACUUM
  1.3729 +    if( nTrunc!=0 ){
  1.3730 +      rc = sqlite3PagerTruncate(pPager, nTrunc);
  1.3731 +      if( rc!=SQLITE_OK ) goto sync_exit;
  1.3732 +    }
  1.3733 +#endif
  1.3734 +
  1.3735 +    /* Write all dirty pages to the database file */
  1.3736 +    pPg = sqlite3PcacheDirtyList(pPager->pPCache);
  1.3737 +    rc = pager_write_pagelist(pPg);
  1.3738 +    if( rc!=SQLITE_OK ){
  1.3739 +      assert( rc!=SQLITE_IOERR_BLOCKED );
  1.3740 +      /* The error might have left the dirty list all fouled up here,
  1.3741 +      ** but that does not matter because if the if the dirty list did
  1.3742 +      ** get corrupted, then the transaction will roll back and
  1.3743 +      ** discard the dirty list.  There is an assert in
  1.3744 +      ** pager_get_all_dirty_pages() that verifies that no attempt
  1.3745 +      ** is made to use an invalid dirty list.
  1.3746 +      */
  1.3747 +      goto sync_exit;
  1.3748 +    }
  1.3749 +    sqlite3PcacheCleanAll(pPager->pPCache);
  1.3750 +
  1.3751 +    /* Sync the database file. */
  1.3752 +    if( !pPager->noSync && !noSync ){
  1.3753 +      rc = sqlite3OsSync(pPager->fd, pPager->sync_flags);
  1.3754 +    }
  1.3755 +    IOTRACE(("DBSYNC %p\n", pPager))
  1.3756 +
  1.3757 +    pPager->state = PAGER_SYNCED;
  1.3758 +  }else if( MEMDB && nTrunc!=0 ){
  1.3759 +    rc = sqlite3PagerTruncate(pPager, nTrunc);
  1.3760 +  }
  1.3761 +
  1.3762 +sync_exit:
  1.3763 +  if( rc==SQLITE_IOERR_BLOCKED ){
  1.3764 +    /* pager_incr_changecounter() may attempt to obtain an exclusive
  1.3765 +     * lock to spill the cache and return IOERR_BLOCKED. But since 
  1.3766 +     * there is no chance the cache is inconsistent, it is
  1.3767 +     * better to return SQLITE_BUSY.
  1.3768 +     */
  1.3769 +    rc = SQLITE_BUSY;
  1.3770 +  }
  1.3771 +  return rc;
  1.3772 +}
  1.3773 +
  1.3774 +
  1.3775 +/*
  1.3776 +** Commit all changes to the database and release the write lock.
  1.3777 +**
  1.3778 +** If the commit fails for any reason, a rollback attempt is made
  1.3779 +** and an error code is returned.  If the commit worked, SQLITE_OK
  1.3780 +** is returned.
  1.3781 +*/
  1.3782 +int sqlite3PagerCommitPhaseTwo(Pager *pPager){
  1.3783 +  int rc = SQLITE_OK;
  1.3784 +
  1.3785 +  if( pPager->errCode ){
  1.3786 +    return pPager->errCode;
  1.3787 +  }
  1.3788 +  if( pPager->state<PAGER_RESERVED ){
  1.3789 +    return SQLITE_ERROR;
  1.3790 +  }
  1.3791 +  if( pPager->dbModified==0 &&
  1.3792 +        (pPager->journalMode!=PAGER_JOURNALMODE_DELETE ||
  1.3793 +          pPager->exclusiveMode!=0) ){
  1.3794 +    assert( pPager->dirtyCache==0 || pPager->journalOpen==0 );
  1.3795 +    return SQLITE_OK;
  1.3796 +  }
  1.3797 +  PAGERTRACE2("COMMIT %d\n", PAGERID(pPager));
  1.3798 +  if( MEMDB ){
  1.3799 +    sqlite3PcacheCommit(pPager->pPCache, 0);
  1.3800 +    sqlite3PcacheCleanAll(pPager->pPCache);
  1.3801 +    sqlite3PcacheAssertFlags(pPager->pPCache, 0, PGHDR_IN_JOURNAL);
  1.3802 +    pPager->state = PAGER_SHARED;
  1.3803 +  }else{
  1.3804 +    assert( pPager->state==PAGER_SYNCED || !pPager->dirtyCache );
  1.3805 +    rc = pager_end_transaction(pPager, pPager->setMaster);
  1.3806 +    rc = pager_error(pPager, rc);
  1.3807 +  }
  1.3808 +  return rc;
  1.3809 +}
  1.3810 +
  1.3811 +/*
  1.3812 +** Rollback all changes.  The database falls back to PAGER_SHARED mode.
  1.3813 +** All in-memory cache pages revert to their original data contents.
  1.3814 +** The journal is deleted.
  1.3815 +**
  1.3816 +** This routine cannot fail unless some other process is not following
  1.3817 +** the correct locking protocol or unless some other
  1.3818 +** process is writing trash into the journal file (SQLITE_CORRUPT) or
  1.3819 +** unless a prior malloc() failed (SQLITE_NOMEM).  Appropriate error
  1.3820 +** codes are returned for all these occasions.  Otherwise,
  1.3821 +** SQLITE_OK is returned.
  1.3822 +*/
  1.3823 +int sqlite3PagerRollback(Pager *pPager){
  1.3824 +  int rc = SQLITE_OK;
  1.3825 +  PAGERTRACE2("ROLLBACK %d\n", PAGERID(pPager));
  1.3826 +  if( MEMDB ){
  1.3827 +    sqlite3PcacheRollback(pPager->pPCache, 1, pPager->xReiniter);
  1.3828 +    sqlite3PcacheRollback(pPager->pPCache, 0, pPager->xReiniter);
  1.3829 +    sqlite3PcacheCleanAll(pPager->pPCache);
  1.3830 +    sqlite3PcacheAssertFlags(pPager->pPCache, 0, PGHDR_IN_JOURNAL);
  1.3831 +    pPager->dbSize = pPager->origDbSize;
  1.3832 +    pager_truncate_cache(pPager);
  1.3833 +    pPager->stmtInUse = 0;
  1.3834 +    pPager->state = PAGER_SHARED;
  1.3835 +  }else if( !pPager->dirtyCache || !pPager->journalOpen ){
  1.3836 +    rc = pager_end_transaction(pPager, pPager->setMaster);
  1.3837 +  }else if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
  1.3838 +    if( pPager->state>=PAGER_EXCLUSIVE ){
  1.3839 +      pager_playback(pPager, 0);
  1.3840 +    }
  1.3841 +    rc = pPager->errCode;
  1.3842 +  }else{
  1.3843 +    if( pPager->state==PAGER_RESERVED ){
  1.3844 +      int rc2;
  1.3845 +      rc = pager_playback(pPager, 0);
  1.3846 +      rc2 = pager_end_transaction(pPager, pPager->setMaster);
  1.3847 +      if( rc==SQLITE_OK ){
  1.3848 +        rc = rc2;
  1.3849 +      }
  1.3850 +    }else{
  1.3851 +      rc = pager_playback(pPager, 0);
  1.3852 +    }
  1.3853 +
  1.3854 +    pPager->dbSize = -1;
  1.3855 +
  1.3856 +    /* If an error occurs during a ROLLBACK, we can no longer trust the pager
  1.3857 +    ** cache. So call pager_error() on the way out to make any error 
  1.3858 +    ** persistent.
  1.3859 +    */
  1.3860 +    rc = pager_error(pPager, rc);
  1.3861 +  }
  1.3862 +  return rc;
  1.3863 +}
  1.3864 +
  1.3865 +/*
  1.3866 +** Return TRUE if the database file is opened read-only.  Return FALSE
  1.3867 +** if the database is (in theory) writable.
  1.3868 +*/
  1.3869 +int sqlite3PagerIsreadonly(Pager *pPager){
  1.3870 +  return pPager->readOnly;
  1.3871 +}
  1.3872 +
  1.3873 +/*
  1.3874 +** Return the number of references to the pager.
  1.3875 +*/
  1.3876 +int sqlite3PagerRefcount(Pager *pPager){
  1.3877 +  return sqlite3PcacheRefCount(pPager->pPCache);
  1.3878 +}
  1.3879 +
  1.3880 +/*
  1.3881 +** Return the number of references to the specified page.
  1.3882 +*/
  1.3883 +int sqlite3PagerPageRefcount(DbPage *pPage){
  1.3884 +  return sqlite3PcachePageRefcount(pPage);
  1.3885 +}
  1.3886 +
  1.3887 +#ifdef SQLITE_TEST
  1.3888 +/*
  1.3889 +** This routine is used for testing and analysis only.
  1.3890 +*/
  1.3891 +int *sqlite3PagerStats(Pager *pPager){
  1.3892 +  static int a[11];
  1.3893 +  a[0] = sqlite3PcacheRefCount(pPager->pPCache);
  1.3894 +  a[1] = sqlite3PcachePagecount(pPager->pPCache);
  1.3895 +  a[2] = sqlite3PcacheGetCachesize(pPager->pPCache);
  1.3896 +  a[3] = pPager->dbSize;
  1.3897 +  a[4] = pPager->state;
  1.3898 +  a[5] = pPager->errCode;
  1.3899 +  a[6] = pPager->nHit;
  1.3900 +  a[7] = pPager->nMiss;
  1.3901 +  a[8] = 0;  /* Used to be pPager->nOvfl */
  1.3902 +  a[9] = pPager->nRead;
  1.3903 +  a[10] = pPager->nWrite;
  1.3904 +  return a;
  1.3905 +}
  1.3906 +int sqlite3PagerIsMemdb(Pager *pPager){
  1.3907 +  return MEMDB;
  1.3908 +}
  1.3909 +#endif
  1.3910 +
  1.3911 +/*
  1.3912 +** Set the statement rollback point.
  1.3913 +**
  1.3914 +** This routine should be called with the transaction journal already
  1.3915 +** open.  A new statement journal is created that can be used to rollback
  1.3916 +** changes of a single SQL command within a larger transaction.
  1.3917 +*/
  1.3918 +static int pagerStmtBegin(Pager *pPager){
  1.3919 +  int rc;
  1.3920 +  assert( !pPager->stmtInUse );
  1.3921 +  assert( pPager->state>=PAGER_SHARED );
  1.3922 +  assert( pPager->dbSize>=0 );
  1.3923 +  PAGERTRACE2("STMT-BEGIN %d\n", PAGERID(pPager));
  1.3924 +  if( MEMDB ){
  1.3925 +    pPager->stmtInUse = 1;
  1.3926 +    pPager->stmtSize = pPager->dbSize;
  1.3927 +    return SQLITE_OK;
  1.3928 +  }
  1.3929 +  if( !pPager->journalOpen ){
  1.3930 +    pPager->stmtAutoopen = 1;
  1.3931 +    return SQLITE_OK;
  1.3932 +  }
  1.3933 +  assert( pPager->journalOpen );
  1.3934 +  assert( pPager->pInStmt==0 );
  1.3935 +  pPager->pInStmt = sqlite3BitvecCreate(pPager->dbSize);
  1.3936 +  if( pPager->pInStmt==0 ){
  1.3937 +    /* sqlite3OsLock(pPager->fd, SHARED_LOCK); */
  1.3938 +    return SQLITE_NOMEM;
  1.3939 +  }
  1.3940 +  pPager->stmtJSize = pPager->journalOff;
  1.3941 +  pPager->stmtSize = pPager->dbSize;
  1.3942 +  pPager->stmtHdrOff = 0;
  1.3943 +  pPager->stmtCksum = pPager->cksumInit;
  1.3944 +  if( !pPager->stmtOpen ){
  1.3945 +    rc = sqlite3PagerOpentemp(pPager, pPager->stfd, SQLITE_OPEN_SUBJOURNAL);
  1.3946 +    if( rc ){
  1.3947 +      goto stmt_begin_failed;
  1.3948 +    }
  1.3949 +    pPager->stmtOpen = 1;
  1.3950 +    pPager->stmtNRec = 0;
  1.3951 +  }
  1.3952 +  pPager->stmtInUse = 1;
  1.3953 +  return SQLITE_OK;
  1.3954 + 
  1.3955 +stmt_begin_failed:
  1.3956 +  if( pPager->pInStmt ){
  1.3957 +    sqlite3BitvecDestroy(pPager->pInStmt);
  1.3958 +    pPager->pInStmt = 0;
  1.3959 +  }
  1.3960 +  return rc;
  1.3961 +}
  1.3962 +int sqlite3PagerStmtBegin(Pager *pPager){
  1.3963 +  int rc;
  1.3964 +  rc = pagerStmtBegin(pPager);
  1.3965 +  return rc;
  1.3966 +}
  1.3967 +
  1.3968 +/*
  1.3969 +** Commit a statement.
  1.3970 +*/
  1.3971 +int sqlite3PagerStmtCommit(Pager *pPager){
  1.3972 +  if( pPager->stmtInUse ){
  1.3973 +    PAGERTRACE2("STMT-COMMIT %d\n", PAGERID(pPager));
  1.3974 +    if( !MEMDB ){
  1.3975 +      sqlite3BitvecDestroy(pPager->pInStmt);
  1.3976 +      pPager->pInStmt = 0;
  1.3977 +    }else{
  1.3978 +      sqlite3PcacheCommit(pPager->pPCache, 1);
  1.3979 +    }
  1.3980 +    pPager->stmtNRec = 0;
  1.3981 +    pPager->stmtInUse = 0;
  1.3982 +  }
  1.3983 +  pPager->stmtAutoopen = 0;
  1.3984 +  return SQLITE_OK;
  1.3985 +}
  1.3986 +
  1.3987 +/*
  1.3988 +** Rollback a statement.
  1.3989 +*/
  1.3990 +int sqlite3PagerStmtRollback(Pager *pPager){
  1.3991 +  int rc;
  1.3992 +  if( pPager->stmtInUse ){
  1.3993 +    PAGERTRACE2("STMT-ROLLBACK %d\n", PAGERID(pPager));
  1.3994 +    if( MEMDB ){
  1.3995 +      sqlite3PcacheRollback(pPager->pPCache, 1, pPager->xReiniter);
  1.3996 +      pPager->dbSize = pPager->stmtSize;
  1.3997 +      pager_truncate_cache(pPager);
  1.3998 +      rc = SQLITE_OK;
  1.3999 +    }else{
  1.4000 +      rc = pager_stmt_playback(pPager);
  1.4001 +    }
  1.4002 +    sqlite3PagerStmtCommit(pPager);
  1.4003 +  }else{
  1.4004 +    rc = SQLITE_OK;
  1.4005 +  }
  1.4006 +  pPager->stmtAutoopen = 0;
  1.4007 +  return rc;
  1.4008 +}
  1.4009 +
  1.4010 +/*
  1.4011 +** Return the full pathname of the database file.
  1.4012 +*/
  1.4013 +const char *sqlite3PagerFilename(Pager *pPager){
  1.4014 +  return pPager->zFilename;
  1.4015 +}
  1.4016 +
  1.4017 +/*
  1.4018 +** Return the VFS structure for the pager.
  1.4019 +*/
  1.4020 +const sqlite3_vfs *sqlite3PagerVfs(Pager *pPager){
  1.4021 +  return pPager->pVfs;
  1.4022 +}
  1.4023 +
  1.4024 +/*
  1.4025 +** Return the file handle for the database file associated
  1.4026 +** with the pager.  This might return NULL if the file has
  1.4027 +** not yet been opened.
  1.4028 +*/
  1.4029 +sqlite3_file *sqlite3PagerFile(Pager *pPager){
  1.4030 +  return pPager->fd;
  1.4031 +}
  1.4032 +
  1.4033 +/*
  1.4034 +** Return the directory of the database file.
  1.4035 +*/
  1.4036 +const char *sqlite3PagerDirname(Pager *pPager){
  1.4037 +  return pPager->zDirectory;
  1.4038 +}
  1.4039 +
  1.4040 +/*
  1.4041 +** Return the full pathname of the journal file.
  1.4042 +*/
  1.4043 +const char *sqlite3PagerJournalname(Pager *pPager){
  1.4044 +  return pPager->zJournal;
  1.4045 +}
  1.4046 +
  1.4047 +/*
  1.4048 +** Return true if fsync() calls are disabled for this pager.  Return FALSE
  1.4049 +** if fsync()s are executed normally.
  1.4050 +*/
  1.4051 +int sqlite3PagerNosync(Pager *pPager){
  1.4052 +  return pPager->noSync;
  1.4053 +}
  1.4054 +
  1.4055 +#ifdef SQLITE_HAS_CODEC
  1.4056 +/*
  1.4057 +** Set the codec for this pager
  1.4058 +*/
  1.4059 +void sqlite3PagerSetCodec(
  1.4060 +  Pager *pPager,
  1.4061 +  void *(*xCodec)(void*,void*,Pgno,int),
  1.4062 +  void *pCodecArg
  1.4063 +){
  1.4064 +  pPager->xCodec = xCodec;
  1.4065 +  pPager->pCodecArg = pCodecArg;
  1.4066 +}
  1.4067 +#endif
  1.4068 +
  1.4069 +#ifndef SQLITE_OMIT_AUTOVACUUM
  1.4070 +/*
  1.4071 +** Move the page pPg to location pgno in the file.
  1.4072 +**
  1.4073 +** There must be no references to the page previously located at
  1.4074 +** pgno (which we call pPgOld) though that page is allowed to be
  1.4075 +** in cache.  If the page previously located at pgno is not already
  1.4076 +** in the rollback journal, it is not put there by by this routine.
  1.4077 +**
  1.4078 +** References to the page pPg remain valid. Updating any
  1.4079 +** meta-data associated with pPg (i.e. data stored in the nExtra bytes
  1.4080 +** allocated along with the page) is the responsibility of the caller.
  1.4081 +**
  1.4082 +** A transaction must be active when this routine is called. It used to be
  1.4083 +** required that a statement transaction was not active, but this restriction
  1.4084 +** has been removed (CREATE INDEX needs to move a page when a statement
  1.4085 +** transaction is active).
  1.4086 +**
  1.4087 +** If the fourth argument, isCommit, is non-zero, then this page is being
  1.4088 +** moved as part of a database reorganization just before the transaction 
  1.4089 +** is being committed. In this case, it is guaranteed that the database page 
  1.4090 +** pPg refers to will not be written to again within this transaction.
  1.4091 +*/
  1.4092 +int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno, int isCommit){
  1.4093 +  PgHdr *pPgOld;  /* The page being overwritten. */
  1.4094 +  Pgno needSyncPgno = 0;
  1.4095 +
  1.4096 +  assert( pPg->nRef>0 );
  1.4097 +
  1.4098 +  PAGERTRACE5("MOVE %d page %d (needSync=%d) moves to %d\n", 
  1.4099 +      PAGERID(pPager), pPg->pgno, (pPg->flags&PGHDR_NEED_SYNC)?1:0, pgno);
  1.4100 +  IOTRACE(("MOVE %p %d %d\n", pPager, pPg->pgno, pgno))
  1.4101 +
  1.4102 +  pager_get_content(pPg);
  1.4103 +
  1.4104 +  /* If the journal needs to be sync()ed before page pPg->pgno can
  1.4105 +  ** be written to, store pPg->pgno in local variable needSyncPgno.
  1.4106 +  **
  1.4107 +  ** If the isCommit flag is set, there is no need to remember that
  1.4108 +  ** the journal needs to be sync()ed before database page pPg->pgno 
  1.4109 +  ** can be written to. The caller has already promised not to write to it.
  1.4110 +  */
  1.4111 +  if( (pPg->flags&PGHDR_NEED_SYNC) && !isCommit ){
  1.4112 +    needSyncPgno = pPg->pgno;
  1.4113 +    assert( (pPg->flags&PGHDR_IN_JOURNAL) || (int)pgno>pPager->origDbSize );
  1.4114 +    assert( pPg->flags&PGHDR_DIRTY );
  1.4115 +    assert( pPager->needSync );
  1.4116 +  }
  1.4117 +
  1.4118 +  /* If the cache contains a page with page-number pgno, remove it
  1.4119 +  ** from its hash chain. Also, if the PgHdr.needSync was set for 
  1.4120 +  ** page pgno before the 'move' operation, it needs to be retained 
  1.4121 +  ** for the page moved there.
  1.4122 +  */
  1.4123 +  pPg->flags &= ~(PGHDR_NEED_SYNC|PGHDR_IN_JOURNAL);
  1.4124 +  pPgOld = pager_lookup(pPager, pgno);
  1.4125 +  assert( !pPgOld || pPgOld->nRef==1 );
  1.4126 +  if( pPgOld ){
  1.4127 +    pPg->flags |= (pPgOld->flags&PGHDR_NEED_SYNC);
  1.4128 +  }
  1.4129 +  if( sqlite3BitvecTest(pPager->pInJournal, pgno) ){
  1.4130 +    assert( !MEMDB );
  1.4131 +    pPg->flags |= PGHDR_IN_JOURNAL;
  1.4132 +  }
  1.4133 +
  1.4134 +  sqlite3PcacheMove(pPg, pgno);
  1.4135 +  if( pPgOld ){
  1.4136 +    sqlite3PcacheMove(pPgOld, 0);
  1.4137 +    sqlite3PcacheRelease(pPgOld);
  1.4138 +  }
  1.4139 +
  1.4140 +  makeDirty(pPg);
  1.4141 +  pPager->dirtyCache = 1;
  1.4142 +  pPager->dbModified = 1;
  1.4143 +
  1.4144 +  if( needSyncPgno ){
  1.4145 +    /* If needSyncPgno is non-zero, then the journal file needs to be 
  1.4146 +    ** sync()ed before any data is written to database file page needSyncPgno.
  1.4147 +    ** Currently, no such page exists in the page-cache and the 
  1.4148 +    ** "is journaled" bitvec flag has been set. This needs to be remedied by
  1.4149 +    ** loading the page into the pager-cache and setting the PgHdr.needSync 
  1.4150 +    ** flag.
  1.4151 +    **
  1.4152 +    ** If the attempt to load the page into the page-cache fails, (due
  1.4153 +    ** to a malloc() or IO failure), clear the bit in the pInJournal[]
  1.4154 +    ** array. Otherwise, if the page is loaded and written again in
  1.4155 +    ** this transaction, it may be written to the database file before
  1.4156 +    ** it is synced into the journal file. This way, it may end up in
  1.4157 +    ** the journal file twice, but that is not a problem.
  1.4158 +    **
  1.4159 +    ** The sqlite3PagerGet() call may cause the journal to sync. So make
  1.4160 +    ** sure the Pager.needSync flag is set too.
  1.4161 +    */
  1.4162 +    int rc;
  1.4163 +    PgHdr *pPgHdr;
  1.4164 +    assert( pPager->needSync );
  1.4165 +    rc = sqlite3PagerGet(pPager, needSyncPgno, &pPgHdr);
  1.4166 +    if( rc!=SQLITE_OK ){
  1.4167 +      if( pPager->pInJournal && (int)needSyncPgno<=pPager->origDbSize ){
  1.4168 +        sqlite3BitvecClear(pPager->pInJournal, needSyncPgno);
  1.4169 +      }
  1.4170 +      return rc;
  1.4171 +    }
  1.4172 +    pPager->needSync = 1;
  1.4173 +    assert( pPager->noSync==0 && !MEMDB );
  1.4174 +    pPgHdr->flags |= PGHDR_NEED_SYNC;
  1.4175 +    pPgHdr->flags |= PGHDR_IN_JOURNAL;
  1.4176 +    makeDirty(pPgHdr);
  1.4177 +    sqlite3PagerUnref(pPgHdr);
  1.4178 +  }
  1.4179 +
  1.4180 +  return SQLITE_OK;
  1.4181 +}
  1.4182 +#endif
  1.4183 +
  1.4184 +/*
  1.4185 +** Return a pointer to the data for the specified page.
  1.4186 +*/
  1.4187 +void *sqlite3PagerGetData(DbPage *pPg){
  1.4188 +  assert( pPg->nRef>0 || pPg->pPager->memDb );
  1.4189 +  return pPg->pData;
  1.4190 +}
  1.4191 +
  1.4192 +/*
  1.4193 +** Return a pointer to the Pager.nExtra bytes of "extra" space 
  1.4194 +** allocated along with the specified page.
  1.4195 +*/
  1.4196 +void *sqlite3PagerGetExtra(DbPage *pPg){
  1.4197 +  Pager *pPager = pPg->pPager;
  1.4198 +  return (pPager?pPg->pExtra:0);
  1.4199 +}
  1.4200 +
  1.4201 +/*
  1.4202 +** Get/set the locking-mode for this pager. Parameter eMode must be one
  1.4203 +** of PAGER_LOCKINGMODE_QUERY, PAGER_LOCKINGMODE_NORMAL or 
  1.4204 +** PAGER_LOCKINGMODE_EXCLUSIVE. If the parameter is not _QUERY, then
  1.4205 +** the locking-mode is set to the value specified.
  1.4206 +**
  1.4207 +** The returned value is either PAGER_LOCKINGMODE_NORMAL or
  1.4208 +** PAGER_LOCKINGMODE_EXCLUSIVE, indicating the current (possibly updated)
  1.4209 +** locking-mode.
  1.4210 +*/
  1.4211 +int sqlite3PagerLockingMode(Pager *pPager, int eMode){
  1.4212 +  assert( eMode==PAGER_LOCKINGMODE_QUERY
  1.4213 +            || eMode==PAGER_LOCKINGMODE_NORMAL
  1.4214 +            || eMode==PAGER_LOCKINGMODE_EXCLUSIVE );
  1.4215 +  assert( PAGER_LOCKINGMODE_QUERY<0 );
  1.4216 +  assert( PAGER_LOCKINGMODE_NORMAL>=0 && PAGER_LOCKINGMODE_EXCLUSIVE>=0 );
  1.4217 +  if( eMode>=0 && !pPager->tempFile ){
  1.4218 +    pPager->exclusiveMode = eMode;
  1.4219 +  }
  1.4220 +  return (int)pPager->exclusiveMode;
  1.4221 +}
  1.4222 +
  1.4223 +/*
  1.4224 +** Get/set the journal-mode for this pager. Parameter eMode must be one of:
  1.4225 +**
  1.4226 +**    PAGER_JOURNALMODE_QUERY
  1.4227 +**    PAGER_JOURNALMODE_DELETE
  1.4228 +**    PAGER_JOURNALMODE_TRUNCATE
  1.4229 +**    PAGER_JOURNALMODE_PERSIST
  1.4230 +**    PAGER_JOURNALMODE_OFF
  1.4231 +**
  1.4232 +** If the parameter is not _QUERY, then the journal-mode is set to the
  1.4233 +** value specified.
  1.4234 +**
  1.4235 +** The returned indicate the current (possibly updated)
  1.4236 +** journal-mode.
  1.4237 +*/
  1.4238 +int sqlite3PagerJournalMode(Pager *pPager, int eMode){
  1.4239 +  assert( eMode==PAGER_JOURNALMODE_QUERY
  1.4240 +            || eMode==PAGER_JOURNALMODE_DELETE
  1.4241 +            || eMode==PAGER_JOURNALMODE_TRUNCATE
  1.4242 +            || eMode==PAGER_JOURNALMODE_PERSIST
  1.4243 +            || eMode==PAGER_JOURNALMODE_OFF );
  1.4244 +  assert( PAGER_JOURNALMODE_QUERY<0 );
  1.4245 +  if( eMode>=0 ){
  1.4246 +    pPager->journalMode = eMode;
  1.4247 +  }else{
  1.4248 +    assert( eMode==PAGER_JOURNALMODE_QUERY );
  1.4249 +  }
  1.4250 +  return (int)pPager->journalMode;
  1.4251 +}
  1.4252 +
  1.4253 +/*
  1.4254 +** Get/set the size-limit used for persistent journal files.
  1.4255 +*/
  1.4256 +i64 sqlite3PagerJournalSizeLimit(Pager *pPager, i64 iLimit){
  1.4257 +  if( iLimit>=-1 ){
  1.4258 +    pPager->journalSizeLimit = iLimit;
  1.4259 +  }
  1.4260 +  return pPager->journalSizeLimit;
  1.4261 +}
  1.4262 +
  1.4263 +#endif /* SQLITE_OMIT_DISKIO */