os/persistentdata/persistentstorage/sql/SQLite/os_unix.c
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/persistentdata/persistentstorage/sql/SQLite/os_unix.c	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,2750 @@
     1.4 +/*
     1.5 +** 2004 May 22
     1.6 +**
     1.7 +** The author disclaims copyright to this source code.  In place of
     1.8 +** a legal notice, here is a blessing:
     1.9 +**
    1.10 +**    May you do good and not evil.
    1.11 +**    May you find forgiveness for yourself and forgive others.
    1.12 +**    May you share freely, never taking more than you give.
    1.13 +**
    1.14 +******************************************************************************
    1.15 +**
    1.16 +** This file contains code that is specific to Unix systems.
    1.17 +**
    1.18 +** $Id: os_unix.c,v 1.195 2008/07/30 17:28:04 drh Exp $
    1.19 +*/
    1.20 +#include "sqliteInt.h"
    1.21 +#if SQLITE_OS_UNIX              /* This file is used on unix only */
    1.22 +
    1.23 +/*
    1.24 +** If SQLITE_ENABLE_LOCKING_STYLE is defined, then several different 
    1.25 +** locking implementations are provided:
    1.26 +**
    1.27 +**   * POSIX locking (the default),
    1.28 +**   * No locking,
    1.29 +**   * Dot-file locking,
    1.30 +**   * flock() locking,
    1.31 +**   * AFP locking (OSX only).
    1.32 +*/
    1.33 +/* #define SQLITE_ENABLE_LOCKING_STYLE 0 */
    1.34 +
    1.35 +/*
    1.36 +** These #defines should enable >2GB file support on Posix if the
    1.37 +** underlying operating system supports it.  If the OS lacks
    1.38 +** large file support, these should be no-ops.
    1.39 +**
    1.40 +** Large file support can be disabled using the -DSQLITE_DISABLE_LFS switch
    1.41 +** on the compiler command line.  This is necessary if you are compiling
    1.42 +** on a recent machine (ex: RedHat 7.2) but you want your code to work
    1.43 +** on an older machine (ex: RedHat 6.0).  If you compile on RedHat 7.2
    1.44 +** without this option, LFS is enable.  But LFS does not exist in the kernel
    1.45 +** in RedHat 6.0, so the code won't work.  Hence, for maximum binary
    1.46 +** portability you should omit LFS.
    1.47 +*/
    1.48 +#ifndef SQLITE_DISABLE_LFS
    1.49 +# define _LARGE_FILE       1
    1.50 +# ifndef _FILE_OFFSET_BITS
    1.51 +#   define _FILE_OFFSET_BITS 64
    1.52 +# endif
    1.53 +# define _LARGEFILE_SOURCE 1
    1.54 +#endif
    1.55 +
    1.56 +/*
    1.57 +** standard include files.
    1.58 +*/
    1.59 +#include <sys/types.h>
    1.60 +#include <sys/stat.h>
    1.61 +#include <fcntl.h>
    1.62 +#include <unistd.h>
    1.63 +#include <time.h>
    1.64 +#include <sys/time.h>
    1.65 +#include <errno.h>
    1.66 +
    1.67 +#ifdef SQLITE_ENABLE_LOCKING_STYLE
    1.68 +#include <sys/ioctl.h>
    1.69 +#include <sys/param.h>
    1.70 +#include <sys/mount.h>
    1.71 +#endif /* SQLITE_ENABLE_LOCKING_STYLE */
    1.72 +
    1.73 +/*
    1.74 +** If we are to be thread-safe, include the pthreads header and define
    1.75 +** the SQLITE_UNIX_THREADS macro.
    1.76 +*/
    1.77 +#if SQLITE_THREADSAFE
    1.78 +# include <pthread.h>
    1.79 +# define SQLITE_UNIX_THREADS 1
    1.80 +#endif
    1.81 +
    1.82 +/*
    1.83 +** Default permissions when creating a new file
    1.84 +*/
    1.85 +#ifndef SQLITE_DEFAULT_FILE_PERMISSIONS
    1.86 +# define SQLITE_DEFAULT_FILE_PERMISSIONS 0644
    1.87 +#endif
    1.88 +
    1.89 +/*
    1.90 +** Maximum supported path-length.
    1.91 +*/
    1.92 +#define MAX_PATHNAME 512
    1.93 +
    1.94 +
    1.95 +/*
    1.96 +** The unixFile structure is subclass of sqlite3_file specific for the unix
    1.97 +** protability layer.
    1.98 +*/
    1.99 +typedef struct unixFile unixFile;
   1.100 +struct unixFile {
   1.101 +  sqlite3_io_methods const *pMethod;  /* Always the first entry */
   1.102 +#ifdef SQLITE_TEST
   1.103 +  /* In test mode, increase the size of this structure a bit so that 
   1.104 +  ** it is larger than the struct CrashFile defined in test6.c.
   1.105 +  */
   1.106 +  char aPadding[32];
   1.107 +#endif
   1.108 +  struct openCnt *pOpen;    /* Info about all open fd's on this inode */
   1.109 +  struct lockInfo *pLock;   /* Info about locks on this inode */
   1.110 +#ifdef SQLITE_ENABLE_LOCKING_STYLE
   1.111 +  void *lockingContext;     /* Locking style specific state */
   1.112 +#endif
   1.113 +  int h;                    /* The file descriptor */
   1.114 +  unsigned char locktype;   /* The type of lock held on this fd */
   1.115 +  int dirfd;                /* File descriptor for the directory */
   1.116 +#if SQLITE_THREADSAFE
   1.117 +  pthread_t tid;            /* The thread that "owns" this unixFile */
   1.118 +#endif
   1.119 +};
   1.120 +
   1.121 +/*
   1.122 +** Include code that is common to all os_*.c files
   1.123 +*/
   1.124 +#include "os_common.h"
   1.125 +
   1.126 +/*
   1.127 +** Define various macros that are missing from some systems.
   1.128 +*/
   1.129 +#ifndef O_LARGEFILE
   1.130 +# define O_LARGEFILE 0
   1.131 +#endif
   1.132 +#ifdef SQLITE_DISABLE_LFS
   1.133 +# undef O_LARGEFILE
   1.134 +# define O_LARGEFILE 0
   1.135 +#endif
   1.136 +#ifndef O_NOFOLLOW
   1.137 +# define O_NOFOLLOW 0
   1.138 +#endif
   1.139 +#ifndef O_BINARY
   1.140 +# define O_BINARY 0
   1.141 +#endif
   1.142 +
   1.143 +/*
   1.144 +** The DJGPP compiler environment looks mostly like Unix, but it
   1.145 +** lacks the fcntl() system call.  So redefine fcntl() to be something
   1.146 +** that always succeeds.  This means that locking does not occur under
   1.147 +** DJGPP.  But it is DOS - what did you expect?
   1.148 +*/
   1.149 +#ifdef __DJGPP__
   1.150 +# define fcntl(A,B,C) 0
   1.151 +#endif
   1.152 +
   1.153 +/*
   1.154 +** The threadid macro resolves to the thread-id or to 0.  Used for
   1.155 +** testing and debugging only.
   1.156 +*/
   1.157 +#if SQLITE_THREADSAFE
   1.158 +#define threadid pthread_self()
   1.159 +#else
   1.160 +#define threadid 0
   1.161 +#endif
   1.162 +
   1.163 +/*
   1.164 +** Set or check the unixFile.tid field.  This field is set when an unixFile
   1.165 +** is first opened.  All subsequent uses of the unixFile verify that the
   1.166 +** same thread is operating on the unixFile.  Some operating systems do
   1.167 +** not allow locks to be overridden by other threads and that restriction
   1.168 +** means that sqlite3* database handles cannot be moved from one thread
   1.169 +** to another.  This logic makes sure a user does not try to do that
   1.170 +** by mistake.
   1.171 +**
   1.172 +** Version 3.3.1 (2006-01-15):  unixFile can be moved from one thread to
   1.173 +** another as long as we are running on a system that supports threads
   1.174 +** overriding each others locks (which now the most common behavior)
   1.175 +** or if no locks are held.  But the unixFile.pLock field needs to be
   1.176 +** recomputed because its key includes the thread-id.  See the 
   1.177 +** transferOwnership() function below for additional information
   1.178 +*/
   1.179 +#if SQLITE_THREADSAFE
   1.180 +# define SET_THREADID(X)   (X)->tid = pthread_self()
   1.181 +# define CHECK_THREADID(X) (threadsOverrideEachOthersLocks==0 && \
   1.182 +                            !pthread_equal((X)->tid, pthread_self()))
   1.183 +#else
   1.184 +# define SET_THREADID(X)
   1.185 +# define CHECK_THREADID(X) 0
   1.186 +#endif
   1.187 +
   1.188 +/*
   1.189 +** Here is the dirt on POSIX advisory locks:  ANSI STD 1003.1 (1996)
   1.190 +** section 6.5.2.2 lines 483 through 490 specify that when a process
   1.191 +** sets or clears a lock, that operation overrides any prior locks set
   1.192 +** by the same process.  It does not explicitly say so, but this implies
   1.193 +** that it overrides locks set by the same process using a different
   1.194 +** file descriptor.  Consider this test case:
   1.195 +**       int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
   1.196 +**
   1.197 +** Suppose ./file1 and ./file2 are really the same file (because
   1.198 +** one is a hard or symbolic link to the other) then if you set
   1.199 +** an exclusive lock on fd1, then try to get an exclusive lock
   1.200 +** on fd2, it works.  I would have expected the second lock to
   1.201 +** fail since there was already a lock on the file due to fd1.
   1.202 +** But not so.  Since both locks came from the same process, the
   1.203 +** second overrides the first, even though they were on different
   1.204 +** file descriptors opened on different file names.
   1.205 +**
   1.206 +** Bummer.  If you ask me, this is broken.  Badly broken.  It means
   1.207 +** that we cannot use POSIX locks to synchronize file access among
   1.208 +** competing threads of the same process.  POSIX locks will work fine
   1.209 +** to synchronize access for threads in separate processes, but not
   1.210 +** threads within the same process.
   1.211 +**
   1.212 +** To work around the problem, SQLite has to manage file locks internally
   1.213 +** on its own.  Whenever a new database is opened, we have to find the
   1.214 +** specific inode of the database file (the inode is determined by the
   1.215 +** st_dev and st_ino fields of the stat structure that fstat() fills in)
   1.216 +** and check for locks already existing on that inode.  When locks are
   1.217 +** created or removed, we have to look at our own internal record of the
   1.218 +** locks to see if another thread has previously set a lock on that same
   1.219 +** inode.
   1.220 +**
   1.221 +** The sqlite3_file structure for POSIX is no longer just an integer file
   1.222 +** descriptor.  It is now a structure that holds the integer file
   1.223 +** descriptor and a pointer to a structure that describes the internal
   1.224 +** locks on the corresponding inode.  There is one locking structure
   1.225 +** per inode, so if the same inode is opened twice, both unixFile structures
   1.226 +** point to the same locking structure.  The locking structure keeps
   1.227 +** a reference count (so we will know when to delete it) and a "cnt"
   1.228 +** field that tells us its internal lock status.  cnt==0 means the
   1.229 +** file is unlocked.  cnt==-1 means the file has an exclusive lock.
   1.230 +** cnt>0 means there are cnt shared locks on the file.
   1.231 +**
   1.232 +** Any attempt to lock or unlock a file first checks the locking
   1.233 +** structure.  The fcntl() system call is only invoked to set a 
   1.234 +** POSIX lock if the internal lock structure transitions between
   1.235 +** a locked and an unlocked state.
   1.236 +**
   1.237 +** 2004-Jan-11:
   1.238 +** More recent discoveries about POSIX advisory locks.  (The more
   1.239 +** I discover, the more I realize the a POSIX advisory locks are
   1.240 +** an abomination.)
   1.241 +**
   1.242 +** If you close a file descriptor that points to a file that has locks,
   1.243 +** all locks on that file that are owned by the current process are
   1.244 +** released.  To work around this problem, each unixFile structure contains
   1.245 +** a pointer to an openCnt structure.  There is one openCnt structure
   1.246 +** per open inode, which means that multiple unixFile can point to a single
   1.247 +** openCnt.  When an attempt is made to close an unixFile, if there are
   1.248 +** other unixFile open on the same inode that are holding locks, the call
   1.249 +** to close() the file descriptor is deferred until all of the locks clear.
   1.250 +** The openCnt structure keeps a list of file descriptors that need to
   1.251 +** be closed and that list is walked (and cleared) when the last lock
   1.252 +** clears.
   1.253 +**
   1.254 +** First, under Linux threads, because each thread has a separate
   1.255 +** process ID, lock operations in one thread do not override locks
   1.256 +** to the same file in other threads.  Linux threads behave like
   1.257 +** separate processes in this respect.  But, if you close a file
   1.258 +** descriptor in linux threads, all locks are cleared, even locks
   1.259 +** on other threads and even though the other threads have different
   1.260 +** process IDs.  Linux threads is inconsistent in this respect.
   1.261 +** (I'm beginning to think that linux threads is an abomination too.)
   1.262 +** The consequence of this all is that the hash table for the lockInfo
   1.263 +** structure has to include the process id as part of its key because
   1.264 +** locks in different threads are treated as distinct.  But the 
   1.265 +** openCnt structure should not include the process id in its
   1.266 +** key because close() clears lock on all threads, not just the current
   1.267 +** thread.  Were it not for this goofiness in linux threads, we could
   1.268 +** combine the lockInfo and openCnt structures into a single structure.
   1.269 +**
   1.270 +** 2004-Jun-28:
   1.271 +** On some versions of linux, threads can override each others locks.
   1.272 +** On others not.  Sometimes you can change the behavior on the same
   1.273 +** system by setting the LD_ASSUME_KERNEL environment variable.  The
   1.274 +** POSIX standard is silent as to which behavior is correct, as far
   1.275 +** as I can tell, so other versions of unix might show the same
   1.276 +** inconsistency.  There is no little doubt in my mind that posix
   1.277 +** advisory locks and linux threads are profoundly broken.
   1.278 +**
   1.279 +** To work around the inconsistencies, we have to test at runtime 
   1.280 +** whether or not threads can override each others locks.  This test
   1.281 +** is run once, the first time any lock is attempted.  A static 
   1.282 +** variable is set to record the results of this test for future
   1.283 +** use.
   1.284 +*/
   1.285 +
   1.286 +/*
   1.287 +** An instance of the following structure serves as the key used
   1.288 +** to locate a particular lockInfo structure given its inode.
   1.289 +**
   1.290 +** If threads cannot override each others locks, then we set the
   1.291 +** lockKey.tid field to the thread ID.  If threads can override
   1.292 +** each others locks then tid is always set to zero.  tid is omitted
   1.293 +** if we compile without threading support.
   1.294 +*/
   1.295 +struct lockKey {
   1.296 +  dev_t dev;       /* Device number */
   1.297 +  ino_t ino;       /* Inode number */
   1.298 +#if SQLITE_THREADSAFE
   1.299 +  pthread_t tid;   /* Thread ID or zero if threads can override each other */
   1.300 +#endif
   1.301 +};
   1.302 +
   1.303 +/*
   1.304 +** An instance of the following structure is allocated for each open
   1.305 +** inode on each thread with a different process ID.  (Threads have
   1.306 +** different process IDs on linux, but not on most other unixes.)
   1.307 +**
   1.308 +** A single inode can have multiple file descriptors, so each unixFile
   1.309 +** structure contains a pointer to an instance of this object and this
   1.310 +** object keeps a count of the number of unixFile pointing to it.
   1.311 +*/
   1.312 +struct lockInfo {
   1.313 +  struct lockKey key;  /* The lookup key */
   1.314 +  int cnt;             /* Number of SHARED locks held */
   1.315 +  int locktype;        /* One of SHARED_LOCK, RESERVED_LOCK etc. */
   1.316 +  int nRef;            /* Number of pointers to this structure */
   1.317 +  struct lockInfo *pNext, *pPrev;   /* List of all lockInfo objects */
   1.318 +};
   1.319 +
   1.320 +/*
   1.321 +** An instance of the following structure serves as the key used
   1.322 +** to locate a particular openCnt structure given its inode.  This
   1.323 +** is the same as the lockKey except that the thread ID is omitted.
   1.324 +*/
   1.325 +struct openKey {
   1.326 +  dev_t dev;   /* Device number */
   1.327 +  ino_t ino;   /* Inode number */
   1.328 +};
   1.329 +
   1.330 +/*
   1.331 +** An instance of the following structure is allocated for each open
   1.332 +** inode.  This structure keeps track of the number of locks on that
   1.333 +** inode.  If a close is attempted against an inode that is holding
   1.334 +** locks, the close is deferred until all locks clear by adding the
   1.335 +** file descriptor to be closed to the pending list.
   1.336 +*/
   1.337 +struct openCnt {
   1.338 +  struct openKey key;   /* The lookup key */
   1.339 +  int nRef;             /* Number of pointers to this structure */
   1.340 +  int nLock;            /* Number of outstanding locks */
   1.341 +  int nPending;         /* Number of pending close() operations */
   1.342 +  int *aPending;        /* Malloced space holding fd's awaiting a close() */
   1.343 +  struct openCnt *pNext, *pPrev;   /* List of all openCnt objects */
   1.344 +};
   1.345 +
   1.346 +/*
   1.347 +** List of all lockInfo and openCnt objects.  This used to be a hash
   1.348 +** table.  But the number of objects is rarely more than a dozen and
   1.349 +** never exceeds a few thousand.  And lookup is not on a critical
   1.350 +** path oo a simple linked list will suffice.
   1.351 +*/
   1.352 +static struct lockInfo *lockList = 0;
   1.353 +static struct openCnt *openList = 0;
   1.354 +
   1.355 +/*
   1.356 +** The locking styles are associated with the different file locking
   1.357 +** capabilities supported by different file systems.  
   1.358 +**
   1.359 +** POSIX locking style fully supports shared and exclusive byte-range locks 
   1.360 +** AFP locking only supports exclusive byte-range locks
   1.361 +** FLOCK only supports a single file-global exclusive lock
   1.362 +** DOTLOCK isn't a true locking style, it refers to the use of a special
   1.363 +**   file named the same as the database file with a '.lock' extension, this
   1.364 +**   can be used on file systems that do not offer any reliable file locking
   1.365 +** NO locking means that no locking will be attempted, this is only used for
   1.366 +**   read-only file systems currently
   1.367 +** UNSUPPORTED means that no locking will be attempted, this is only used for
   1.368 +**   file systems that are known to be unsupported
   1.369 +*/
   1.370 +#define LOCKING_STYLE_POSIX        1
   1.371 +#define LOCKING_STYLE_NONE         2
   1.372 +#define LOCKING_STYLE_DOTFILE      3
   1.373 +#define LOCKING_STYLE_FLOCK        4
   1.374 +#define LOCKING_STYLE_AFP          5
   1.375 +
   1.376 +/*
   1.377 +** Helper functions to obtain and relinquish the global mutex.
   1.378 +*/
   1.379 +static void enterMutex(){
   1.380 +  sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
   1.381 +}
   1.382 +static void leaveMutex(){
   1.383 +  sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
   1.384 +}
   1.385 +
   1.386 +#if SQLITE_THREADSAFE
   1.387 +/*
   1.388 +** This variable records whether or not threads can override each others
   1.389 +** locks.
   1.390 +**
   1.391 +**    0:  No.  Threads cannot override each others locks.
   1.392 +**    1:  Yes.  Threads can override each others locks.
   1.393 +**   -1:  We don't know yet.
   1.394 +**
   1.395 +** On some systems, we know at compile-time if threads can override each
   1.396 +** others locks.  On those systems, the SQLITE_THREAD_OVERRIDE_LOCK macro
   1.397 +** will be set appropriately.  On other systems, we have to check at
   1.398 +** runtime.  On these latter systems, SQLTIE_THREAD_OVERRIDE_LOCK is
   1.399 +** undefined.
   1.400 +**
   1.401 +** This variable normally has file scope only.  But during testing, we make
   1.402 +** it a global so that the test code can change its value in order to verify
   1.403 +** that the right stuff happens in either case.
   1.404 +*/
   1.405 +#ifndef SQLITE_THREAD_OVERRIDE_LOCK
   1.406 +# define SQLITE_THREAD_OVERRIDE_LOCK -1
   1.407 +#endif
   1.408 +#ifdef SQLITE_TEST
   1.409 +int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
   1.410 +#else
   1.411 +static int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
   1.412 +#endif
   1.413 +
   1.414 +/*
   1.415 +** This structure holds information passed into individual test
   1.416 +** threads by the testThreadLockingBehavior() routine.
   1.417 +*/
   1.418 +struct threadTestData {
   1.419 +  int fd;                /* File to be locked */
   1.420 +  struct flock lock;     /* The locking operation */
   1.421 +  int result;            /* Result of the locking operation */
   1.422 +};
   1.423 +
   1.424 +#ifdef SQLITE_LOCK_TRACE
   1.425 +/*
   1.426 +** Print out information about all locking operations.
   1.427 +**
   1.428 +** This routine is used for troubleshooting locks on multithreaded
   1.429 +** platforms.  Enable by compiling with the -DSQLITE_LOCK_TRACE
   1.430 +** command-line option on the compiler.  This code is normally
   1.431 +** turned off.
   1.432 +*/
   1.433 +static int lockTrace(int fd, int op, struct flock *p){
   1.434 +  char *zOpName, *zType;
   1.435 +  int s;
   1.436 +  int savedErrno;
   1.437 +  if( op==F_GETLK ){
   1.438 +    zOpName = "GETLK";
   1.439 +  }else if( op==F_SETLK ){
   1.440 +    zOpName = "SETLK";
   1.441 +  }else{
   1.442 +    s = fcntl(fd, op, p);
   1.443 +    sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
   1.444 +    return s;
   1.445 +  }
   1.446 +  if( p->l_type==F_RDLCK ){
   1.447 +    zType = "RDLCK";
   1.448 +  }else if( p->l_type==F_WRLCK ){
   1.449 +    zType = "WRLCK";
   1.450 +  }else if( p->l_type==F_UNLCK ){
   1.451 +    zType = "UNLCK";
   1.452 +  }else{
   1.453 +    assert( 0 );
   1.454 +  }
   1.455 +  assert( p->l_whence==SEEK_SET );
   1.456 +  s = fcntl(fd, op, p);
   1.457 +  savedErrno = errno;
   1.458 +  sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
   1.459 +     threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
   1.460 +     (int)p->l_pid, s);
   1.461 +  if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){
   1.462 +    struct flock l2;
   1.463 +    l2 = *p;
   1.464 +    fcntl(fd, F_GETLK, &l2);
   1.465 +    if( l2.l_type==F_RDLCK ){
   1.466 +      zType = "RDLCK";
   1.467 +    }else if( l2.l_type==F_WRLCK ){
   1.468 +      zType = "WRLCK";
   1.469 +    }else if( l2.l_type==F_UNLCK ){
   1.470 +      zType = "UNLCK";
   1.471 +    }else{
   1.472 +      assert( 0 );
   1.473 +    }
   1.474 +    sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
   1.475 +       zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
   1.476 +  }
   1.477 +  errno = savedErrno;
   1.478 +  return s;
   1.479 +}
   1.480 +#define fcntl lockTrace
   1.481 +#endif /* SQLITE_LOCK_TRACE */
   1.482 +
   1.483 +/*
   1.484 +** The testThreadLockingBehavior() routine launches two separate
   1.485 +** threads on this routine.  This routine attempts to lock a file
   1.486 +** descriptor then returns.  The success or failure of that attempt
   1.487 +** allows the testThreadLockingBehavior() procedure to determine
   1.488 +** whether or not threads can override each others locks.
   1.489 +*/
   1.490 +static void *threadLockingTest(void *pArg){
   1.491 +  struct threadTestData *pData = (struct threadTestData*)pArg;
   1.492 +  pData->result = fcntl(pData->fd, F_SETLK, &pData->lock);
   1.493 +  return pArg;
   1.494 +}
   1.495 +
   1.496 +/*
   1.497 +** This procedure attempts to determine whether or not threads
   1.498 +** can override each others locks then sets the 
   1.499 +** threadsOverrideEachOthersLocks variable appropriately.
   1.500 +*/
   1.501 +static void testThreadLockingBehavior(int fd_orig){
   1.502 +  int fd;
   1.503 +  struct threadTestData d[2];
   1.504 +  pthread_t t[2];
   1.505 +
   1.506 +  fd = dup(fd_orig);
   1.507 +  if( fd<0 ) return;
   1.508 +  memset(d, 0, sizeof(d));
   1.509 +  d[0].fd = fd;
   1.510 +  d[0].lock.l_type = F_RDLCK;
   1.511 +  d[0].lock.l_len = 1;
   1.512 +  d[0].lock.l_start = 0;
   1.513 +  d[0].lock.l_whence = SEEK_SET;
   1.514 +  d[1] = d[0];
   1.515 +  d[1].lock.l_type = F_WRLCK;
   1.516 +  pthread_create(&t[0], 0, threadLockingTest, &d[0]);
   1.517 +  pthread_create(&t[1], 0, threadLockingTest, &d[1]);
   1.518 +  pthread_join(t[0], 0);
   1.519 +  pthread_join(t[1], 0);
   1.520 +  close(fd);
   1.521 +  threadsOverrideEachOthersLocks =  d[0].result==0 && d[1].result==0;
   1.522 +}
   1.523 +#endif /* SQLITE_THREADSAFE */
   1.524 +
   1.525 +/*
   1.526 +** Release a lockInfo structure previously allocated by findLockInfo().
   1.527 +*/
   1.528 +static void releaseLockInfo(struct lockInfo *pLock){
   1.529 +  if( pLock ){
   1.530 +    pLock->nRef--;
   1.531 +    if( pLock->nRef==0 ){
   1.532 +      if( pLock->pPrev ){
   1.533 +        assert( pLock->pPrev->pNext==pLock );
   1.534 +        pLock->pPrev->pNext = pLock->pNext;
   1.535 +      }else{
   1.536 +        assert( lockList==pLock );
   1.537 +        lockList = pLock->pNext;
   1.538 +      }
   1.539 +      if( pLock->pNext ){
   1.540 +        assert( pLock->pNext->pPrev==pLock );
   1.541 +        pLock->pNext->pPrev = pLock->pPrev;
   1.542 +      }
   1.543 +      sqlite3_free(pLock);
   1.544 +    }
   1.545 +  }
   1.546 +}
   1.547 +
   1.548 +/*
   1.549 +** Release a openCnt structure previously allocated by findLockInfo().
   1.550 +*/
   1.551 +static void releaseOpenCnt(struct openCnt *pOpen){
   1.552 +  if( pOpen ){
   1.553 +    pOpen->nRef--;
   1.554 +    if( pOpen->nRef==0 ){
   1.555 +      if( pOpen->pPrev ){
   1.556 +        assert( pOpen->pPrev->pNext==pOpen );
   1.557 +        pOpen->pPrev->pNext = pOpen->pNext;
   1.558 +      }else{
   1.559 +        assert( openList==pOpen );
   1.560 +        openList = pOpen->pNext;
   1.561 +      }
   1.562 +      if( pOpen->pNext ){
   1.563 +        assert( pOpen->pNext->pPrev==pOpen );
   1.564 +        pOpen->pNext->pPrev = pOpen->pPrev;
   1.565 +      }
   1.566 +      sqlite3_free(pOpen->aPending);
   1.567 +      sqlite3_free(pOpen);
   1.568 +    }
   1.569 +  }
   1.570 +}
   1.571 +
   1.572 +#ifdef SQLITE_ENABLE_LOCKING_STYLE
   1.573 +/*
   1.574 +** Tests a byte-range locking query to see if byte range locks are 
   1.575 +** supported, if not we fall back to dotlockLockingStyle.
   1.576 +*/
   1.577 +static int testLockingStyle(int fd){
   1.578 +  struct flock lockInfo;
   1.579 +
   1.580 +  /* Test byte-range lock using fcntl(). If the call succeeds, 
   1.581 +  ** assume that the file-system supports POSIX style locks. 
   1.582 +  */
   1.583 +  lockInfo.l_len = 1;
   1.584 +  lockInfo.l_start = 0;
   1.585 +  lockInfo.l_whence = SEEK_SET;
   1.586 +  lockInfo.l_type = F_RDLCK;
   1.587 +  if( fcntl(fd, F_GETLK, &lockInfo)!=-1 ) {
   1.588 +    return LOCKING_STYLE_POSIX;
   1.589 +  }
   1.590 +  
   1.591 +  /* Testing for flock() can give false positives.  So if if the above 
   1.592 +  ** test fails, then we fall back to using dot-file style locking.
   1.593 +  */  
   1.594 +  return LOCKING_STYLE_DOTFILE;
   1.595 +}
   1.596 +#endif
   1.597 +
   1.598 +/* 
   1.599 +** If SQLITE_ENABLE_LOCKING_STYLE is defined, this function Examines the 
   1.600 +** f_fstypename entry in the statfs structure as returned by stat() for 
   1.601 +** the file system hosting the database file and selects  the appropriate
   1.602 +** locking style based on its value.  These values and assignments are 
   1.603 +** based on Darwin/OSX behavior and have not been thoroughly tested on 
   1.604 +** other systems.
   1.605 +**
   1.606 +** If SQLITE_ENABLE_LOCKING_STYLE is not defined, this function always
   1.607 +** returns LOCKING_STYLE_POSIX.
   1.608 +*/
   1.609 +static int detectLockingStyle(
   1.610 +  sqlite3_vfs *pVfs,
   1.611 +  const char *filePath, 
   1.612 +  int fd
   1.613 +){
   1.614 +#ifdef SQLITE_ENABLE_LOCKING_STYLE
   1.615 +  struct Mapping {
   1.616 +    const char *zFilesystem;
   1.617 +    int eLockingStyle;
   1.618 +  } aMap[] = {
   1.619 +    { "hfs",    LOCKING_STYLE_POSIX },
   1.620 +    { "ufs",    LOCKING_STYLE_POSIX },
   1.621 +    { "afpfs",  LOCKING_STYLE_AFP },
   1.622 +    { "smbfs",  LOCKING_STYLE_FLOCK },
   1.623 +    { "msdos",  LOCKING_STYLE_DOTFILE },
   1.624 +    { "webdav", LOCKING_STYLE_NONE },
   1.625 +    { 0, 0 }
   1.626 +  };
   1.627 +  int i;
   1.628 +  struct statfs fsInfo;
   1.629 +
   1.630 +  if( !filePath ){
   1.631 +    return LOCKING_STYLE_NONE;
   1.632 +  }
   1.633 +  if( pVfs->pAppData ){
   1.634 +    return (int)pVfs->pAppData;
   1.635 +  }
   1.636 +
   1.637 +  if( statfs(filePath, &fsInfo) != -1 ){
   1.638 +    if( fsInfo.f_flags & MNT_RDONLY ){
   1.639 +      return LOCKING_STYLE_NONE;
   1.640 +    }
   1.641 +    for(i=0; aMap[i].zFilesystem; i++){
   1.642 +      if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){
   1.643 +        return aMap[i].eLockingStyle;
   1.644 +      }
   1.645 +    }
   1.646 +  }
   1.647 +
   1.648 +  /* Default case. Handles, amongst others, "nfs". */
   1.649 +  return testLockingStyle(fd);  
   1.650 +#endif
   1.651 +  return LOCKING_STYLE_POSIX;
   1.652 +}
   1.653 +
   1.654 +/*
   1.655 +** Given a file descriptor, locate lockInfo and openCnt structures that
   1.656 +** describes that file descriptor.  Create new ones if necessary.  The
   1.657 +** return values might be uninitialized if an error occurs.
   1.658 +**
   1.659 +** Return an appropriate error code.
   1.660 +*/
   1.661 +static int findLockInfo(
   1.662 +  int fd,                      /* The file descriptor used in the key */
   1.663 +  struct lockInfo **ppLock,    /* Return the lockInfo structure here */
   1.664 +  struct openCnt **ppOpen      /* Return the openCnt structure here */
   1.665 +){
   1.666 +  int rc;
   1.667 +  struct lockKey key1;
   1.668 +  struct openKey key2;
   1.669 +  struct stat statbuf;
   1.670 +  struct lockInfo *pLock;
   1.671 +  struct openCnt *pOpen;
   1.672 +  rc = fstat(fd, &statbuf);
   1.673 +  if( rc!=0 ){
   1.674 +#ifdef EOVERFLOW
   1.675 +    if( errno==EOVERFLOW ) return SQLITE_NOLFS;
   1.676 +#endif
   1.677 +    return SQLITE_IOERR;
   1.678 +  }
   1.679 +
   1.680 +  /* On OS X on an msdos filesystem, the inode number is reported
   1.681 +  ** incorrectly for zero-size files.  See ticket #3260.  To work
   1.682 +  ** around this problem (we consider it a bug in OS X, not SQLite)
   1.683 +  ** we always increase the file size to 1 by writing a single byte
   1.684 +  ** prior to accessing the inode number.  The one byte written is
   1.685 +  ** an ASCII 'S' character which also happens to be the first byte
   1.686 +  ** in the header of every SQLite database.  In this way, if there
   1.687 +  ** is a race condition such that another thread has already populated
   1.688 +  ** the first page of the database, no damage is done.
   1.689 +  */
   1.690 +  if( statbuf.st_size==0 ){
   1.691 +    write(fd, "S", 1);
   1.692 +    rc = fstat(fd, &statbuf);
   1.693 +    if( rc!=0 ){
   1.694 +      return SQLITE_IOERR;
   1.695 +    }
   1.696 +  }
   1.697 +
   1.698 +  memset(&key1, 0, sizeof(key1));
   1.699 +  key1.dev = statbuf.st_dev;
   1.700 +  key1.ino = statbuf.st_ino;
   1.701 +#if SQLITE_THREADSAFE
   1.702 +  if( threadsOverrideEachOthersLocks<0 ){
   1.703 +    testThreadLockingBehavior(fd);
   1.704 +  }
   1.705 +  key1.tid = threadsOverrideEachOthersLocks ? 0 : pthread_self();
   1.706 +#endif
   1.707 +  memset(&key2, 0, sizeof(key2));
   1.708 +  key2.dev = statbuf.st_dev;
   1.709 +  key2.ino = statbuf.st_ino;
   1.710 +  pLock = lockList;
   1.711 +  while( pLock && memcmp(&key1, &pLock->key, sizeof(key1)) ){
   1.712 +    pLock = pLock->pNext;
   1.713 +  }
   1.714 +  if( pLock==0 ){
   1.715 +    pLock = sqlite3_malloc( sizeof(*pLock) );
   1.716 +    if( pLock==0 ){
   1.717 +      rc = SQLITE_NOMEM;
   1.718 +      goto exit_findlockinfo;
   1.719 +    }
   1.720 +    pLock->key = key1;
   1.721 +    pLock->nRef = 1;
   1.722 +    pLock->cnt = 0;
   1.723 +    pLock->locktype = 0;
   1.724 +    pLock->pNext = lockList;
   1.725 +    pLock->pPrev = 0;
   1.726 +    if( lockList ) lockList->pPrev = pLock;
   1.727 +    lockList = pLock;
   1.728 +  }else{
   1.729 +    pLock->nRef++;
   1.730 +  }
   1.731 +  *ppLock = pLock;
   1.732 +  if( ppOpen!=0 ){
   1.733 +    pOpen = openList;
   1.734 +    while( pOpen && memcmp(&key2, &pOpen->key, sizeof(key2)) ){
   1.735 +      pOpen = pOpen->pNext;
   1.736 +    }
   1.737 +    if( pOpen==0 ){
   1.738 +      pOpen = sqlite3_malloc( sizeof(*pOpen) );
   1.739 +      if( pOpen==0 ){
   1.740 +        releaseLockInfo(pLock);
   1.741 +        rc = SQLITE_NOMEM;
   1.742 +        goto exit_findlockinfo;
   1.743 +      }
   1.744 +      pOpen->key = key2;
   1.745 +      pOpen->nRef = 1;
   1.746 +      pOpen->nLock = 0;
   1.747 +      pOpen->nPending = 0;
   1.748 +      pOpen->aPending = 0;
   1.749 +      pOpen->pNext = openList;
   1.750 +      pOpen->pPrev = 0;
   1.751 +      if( openList ) openList->pPrev = pOpen;
   1.752 +      openList = pOpen;
   1.753 +    }else{
   1.754 +      pOpen->nRef++;
   1.755 +    }
   1.756 +    *ppOpen = pOpen;
   1.757 +  }
   1.758 +
   1.759 +exit_findlockinfo:
   1.760 +  return rc;
   1.761 +}
   1.762 +
   1.763 +#ifdef SQLITE_DEBUG
   1.764 +/*
   1.765 +** Helper function for printing out trace information from debugging
   1.766 +** binaries. This returns the string represetation of the supplied
   1.767 +** integer lock-type.
   1.768 +*/
   1.769 +static const char *locktypeName(int locktype){
   1.770 +  switch( locktype ){
   1.771 +  case NO_LOCK: return "NONE";
   1.772 +  case SHARED_LOCK: return "SHARED";
   1.773 +  case RESERVED_LOCK: return "RESERVED";
   1.774 +  case PENDING_LOCK: return "PENDING";
   1.775 +  case EXCLUSIVE_LOCK: return "EXCLUSIVE";
   1.776 +  }
   1.777 +  return "ERROR";
   1.778 +}
   1.779 +#endif
   1.780 +
   1.781 +/*
   1.782 +** If we are currently in a different thread than the thread that the
   1.783 +** unixFile argument belongs to, then transfer ownership of the unixFile
   1.784 +** over to the current thread.
   1.785 +**
   1.786 +** A unixFile is only owned by a thread on systems where one thread is
   1.787 +** unable to override locks created by a different thread.  RedHat9 is
   1.788 +** an example of such a system.
   1.789 +**
   1.790 +** Ownership transfer is only allowed if the unixFile is currently unlocked.
   1.791 +** If the unixFile is locked and an ownership is wrong, then return
   1.792 +** SQLITE_MISUSE.  SQLITE_OK is returned if everything works.
   1.793 +*/
   1.794 +#if SQLITE_THREADSAFE
   1.795 +static int transferOwnership(unixFile *pFile){
   1.796 +  int rc;
   1.797 +  pthread_t hSelf;
   1.798 +  if( threadsOverrideEachOthersLocks ){
   1.799 +    /* Ownership transfers not needed on this system */
   1.800 +    return SQLITE_OK;
   1.801 +  }
   1.802 +  hSelf = pthread_self();
   1.803 +  if( pthread_equal(pFile->tid, hSelf) ){
   1.804 +    /* We are still in the same thread */
   1.805 +    OSTRACE1("No-transfer, same thread\n");
   1.806 +    return SQLITE_OK;
   1.807 +  }
   1.808 +  if( pFile->locktype!=NO_LOCK ){
   1.809 +    /* We cannot change ownership while we are holding a lock! */
   1.810 +    return SQLITE_MISUSE;
   1.811 +  }
   1.812 +  OSTRACE4("Transfer ownership of %d from %d to %d\n",
   1.813 +            pFile->h, pFile->tid, hSelf);
   1.814 +  pFile->tid = hSelf;
   1.815 +  if (pFile->pLock != NULL) {
   1.816 +    releaseLockInfo(pFile->pLock);
   1.817 +    rc = findLockInfo(pFile->h, &pFile->pLock, 0);
   1.818 +    OSTRACE5("LOCK    %d is now %s(%s,%d)\n", pFile->h,
   1.819 +           locktypeName(pFile->locktype),
   1.820 +           locktypeName(pFile->pLock->locktype), pFile->pLock->cnt);
   1.821 +    return rc;
   1.822 +  } else {
   1.823 +    return SQLITE_OK;
   1.824 +  }
   1.825 +}
   1.826 +#else
   1.827 +  /* On single-threaded builds, ownership transfer is a no-op */
   1.828 +# define transferOwnership(X) SQLITE_OK
   1.829 +#endif
   1.830 +
   1.831 +/*
   1.832 +** Seek to the offset passed as the second argument, then read cnt 
   1.833 +** bytes into pBuf. Return the number of bytes actually read.
   1.834 +**
   1.835 +** NB:  If you define USE_PREAD or USE_PREAD64, then it might also
   1.836 +** be necessary to define _XOPEN_SOURCE to be 500.  This varies from
   1.837 +** one system to another.  Since SQLite does not define USE_PREAD
   1.838 +** any any form by default, we will not attempt to define _XOPEN_SOURCE.
   1.839 +** See tickets #2741 and #2681.
   1.840 +*/
   1.841 +static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){
   1.842 +  int got;
   1.843 +  i64 newOffset;
   1.844 +  TIMER_START;
   1.845 +#if defined(USE_PREAD)
   1.846 +  got = pread(id->h, pBuf, cnt, offset);
   1.847 +  SimulateIOError( got = -1 );
   1.848 +#elif defined(USE_PREAD64)
   1.849 +  got = pread64(id->h, pBuf, cnt, offset);
   1.850 +  SimulateIOError( got = -1 );
   1.851 +#else
   1.852 +  newOffset = lseek(id->h, offset, SEEK_SET);
   1.853 +  SimulateIOError( newOffset-- );
   1.854 +  if( newOffset!=offset ){
   1.855 +    return -1;
   1.856 +  }
   1.857 +  got = read(id->h, pBuf, cnt);
   1.858 +#endif
   1.859 +  TIMER_END;
   1.860 +  OSTRACE5("READ    %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED);
   1.861 +  return got;
   1.862 +}
   1.863 +
   1.864 +/*
   1.865 +** Read data from a file into a buffer.  Return SQLITE_OK if all
   1.866 +** bytes were read successfully and SQLITE_IOERR if anything goes
   1.867 +** wrong.
   1.868 +*/
   1.869 +static int unixRead(
   1.870 +  sqlite3_file *id, 
   1.871 +  void *pBuf, 
   1.872 +  int amt,
   1.873 +  sqlite3_int64 offset
   1.874 +){
   1.875 +  int got;
   1.876 +  assert( id );
   1.877 +  got = seekAndRead((unixFile*)id, offset, pBuf, amt);
   1.878 +  if( got==amt ){
   1.879 +    return SQLITE_OK;
   1.880 +  }else if( got<0 ){
   1.881 +    return SQLITE_IOERR_READ;
   1.882 +  }else{
   1.883 +    memset(&((char*)pBuf)[got], 0, amt-got);
   1.884 +    return SQLITE_IOERR_SHORT_READ;
   1.885 +  }
   1.886 +}
   1.887 +
   1.888 +/*
   1.889 +** Seek to the offset in id->offset then read cnt bytes into pBuf.
   1.890 +** Return the number of bytes actually read.  Update the offset.
   1.891 +*/
   1.892 +static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){
   1.893 +  int got;
   1.894 +  i64 newOffset;
   1.895 +  TIMER_START;
   1.896 +#if defined(USE_PREAD)
   1.897 +  got = pwrite(id->h, pBuf, cnt, offset);
   1.898 +#elif defined(USE_PREAD64)
   1.899 +  got = pwrite64(id->h, pBuf, cnt, offset);
   1.900 +#else
   1.901 +  newOffset = lseek(id->h, offset, SEEK_SET);
   1.902 +  if( newOffset!=offset ){
   1.903 +    return -1;
   1.904 +  }
   1.905 +  got = write(id->h, pBuf, cnt);
   1.906 +#endif
   1.907 +  TIMER_END;
   1.908 +  OSTRACE5("WRITE   %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED);
   1.909 +  return got;
   1.910 +}
   1.911 +
   1.912 +
   1.913 +/*
   1.914 +** Write data from a buffer into a file.  Return SQLITE_OK on success
   1.915 +** or some other error code on failure.
   1.916 +*/
   1.917 +static int unixWrite(
   1.918 +  sqlite3_file *id, 
   1.919 +  const void *pBuf, 
   1.920 +  int amt,
   1.921 +  sqlite3_int64 offset 
   1.922 +){
   1.923 +  int wrote = 0;
   1.924 +  assert( id );
   1.925 +  assert( amt>0 );
   1.926 +  while( amt>0 && (wrote = seekAndWrite((unixFile*)id, offset, pBuf, amt))>0 ){
   1.927 +    amt -= wrote;
   1.928 +    offset += wrote;
   1.929 +    pBuf = &((char*)pBuf)[wrote];
   1.930 +  }
   1.931 +  SimulateIOError(( wrote=(-1), amt=1 ));
   1.932 +  SimulateDiskfullError(( wrote=0, amt=1 ));
   1.933 +  if( amt>0 ){
   1.934 +    if( wrote<0 ){
   1.935 +      return SQLITE_IOERR_WRITE;
   1.936 +    }else{
   1.937 +      return SQLITE_FULL;
   1.938 +    }
   1.939 +  }
   1.940 +  return SQLITE_OK;
   1.941 +}
   1.942 +
   1.943 +#ifdef SQLITE_TEST
   1.944 +/*
   1.945 +** Count the number of fullsyncs and normal syncs.  This is used to test
   1.946 +** that syncs and fullsyncs are occuring at the right times.
   1.947 +*/
   1.948 +int sqlite3_sync_count = 0;
   1.949 +int sqlite3_fullsync_count = 0;
   1.950 +#endif
   1.951 +
   1.952 +/*
   1.953 +** Use the fdatasync() API only if the HAVE_FDATASYNC macro is defined.
   1.954 +** Otherwise use fsync() in its place.
   1.955 +*/
   1.956 +#ifndef HAVE_FDATASYNC
   1.957 +# define fdatasync fsync
   1.958 +#endif
   1.959 +
   1.960 +/*
   1.961 +** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not
   1.962 +** the F_FULLFSYNC macro is defined.  F_FULLFSYNC is currently
   1.963 +** only available on Mac OS X.  But that could change.
   1.964 +*/
   1.965 +#ifdef F_FULLFSYNC
   1.966 +# define HAVE_FULLFSYNC 1
   1.967 +#else
   1.968 +# define HAVE_FULLFSYNC 0
   1.969 +#endif
   1.970 +
   1.971 +
   1.972 +/*
   1.973 +** The fsync() system call does not work as advertised on many
   1.974 +** unix systems.  The following procedure is an attempt to make
   1.975 +** it work better.
   1.976 +**
   1.977 +** The SQLITE_NO_SYNC macro disables all fsync()s.  This is useful
   1.978 +** for testing when we want to run through the test suite quickly.
   1.979 +** You are strongly advised *not* to deploy with SQLITE_NO_SYNC
   1.980 +** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
   1.981 +** or power failure will likely corrupt the database file.
   1.982 +*/
   1.983 +static int full_fsync(int fd, int fullSync, int dataOnly){
   1.984 +  int rc;
   1.985 +
   1.986 +  /* Record the number of times that we do a normal fsync() and 
   1.987 +  ** FULLSYNC.  This is used during testing to verify that this procedure
   1.988 +  ** gets called with the correct arguments.
   1.989 +  */
   1.990 +#ifdef SQLITE_TEST
   1.991 +  if( fullSync ) sqlite3_fullsync_count++;
   1.992 +  sqlite3_sync_count++;
   1.993 +#endif
   1.994 +
   1.995 +  /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
   1.996 +  ** no-op
   1.997 +  */
   1.998 +#ifdef SQLITE_NO_SYNC
   1.999 +  rc = SQLITE_OK;
  1.1000 +#else
  1.1001 +
  1.1002 +#if HAVE_FULLFSYNC
  1.1003 +  if( fullSync ){
  1.1004 +    rc = fcntl(fd, F_FULLFSYNC, 0);
  1.1005 +  }else{
  1.1006 +    rc = 1;
  1.1007 +  }
  1.1008 +  /* If the FULLFSYNC failed, fall back to attempting an fsync().
  1.1009 +   * It shouldn't be possible for fullfsync to fail on the local 
  1.1010 +   * file system (on OSX), so failure indicates that FULLFSYNC
  1.1011 +   * isn't supported for this file system. So, attempt an fsync 
  1.1012 +   * and (for now) ignore the overhead of a superfluous fcntl call.  
  1.1013 +   * It'd be better to detect fullfsync support once and avoid 
  1.1014 +   * the fcntl call every time sync is called.
  1.1015 +   */
  1.1016 +  if( rc ) rc = fsync(fd);
  1.1017 +
  1.1018 +#else 
  1.1019 +  if( dataOnly ){
  1.1020 +    rc = fdatasync(fd);
  1.1021 +  }else{
  1.1022 +    rc = fsync(fd);
  1.1023 +  }
  1.1024 +#endif /* HAVE_FULLFSYNC */
  1.1025 +#endif /* defined(SQLITE_NO_SYNC) */
  1.1026 +
  1.1027 +  return rc;
  1.1028 +}
  1.1029 +
  1.1030 +/*
  1.1031 +** Make sure all writes to a particular file are committed to disk.
  1.1032 +**
  1.1033 +** If dataOnly==0 then both the file itself and its metadata (file
  1.1034 +** size, access time, etc) are synced.  If dataOnly!=0 then only the
  1.1035 +** file data is synced.
  1.1036 +**
  1.1037 +** Under Unix, also make sure that the directory entry for the file
  1.1038 +** has been created by fsync-ing the directory that contains the file.
  1.1039 +** If we do not do this and we encounter a power failure, the directory
  1.1040 +** entry for the journal might not exist after we reboot.  The next
  1.1041 +** SQLite to access the file will not know that the journal exists (because
  1.1042 +** the directory entry for the journal was never created) and the transaction
  1.1043 +** will not roll back - possibly leading to database corruption.
  1.1044 +*/
  1.1045 +static int unixSync(sqlite3_file *id, int flags){
  1.1046 +  int rc;
  1.1047 +  unixFile *pFile = (unixFile*)id;
  1.1048 +
  1.1049 +  int isDataOnly = (flags&SQLITE_SYNC_DATAONLY);
  1.1050 +  int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL;
  1.1051 +
  1.1052 +  /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */
  1.1053 +  assert((flags&0x0F)==SQLITE_SYNC_NORMAL
  1.1054 +      || (flags&0x0F)==SQLITE_SYNC_FULL
  1.1055 +  );
  1.1056 +
  1.1057 +  assert( pFile );
  1.1058 +  OSTRACE2("SYNC    %-3d\n", pFile->h);
  1.1059 +  rc = full_fsync(pFile->h, isFullsync, isDataOnly);
  1.1060 +  SimulateIOError( rc=1 );
  1.1061 +  if( rc ){
  1.1062 +    return SQLITE_IOERR_FSYNC;
  1.1063 +  }
  1.1064 +  if( pFile->dirfd>=0 ){
  1.1065 +    OSTRACE4("DIRSYNC %-3d (have_fullfsync=%d fullsync=%d)\n", pFile->dirfd,
  1.1066 +            HAVE_FULLFSYNC, isFullsync);
  1.1067 +#ifndef SQLITE_DISABLE_DIRSYNC
  1.1068 +    /* The directory sync is only attempted if full_fsync is
  1.1069 +    ** turned off or unavailable.  If a full_fsync occurred above,
  1.1070 +    ** then the directory sync is superfluous.
  1.1071 +    */
  1.1072 +    if( (!HAVE_FULLFSYNC || !isFullsync) && full_fsync(pFile->dirfd,0,0) ){
  1.1073 +       /*
  1.1074 +       ** We have received multiple reports of fsync() returning
  1.1075 +       ** errors when applied to directories on certain file systems.
  1.1076 +       ** A failed directory sync is not a big deal.  So it seems
  1.1077 +       ** better to ignore the error.  Ticket #1657
  1.1078 +       */
  1.1079 +       /* return SQLITE_IOERR; */
  1.1080 +    }
  1.1081 +#endif
  1.1082 +    close(pFile->dirfd);  /* Only need to sync once, so close the directory */
  1.1083 +    pFile->dirfd = -1;    /* when we are done. */
  1.1084 +  }
  1.1085 +  return SQLITE_OK;
  1.1086 +}
  1.1087 +
  1.1088 +/*
  1.1089 +** Truncate an open file to a specified size
  1.1090 +*/
  1.1091 +static int unixTruncate(sqlite3_file *id, i64 nByte){
  1.1092 +  int rc;
  1.1093 +  assert( id );
  1.1094 +  SimulateIOError( return SQLITE_IOERR_TRUNCATE );
  1.1095 +  rc = ftruncate(((unixFile*)id)->h, (off_t)nByte);
  1.1096 +  if( rc ){
  1.1097 +    return SQLITE_IOERR_TRUNCATE;
  1.1098 +  }else{
  1.1099 +    return SQLITE_OK;
  1.1100 +  }
  1.1101 +}
  1.1102 +
  1.1103 +/*
  1.1104 +** Determine the current size of a file in bytes
  1.1105 +*/
  1.1106 +static int unixFileSize(sqlite3_file *id, i64 *pSize){
  1.1107 +  int rc;
  1.1108 +  struct stat buf;
  1.1109 +  assert( id );
  1.1110 +  rc = fstat(((unixFile*)id)->h, &buf);
  1.1111 +  SimulateIOError( rc=1 );
  1.1112 +  if( rc!=0 ){
  1.1113 +    return SQLITE_IOERR_FSTAT;
  1.1114 +  }
  1.1115 +  *pSize = buf.st_size;
  1.1116 +
  1.1117 +  /* When opening a zero-size database, the findLockInfo() procedure
  1.1118 +  ** writes a single byte into that file in order to work around a bug
  1.1119 +  ** in the OS-X msdos filesystem.  In order to avoid problems with upper
  1.1120 +  ** layers, we need to report this file size as zero even though it is
  1.1121 +  ** really 1.   Ticket #3260.
  1.1122 +  */
  1.1123 +  if( *pSize==1 ) *pSize = 0;
  1.1124 +
  1.1125 +
  1.1126 +  return SQLITE_OK;
  1.1127 +}
  1.1128 +
  1.1129 +/*
  1.1130 +** This routine checks if there is a RESERVED lock held on the specified
  1.1131 +** file by this or any other process. If such a lock is held, return
  1.1132 +** non-zero.  If the file is unlocked or holds only SHARED locks, then
  1.1133 +** return zero.
  1.1134 +*/
  1.1135 +static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){
  1.1136 +  int r = 0;
  1.1137 +  unixFile *pFile = (unixFile*)id;
  1.1138 +
  1.1139 +  SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
  1.1140 +
  1.1141 +  assert( pFile );
  1.1142 +  enterMutex(); /* Because pFile->pLock is shared across threads */
  1.1143 +
  1.1144 +  /* Check if a thread in this process holds such a lock */
  1.1145 +  if( pFile->pLock->locktype>SHARED_LOCK ){
  1.1146 +    r = 1;
  1.1147 +  }
  1.1148 +
  1.1149 +  /* Otherwise see if some other process holds it.
  1.1150 +  */
  1.1151 +  if( !r ){
  1.1152 +    struct flock lock;
  1.1153 +    lock.l_whence = SEEK_SET;
  1.1154 +    lock.l_start = RESERVED_BYTE;
  1.1155 +    lock.l_len = 1;
  1.1156 +    lock.l_type = F_WRLCK;
  1.1157 +    fcntl(pFile->h, F_GETLK, &lock);
  1.1158 +    if( lock.l_type!=F_UNLCK ){
  1.1159 +      r = 1;
  1.1160 +    }
  1.1161 +  }
  1.1162 +  
  1.1163 +  leaveMutex();
  1.1164 +  OSTRACE3("TEST WR-LOCK %d %d\n", pFile->h, r);
  1.1165 +
  1.1166 +  *pResOut = r;
  1.1167 +  return SQLITE_OK;
  1.1168 +}
  1.1169 +
  1.1170 +/*
  1.1171 +** Lock the file with the lock specified by parameter locktype - one
  1.1172 +** of the following:
  1.1173 +**
  1.1174 +**     (1) SHARED_LOCK
  1.1175 +**     (2) RESERVED_LOCK
  1.1176 +**     (3) PENDING_LOCK
  1.1177 +**     (4) EXCLUSIVE_LOCK
  1.1178 +**
  1.1179 +** Sometimes when requesting one lock state, additional lock states
  1.1180 +** are inserted in between.  The locking might fail on one of the later
  1.1181 +** transitions leaving the lock state different from what it started but
  1.1182 +** still short of its goal.  The following chart shows the allowed
  1.1183 +** transitions and the inserted intermediate states:
  1.1184 +**
  1.1185 +**    UNLOCKED -> SHARED
  1.1186 +**    SHARED -> RESERVED
  1.1187 +**    SHARED -> (PENDING) -> EXCLUSIVE
  1.1188 +**    RESERVED -> (PENDING) -> EXCLUSIVE
  1.1189 +**    PENDING -> EXCLUSIVE
  1.1190 +**
  1.1191 +** This routine will only increase a lock.  Use the sqlite3OsUnlock()
  1.1192 +** routine to lower a locking level.
  1.1193 +*/
  1.1194 +static int unixLock(sqlite3_file *id, int locktype){
  1.1195 +  /* The following describes the implementation of the various locks and
  1.1196 +  ** lock transitions in terms of the POSIX advisory shared and exclusive
  1.1197 +  ** lock primitives (called read-locks and write-locks below, to avoid
  1.1198 +  ** confusion with SQLite lock names). The algorithms are complicated
  1.1199 +  ** slightly in order to be compatible with windows systems simultaneously
  1.1200 +  ** accessing the same database file, in case that is ever required.
  1.1201 +  **
  1.1202 +  ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
  1.1203 +  ** byte', each single bytes at well known offsets, and the 'shared byte
  1.1204 +  ** range', a range of 510 bytes at a well known offset.
  1.1205 +  **
  1.1206 +  ** To obtain a SHARED lock, a read-lock is obtained on the 'pending
  1.1207 +  ** byte'.  If this is successful, a random byte from the 'shared byte
  1.1208 +  ** range' is read-locked and the lock on the 'pending byte' released.
  1.1209 +  **
  1.1210 +  ** A process may only obtain a RESERVED lock after it has a SHARED lock.
  1.1211 +  ** A RESERVED lock is implemented by grabbing a write-lock on the
  1.1212 +  ** 'reserved byte'. 
  1.1213 +  **
  1.1214 +  ** A process may only obtain a PENDING lock after it has obtained a
  1.1215 +  ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
  1.1216 +  ** on the 'pending byte'. This ensures that no new SHARED locks can be
  1.1217 +  ** obtained, but existing SHARED locks are allowed to persist. A process
  1.1218 +  ** does not have to obtain a RESERVED lock on the way to a PENDING lock.
  1.1219 +  ** This property is used by the algorithm for rolling back a journal file
  1.1220 +  ** after a crash.
  1.1221 +  **
  1.1222 +  ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
  1.1223 +  ** implemented by obtaining a write-lock on the entire 'shared byte
  1.1224 +  ** range'. Since all other locks require a read-lock on one of the bytes
  1.1225 +  ** within this range, this ensures that no other locks are held on the
  1.1226 +  ** database. 
  1.1227 +  **
  1.1228 +  ** The reason a single byte cannot be used instead of the 'shared byte
  1.1229 +  ** range' is that some versions of windows do not support read-locks. By
  1.1230 +  ** locking a random byte from a range, concurrent SHARED locks may exist
  1.1231 +  ** even if the locking primitive used is always a write-lock.
  1.1232 +  */
  1.1233 +  int rc = SQLITE_OK;
  1.1234 +  unixFile *pFile = (unixFile*)id;
  1.1235 +  struct lockInfo *pLock = pFile->pLock;
  1.1236 +  struct flock lock;
  1.1237 +  int s;
  1.1238 +
  1.1239 +  assert( pFile );
  1.1240 +  OSTRACE7("LOCK    %d %s was %s(%s,%d) pid=%d\n", pFile->h,
  1.1241 +      locktypeName(locktype), locktypeName(pFile->locktype),
  1.1242 +      locktypeName(pLock->locktype), pLock->cnt , getpid());
  1.1243 +
  1.1244 +  /* If there is already a lock of this type or more restrictive on the
  1.1245 +  ** unixFile, do nothing. Don't use the end_lock: exit path, as
  1.1246 +  ** enterMutex() hasn't been called yet.
  1.1247 +  */
  1.1248 +  if( pFile->locktype>=locktype ){
  1.1249 +    OSTRACE3("LOCK    %d %s ok (already held)\n", pFile->h,
  1.1250 +            locktypeName(locktype));
  1.1251 +    return SQLITE_OK;
  1.1252 +  }
  1.1253 +
  1.1254 +  /* Make sure the locking sequence is correct
  1.1255 +  */
  1.1256 +  assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
  1.1257 +  assert( locktype!=PENDING_LOCK );
  1.1258 +  assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
  1.1259 +
  1.1260 +  /* This mutex is needed because pFile->pLock is shared across threads
  1.1261 +  */
  1.1262 +  enterMutex();
  1.1263 +
  1.1264 +  /* Make sure the current thread owns the pFile.
  1.1265 +  */
  1.1266 +  rc = transferOwnership(pFile);
  1.1267 +  if( rc!=SQLITE_OK ){
  1.1268 +    leaveMutex();
  1.1269 +    return rc;
  1.1270 +  }
  1.1271 +  pLock = pFile->pLock;
  1.1272 +
  1.1273 +  /* If some thread using this PID has a lock via a different unixFile*
  1.1274 +  ** handle that precludes the requested lock, return BUSY.
  1.1275 +  */
  1.1276 +  if( (pFile->locktype!=pLock->locktype && 
  1.1277 +          (pLock->locktype>=PENDING_LOCK || locktype>SHARED_LOCK))
  1.1278 +  ){
  1.1279 +    rc = SQLITE_BUSY;
  1.1280 +    goto end_lock;
  1.1281 +  }
  1.1282 +
  1.1283 +  /* If a SHARED lock is requested, and some thread using this PID already
  1.1284 +  ** has a SHARED or RESERVED lock, then increment reference counts and
  1.1285 +  ** return SQLITE_OK.
  1.1286 +  */
  1.1287 +  if( locktype==SHARED_LOCK && 
  1.1288 +      (pLock->locktype==SHARED_LOCK || pLock->locktype==RESERVED_LOCK) ){
  1.1289 +    assert( locktype==SHARED_LOCK );
  1.1290 +    assert( pFile->locktype==0 );
  1.1291 +    assert( pLock->cnt>0 );
  1.1292 +    pFile->locktype = SHARED_LOCK;
  1.1293 +    pLock->cnt++;
  1.1294 +    pFile->pOpen->nLock++;
  1.1295 +    goto end_lock;
  1.1296 +  }
  1.1297 +
  1.1298 +  lock.l_len = 1L;
  1.1299 +
  1.1300 +  lock.l_whence = SEEK_SET;
  1.1301 +
  1.1302 +  /* A PENDING lock is needed before acquiring a SHARED lock and before
  1.1303 +  ** acquiring an EXCLUSIVE lock.  For the SHARED lock, the PENDING will
  1.1304 +  ** be released.
  1.1305 +  */
  1.1306 +  if( locktype==SHARED_LOCK 
  1.1307 +      || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
  1.1308 +  ){
  1.1309 +    lock.l_type = (locktype==SHARED_LOCK?F_RDLCK:F_WRLCK);
  1.1310 +    lock.l_start = PENDING_BYTE;
  1.1311 +    s = fcntl(pFile->h, F_SETLK, &lock);
  1.1312 +    if( s==(-1) ){
  1.1313 +      rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
  1.1314 +      goto end_lock;
  1.1315 +    }
  1.1316 +  }
  1.1317 +
  1.1318 +
  1.1319 +  /* If control gets to this point, then actually go ahead and make
  1.1320 +  ** operating system calls for the specified lock.
  1.1321 +  */
  1.1322 +  if( locktype==SHARED_LOCK ){
  1.1323 +    assert( pLock->cnt==0 );
  1.1324 +    assert( pLock->locktype==0 );
  1.1325 +
  1.1326 +    /* Now get the read-lock */
  1.1327 +    lock.l_start = SHARED_FIRST;
  1.1328 +    lock.l_len = SHARED_SIZE;
  1.1329 +    s = fcntl(pFile->h, F_SETLK, &lock);
  1.1330 +
  1.1331 +    /* Drop the temporary PENDING lock */
  1.1332 +    lock.l_start = PENDING_BYTE;
  1.1333 +    lock.l_len = 1L;
  1.1334 +    lock.l_type = F_UNLCK;
  1.1335 +    if( fcntl(pFile->h, F_SETLK, &lock)!=0 ){
  1.1336 +      rc = SQLITE_IOERR_UNLOCK;  /* This should never happen */
  1.1337 +      goto end_lock;
  1.1338 +    }
  1.1339 +    if( s==(-1) ){
  1.1340 +      rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
  1.1341 +    }else{
  1.1342 +      pFile->locktype = SHARED_LOCK;
  1.1343 +      pFile->pOpen->nLock++;
  1.1344 +      pLock->cnt = 1;
  1.1345 +    }
  1.1346 +  }else if( locktype==EXCLUSIVE_LOCK && pLock->cnt>1 ){
  1.1347 +    /* We are trying for an exclusive lock but another thread in this
  1.1348 +    ** same process is still holding a shared lock. */
  1.1349 +    rc = SQLITE_BUSY;
  1.1350 +  }else{
  1.1351 +    /* The request was for a RESERVED or EXCLUSIVE lock.  It is
  1.1352 +    ** assumed that there is a SHARED or greater lock on the file
  1.1353 +    ** already.
  1.1354 +    */
  1.1355 +    assert( 0!=pFile->locktype );
  1.1356 +    lock.l_type = F_WRLCK;
  1.1357 +    switch( locktype ){
  1.1358 +      case RESERVED_LOCK:
  1.1359 +        lock.l_start = RESERVED_BYTE;
  1.1360 +        break;
  1.1361 +      case EXCLUSIVE_LOCK:
  1.1362 +        lock.l_start = SHARED_FIRST;
  1.1363 +        lock.l_len = SHARED_SIZE;
  1.1364 +        break;
  1.1365 +      default:
  1.1366 +        assert(0);
  1.1367 +    }
  1.1368 +    s = fcntl(pFile->h, F_SETLK, &lock);
  1.1369 +    if( s==(-1) ){
  1.1370 +      rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
  1.1371 +    }
  1.1372 +  }
  1.1373 +  
  1.1374 +  if( rc==SQLITE_OK ){
  1.1375 +    pFile->locktype = locktype;
  1.1376 +    pLock->locktype = locktype;
  1.1377 +  }else if( locktype==EXCLUSIVE_LOCK ){
  1.1378 +    pFile->locktype = PENDING_LOCK;
  1.1379 +    pLock->locktype = PENDING_LOCK;
  1.1380 +  }
  1.1381 +
  1.1382 +end_lock:
  1.1383 +  leaveMutex();
  1.1384 +  OSTRACE4("LOCK    %d %s %s\n", pFile->h, locktypeName(locktype), 
  1.1385 +      rc==SQLITE_OK ? "ok" : "failed");
  1.1386 +  return rc;
  1.1387 +}
  1.1388 +
  1.1389 +/*
  1.1390 +** Lower the locking level on file descriptor pFile to locktype.  locktype
  1.1391 +** must be either NO_LOCK or SHARED_LOCK.
  1.1392 +**
  1.1393 +** If the locking level of the file descriptor is already at or below
  1.1394 +** the requested locking level, this routine is a no-op.
  1.1395 +*/
  1.1396 +static int unixUnlock(sqlite3_file *id, int locktype){
  1.1397 +  struct lockInfo *pLock;
  1.1398 +  struct flock lock;
  1.1399 +  int rc = SQLITE_OK;
  1.1400 +  unixFile *pFile = (unixFile*)id;
  1.1401 +  int h;
  1.1402 +
  1.1403 +  assert( pFile );
  1.1404 +  OSTRACE7("UNLOCK  %d %d was %d(%d,%d) pid=%d\n", pFile->h, locktype,
  1.1405 +      pFile->locktype, pFile->pLock->locktype, pFile->pLock->cnt, getpid());
  1.1406 +
  1.1407 +  assert( locktype<=SHARED_LOCK );
  1.1408 +  if( pFile->locktype<=locktype ){
  1.1409 +    return SQLITE_OK;
  1.1410 +  }
  1.1411 +  if( CHECK_THREADID(pFile) ){
  1.1412 +    return SQLITE_MISUSE;
  1.1413 +  }
  1.1414 +  enterMutex();
  1.1415 +  h = pFile->h;
  1.1416 +  pLock = pFile->pLock;
  1.1417 +  assert( pLock->cnt!=0 );
  1.1418 +  if( pFile->locktype>SHARED_LOCK ){
  1.1419 +    assert( pLock->locktype==pFile->locktype );
  1.1420 +    SimulateIOErrorBenign(1);
  1.1421 +    SimulateIOError( h=(-1) )
  1.1422 +    SimulateIOErrorBenign(0);
  1.1423 +    if( locktype==SHARED_LOCK ){
  1.1424 +      lock.l_type = F_RDLCK;
  1.1425 +      lock.l_whence = SEEK_SET;
  1.1426 +      lock.l_start = SHARED_FIRST;
  1.1427 +      lock.l_len = SHARED_SIZE;
  1.1428 +      if( fcntl(h, F_SETLK, &lock)==(-1) ){
  1.1429 +        rc = SQLITE_IOERR_RDLOCK;
  1.1430 +      }
  1.1431 +    }
  1.1432 +    lock.l_type = F_UNLCK;
  1.1433 +    lock.l_whence = SEEK_SET;
  1.1434 +    lock.l_start = PENDING_BYTE;
  1.1435 +    lock.l_len = 2L;  assert( PENDING_BYTE+1==RESERVED_BYTE );
  1.1436 +    if( fcntl(h, F_SETLK, &lock)!=(-1) ){
  1.1437 +      pLock->locktype = SHARED_LOCK;
  1.1438 +    }else{
  1.1439 +      rc = SQLITE_IOERR_UNLOCK;
  1.1440 +    }
  1.1441 +  }
  1.1442 +  if( locktype==NO_LOCK ){
  1.1443 +    struct openCnt *pOpen;
  1.1444 +
  1.1445 +    /* Decrement the shared lock counter.  Release the lock using an
  1.1446 +    ** OS call only when all threads in this same process have released
  1.1447 +    ** the lock.
  1.1448 +    */
  1.1449 +    pLock->cnt--;
  1.1450 +    if( pLock->cnt==0 ){
  1.1451 +      lock.l_type = F_UNLCK;
  1.1452 +      lock.l_whence = SEEK_SET;
  1.1453 +      lock.l_start = lock.l_len = 0L;
  1.1454 +      SimulateIOErrorBenign(1);
  1.1455 +      SimulateIOError( h=(-1) )
  1.1456 +      SimulateIOErrorBenign(0);
  1.1457 +      if( fcntl(h, F_SETLK, &lock)!=(-1) ){
  1.1458 +        pLock->locktype = NO_LOCK;
  1.1459 +      }else{
  1.1460 +        rc = SQLITE_IOERR_UNLOCK;
  1.1461 +        pLock->cnt = 1;
  1.1462 +      }
  1.1463 +    }
  1.1464 +
  1.1465 +    /* Decrement the count of locks against this same file.  When the
  1.1466 +    ** count reaches zero, close any other file descriptors whose close
  1.1467 +    ** was deferred because of outstanding locks.
  1.1468 +    */
  1.1469 +    if( rc==SQLITE_OK ){
  1.1470 +      pOpen = pFile->pOpen;
  1.1471 +      pOpen->nLock--;
  1.1472 +      assert( pOpen->nLock>=0 );
  1.1473 +      if( pOpen->nLock==0 && pOpen->nPending>0 ){
  1.1474 +        int i;
  1.1475 +        for(i=0; i<pOpen->nPending; i++){
  1.1476 +          close(pOpen->aPending[i]);
  1.1477 +        }
  1.1478 +        sqlite3_free(pOpen->aPending);
  1.1479 +        pOpen->nPending = 0;
  1.1480 +        pOpen->aPending = 0;
  1.1481 +      }
  1.1482 +    }
  1.1483 +  }
  1.1484 +  leaveMutex();
  1.1485 +  if( rc==SQLITE_OK ) pFile->locktype = locktype;
  1.1486 +  return rc;
  1.1487 +}
  1.1488 +
  1.1489 +/*
  1.1490 +** This function performs the parts of the "close file" operation 
  1.1491 +** common to all locking schemes. It closes the directory and file
  1.1492 +** handles, if they are valid, and sets all fields of the unixFile
  1.1493 +** structure to 0.
  1.1494 +*/
  1.1495 +static int closeUnixFile(sqlite3_file *id){
  1.1496 +  unixFile *pFile = (unixFile*)id;
  1.1497 +  if( pFile ){
  1.1498 +    if( pFile->dirfd>=0 ){
  1.1499 +      close(pFile->dirfd);
  1.1500 +    }
  1.1501 +    if( pFile->h>=0 ){
  1.1502 +      close(pFile->h);
  1.1503 +    }
  1.1504 +    OSTRACE2("CLOSE   %-3d\n", pFile->h);
  1.1505 +    OpenCounter(-1);
  1.1506 +    memset(pFile, 0, sizeof(unixFile));
  1.1507 +  }
  1.1508 +  return SQLITE_OK;
  1.1509 +}
  1.1510 +
  1.1511 +/*
  1.1512 +** Close a file.
  1.1513 +*/
  1.1514 +static int unixClose(sqlite3_file *id){
  1.1515 +  if( id ){
  1.1516 +    unixFile *pFile = (unixFile *)id;
  1.1517 +    unixUnlock(id, NO_LOCK);
  1.1518 +    enterMutex();
  1.1519 +    if( pFile->pOpen && pFile->pOpen->nLock ){
  1.1520 +      /* If there are outstanding locks, do not actually close the file just
  1.1521 +      ** yet because that would clear those locks.  Instead, add the file
  1.1522 +      ** descriptor to pOpen->aPending.  It will be automatically closed when
  1.1523 +      ** the last lock is cleared.
  1.1524 +      */
  1.1525 +      int *aNew;
  1.1526 +      struct openCnt *pOpen = pFile->pOpen;
  1.1527 +      aNew = sqlite3_realloc(pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
  1.1528 +      if( aNew==0 ){
  1.1529 +        /* If a malloc fails, just leak the file descriptor */
  1.1530 +      }else{
  1.1531 +        pOpen->aPending = aNew;
  1.1532 +        pOpen->aPending[pOpen->nPending] = pFile->h;
  1.1533 +        pOpen->nPending++;
  1.1534 +        pFile->h = -1;
  1.1535 +      }
  1.1536 +    }
  1.1537 +    releaseLockInfo(pFile->pLock);
  1.1538 +    releaseOpenCnt(pFile->pOpen);
  1.1539 +    closeUnixFile(id);
  1.1540 +    leaveMutex();
  1.1541 +  }
  1.1542 +  return SQLITE_OK;
  1.1543 +}
  1.1544 +
  1.1545 +
  1.1546 +#ifdef SQLITE_ENABLE_LOCKING_STYLE
  1.1547 +#pragma mark AFP Support
  1.1548 +
  1.1549 +/*
  1.1550 + ** The afpLockingContext structure contains all afp lock specific state
  1.1551 + */
  1.1552 +typedef struct afpLockingContext afpLockingContext;
  1.1553 +struct afpLockingContext {
  1.1554 +  unsigned long long sharedLockByte;
  1.1555 +  const char *filePath;
  1.1556 +};
  1.1557 +
  1.1558 +struct ByteRangeLockPB2
  1.1559 +{
  1.1560 +  unsigned long long offset;        /* offset to first byte to lock */
  1.1561 +  unsigned long long length;        /* nbr of bytes to lock */
  1.1562 +  unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */
  1.1563 +  unsigned char unLockFlag;         /* 1 = unlock, 0 = lock */
  1.1564 +  unsigned char startEndFlag;       /* 1=rel to end of fork, 0=rel to start */
  1.1565 +  int fd;                           /* file desc to assoc this lock with */
  1.1566 +};
  1.1567 +
  1.1568 +#define afpfsByteRangeLock2FSCTL        _IOWR('z', 23, struct ByteRangeLockPB2)
  1.1569 +
  1.1570 +/* 
  1.1571 +** Return 0 on success, 1 on failure.  To match the behavior of the 
  1.1572 +** normal posix file locking (used in unixLock for example), we should 
  1.1573 +** provide 'richer' return codes - specifically to differentiate between
  1.1574 +** 'file busy' and 'file system error' results.
  1.1575 +*/
  1.1576 +static int _AFPFSSetLock(
  1.1577 +  const char *path, 
  1.1578 +  int fd, 
  1.1579 +  unsigned long long offset, 
  1.1580 +  unsigned long long length, 
  1.1581 +  int setLockFlag
  1.1582 +){
  1.1583 +  struct ByteRangeLockPB2       pb;
  1.1584 +  int                     err;
  1.1585 +  
  1.1586 +  pb.unLockFlag = setLockFlag ? 0 : 1;
  1.1587 +  pb.startEndFlag = 0;
  1.1588 +  pb.offset = offset;
  1.1589 +  pb.length = length; 
  1.1590 +  pb.fd = fd;
  1.1591 +  OSTRACE5("AFPLOCK setting lock %s for %d in range %llx:%llx\n", 
  1.1592 +    (setLockFlag?"ON":"OFF"), fd, offset, length);
  1.1593 +  err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0);
  1.1594 +  if ( err==-1 ) {
  1.1595 +    OSTRACE4("AFPLOCK failed to fsctl() '%s' %d %s\n", path, errno, 
  1.1596 +      strerror(errno));
  1.1597 +    return 1; /* error */
  1.1598 +  } else {
  1.1599 +    return 0;
  1.1600 +  }
  1.1601 +}
  1.1602 +
  1.1603 +/*
  1.1604 + ** This routine checks if there is a RESERVED lock held on the specified
  1.1605 + ** file by this or any other process. If such a lock is held, return
  1.1606 + ** non-zero.  If the file is unlocked or holds only SHARED locks, then
  1.1607 + ** return zero.
  1.1608 + */
  1.1609 +static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){
  1.1610 +  int r = 0;
  1.1611 +  unixFile *pFile = (unixFile*)id;
  1.1612 +  
  1.1613 +  assert( pFile ); 
  1.1614 +  afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
  1.1615 +  
  1.1616 +  /* Check if a thread in this process holds such a lock */
  1.1617 +  if( pFile->locktype>SHARED_LOCK ){
  1.1618 +    r = 1;
  1.1619 +  }
  1.1620 +  
  1.1621 +  /* Otherwise see if some other process holds it.
  1.1622 +   */
  1.1623 +  if ( !r ) {
  1.1624 +    /* lock the byte */
  1.1625 +    int failed = _AFPFSSetLock(context->filePath, pFile->h, RESERVED_BYTE, 1,1);  
  1.1626 +    if (failed) {
  1.1627 +      /* if we failed to get the lock then someone else must have it */
  1.1628 +      r = 1;
  1.1629 +    } else {
  1.1630 +      /* if we succeeded in taking the reserved lock, unlock it to restore
  1.1631 +      ** the original state */
  1.1632 +      _AFPFSSetLock(context->filePath, pFile->h, RESERVED_BYTE, 1, 0);
  1.1633 +    }
  1.1634 +  }
  1.1635 +  OSTRACE3("TEST WR-LOCK %d %d\n", pFile->h, r);
  1.1636 +  
  1.1637 +  *pResOut = r;
  1.1638 +  return SQLITE_OK;
  1.1639 +}
  1.1640 +
  1.1641 +/* AFP-style locking following the behavior of unixLock, see the unixLock 
  1.1642 +** function comments for details of lock management. */
  1.1643 +static int afpLock(sqlite3_file *id, int locktype){
  1.1644 +  int rc = SQLITE_OK;
  1.1645 +  unixFile *pFile = (unixFile*)id;
  1.1646 +  afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
  1.1647 +  
  1.1648 +  assert( pFile );
  1.1649 +  OSTRACE5("LOCK    %d %s was %s pid=%d\n", pFile->h,
  1.1650 +         locktypeName(locktype), locktypeName(pFile->locktype), getpid());
  1.1651 +
  1.1652 +  /* If there is already a lock of this type or more restrictive on the
  1.1653 +  ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as
  1.1654 +  ** enterMutex() hasn't been called yet.
  1.1655 +  */
  1.1656 +  if( pFile->locktype>=locktype ){
  1.1657 +    OSTRACE3("LOCK    %d %s ok (already held)\n", pFile->h,
  1.1658 +           locktypeName(locktype));
  1.1659 +    return SQLITE_OK;
  1.1660 +  }
  1.1661 +
  1.1662 +  /* Make sure the locking sequence is correct
  1.1663 +  */
  1.1664 +  assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
  1.1665 +  assert( locktype!=PENDING_LOCK );
  1.1666 +  assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
  1.1667 +  
  1.1668 +  /* This mutex is needed because pFile->pLock is shared across threads
  1.1669 +  */
  1.1670 +  enterMutex();
  1.1671 +
  1.1672 +  /* Make sure the current thread owns the pFile.
  1.1673 +  */
  1.1674 +  rc = transferOwnership(pFile);
  1.1675 +  if( rc!=SQLITE_OK ){
  1.1676 +    leaveMutex();
  1.1677 +    return rc;
  1.1678 +  }
  1.1679 +    
  1.1680 +  /* A PENDING lock is needed before acquiring a SHARED lock and before
  1.1681 +  ** acquiring an EXCLUSIVE lock.  For the SHARED lock, the PENDING will
  1.1682 +  ** be released.
  1.1683 +  */
  1.1684 +  if( locktype==SHARED_LOCK 
  1.1685 +      || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
  1.1686 +  ){
  1.1687 +    int failed;
  1.1688 +    failed = _AFPFSSetLock(context->filePath, pFile->h, PENDING_BYTE, 1, 1);
  1.1689 +    if (failed) {
  1.1690 +      rc = SQLITE_BUSY;
  1.1691 +      goto afp_end_lock;
  1.1692 +    }
  1.1693 +  }
  1.1694 +  
  1.1695 +  /* If control gets to this point, then actually go ahead and make
  1.1696 +  ** operating system calls for the specified lock.
  1.1697 +  */
  1.1698 +  if( locktype==SHARED_LOCK ){
  1.1699 +    int lk, failed;
  1.1700 +    
  1.1701 +    /* Now get the read-lock */
  1.1702 +    /* note that the quality of the randomness doesn't matter that much */
  1.1703 +    lk = random(); 
  1.1704 +    context->sharedLockByte = (lk & 0x7fffffff)%(SHARED_SIZE - 1);
  1.1705 +    failed = _AFPFSSetLock(context->filePath, pFile->h, 
  1.1706 +      SHARED_FIRST+context->sharedLockByte, 1, 1);
  1.1707 +    
  1.1708 +    /* Drop the temporary PENDING lock */
  1.1709 +    if (_AFPFSSetLock(context->filePath, pFile->h, PENDING_BYTE, 1, 0)) {
  1.1710 +      rc = SQLITE_IOERR_UNLOCK;  /* This should never happen */
  1.1711 +      goto afp_end_lock;
  1.1712 +    }
  1.1713 +    
  1.1714 +    if( failed ){
  1.1715 +      rc = SQLITE_BUSY;
  1.1716 +    } else {
  1.1717 +      pFile->locktype = SHARED_LOCK;
  1.1718 +    }
  1.1719 +  }else{
  1.1720 +    /* The request was for a RESERVED or EXCLUSIVE lock.  It is
  1.1721 +    ** assumed that there is a SHARED or greater lock on the file
  1.1722 +    ** already.
  1.1723 +    */
  1.1724 +    int failed = 0;
  1.1725 +    assert( 0!=pFile->locktype );
  1.1726 +    if (locktype >= RESERVED_LOCK && pFile->locktype < RESERVED_LOCK) {
  1.1727 +        /* Acquire a RESERVED lock */
  1.1728 +        failed = _AFPFSSetLock(context->filePath, pFile->h, RESERVED_BYTE, 1,1);
  1.1729 +    }
  1.1730 +    if (!failed && locktype == EXCLUSIVE_LOCK) {
  1.1731 +      /* Acquire an EXCLUSIVE lock */
  1.1732 +        
  1.1733 +      /* Remove the shared lock before trying the range.  we'll need to 
  1.1734 +      ** reestablish the shared lock if we can't get the  afpUnlock
  1.1735 +      */
  1.1736 +      if (!_AFPFSSetLock(context->filePath, pFile->h, SHARED_FIRST +
  1.1737 +                         context->sharedLockByte, 1, 0)) {
  1.1738 +        /* now attemmpt to get the exclusive lock range */
  1.1739 +        failed = _AFPFSSetLock(context->filePath, pFile->h, SHARED_FIRST, 
  1.1740 +                               SHARED_SIZE, 1);
  1.1741 +        if (failed && _AFPFSSetLock(context->filePath, pFile->h, SHARED_FIRST +
  1.1742 +                                    context->sharedLockByte, 1, 1)) {
  1.1743 +          rc = SQLITE_IOERR_RDLOCK; /* this should never happen */
  1.1744 +        }
  1.1745 +      } else {
  1.1746 +        /* */
  1.1747 +        rc = SQLITE_IOERR_UNLOCK; /* this should never happen */
  1.1748 +      }
  1.1749 +    }
  1.1750 +    if( failed && rc == SQLITE_OK){
  1.1751 +      rc = SQLITE_BUSY;
  1.1752 +    }
  1.1753 +  }
  1.1754 +  
  1.1755 +  if( rc==SQLITE_OK ){
  1.1756 +    pFile->locktype = locktype;
  1.1757 +  }else if( locktype==EXCLUSIVE_LOCK ){
  1.1758 +    pFile->locktype = PENDING_LOCK;
  1.1759 +  }
  1.1760 +  
  1.1761 +afp_end_lock:
  1.1762 +  leaveMutex();
  1.1763 +  OSTRACE4("LOCK    %d %s %s\n", pFile->h, locktypeName(locktype), 
  1.1764 +         rc==SQLITE_OK ? "ok" : "failed");
  1.1765 +  return rc;
  1.1766 +}
  1.1767 +
  1.1768 +/*
  1.1769 +** Lower the locking level on file descriptor pFile to locktype.  locktype
  1.1770 +** must be either NO_LOCK or SHARED_LOCK.
  1.1771 +**
  1.1772 +** If the locking level of the file descriptor is already at or below
  1.1773 +** the requested locking level, this routine is a no-op.
  1.1774 +*/
  1.1775 +static int afpUnlock(sqlite3_file *id, int locktype) {
  1.1776 +  int rc = SQLITE_OK;
  1.1777 +  unixFile *pFile = (unixFile*)id;
  1.1778 +  afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
  1.1779 +
  1.1780 +  assert( pFile );
  1.1781 +  OSTRACE5("UNLOCK  %d %d was %d pid=%d\n", pFile->h, locktype,
  1.1782 +         pFile->locktype, getpid());
  1.1783 +  
  1.1784 +  assert( locktype<=SHARED_LOCK );
  1.1785 +  if( pFile->locktype<=locktype ){
  1.1786 +    return SQLITE_OK;
  1.1787 +  }
  1.1788 +  if( CHECK_THREADID(pFile) ){
  1.1789 +    return SQLITE_MISUSE;
  1.1790 +  }
  1.1791 +  enterMutex();
  1.1792 +  if( pFile->locktype>SHARED_LOCK ){
  1.1793 +    if( locktype==SHARED_LOCK ){
  1.1794 +      int failed = 0;
  1.1795 +
  1.1796 +      /* unlock the exclusive range - then re-establish the shared lock */
  1.1797 +      if (pFile->locktype==EXCLUSIVE_LOCK) {
  1.1798 +        failed = _AFPFSSetLock(context->filePath, pFile->h, SHARED_FIRST, 
  1.1799 +                                 SHARED_SIZE, 0);
  1.1800 +        if (!failed) {
  1.1801 +          /* successfully removed the exclusive lock */
  1.1802 +          if (_AFPFSSetLock(context->filePath, pFile->h, SHARED_FIRST+
  1.1803 +                            context->sharedLockByte, 1, 1)) {
  1.1804 +            /* failed to re-establish our shared lock */
  1.1805 +            rc = SQLITE_IOERR_RDLOCK; /* This should never happen */
  1.1806 +          }
  1.1807 +        } else {
  1.1808 +          /* This should never happen - failed to unlock the exclusive range */
  1.1809 +          rc = SQLITE_IOERR_UNLOCK;
  1.1810 +        } 
  1.1811 +      }
  1.1812 +    }
  1.1813 +    if (rc == SQLITE_OK && pFile->locktype>=PENDING_LOCK) {
  1.1814 +      if (_AFPFSSetLock(context->filePath, pFile->h, PENDING_BYTE, 1, 0)){
  1.1815 +        /* failed to release the pending lock */
  1.1816 +        rc = SQLITE_IOERR_UNLOCK; /* This should never happen */
  1.1817 +      }
  1.1818 +    } 
  1.1819 +    if (rc == SQLITE_OK && pFile->locktype>=RESERVED_LOCK) {
  1.1820 +      if (_AFPFSSetLock(context->filePath, pFile->h, RESERVED_BYTE, 1, 0)) {
  1.1821 +        /* failed to release the reserved lock */
  1.1822 +        rc = SQLITE_IOERR_UNLOCK;  /* This should never happen */
  1.1823 +      }
  1.1824 +    } 
  1.1825 +  }
  1.1826 +  if( locktype==NO_LOCK ){
  1.1827 +    int failed = _AFPFSSetLock(context->filePath, pFile->h, 
  1.1828 +                               SHARED_FIRST + context->sharedLockByte, 1, 0);
  1.1829 +    if (failed) {
  1.1830 +      rc = SQLITE_IOERR_UNLOCK;  /* This should never happen */
  1.1831 +    }
  1.1832 +  }
  1.1833 +  if (rc == SQLITE_OK)
  1.1834 +    pFile->locktype = locktype;
  1.1835 +  leaveMutex();
  1.1836 +  return rc;
  1.1837 +}
  1.1838 +
  1.1839 +/*
  1.1840 +** Close a file & cleanup AFP specific locking context 
  1.1841 +*/
  1.1842 +static int afpClose(sqlite3_file *id) {
  1.1843 +  if( id ){
  1.1844 +    unixFile *pFile = (unixFile*)id;
  1.1845 +    afpUnlock(id, NO_LOCK);
  1.1846 +    sqlite3_free(pFile->lockingContext);
  1.1847 +  }
  1.1848 +  return closeUnixFile(id);
  1.1849 +}
  1.1850 +
  1.1851 +
  1.1852 +#pragma mark flock() style locking
  1.1853 +
  1.1854 +/*
  1.1855 +** The flockLockingContext is not used
  1.1856 +*/
  1.1857 +typedef void flockLockingContext;
  1.1858 +
  1.1859 +static int flockCheckReservedLock(sqlite3_file *id, int *pResOut){
  1.1860 +  int r = 1;
  1.1861 +  unixFile *pFile = (unixFile*)id;
  1.1862 +  
  1.1863 +  if (pFile->locktype != RESERVED_LOCK) {
  1.1864 +    /* attempt to get the lock */
  1.1865 +    int rc = flock(pFile->h, LOCK_EX | LOCK_NB);
  1.1866 +    if (!rc) {
  1.1867 +      /* got the lock, unlock it */
  1.1868 +      flock(pFile->h, LOCK_UN);
  1.1869 +      r = 0;  /* no one has it reserved */
  1.1870 +    }
  1.1871 +  }
  1.1872 +
  1.1873 +  *pResOut = r;
  1.1874 +  return SQLITE_OK;
  1.1875 +}
  1.1876 +
  1.1877 +static int flockLock(sqlite3_file *id, int locktype) {
  1.1878 +  unixFile *pFile = (unixFile*)id;
  1.1879 +  
  1.1880 +  /* if we already have a lock, it is exclusive.  
  1.1881 +  ** Just adjust level and punt on outta here. */
  1.1882 +  if (pFile->locktype > NO_LOCK) {
  1.1883 +    pFile->locktype = locktype;
  1.1884 +    return SQLITE_OK;
  1.1885 +  }
  1.1886 +  
  1.1887 +  /* grab an exclusive lock */
  1.1888 +  int rc = flock(pFile->h, LOCK_EX | LOCK_NB);
  1.1889 +  if (rc) {
  1.1890 +    /* didn't get, must be busy */
  1.1891 +    return SQLITE_BUSY;
  1.1892 +  } else {
  1.1893 +    /* got it, set the type and return ok */
  1.1894 +    pFile->locktype = locktype;
  1.1895 +    return SQLITE_OK;
  1.1896 +  }
  1.1897 +}
  1.1898 +
  1.1899 +static int flockUnlock(sqlite3_file *id, int locktype) {
  1.1900 +  unixFile *pFile = (unixFile*)id;
  1.1901 +  
  1.1902 +  assert( locktype<=SHARED_LOCK );
  1.1903 +  
  1.1904 +  /* no-op if possible */
  1.1905 +  if( pFile->locktype==locktype ){
  1.1906 +    return SQLITE_OK;
  1.1907 +  }
  1.1908 +  
  1.1909 +  /* shared can just be set because we always have an exclusive */
  1.1910 +  if (locktype==SHARED_LOCK) {
  1.1911 +    pFile->locktype = locktype;
  1.1912 +    return SQLITE_OK;
  1.1913 +  }
  1.1914 +  
  1.1915 +  /* no, really, unlock. */
  1.1916 +  int rc = flock(pFile->h, LOCK_UN);
  1.1917 +  if (rc)
  1.1918 +    return SQLITE_IOERR_UNLOCK;
  1.1919 +  else {
  1.1920 +    pFile->locktype = NO_LOCK;
  1.1921 +    return SQLITE_OK;
  1.1922 +  }
  1.1923 +}
  1.1924 +
  1.1925 +/*
  1.1926 +** Close a file.
  1.1927 +*/
  1.1928 +static int flockClose(sqlite3_file *id) {
  1.1929 +  if( id ){
  1.1930 +    flockUnlock(id, NO_LOCK);
  1.1931 +  }
  1.1932 +  return closeUnixFile(id);
  1.1933 +}
  1.1934 +
  1.1935 +#pragma mark Old-School .lock file based locking
  1.1936 +
  1.1937 +static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) {
  1.1938 +  int r = 1;
  1.1939 +  unixFile *pFile = (unixFile*)id;
  1.1940 +  char *zLockFile = (char *)pFile->lockingContext;
  1.1941 +
  1.1942 +  if (pFile->locktype != RESERVED_LOCK) {
  1.1943 +    struct stat statBuf;
  1.1944 +    if (lstat(zLockFile, &statBuf) != 0){
  1.1945 +      /* file does not exist, we could have it if we want it */
  1.1946 +      r = 0;
  1.1947 +    }
  1.1948 +  }
  1.1949 +
  1.1950 +  *pResOut = r;
  1.1951 +  return SQLITE_OK;
  1.1952 +}
  1.1953 +
  1.1954 +static int dotlockLock(sqlite3_file *id, int locktype) {
  1.1955 +  unixFile *pFile = (unixFile*)id;
  1.1956 +  int fd;
  1.1957 +  char *zLockFile = (char *)pFile->lockingContext;
  1.1958 +
  1.1959 +  /* if we already have a lock, it is exclusive.  
  1.1960 +  ** Just adjust level and punt on outta here. */
  1.1961 +  if (pFile->locktype > NO_LOCK) {
  1.1962 +    pFile->locktype = locktype;
  1.1963 +    
  1.1964 +    /* Always update the timestamp on the old file */
  1.1965 +    utimes(zLockFile, NULL);
  1.1966 +    return SQLITE_OK;
  1.1967 +  }
  1.1968 +  
  1.1969 +  /* check to see if lock file already exists */
  1.1970 +  struct stat statBuf;
  1.1971 +  if (lstat(zLockFile,&statBuf) == 0){
  1.1972 +    return SQLITE_BUSY; /* it does, busy */
  1.1973 +  }
  1.1974 +  
  1.1975 +  /* grab an exclusive lock */
  1.1976 +  fd = open(zLockFile,O_RDONLY|O_CREAT|O_EXCL,0600);
  1.1977 +  if( fd<0 ){
  1.1978 +    /* failed to open/create the file, someone else may have stolen the lock */
  1.1979 +    return SQLITE_BUSY; 
  1.1980 +  }
  1.1981 +  close(fd);
  1.1982 +  
  1.1983 +  /* got it, set the type and return ok */
  1.1984 +  pFile->locktype = locktype;
  1.1985 +  return SQLITE_OK;
  1.1986 +}
  1.1987 +
  1.1988 +static int dotlockUnlock(sqlite3_file *id, int locktype) {
  1.1989 +  unixFile *pFile = (unixFile*)id;
  1.1990 +  char *zLockFile = (char *)pFile->lockingContext;
  1.1991 +
  1.1992 +  assert( locktype<=SHARED_LOCK );
  1.1993 +  
  1.1994 +  /* no-op if possible */
  1.1995 +  if( pFile->locktype==locktype ){
  1.1996 +    return SQLITE_OK;
  1.1997 +  }
  1.1998 +  
  1.1999 +  /* shared can just be set because we always have an exclusive */
  1.2000 +  if (locktype==SHARED_LOCK) {
  1.2001 +    pFile->locktype = locktype;
  1.2002 +    return SQLITE_OK;
  1.2003 +  }
  1.2004 +  
  1.2005 +  /* no, really, unlock. */
  1.2006 +  unlink(zLockFile);
  1.2007 +  pFile->locktype = NO_LOCK;
  1.2008 +  return SQLITE_OK;
  1.2009 +}
  1.2010 +
  1.2011 +/*
  1.2012 + ** Close a file.
  1.2013 + */
  1.2014 +static int dotlockClose(sqlite3_file *id) {
  1.2015 +  if( id ){
  1.2016 +    unixFile *pFile = (unixFile*)id;
  1.2017 +    dotlockUnlock(id, NO_LOCK);
  1.2018 +    sqlite3_free(pFile->lockingContext);
  1.2019 +  }
  1.2020 +  return closeUnixFile(id);
  1.2021 +}
  1.2022 +
  1.2023 +
  1.2024 +#endif /* SQLITE_ENABLE_LOCKING_STYLE */
  1.2025 +
  1.2026 +/*
  1.2027 +** The nolockLockingContext is void
  1.2028 +*/
  1.2029 +typedef void nolockLockingContext;
  1.2030 +
  1.2031 +static int nolockCheckReservedLock(sqlite3_file *id, int *pResOut) {
  1.2032 +  *pResOut = 0;
  1.2033 +  return SQLITE_OK;
  1.2034 +}
  1.2035 +
  1.2036 +static int nolockLock(sqlite3_file *id, int locktype) {
  1.2037 +  return SQLITE_OK;
  1.2038 +}
  1.2039 +
  1.2040 +static int nolockUnlock(sqlite3_file *id, int locktype) {
  1.2041 +  return SQLITE_OK;
  1.2042 +}
  1.2043 +
  1.2044 +/*
  1.2045 +** Close a file.
  1.2046 +*/
  1.2047 +static int nolockClose(sqlite3_file *id) {
  1.2048 +  return closeUnixFile(id);
  1.2049 +}
  1.2050 +
  1.2051 +
  1.2052 +/*
  1.2053 +** Information and control of an open file handle.
  1.2054 +*/
  1.2055 +static int unixFileControl(sqlite3_file *id, int op, void *pArg){
  1.2056 +  switch( op ){
  1.2057 +    case SQLITE_FCNTL_LOCKSTATE: {
  1.2058 +      *(int*)pArg = ((unixFile*)id)->locktype;
  1.2059 +      return SQLITE_OK;
  1.2060 +    }
  1.2061 +  }
  1.2062 +  return SQLITE_ERROR;
  1.2063 +}
  1.2064 +
  1.2065 +/*
  1.2066 +** Return the sector size in bytes of the underlying block device for
  1.2067 +** the specified file. This is almost always 512 bytes, but may be
  1.2068 +** larger for some devices.
  1.2069 +**
  1.2070 +** SQLite code assumes this function cannot fail. It also assumes that
  1.2071 +** if two files are created in the same file-system directory (i.e.
  1.2072 +** a database and its journal file) that the sector size will be the
  1.2073 +** same for both.
  1.2074 +*/
  1.2075 +static int unixSectorSize(sqlite3_file *id){
  1.2076 +  return SQLITE_DEFAULT_SECTOR_SIZE;
  1.2077 +}
  1.2078 +
  1.2079 +/*
  1.2080 +** Return the device characteristics for the file. This is always 0.
  1.2081 +*/
  1.2082 +static int unixDeviceCharacteristics(sqlite3_file *id){
  1.2083 +  return 0;
  1.2084 +}
  1.2085 +
  1.2086 +/*
  1.2087 +** Initialize the contents of the unixFile structure pointed to by pId.
  1.2088 +**
  1.2089 +** When locking extensions are enabled, the filepath and locking style 
  1.2090 +** are needed to determine the unixFile pMethod to use for locking operations.
  1.2091 +** The locking-style specific lockingContext data structure is created 
  1.2092 +** and assigned here also.
  1.2093 +*/
  1.2094 +static int fillInUnixFile(
  1.2095 +  sqlite3_vfs *pVfs,      /* Pointer to vfs object */
  1.2096 +  int h,                  /* Open file descriptor of file being opened */
  1.2097 +  int dirfd,              /* Directory file descriptor */
  1.2098 +  sqlite3_file *pId,      /* Write to the unixFile structure here */
  1.2099 +  const char *zFilename,  /* Name of the file being opened */
  1.2100 +  int noLock              /* Omit locking if true */
  1.2101 +){
  1.2102 +  int eLockingStyle;
  1.2103 +  unixFile *pNew = (unixFile *)pId;
  1.2104 +  int rc = SQLITE_OK;
  1.2105 +
  1.2106 +  /* Macro to define the static contents of an sqlite3_io_methods 
  1.2107 +  ** structure for a unix backend file. Different locking methods
  1.2108 +  ** require different functions for the xClose, xLock, xUnlock and
  1.2109 +  ** xCheckReservedLock methods.
  1.2110 +  */
  1.2111 +  #define IOMETHODS(xClose, xLock, xUnlock, xCheckReservedLock) {    \
  1.2112 +    1,                          /* iVersion */                           \
  1.2113 +    xClose,                     /* xClose */                             \
  1.2114 +    unixRead,                   /* xRead */                              \
  1.2115 +    unixWrite,                  /* xWrite */                             \
  1.2116 +    unixTruncate,               /* xTruncate */                          \
  1.2117 +    unixSync,                   /* xSync */                              \
  1.2118 +    unixFileSize,               /* xFileSize */                          \
  1.2119 +    xLock,                      /* xLock */                              \
  1.2120 +    xUnlock,                    /* xUnlock */                            \
  1.2121 +    xCheckReservedLock,         /* xCheckReservedLock */                 \
  1.2122 +    unixFileControl,            /* xFileControl */                       \
  1.2123 +    unixSectorSize,             /* xSectorSize */                        \
  1.2124 +    unixDeviceCharacteristics   /* xDeviceCapabilities */                \
  1.2125 +  }
  1.2126 +  static sqlite3_io_methods aIoMethod[] = {
  1.2127 +    IOMETHODS(unixClose, unixLock, unixUnlock, unixCheckReservedLock) 
  1.2128 +   ,IOMETHODS(nolockClose, nolockLock, nolockUnlock, nolockCheckReservedLock)
  1.2129 +#ifdef SQLITE_ENABLE_LOCKING_STYLE
  1.2130 +   ,IOMETHODS(dotlockClose, dotlockLock, dotlockUnlock,dotlockCheckReservedLock)
  1.2131 +   ,IOMETHODS(flockClose, flockLock, flockUnlock, flockCheckReservedLock)
  1.2132 +   ,IOMETHODS(afpClose, afpLock, afpUnlock, afpCheckReservedLock)
  1.2133 +#endif
  1.2134 +  };
  1.2135 +  /* The order of the IOMETHODS macros above is important.  It must be the
  1.2136 +  ** same order as the LOCKING_STYLE numbers
  1.2137 +  */
  1.2138 +  assert(LOCKING_STYLE_POSIX==1);
  1.2139 +  assert(LOCKING_STYLE_NONE==2);
  1.2140 +  assert(LOCKING_STYLE_DOTFILE==3);
  1.2141 +  assert(LOCKING_STYLE_FLOCK==4);
  1.2142 +  assert(LOCKING_STYLE_AFP==5);
  1.2143 +
  1.2144 +  assert( pNew->pLock==NULL );
  1.2145 +  assert( pNew->pOpen==NULL );
  1.2146 +
  1.2147 +  OSTRACE3("OPEN    %-3d %s\n", h, zFilename);    
  1.2148 +  pNew->h = h;
  1.2149 +  pNew->dirfd = dirfd;
  1.2150 +  SET_THREADID(pNew);
  1.2151 +
  1.2152 +  if( noLock ){
  1.2153 +    eLockingStyle = LOCKING_STYLE_NONE;
  1.2154 +  }else{
  1.2155 +    eLockingStyle = detectLockingStyle(pVfs, zFilename, h);
  1.2156 +  }
  1.2157 +
  1.2158 +  switch( eLockingStyle ){
  1.2159 +
  1.2160 +    case LOCKING_STYLE_POSIX: {
  1.2161 +      enterMutex();
  1.2162 +      rc = findLockInfo(h, &pNew->pLock, &pNew->pOpen);
  1.2163 +      leaveMutex();
  1.2164 +      break;
  1.2165 +    }
  1.2166 +
  1.2167 +#ifdef SQLITE_ENABLE_LOCKING_STYLE
  1.2168 +    case LOCKING_STYLE_AFP: {
  1.2169 +      /* AFP locking uses the file path so it needs to be included in
  1.2170 +      ** the afpLockingContext.
  1.2171 +      */
  1.2172 +      afpLockingContext *pCtx;
  1.2173 +      pNew->lockingContext = pCtx = sqlite3_malloc( sizeof(*pCtx) );
  1.2174 +      if( pCtx==0 ){
  1.2175 +        rc = SQLITE_NOMEM;
  1.2176 +      }else{
  1.2177 +        /* NB: zFilename exists and remains valid until the file is closed
  1.2178 +        ** according to requirement F11141.  So we do not need to make a
  1.2179 +        ** copy of the filename. */
  1.2180 +        pCtx->filePath = zFilename;
  1.2181 +        srandomdev();
  1.2182 +      }
  1.2183 +      break;
  1.2184 +    }
  1.2185 +
  1.2186 +    case LOCKING_STYLE_DOTFILE: {
  1.2187 +      /* Dotfile locking uses the file path so it needs to be included in
  1.2188 +      ** the dotlockLockingContext 
  1.2189 +      */
  1.2190 +      char *zLockFile;
  1.2191 +      int nFilename;
  1.2192 +      nFilename = strlen(zFilename) + 6;
  1.2193 +      zLockFile = (char *)sqlite3_malloc(nFilename);
  1.2194 +      if( zLockFile==0 ){
  1.2195 +        rc = SQLITE_NOMEM;
  1.2196 +      }else{
  1.2197 +        sqlite3_snprintf(nFilename, zLockFile, "%s.lock", zFilename);
  1.2198 +      }
  1.2199 +      pNew->lockingContext = zLockFile;
  1.2200 +      break;
  1.2201 +    }
  1.2202 +
  1.2203 +    case LOCKING_STYLE_FLOCK: 
  1.2204 +    case LOCKING_STYLE_NONE: 
  1.2205 +      break;
  1.2206 +#endif
  1.2207 +  }
  1.2208 +
  1.2209 +  if( rc!=SQLITE_OK ){
  1.2210 +    if( dirfd>=0 ) close(dirfd);
  1.2211 +    close(h);
  1.2212 +  }else{
  1.2213 +    pNew->pMethod = &aIoMethod[eLockingStyle-1];
  1.2214 +    OpenCounter(+1);
  1.2215 +  }
  1.2216 +  return rc;
  1.2217 +}
  1.2218 +
  1.2219 +/*
  1.2220 +** Open a file descriptor to the directory containing file zFilename.
  1.2221 +** If successful, *pFd is set to the opened file descriptor and
  1.2222 +** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM
  1.2223 +** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined
  1.2224 +** value.
  1.2225 +**
  1.2226 +** If SQLITE_OK is returned, the caller is responsible for closing
  1.2227 +** the file descriptor *pFd using close().
  1.2228 +*/
  1.2229 +static int openDirectory(const char *zFilename, int *pFd){
  1.2230 +  int ii;
  1.2231 +  int fd = -1;
  1.2232 +  char zDirname[MAX_PATHNAME+1];
  1.2233 +
  1.2234 +  sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename);
  1.2235 +  for(ii=strlen(zDirname); ii>=0 && zDirname[ii]!='/'; ii--);
  1.2236 +  if( ii>0 ){
  1.2237 +    zDirname[ii] = '\0';
  1.2238 +    fd = open(zDirname, O_RDONLY|O_BINARY, 0);
  1.2239 +    if( fd>=0 ){
  1.2240 +#ifdef FD_CLOEXEC
  1.2241 +      fcntl(fd, F_SETFD, fcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
  1.2242 +#endif
  1.2243 +      OSTRACE3("OPENDIR %-3d %s\n", fd, zDirname);
  1.2244 +    }
  1.2245 +  }
  1.2246 +  *pFd = fd;
  1.2247 +  return (fd>=0?SQLITE_OK:SQLITE_CANTOPEN);
  1.2248 +}
  1.2249 +
  1.2250 +/*
  1.2251 +** Create a temporary file name in zBuf.  zBuf must be allocated
  1.2252 +** by the calling process and must be big enough to hold at least
  1.2253 +** pVfs->mxPathname bytes.
  1.2254 +*/
  1.2255 +static int getTempname(int nBuf, char *zBuf){
  1.2256 +  static const char *azDirs[] = {
  1.2257 +     0,
  1.2258 +     "/var/tmp",
  1.2259 +     "/usr/tmp",
  1.2260 +     "/tmp",
  1.2261 +     ".",
  1.2262 +  };
  1.2263 +  static const unsigned char zChars[] =
  1.2264 +    "abcdefghijklmnopqrstuvwxyz"
  1.2265 +    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  1.2266 +    "0123456789";
  1.2267 +  int i, j;
  1.2268 +  struct stat buf;
  1.2269 +  const char *zDir = ".";
  1.2270 +
  1.2271 +  /* It's odd to simulate an io-error here, but really this is just
  1.2272 +  ** using the io-error infrastructure to test that SQLite handles this
  1.2273 +  ** function failing. 
  1.2274 +  */
  1.2275 +  SimulateIOError( return SQLITE_IOERR );
  1.2276 +
  1.2277 +  azDirs[0] = sqlite3_temp_directory;
  1.2278 +  for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
  1.2279 +    if( azDirs[i]==0 ) continue;
  1.2280 +    if( stat(azDirs[i], &buf) ) continue;
  1.2281 +    if( !S_ISDIR(buf.st_mode) ) continue;
  1.2282 +    if( access(azDirs[i], 07) ) continue;
  1.2283 +    zDir = azDirs[i];
  1.2284 +    break;
  1.2285 +  }
  1.2286 +
  1.2287 +  /* Check that the output buffer is large enough for the temporary file 
  1.2288 +  ** name. If it is not, return SQLITE_ERROR.
  1.2289 +  */
  1.2290 +  if( (strlen(zDir) + strlen(SQLITE_TEMP_FILE_PREFIX) + 17) >= nBuf ){
  1.2291 +    return SQLITE_ERROR;
  1.2292 +  }
  1.2293 +
  1.2294 +  do{
  1.2295 +    sqlite3_snprintf(nBuf-17, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX, zDir);
  1.2296 +    j = strlen(zBuf);
  1.2297 +    sqlite3_randomness(15, &zBuf[j]);
  1.2298 +    for(i=0; i<15; i++, j++){
  1.2299 +      zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
  1.2300 +    }
  1.2301 +    zBuf[j] = 0;
  1.2302 +  }while( access(zBuf,0)==0 );
  1.2303 +  return SQLITE_OK;
  1.2304 +}
  1.2305 +
  1.2306 +
  1.2307 +/*
  1.2308 +** Open the file zPath.
  1.2309 +** 
  1.2310 +** Previously, the SQLite OS layer used three functions in place of this
  1.2311 +** one:
  1.2312 +**
  1.2313 +**     sqlite3OsOpenReadWrite();
  1.2314 +**     sqlite3OsOpenReadOnly();
  1.2315 +**     sqlite3OsOpenExclusive();
  1.2316 +**
  1.2317 +** These calls correspond to the following combinations of flags:
  1.2318 +**
  1.2319 +**     ReadWrite() ->     (READWRITE | CREATE)
  1.2320 +**     ReadOnly()  ->     (READONLY) 
  1.2321 +**     OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE)
  1.2322 +**
  1.2323 +** The old OpenExclusive() accepted a boolean argument - "delFlag". If
  1.2324 +** true, the file was configured to be automatically deleted when the
  1.2325 +** file handle closed. To achieve the same effect using this new 
  1.2326 +** interface, add the DELETEONCLOSE flag to those specified above for 
  1.2327 +** OpenExclusive().
  1.2328 +*/
  1.2329 +static int unixOpen(
  1.2330 +  sqlite3_vfs *pVfs, 
  1.2331 +  const char *zPath, 
  1.2332 +  sqlite3_file *pFile,
  1.2333 +  int flags,
  1.2334 +  int *pOutFlags
  1.2335 +){
  1.2336 +  int fd = 0;                    /* File descriptor returned by open() */
  1.2337 +  int dirfd = -1;                /* Directory file descriptor */
  1.2338 +  int oflags = 0;                /* Flags to pass to open() */
  1.2339 +  int eType = flags&0xFFFFFF00;  /* Type of file to open */
  1.2340 +  int noLock;                    /* True to omit locking primitives */
  1.2341 +
  1.2342 +  int isExclusive  = (flags & SQLITE_OPEN_EXCLUSIVE);
  1.2343 +  int isDelete     = (flags & SQLITE_OPEN_DELETEONCLOSE);
  1.2344 +  int isCreate     = (flags & SQLITE_OPEN_CREATE);
  1.2345 +  int isReadonly   = (flags & SQLITE_OPEN_READONLY);
  1.2346 +  int isReadWrite  = (flags & SQLITE_OPEN_READWRITE);
  1.2347 +
  1.2348 +  /* If creating a master or main-file journal, this function will open
  1.2349 +  ** a file-descriptor on the directory too. The first time unixSync()
  1.2350 +  ** is called the directory file descriptor will be fsync()ed and close()d.
  1.2351 +  */
  1.2352 +  int isOpenDirectory = (isCreate && 
  1.2353 +      (eType==SQLITE_OPEN_MASTER_JOURNAL || eType==SQLITE_OPEN_MAIN_JOURNAL)
  1.2354 +  );
  1.2355 +
  1.2356 +  /* If argument zPath is a NULL pointer, this function is required to open
  1.2357 +  ** a temporary file. Use this buffer to store the file name in.
  1.2358 +  */
  1.2359 +  char zTmpname[MAX_PATHNAME+1];
  1.2360 +  const char *zName = zPath;
  1.2361 +
  1.2362 +  /* Check the following statements are true: 
  1.2363 +  **
  1.2364 +  **   (a) Exactly one of the READWRITE and READONLY flags must be set, and 
  1.2365 +  **   (b) if CREATE is set, then READWRITE must also be set, and
  1.2366 +  **   (c) if EXCLUSIVE is set, then CREATE must also be set.
  1.2367 +  **   (d) if DELETEONCLOSE is set, then CREATE must also be set.
  1.2368 +  */
  1.2369 +  assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly));
  1.2370 +  assert(isCreate==0 || isReadWrite);
  1.2371 +  assert(isExclusive==0 || isCreate);
  1.2372 +  assert(isDelete==0 || isCreate);
  1.2373 +
  1.2374 +  /* The main DB, main journal, and master journal are never automatically
  1.2375 +  ** deleted
  1.2376 +  */
  1.2377 +  assert( eType!=SQLITE_OPEN_MAIN_DB || !isDelete );
  1.2378 +  assert( eType!=SQLITE_OPEN_MAIN_JOURNAL || !isDelete );
  1.2379 +  assert( eType!=SQLITE_OPEN_MASTER_JOURNAL || !isDelete );
  1.2380 +
  1.2381 +  /* Assert that the upper layer has set one of the "file-type" flags. */
  1.2382 +  assert( eType==SQLITE_OPEN_MAIN_DB      || eType==SQLITE_OPEN_TEMP_DB 
  1.2383 +       || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL 
  1.2384 +       || eType==SQLITE_OPEN_SUBJOURNAL   || eType==SQLITE_OPEN_MASTER_JOURNAL 
  1.2385 +       || eType==SQLITE_OPEN_TRANSIENT_DB
  1.2386 +  );
  1.2387 +
  1.2388 +  memset(pFile, 0, sizeof(unixFile));
  1.2389 +
  1.2390 +  if( !zName ){
  1.2391 +    int rc;
  1.2392 +    assert(isDelete && !isOpenDirectory);
  1.2393 +    rc = getTempname(MAX_PATHNAME+1, zTmpname);
  1.2394 +    if( rc!=SQLITE_OK ){
  1.2395 +      return rc;
  1.2396 +    }
  1.2397 +    zName = zTmpname;
  1.2398 +  }
  1.2399 +
  1.2400 +  if( isReadonly )  oflags |= O_RDONLY;
  1.2401 +  if( isReadWrite ) oflags |= O_RDWR;
  1.2402 +  if( isCreate )    oflags |= O_CREAT;
  1.2403 +  if( isExclusive ) oflags |= (O_EXCL|O_NOFOLLOW);
  1.2404 +  oflags |= (O_LARGEFILE|O_BINARY);
  1.2405 +
  1.2406 +  fd = open(zName, oflags, isDelete?0600:SQLITE_DEFAULT_FILE_PERMISSIONS);
  1.2407 +  if( fd<0 && errno!=EISDIR && isReadWrite && !isExclusive ){
  1.2408 +    /* Failed to open the file for read/write access. Try read-only. */
  1.2409 +    flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE);
  1.2410 +    flags |= SQLITE_OPEN_READONLY;
  1.2411 +    return unixOpen(pVfs, zPath, pFile, flags, pOutFlags);
  1.2412 +  }
  1.2413 +  if( fd<0 ){
  1.2414 +    return SQLITE_CANTOPEN;
  1.2415 +  }
  1.2416 +  if( isDelete ){
  1.2417 +    unlink(zName);
  1.2418 +  }
  1.2419 +  if( pOutFlags ){
  1.2420 +    *pOutFlags = flags;
  1.2421 +  }
  1.2422 +
  1.2423 +  assert(fd!=0);
  1.2424 +  if( isOpenDirectory ){
  1.2425 +    int rc = openDirectory(zPath, &dirfd);
  1.2426 +    if( rc!=SQLITE_OK ){
  1.2427 +      close(fd);
  1.2428 +      return rc;
  1.2429 +    }
  1.2430 +  }
  1.2431 +
  1.2432 +#ifdef FD_CLOEXEC
  1.2433 +  fcntl(fd, F_SETFD, fcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
  1.2434 +#endif
  1.2435 +
  1.2436 +  noLock = eType!=SQLITE_OPEN_MAIN_DB;
  1.2437 +  return fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock);
  1.2438 +}
  1.2439 +
  1.2440 +/*
  1.2441 +** Delete the file at zPath. If the dirSync argument is true, fsync()
  1.2442 +** the directory after deleting the file.
  1.2443 +*/
  1.2444 +static int unixDelete(sqlite3_vfs *pVfs, const char *zPath, int dirSync){
  1.2445 +  int rc = SQLITE_OK;
  1.2446 +  SimulateIOError(return SQLITE_IOERR_DELETE);
  1.2447 +  unlink(zPath);
  1.2448 +  if( dirSync ){
  1.2449 +    int fd;
  1.2450 +    rc = openDirectory(zPath, &fd);
  1.2451 +    if( rc==SQLITE_OK ){
  1.2452 +      if( fsync(fd) ){
  1.2453 +        rc = SQLITE_IOERR_DIR_FSYNC;
  1.2454 +      }
  1.2455 +      close(fd);
  1.2456 +    }
  1.2457 +  }
  1.2458 +  return rc;
  1.2459 +}
  1.2460 +
  1.2461 +/*
  1.2462 +** Test the existance of or access permissions of file zPath. The
  1.2463 +** test performed depends on the value of flags:
  1.2464 +**
  1.2465 +**     SQLITE_ACCESS_EXISTS: Return 1 if the file exists
  1.2466 +**     SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable.
  1.2467 +**     SQLITE_ACCESS_READONLY: Return 1 if the file is readable.
  1.2468 +**
  1.2469 +** Otherwise return 0.
  1.2470 +*/
  1.2471 +static int unixAccess(
  1.2472 +  sqlite3_vfs *pVfs, 
  1.2473 +  const char *zPath, 
  1.2474 +  int flags, 
  1.2475 +  int *pResOut
  1.2476 +){
  1.2477 +  int amode = 0;
  1.2478 +  SimulateIOError( return SQLITE_IOERR_ACCESS; );
  1.2479 +  switch( flags ){
  1.2480 +    case SQLITE_ACCESS_EXISTS:
  1.2481 +      amode = F_OK;
  1.2482 +      break;
  1.2483 +    case SQLITE_ACCESS_READWRITE:
  1.2484 +      amode = W_OK|R_OK;
  1.2485 +      break;
  1.2486 +    case SQLITE_ACCESS_READ:
  1.2487 +      amode = R_OK;
  1.2488 +      break;
  1.2489 +
  1.2490 +    default:
  1.2491 +      assert(!"Invalid flags argument");
  1.2492 +  }
  1.2493 +  *pResOut = (access(zPath, amode)==0);
  1.2494 +  return SQLITE_OK;
  1.2495 +}
  1.2496 +
  1.2497 +
  1.2498 +/*
  1.2499 +** Turn a relative pathname into a full pathname. The relative path
  1.2500 +** is stored as a nul-terminated string in the buffer pointed to by
  1.2501 +** zPath. 
  1.2502 +**
  1.2503 +** zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes 
  1.2504 +** (in this case, MAX_PATHNAME bytes). The full-path is written to
  1.2505 +** this buffer before returning.
  1.2506 +*/
  1.2507 +static int unixFullPathname(
  1.2508 +  sqlite3_vfs *pVfs,            /* Pointer to vfs object */
  1.2509 +  const char *zPath,            /* Possibly relative input path */
  1.2510 +  int nOut,                     /* Size of output buffer in bytes */
  1.2511 +  char *zOut                    /* Output buffer */
  1.2512 +){
  1.2513 +
  1.2514 +  /* It's odd to simulate an io-error here, but really this is just
  1.2515 +  ** using the io-error infrastructure to test that SQLite handles this
  1.2516 +  ** function failing. This function could fail if, for example, the
  1.2517 +  ** current working directly has been unlinked.
  1.2518 +  */
  1.2519 +  SimulateIOError( return SQLITE_ERROR );
  1.2520 +
  1.2521 +  assert( pVfs->mxPathname==MAX_PATHNAME );
  1.2522 +  zOut[nOut-1] = '\0';
  1.2523 +  if( zPath[0]=='/' ){
  1.2524 +    sqlite3_snprintf(nOut, zOut, "%s", zPath);
  1.2525 +  }else{
  1.2526 +    int nCwd;
  1.2527 +    if( getcwd(zOut, nOut-1)==0 ){
  1.2528 +      return SQLITE_CANTOPEN;
  1.2529 +    }
  1.2530 +    nCwd = strlen(zOut);
  1.2531 +    sqlite3_snprintf(nOut-nCwd, &zOut[nCwd], "/%s", zPath);
  1.2532 +  }
  1.2533 +  return SQLITE_OK;
  1.2534 +
  1.2535 +#if 0
  1.2536 +  /*
  1.2537 +  ** Remove "/./" path elements and convert "/A/./" path elements
  1.2538 +  ** to just "/".
  1.2539 +  */
  1.2540 +  if( zFull ){
  1.2541 +    int i, j;
  1.2542 +    for(i=j=0; zFull[i]; i++){
  1.2543 +      if( zFull[i]=='/' ){
  1.2544 +        if( zFull[i+1]=='/' ) continue;
  1.2545 +        if( zFull[i+1]=='.' && zFull[i+2]=='/' ){
  1.2546 +          i += 1;
  1.2547 +          continue;
  1.2548 +        }
  1.2549 +        if( zFull[i+1]=='.' && zFull[i+2]=='.' && zFull[i+3]=='/' ){
  1.2550 +          while( j>0 && zFull[j-1]!='/' ){ j--; }
  1.2551 +          i += 3;
  1.2552 +          continue;
  1.2553 +        }
  1.2554 +      }
  1.2555 +      zFull[j++] = zFull[i];
  1.2556 +    }
  1.2557 +    zFull[j] = 0;
  1.2558 +  }
  1.2559 +#endif
  1.2560 +}
  1.2561 +
  1.2562 +
  1.2563 +#ifndef SQLITE_OMIT_LOAD_EXTENSION
  1.2564 +/*
  1.2565 +** Interfaces for opening a shared library, finding entry points
  1.2566 +** within the shared library, and closing the shared library.
  1.2567 +*/
  1.2568 +#include <dlfcn.h>
  1.2569 +static void *unixDlOpen(sqlite3_vfs *pVfs, const char *zFilename){
  1.2570 +  return dlopen(zFilename, RTLD_NOW | RTLD_GLOBAL);
  1.2571 +}
  1.2572 +
  1.2573 +/*
  1.2574 +** SQLite calls this function immediately after a call to unixDlSym() or
  1.2575 +** unixDlOpen() fails (returns a null pointer). If a more detailed error
  1.2576 +** message is available, it is written to zBufOut. If no error message
  1.2577 +** is available, zBufOut is left unmodified and SQLite uses a default
  1.2578 +** error message.
  1.2579 +*/
  1.2580 +static void unixDlError(sqlite3_vfs *pVfs, int nBuf, char *zBufOut){
  1.2581 +  char *zErr;
  1.2582 +  enterMutex();
  1.2583 +  zErr = dlerror();
  1.2584 +  if( zErr ){
  1.2585 +    sqlite3_snprintf(nBuf, zBufOut, "%s", zErr);
  1.2586 +  }
  1.2587 +  leaveMutex();
  1.2588 +}
  1.2589 +static void *unixDlSym(sqlite3_vfs *pVfs, void *pHandle, const char *zSymbol){
  1.2590 +  return dlsym(pHandle, zSymbol);
  1.2591 +}
  1.2592 +static void unixDlClose(sqlite3_vfs *pVfs, void *pHandle){
  1.2593 +  dlclose(pHandle);
  1.2594 +}
  1.2595 +#else /* if SQLITE_OMIT_LOAD_EXTENSION is defined: */
  1.2596 +  #define unixDlOpen  0
  1.2597 +  #define unixDlError 0
  1.2598 +  #define unixDlSym   0
  1.2599 +  #define unixDlClose 0
  1.2600 +#endif
  1.2601 +
  1.2602 +/*
  1.2603 +** Write nBuf bytes of random data to the supplied buffer zBuf.
  1.2604 +*/
  1.2605 +static int unixRandomness(sqlite3_vfs *pVfs, int nBuf, char *zBuf){
  1.2606 +
  1.2607 +  assert(nBuf>=(sizeof(time_t)+sizeof(int)));
  1.2608 +
  1.2609 +  /* We have to initialize zBuf to prevent valgrind from reporting
  1.2610 +  ** errors.  The reports issued by valgrind are incorrect - we would
  1.2611 +  ** prefer that the randomness be increased by making use of the
  1.2612 +  ** uninitialized space in zBuf - but valgrind errors tend to worry
  1.2613 +  ** some users.  Rather than argue, it seems easier just to initialize
  1.2614 +  ** the whole array and silence valgrind, even if that means less randomness
  1.2615 +  ** in the random seed.
  1.2616 +  **
  1.2617 +  ** When testing, initializing zBuf[] to zero is all we do.  That means
  1.2618 +  ** that we always use the same random number sequence.  This makes the
  1.2619 +  ** tests repeatable.
  1.2620 +  */
  1.2621 +  memset(zBuf, 0, nBuf);
  1.2622 +#if !defined(SQLITE_TEST)
  1.2623 +  {
  1.2624 +    int pid, fd;
  1.2625 +    fd = open("/dev/urandom", O_RDONLY);
  1.2626 +    if( fd<0 ){
  1.2627 +      time_t t;
  1.2628 +      time(&t);
  1.2629 +      memcpy(zBuf, &t, sizeof(t));
  1.2630 +      pid = getpid();
  1.2631 +      memcpy(&zBuf[sizeof(t)], &pid, sizeof(pid));
  1.2632 +    }else{
  1.2633 +      read(fd, zBuf, nBuf);
  1.2634 +      close(fd);
  1.2635 +    }
  1.2636 +  }
  1.2637 +#endif
  1.2638 +  return SQLITE_OK;
  1.2639 +}
  1.2640 +
  1.2641 +
  1.2642 +/*
  1.2643 +** Sleep for a little while.  Return the amount of time slept.
  1.2644 +** The argument is the number of microseconds we want to sleep.
  1.2645 +** The return value is the number of microseconds of sleep actually
  1.2646 +** requested from the underlying operating system, a number which
  1.2647 +** might be greater than or equal to the argument, but not less
  1.2648 +** than the argument.
  1.2649 +*/
  1.2650 +static int unixSleep(sqlite3_vfs *pVfs, int microseconds){
  1.2651 +#if defined(HAVE_USLEEP) && HAVE_USLEEP
  1.2652 +  usleep(microseconds);
  1.2653 +  return microseconds;
  1.2654 +#else
  1.2655 +  int seconds = (microseconds+999999)/1000000;
  1.2656 +  sleep(seconds);
  1.2657 +  return seconds*1000000;
  1.2658 +#endif
  1.2659 +}
  1.2660 +
  1.2661 +/*
  1.2662 +** The following variable, if set to a non-zero value, becomes the result
  1.2663 +** returned from sqlite3OsCurrentTime().  This is used for testing.
  1.2664 +*/
  1.2665 +#ifdef SQLITE_TEST
  1.2666 +int sqlite3_current_time = 0;
  1.2667 +#endif
  1.2668 +
  1.2669 +/*
  1.2670 +** Find the current time (in Universal Coordinated Time).  Write the
  1.2671 +** current time and date as a Julian Day number into *prNow and
  1.2672 +** return 0.  Return 1 if the time and date cannot be found.
  1.2673 +*/
  1.2674 +static int unixCurrentTime(sqlite3_vfs *pVfs, double *prNow){
  1.2675 +#ifdef NO_GETTOD
  1.2676 +  time_t t;
  1.2677 +  time(&t);
  1.2678 +  *prNow = t/86400.0 + 2440587.5;
  1.2679 +#else
  1.2680 +  struct timeval sNow;
  1.2681 +  gettimeofday(&sNow, 0);
  1.2682 +  *prNow = 2440587.5 + sNow.tv_sec/86400.0 + sNow.tv_usec/86400000000.0;
  1.2683 +#endif
  1.2684 +#ifdef SQLITE_TEST
  1.2685 +  if( sqlite3_current_time ){
  1.2686 +    *prNow = sqlite3_current_time/86400.0 + 2440587.5;
  1.2687 +  }
  1.2688 +#endif
  1.2689 +  return 0;
  1.2690 +}
  1.2691 +
  1.2692 +static int unixGetLastError(sqlite3_vfs *pVfs, int nBuf, char *zBuf){
  1.2693 +  return 0;
  1.2694 +}
  1.2695 +
  1.2696 +/*
  1.2697 +** Initialize the operating system interface.
  1.2698 +*/
  1.2699 +int sqlite3_os_init(void){ 
  1.2700 +  /* Macro to define the static contents of an sqlite3_vfs structure for
  1.2701 +  ** the unix backend. The two parameters are the values to use for
  1.2702 +  ** the sqlite3_vfs.zName and sqlite3_vfs.pAppData fields, respectively.
  1.2703 +  ** 
  1.2704 +  */
  1.2705 +  #define UNIXVFS(zVfsName, pVfsAppData) {                  \
  1.2706 +    1,                    /* iVersion */                    \
  1.2707 +    sizeof(unixFile),     /* szOsFile */                    \
  1.2708 +    MAX_PATHNAME,         /* mxPathname */                  \
  1.2709 +    0,                    /* pNext */                       \
  1.2710 +    zVfsName,             /* zName */                       \
  1.2711 +    (void *)pVfsAppData,  /* pAppData */                    \
  1.2712 +    unixOpen,             /* xOpen */                       \
  1.2713 +    unixDelete,           /* xDelete */                     \
  1.2714 +    unixAccess,           /* xAccess */                     \
  1.2715 +    unixFullPathname,     /* xFullPathname */               \
  1.2716 +    unixDlOpen,           /* xDlOpen */                     \
  1.2717 +    unixDlError,          /* xDlError */                    \
  1.2718 +    unixDlSym,            /* xDlSym */                      \
  1.2719 +    unixDlClose,          /* xDlClose */                    \
  1.2720 +    unixRandomness,       /* xRandomness */                 \
  1.2721 +    unixSleep,            /* xSleep */                      \
  1.2722 +    unixCurrentTime,      /* xCurrentTime */                \
  1.2723 +    unixGetLastError      /* xGetLastError */               \
  1.2724 +  }
  1.2725 +
  1.2726 +  static sqlite3_vfs unixVfs = UNIXVFS("unix", 0);
  1.2727 +#ifdef SQLITE_ENABLE_LOCKING_STYLE
  1.2728 +#if 0
  1.2729 +  int i;
  1.2730 +  static sqlite3_vfs aVfs[] = {
  1.2731 +    UNIXVFS("unix-posix",   LOCKING_STYLE_POSIX), 
  1.2732 +    UNIXVFS("unix-afp",     LOCKING_STYLE_AFP), 
  1.2733 +    UNIXVFS("unix-flock",   LOCKING_STYLE_FLOCK), 
  1.2734 +    UNIXVFS("unix-dotfile", LOCKING_STYLE_DOTFILE), 
  1.2735 +    UNIXVFS("unix-none",    LOCKING_STYLE_NONE)
  1.2736 +  };
  1.2737 +  for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){
  1.2738 +    sqlite3_vfs_register(&aVfs[i], 0);
  1.2739 +  }
  1.2740 +#endif
  1.2741 +#endif
  1.2742 +  sqlite3_vfs_register(&unixVfs, 1);
  1.2743 +  return SQLITE_OK; 
  1.2744 +}
  1.2745 +
  1.2746 +/*
  1.2747 +** Shutdown the operating system interface. This is a no-op for unix.
  1.2748 +*/
  1.2749 +int sqlite3_os_end(void){ 
  1.2750 +  return SQLITE_OK; 
  1.2751 +}
  1.2752 + 
  1.2753 +#endif /* SQLITE_OS_UNIX */