Update contrib.
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
13 ** This file contains code that is specific to Unix systems.
15 ** $Id: os_unix.c,v 1.204 2008/09/24 09:12:47 danielk1977 Exp $
17 #include "sqliteInt.h"
18 #if SQLITE_OS_UNIX /* This file is used on unix only */
21 ** If SQLITE_ENABLE_LOCKING_STYLE is defined and is non-zero, then several
22 ** alternative locking implementations are provided:
24 ** * POSIX locking (the default),
26 ** * Dot-file locking,
28 ** * AFP locking (OSX only).
30 ** SQLITE_ENABLE_LOCKING_STYLE only works on a Mac. It is turned on by
31 ** default on a Mac and disabled on all other posix platforms.
33 #if !defined(SQLITE_ENABLE_LOCKING_STYLE)
34 # if defined(__DARWIN__)
35 # define SQLITE_ENABLE_LOCKING_STYLE 1
37 # define SQLITE_ENABLE_LOCKING_STYLE 0
42 ** These #defines should enable >2GB file support on Posix if the
43 ** underlying operating system supports it. If the OS lacks
44 ** large file support, these should be no-ops.
46 ** Large file support can be disabled using the -DSQLITE_DISABLE_LFS switch
47 ** on the compiler command line. This is necessary if you are compiling
48 ** on a recent machine (ex: RedHat 7.2) but you want your code to work
49 ** on an older machine (ex: RedHat 6.0). If you compile on RedHat 7.2
50 ** without this option, LFS is enable. But LFS does not exist in the kernel
51 ** in RedHat 6.0, so the code won't work. Hence, for maximum binary
52 ** portability you should omit LFS.
54 #ifndef SQLITE_DISABLE_LFS
55 # define _LARGE_FILE 1
56 # ifndef _FILE_OFFSET_BITS
57 # define _FILE_OFFSET_BITS 64
59 # define _LARGEFILE_SOURCE 1
63 ** standard include files.
65 #include <sys/types.h>
73 #if SQLITE_ENABLE_LOCKING_STYLE
74 #include <sys/ioctl.h>
75 #include <sys/param.h>
76 #include <sys/mount.h>
77 #endif /* SQLITE_ENABLE_LOCKING_STYLE */
80 ** If we are to be thread-safe, include the pthreads header and define
81 ** the SQLITE_UNIX_THREADS macro.
85 # define SQLITE_UNIX_THREADS 1
89 ** Default permissions when creating a new file
91 #ifndef SQLITE_DEFAULT_FILE_PERMISSIONS
92 # define SQLITE_DEFAULT_FILE_PERMISSIONS 0644
96 ** Maximum supported path-length.
98 #define MAX_PATHNAME 512
102 ** The unixFile structure is subclass of sqlite3_file specific for the unix
103 ** protability layer.
105 typedef struct unixFile unixFile;
107 sqlite3_io_methods const *pMethod; /* Always the first entry */
109 /* In test mode, increase the size of this structure a bit so that
110 ** it is larger than the struct CrashFile defined in test6.c.
114 struct openCnt *pOpen; /* Info about all open fd's on this inode */
115 struct lockInfo *pLock; /* Info about locks on this inode */
116 #if SQLITE_ENABLE_LOCKING_STYLE
117 void *lockingContext; /* Locking style specific state */
119 int h; /* The file descriptor */
120 unsigned char locktype; /* The type of lock held on this fd */
121 int dirfd; /* File descriptor for the directory */
122 #if SQLITE_THREADSAFE
123 pthread_t tid; /* The thread that "owns" this unixFile */
125 int lastErrno; /* The unix errno from the last I/O error */
129 ** Include code that is common to all os_*.c files
131 #include "os_common.h"
134 ** Define various macros that are missing from some systems.
137 # define O_LARGEFILE 0
139 #ifdef SQLITE_DISABLE_LFS
141 # define O_LARGEFILE 0
144 # define O_NOFOLLOW 0
151 ** The DJGPP compiler environment looks mostly like Unix, but it
152 ** lacks the fcntl() system call. So redefine fcntl() to be something
153 ** that always succeeds. This means that locking does not occur under
154 ** DJGPP. But it is DOS - what did you expect?
157 # define fcntl(A,B,C) 0
161 ** The threadid macro resolves to the thread-id or to 0. Used for
162 ** testing and debugging only.
164 #if SQLITE_THREADSAFE
165 #define threadid pthread_self()
171 ** Set or check the unixFile.tid field. This field is set when an unixFile
172 ** is first opened. All subsequent uses of the unixFile verify that the
173 ** same thread is operating on the unixFile. Some operating systems do
174 ** not allow locks to be overridden by other threads and that restriction
175 ** means that sqlite3* database handles cannot be moved from one thread
176 ** to another. This logic makes sure a user does not try to do that
179 ** Version 3.3.1 (2006-01-15): unixFile can be moved from one thread to
180 ** another as long as we are running on a system that supports threads
181 ** overriding each others locks (which now the most common behavior)
182 ** or if no locks are held. But the unixFile.pLock field needs to be
183 ** recomputed because its key includes the thread-id. See the
184 ** transferOwnership() function below for additional information
186 #if SQLITE_THREADSAFE
187 # define SET_THREADID(X) (X)->tid = pthread_self()
188 # define CHECK_THREADID(X) (threadsOverrideEachOthersLocks==0 && \
189 !pthread_equal((X)->tid, pthread_self()))
191 # define SET_THREADID(X)
192 # define CHECK_THREADID(X) 0
196 ** Here is the dirt on POSIX advisory locks: ANSI STD 1003.1 (1996)
197 ** section 6.5.2.2 lines 483 through 490 specify that when a process
198 ** sets or clears a lock, that operation overrides any prior locks set
199 ** by the same process. It does not explicitly say so, but this implies
200 ** that it overrides locks set by the same process using a different
201 ** file descriptor. Consider this test case:
202 ** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
204 ** Suppose ./file1 and ./file2 are really the same file (because
205 ** one is a hard or symbolic link to the other) then if you set
206 ** an exclusive lock on fd1, then try to get an exclusive lock
207 ** on fd2, it works. I would have expected the second lock to
208 ** fail since there was already a lock on the file due to fd1.
209 ** But not so. Since both locks came from the same process, the
210 ** second overrides the first, even though they were on different
211 ** file descriptors opened on different file names.
213 ** Bummer. If you ask me, this is broken. Badly broken. It means
214 ** that we cannot use POSIX locks to synchronize file access among
215 ** competing threads of the same process. POSIX locks will work fine
216 ** to synchronize access for threads in separate processes, but not
217 ** threads within the same process.
219 ** To work around the problem, SQLite has to manage file locks internally
220 ** on its own. Whenever a new database is opened, we have to find the
221 ** specific inode of the database file (the inode is determined by the
222 ** st_dev and st_ino fields of the stat structure that fstat() fills in)
223 ** and check for locks already existing on that inode. When locks are
224 ** created or removed, we have to look at our own internal record of the
225 ** locks to see if another thread has previously set a lock on that same
228 ** The sqlite3_file structure for POSIX is no longer just an integer file
229 ** descriptor. It is now a structure that holds the integer file
230 ** descriptor and a pointer to a structure that describes the internal
231 ** locks on the corresponding inode. There is one locking structure
232 ** per inode, so if the same inode is opened twice, both unixFile structures
233 ** point to the same locking structure. The locking structure keeps
234 ** a reference count (so we will know when to delete it) and a "cnt"
235 ** field that tells us its internal lock status. cnt==0 means the
236 ** file is unlocked. cnt==-1 means the file has an exclusive lock.
237 ** cnt>0 means there are cnt shared locks on the file.
239 ** Any attempt to lock or unlock a file first checks the locking
240 ** structure. The fcntl() system call is only invoked to set a
241 ** POSIX lock if the internal lock structure transitions between
242 ** a locked and an unlocked state.
245 ** More recent discoveries about POSIX advisory locks. (The more
246 ** I discover, the more I realize the a POSIX advisory locks are
249 ** If you close a file descriptor that points to a file that has locks,
250 ** all locks on that file that are owned by the current process are
251 ** released. To work around this problem, each unixFile structure contains
252 ** a pointer to an openCnt structure. There is one openCnt structure
253 ** per open inode, which means that multiple unixFile can point to a single
254 ** openCnt. When an attempt is made to close an unixFile, if there are
255 ** other unixFile open on the same inode that are holding locks, the call
256 ** to close() the file descriptor is deferred until all of the locks clear.
257 ** The openCnt structure keeps a list of file descriptors that need to
258 ** be closed and that list is walked (and cleared) when the last lock
261 ** First, under Linux threads, because each thread has a separate
262 ** process ID, lock operations in one thread do not override locks
263 ** to the same file in other threads. Linux threads behave like
264 ** separate processes in this respect. But, if you close a file
265 ** descriptor in linux threads, all locks are cleared, even locks
266 ** on other threads and even though the other threads have different
267 ** process IDs. Linux threads is inconsistent in this respect.
268 ** (I'm beginning to think that linux threads is an abomination too.)
269 ** The consequence of this all is that the hash table for the lockInfo
270 ** structure has to include the process id as part of its key because
271 ** locks in different threads are treated as distinct. But the
272 ** openCnt structure should not include the process id in its
273 ** key because close() clears lock on all threads, not just the current
274 ** thread. Were it not for this goofiness in linux threads, we could
275 ** combine the lockInfo and openCnt structures into a single structure.
278 ** On some versions of linux, threads can override each others locks.
279 ** On others not. Sometimes you can change the behavior on the same
280 ** system by setting the LD_ASSUME_KERNEL environment variable. The
281 ** POSIX standard is silent as to which behavior is correct, as far
282 ** as I can tell, so other versions of unix might show the same
283 ** inconsistency. There is no little doubt in my mind that posix
284 ** advisory locks and linux threads are profoundly broken.
286 ** To work around the inconsistencies, we have to test at runtime
287 ** whether or not threads can override each others locks. This test
288 ** is run once, the first time any lock is attempted. A static
289 ** variable is set to record the results of this test for future
294 ** An instance of the following structure serves as the key used
295 ** to locate a particular lockInfo structure given its inode.
297 ** If threads cannot override each others locks, then we set the
298 ** lockKey.tid field to the thread ID. If threads can override
299 ** each others locks then tid is always set to zero. tid is omitted
300 ** if we compile without threading support.
303 dev_t dev; /* Device number */
304 ino_t ino; /* Inode number */
305 #if SQLITE_THREADSAFE
306 pthread_t tid; /* Thread ID or zero if threads can override each other */
311 ** An instance of the following structure is allocated for each open
312 ** inode on each thread with a different process ID. (Threads have
313 ** different process IDs on linux, but not on most other unixes.)
315 ** A single inode can have multiple file descriptors, so each unixFile
316 ** structure contains a pointer to an instance of this object and this
317 ** object keeps a count of the number of unixFile pointing to it.
320 struct lockKey key; /* The lookup key */
321 int cnt; /* Number of SHARED locks held */
322 int locktype; /* One of SHARED_LOCK, RESERVED_LOCK etc. */
323 int nRef; /* Number of pointers to this structure */
324 struct lockInfo *pNext, *pPrev; /* List of all lockInfo objects */
328 ** An instance of the following structure serves as the key used
329 ** to locate a particular openCnt structure given its inode. This
330 ** is the same as the lockKey except that the thread ID is omitted.
333 dev_t dev; /* Device number */
334 ino_t ino; /* Inode number */
338 ** An instance of the following structure is allocated for each open
339 ** inode. This structure keeps track of the number of locks on that
340 ** inode. If a close is attempted against an inode that is holding
341 ** locks, the close is deferred until all locks clear by adding the
342 ** file descriptor to be closed to the pending list.
345 struct openKey key; /* The lookup key */
346 int nRef; /* Number of pointers to this structure */
347 int nLock; /* Number of outstanding locks */
348 int nPending; /* Number of pending close() operations */
349 int *aPending; /* Malloced space holding fd's awaiting a close() */
350 struct openCnt *pNext, *pPrev; /* List of all openCnt objects */
354 ** List of all lockInfo and openCnt objects. This used to be a hash
355 ** table. But the number of objects is rarely more than a dozen and
356 ** never exceeds a few thousand. And lookup is not on a critical
357 ** path oo a simple linked list will suffice.
359 static struct lockInfo *lockList = 0;
360 static struct openCnt *openList = 0;
363 ** The locking styles are associated with the different file locking
364 ** capabilities supported by different file systems.
366 ** POSIX locking style fully supports shared and exclusive byte-range locks
367 ** AFP locking only supports exclusive byte-range locks
368 ** FLOCK only supports a single file-global exclusive lock
369 ** DOTLOCK isn't a true locking style, it refers to the use of a special
370 ** file named the same as the database file with a '.lock' extension, this
371 ** can be used on file systems that do not offer any reliable file locking
372 ** NO locking means that no locking will be attempted, this is only used for
373 ** read-only file systems currently
374 ** UNSUPPORTED means that no locking will be attempted, this is only used for
375 ** file systems that are known to be unsupported
377 #define LOCKING_STYLE_POSIX 1
378 #define LOCKING_STYLE_NONE 2
379 #define LOCKING_STYLE_DOTFILE 3
380 #define LOCKING_STYLE_FLOCK 4
381 #define LOCKING_STYLE_AFP 5
384 ** Only set the lastErrno if the error code is a real error and not
385 ** a normal expected return code of SQLITE_BUSY or SQLITE_OK
387 #define IS_LOCK_ERROR(x) ((x != SQLITE_OK) && (x != SQLITE_BUSY))
390 ** Helper functions to obtain and relinquish the global mutex.
392 static void enterMutex(void){
393 sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
395 static void leaveMutex(void){
396 sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
399 #if SQLITE_THREADSAFE
401 ** This variable records whether or not threads can override each others
404 ** 0: No. Threads cannot override each others locks.
405 ** 1: Yes. Threads can override each others locks.
406 ** -1: We don't know yet.
408 ** On some systems, we know at compile-time if threads can override each
409 ** others locks. On those systems, the SQLITE_THREAD_OVERRIDE_LOCK macro
410 ** will be set appropriately. On other systems, we have to check at
411 ** runtime. On these latter systems, SQLTIE_THREAD_OVERRIDE_LOCK is
414 ** This variable normally has file scope only. But during testing, we make
415 ** it a global so that the test code can change its value in order to verify
416 ** that the right stuff happens in either case.
418 #ifndef SQLITE_THREAD_OVERRIDE_LOCK
419 # define SQLITE_THREAD_OVERRIDE_LOCK -1
422 int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
424 static int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
428 ** This structure holds information passed into individual test
429 ** threads by the testThreadLockingBehavior() routine.
431 struct threadTestData {
432 int fd; /* File to be locked */
433 struct flock lock; /* The locking operation */
434 int result; /* Result of the locking operation */
437 #ifdef SQLITE_LOCK_TRACE
439 ** Print out information about all locking operations.
441 ** This routine is used for troubleshooting locks on multithreaded
442 ** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE
443 ** command-line option on the compiler. This code is normally
446 static int lockTrace(int fd, int op, struct flock *p){
447 char *zOpName, *zType;
452 }else if( op==F_SETLK ){
455 s = fcntl(fd, op, p);
456 sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
459 if( p->l_type==F_RDLCK ){
461 }else if( p->l_type==F_WRLCK ){
463 }else if( p->l_type==F_UNLCK ){
468 assert( p->l_whence==SEEK_SET );
469 s = fcntl(fd, op, p);
471 sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
472 threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
474 if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){
477 fcntl(fd, F_GETLK, &l2);
478 if( l2.l_type==F_RDLCK ){
480 }else if( l2.l_type==F_WRLCK ){
482 }else if( l2.l_type==F_UNLCK ){
487 sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
488 zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
493 #define fcntl lockTrace
494 #endif /* SQLITE_LOCK_TRACE */
497 ** The testThreadLockingBehavior() routine launches two separate
498 ** threads on this routine. This routine attempts to lock a file
499 ** descriptor then returns. The success or failure of that attempt
500 ** allows the testThreadLockingBehavior() procedure to determine
501 ** whether or not threads can override each others locks.
503 static void *threadLockingTest(void *pArg){
504 struct threadTestData *pData = (struct threadTestData*)pArg;
505 pData->result = fcntl(pData->fd, F_SETLK, &pData->lock);
510 ** This procedure attempts to determine whether or not threads
511 ** can override each others locks then sets the
512 ** threadsOverrideEachOthersLocks variable appropriately.
514 static void testThreadLockingBehavior(int fd_orig){
516 struct threadTestData d[2];
521 memset(d, 0, sizeof(d));
523 d[0].lock.l_type = F_RDLCK;
525 d[0].lock.l_start = 0;
526 d[0].lock.l_whence = SEEK_SET;
528 d[1].lock.l_type = F_WRLCK;
529 pthread_create(&t[0], 0, threadLockingTest, &d[0]);
530 pthread_create(&t[1], 0, threadLockingTest, &d[1]);
531 pthread_join(t[0], 0);
532 pthread_join(t[1], 0);
534 threadsOverrideEachOthersLocks = d[0].result==0 && d[1].result==0;
536 #endif /* SQLITE_THREADSAFE */
539 ** Release a lockInfo structure previously allocated by findLockInfo().
541 static void releaseLockInfo(struct lockInfo *pLock){
544 if( pLock->nRef==0 ){
546 assert( pLock->pPrev->pNext==pLock );
547 pLock->pPrev->pNext = pLock->pNext;
549 assert( lockList==pLock );
550 lockList = pLock->pNext;
553 assert( pLock->pNext->pPrev==pLock );
554 pLock->pNext->pPrev = pLock->pPrev;
562 ** Release a openCnt structure previously allocated by findLockInfo().
564 static void releaseOpenCnt(struct openCnt *pOpen){
567 if( pOpen->nRef==0 ){
569 assert( pOpen->pPrev->pNext==pOpen );
570 pOpen->pPrev->pNext = pOpen->pNext;
572 assert( openList==pOpen );
573 openList = pOpen->pNext;
576 assert( pOpen->pNext->pPrev==pOpen );
577 pOpen->pNext->pPrev = pOpen->pPrev;
579 sqlite3_free(pOpen->aPending);
585 #if SQLITE_ENABLE_LOCKING_STYLE
587 ** Tests a byte-range locking query to see if byte range locks are
588 ** supported, if not we fall back to dotlockLockingStyle.
590 static int testLockingStyle(int fd){
591 struct flock lockInfo;
593 /* Test byte-range lock using fcntl(). If the call succeeds,
594 ** assume that the file-system supports POSIX style locks.
597 lockInfo.l_start = 0;
598 lockInfo.l_whence = SEEK_SET;
599 lockInfo.l_type = F_RDLCK;
600 if( fcntl(fd, F_GETLK, &lockInfo)!=-1 ) {
601 return LOCKING_STYLE_POSIX;
604 /* Testing for flock() can give false positives. So if if the above
605 ** test fails, then we fall back to using dot-file style locking.
607 return LOCKING_STYLE_DOTFILE;
612 ** If SQLITE_ENABLE_LOCKING_STYLE is defined, this function Examines the
613 ** f_fstypename entry in the statfs structure as returned by stat() for
614 ** the file system hosting the database file and selects the appropriate
615 ** locking style based on its value. These values and assignments are
616 ** based on Darwin/OSX behavior and have not been thoroughly tested on
619 ** If SQLITE_ENABLE_LOCKING_STYLE is not defined, this function always
620 ** returns LOCKING_STYLE_POSIX.
622 static int detectLockingStyle(
624 const char *filePath,
627 #if SQLITE_ENABLE_LOCKING_STYLE
629 const char *zFilesystem;
632 { "hfs", LOCKING_STYLE_POSIX },
633 { "ufs", LOCKING_STYLE_POSIX },
634 { "afpfs", LOCKING_STYLE_AFP },
635 #ifdef SQLITE_ENABLE_AFP_LOCKING_SMB
636 { "smbfs", LOCKING_STYLE_AFP },
638 { "smbfs", LOCKING_STYLE_FLOCK },
640 { "msdos", LOCKING_STYLE_DOTFILE },
641 { "webdav", LOCKING_STYLE_NONE },
645 struct statfs fsInfo;
648 return LOCKING_STYLE_NONE;
650 if( pVfs->pAppData ){
651 return SQLITE_PTR_TO_INT(pVfs->pAppData);
654 if( statfs(filePath, &fsInfo) != -1 ){
655 if( fsInfo.f_flags & MNT_RDONLY ){
656 return LOCKING_STYLE_NONE;
658 for(i=0; aMap[i].zFilesystem; i++){
659 if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){
660 return aMap[i].eLockingStyle;
665 /* Default case. Handles, amongst others, "nfs". */
666 return testLockingStyle(fd);
668 return LOCKING_STYLE_POSIX;
672 ** Given a file descriptor, locate lockInfo and openCnt structures that
673 ** describes that file descriptor. Create new ones if necessary. The
674 ** return values might be uninitialized if an error occurs.
676 ** Return an appropriate error code.
678 static int findLockInfo(
679 int fd, /* The file descriptor used in the key */
680 struct lockInfo **ppLock, /* Return the lockInfo structure here */
681 struct openCnt **ppOpen /* Return the openCnt structure here */
687 struct lockInfo *pLock;
688 struct openCnt *pOpen;
689 rc = fstat(fd, &statbuf);
692 if( errno==EOVERFLOW ) return SQLITE_NOLFS;
697 /* On OS X on an msdos filesystem, the inode number is reported
698 ** incorrectly for zero-size files. See ticket #3260. To work
699 ** around this problem (we consider it a bug in OS X, not SQLite)
700 ** we always increase the file size to 1 by writing a single byte
701 ** prior to accessing the inode number. The one byte written is
702 ** an ASCII 'S' character which also happens to be the first byte
703 ** in the header of every SQLite database. In this way, if there
704 ** is a race condition such that another thread has already populated
705 ** the first page of the database, no damage is done.
707 if( statbuf.st_size==0 ){
709 rc = fstat(fd, &statbuf);
715 memset(&key1, 0, sizeof(key1));
716 key1.dev = statbuf.st_dev;
717 key1.ino = statbuf.st_ino;
718 #if SQLITE_THREADSAFE
719 if( threadsOverrideEachOthersLocks<0 ){
720 testThreadLockingBehavior(fd);
722 key1.tid = threadsOverrideEachOthersLocks ? 0 : pthread_self();
724 memset(&key2, 0, sizeof(key2));
725 key2.dev = statbuf.st_dev;
726 key2.ino = statbuf.st_ino;
728 while( pLock && memcmp(&key1, &pLock->key, sizeof(key1)) ){
729 pLock = pLock->pNext;
732 pLock = sqlite3_malloc( sizeof(*pLock) );
735 goto exit_findlockinfo;
741 pLock->pNext = lockList;
743 if( lockList ) lockList->pPrev = pLock;
751 while( pOpen && memcmp(&key2, &pOpen->key, sizeof(key2)) ){
752 pOpen = pOpen->pNext;
755 pOpen = sqlite3_malloc( sizeof(*pOpen) );
757 releaseLockInfo(pLock);
759 goto exit_findlockinfo;
766 pOpen->pNext = openList;
768 if( openList ) openList->pPrev = pOpen;
782 ** Helper function for printing out trace information from debugging
783 ** binaries. This returns the string represetation of the supplied
784 ** integer lock-type.
786 static const char *locktypeName(int locktype){
788 case NO_LOCK: return "NONE";
789 case SHARED_LOCK: return "SHARED";
790 case RESERVED_LOCK: return "RESERVED";
791 case PENDING_LOCK: return "PENDING";
792 case EXCLUSIVE_LOCK: return "EXCLUSIVE";
799 ** If we are currently in a different thread than the thread that the
800 ** unixFile argument belongs to, then transfer ownership of the unixFile
801 ** over to the current thread.
803 ** A unixFile is only owned by a thread on systems where one thread is
804 ** unable to override locks created by a different thread. RedHat9 is
805 ** an example of such a system.
807 ** Ownership transfer is only allowed if the unixFile is currently unlocked.
808 ** If the unixFile is locked and an ownership is wrong, then return
809 ** SQLITE_MISUSE. SQLITE_OK is returned if everything works.
811 #if SQLITE_THREADSAFE
812 static int transferOwnership(unixFile *pFile){
815 if( threadsOverrideEachOthersLocks ){
816 /* Ownership transfers not needed on this system */
819 hSelf = pthread_self();
820 if( pthread_equal(pFile->tid, hSelf) ){
821 /* We are still in the same thread */
822 OSTRACE1("No-transfer, same thread\n");
825 if( pFile->locktype!=NO_LOCK ){
826 /* We cannot change ownership while we are holding a lock! */
827 return SQLITE_MISUSE;
829 OSTRACE4("Transfer ownership of %d from %d to %d\n",
830 pFile->h, pFile->tid, hSelf);
832 if (pFile->pLock != NULL) {
833 releaseLockInfo(pFile->pLock);
834 rc = findLockInfo(pFile->h, &pFile->pLock, 0);
835 OSTRACE5("LOCK %d is now %s(%s,%d)\n", pFile->h,
836 locktypeName(pFile->locktype),
837 locktypeName(pFile->pLock->locktype), pFile->pLock->cnt);
844 /* On single-threaded builds, ownership transfer is a no-op */
845 # define transferOwnership(X) SQLITE_OK
849 ** Seek to the offset passed as the second argument, then read cnt
850 ** bytes into pBuf. Return the number of bytes actually read.
852 ** NB: If you define USE_PREAD or USE_PREAD64, then it might also
853 ** be necessary to define _XOPEN_SOURCE to be 500. This varies from
854 ** one system to another. Since SQLite does not define USE_PREAD
855 ** any any form by default, we will not attempt to define _XOPEN_SOURCE.
856 ** See tickets #2741 and #2681.
858 static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){
862 #if defined(USE_PREAD)
863 got = pread(id->h, pBuf, cnt, offset);
864 SimulateIOError( got = -1 );
865 #elif defined(USE_PREAD64)
866 got = pread64(id->h, pBuf, cnt, offset);
867 SimulateIOError( got = -1 );
869 newOffset = lseek(id->h, offset, SEEK_SET);
870 SimulateIOError( newOffset-- );
871 if( newOffset!=offset ){
874 got = read(id->h, pBuf, cnt);
877 OSTRACE5("READ %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED);
882 ** Read data from a file into a buffer. Return SQLITE_OK if all
883 ** bytes were read successfully and SQLITE_IOERR if anything goes
894 got = seekAndRead((unixFile*)id, offset, pBuf, amt);
898 return SQLITE_IOERR_READ;
900 memset(&((char*)pBuf)[got], 0, amt-got);
901 return SQLITE_IOERR_SHORT_READ;
906 ** Seek to the offset in id->offset then read cnt bytes into pBuf.
907 ** Return the number of bytes actually read. Update the offset.
909 static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){
913 #if defined(USE_PREAD)
914 got = pwrite(id->h, pBuf, cnt, offset);
915 #elif defined(USE_PREAD64)
916 got = pwrite64(id->h, pBuf, cnt, offset);
918 newOffset = lseek(id->h, offset, SEEK_SET);
919 if( newOffset!=offset ){
922 got = write(id->h, pBuf, cnt);
925 OSTRACE5("WRITE %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED);
931 ** Write data from a buffer into a file. Return SQLITE_OK on success
932 ** or some other error code on failure.
934 static int unixWrite(
943 while( amt>0 && (wrote = seekAndWrite((unixFile*)id, offset, pBuf, amt))>0 ){
946 pBuf = &((char*)pBuf)[wrote];
948 SimulateIOError(( wrote=(-1), amt=1 ));
949 SimulateDiskfullError(( wrote=0, amt=1 ));
952 return SQLITE_IOERR_WRITE;
962 ** Count the number of fullsyncs and normal syncs. This is used to test
963 ** that syncs and fullsyncs are occuring at the right times.
965 int sqlite3_sync_count = 0;
966 int sqlite3_fullsync_count = 0;
970 ** Use the fdatasync() API only if the HAVE_FDATASYNC macro is defined.
971 ** Otherwise use fsync() in its place.
973 #ifndef HAVE_FDATASYNC
974 # define fdatasync fsync
978 ** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not
979 ** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently
980 ** only available on Mac OS X. But that could change.
983 # define HAVE_FULLFSYNC 1
985 # define HAVE_FULLFSYNC 0
990 ** The fsync() system call does not work as advertised on many
991 ** unix systems. The following procedure is an attempt to make
994 ** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful
995 ** for testing when we want to run through the test suite quickly.
996 ** You are strongly advised *not* to deploy with SQLITE_NO_SYNC
997 ** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
998 ** or power failure will likely corrupt the database file.
1000 static int full_fsync(int fd, int fullSync, int dataOnly){
1003 /* Record the number of times that we do a normal fsync() and
1004 ** FULLSYNC. This is used during testing to verify that this procedure
1005 ** gets called with the correct arguments.
1008 if( fullSync ) sqlite3_fullsync_count++;
1009 sqlite3_sync_count++;
1012 /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
1015 #ifdef SQLITE_NO_SYNC
1021 rc = fcntl(fd, F_FULLFSYNC, 0);
1025 /* If the FULLFSYNC failed, fall back to attempting an fsync().
1026 * It shouldn't be possible for fullfsync to fail on the local
1027 * file system (on OSX), so failure indicates that FULLFSYNC
1028 * isn't supported for this file system. So, attempt an fsync
1029 * and (for now) ignore the overhead of a superfluous fcntl call.
1030 * It'd be better to detect fullfsync support once and avoid
1031 * the fcntl call every time sync is called.
1033 if( rc ) rc = fsync(fd);
1041 #endif /* HAVE_FULLFSYNC */
1042 #endif /* defined(SQLITE_NO_SYNC) */
1048 ** Make sure all writes to a particular file are committed to disk.
1050 ** If dataOnly==0 then both the file itself and its metadata (file
1051 ** size, access time, etc) are synced. If dataOnly!=0 then only the
1052 ** file data is synced.
1054 ** Under Unix, also make sure that the directory entry for the file
1055 ** has been created by fsync-ing the directory that contains the file.
1056 ** If we do not do this and we encounter a power failure, the directory
1057 ** entry for the journal might not exist after we reboot. The next
1058 ** SQLite to access the file will not know that the journal exists (because
1059 ** the directory entry for the journal was never created) and the transaction
1060 ** will not roll back - possibly leading to database corruption.
1062 static int unixSync(sqlite3_file *id, int flags){
1064 unixFile *pFile = (unixFile*)id;
1066 int isDataOnly = (flags&SQLITE_SYNC_DATAONLY);
1067 int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL;
1069 /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */
1070 assert((flags&0x0F)==SQLITE_SYNC_NORMAL
1071 || (flags&0x0F)==SQLITE_SYNC_FULL
1074 /* Unix cannot, but some systems may return SQLITE_FULL from here. This
1075 ** line is to test that doing so does not cause any problems.
1077 SimulateDiskfullError( return SQLITE_FULL );
1080 OSTRACE2("SYNC %-3d\n", pFile->h);
1081 rc = full_fsync(pFile->h, isFullsync, isDataOnly);
1082 SimulateIOError( rc=1 );
1084 return SQLITE_IOERR_FSYNC;
1086 if( pFile->dirfd>=0 ){
1087 OSTRACE4("DIRSYNC %-3d (have_fullfsync=%d fullsync=%d)\n", pFile->dirfd,
1088 HAVE_FULLFSYNC, isFullsync);
1089 #ifndef SQLITE_DISABLE_DIRSYNC
1090 /* The directory sync is only attempted if full_fsync is
1091 ** turned off or unavailable. If a full_fsync occurred above,
1092 ** then the directory sync is superfluous.
1094 if( (!HAVE_FULLFSYNC || !isFullsync) && full_fsync(pFile->dirfd,0,0) ){
1096 ** We have received multiple reports of fsync() returning
1097 ** errors when applied to directories on certain file systems.
1098 ** A failed directory sync is not a big deal. So it seems
1099 ** better to ignore the error. Ticket #1657
1101 /* return SQLITE_IOERR; */
1104 close(pFile->dirfd); /* Only need to sync once, so close the directory */
1105 pFile->dirfd = -1; /* when we are done. */
1111 ** Truncate an open file to a specified size
1113 static int unixTruncate(sqlite3_file *id, i64 nByte){
1116 SimulateIOError( return SQLITE_IOERR_TRUNCATE );
1117 rc = ftruncate(((unixFile*)id)->h, (off_t)nByte);
1119 return SQLITE_IOERR_TRUNCATE;
1126 ** Determine the current size of a file in bytes
1128 static int unixFileSize(sqlite3_file *id, i64 *pSize){
1132 rc = fstat(((unixFile*)id)->h, &buf);
1133 SimulateIOError( rc=1 );
1135 return SQLITE_IOERR_FSTAT;
1137 *pSize = buf.st_size;
1139 /* When opening a zero-size database, the findLockInfo() procedure
1140 ** writes a single byte into that file in order to work around a bug
1141 ** in the OS-X msdos filesystem. In order to avoid problems with upper
1142 ** layers, we need to report this file size as zero even though it is
1143 ** really 1. Ticket #3260.
1145 if( *pSize==1 ) *pSize = 0;
1152 ** This routine translates a standard POSIX errno code into something
1153 ** useful to the clients of the sqlite3 functions. Specifically, it is
1154 ** intended to translate a variety of "try again" errors into SQLITE_BUSY
1155 ** and a variety of "please close the file descriptor NOW" errors into
1158 ** Errors during initialization of locks, or file system support for locks,
1159 ** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately.
1161 static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) {
1162 switch (posixError) {
1171 /* random NFS retry error, unless during file system support
1172 * introspection, in which it actually means what it says */
1176 /* EACCES is like EAGAIN during locking operations, but not any other time*/
1177 if( (sqliteIOErr == SQLITE_IOERR_LOCK) ||
1178 (sqliteIOErr == SQLITE_IOERR_UNLOCK) ||
1179 (sqliteIOErr == SQLITE_IOERR_RDLOCK) ||
1180 (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ){
1183 /* else fall through */
1188 return SQLITE_IOERR_BLOCKED;
1190 #if EOPNOTSUPP!=ENOTSUP
1192 /* something went terribly awry, unless during file system support
1193 * introspection, in which it actually means what it says */
1197 /* invalid fd, unless during file system support introspection, in which
1198 * it actually means what it says */
1209 /* these should force the client to close the file and reconnect */
1217 ** This routine checks if there is a RESERVED lock held on the specified
1218 ** file by this or any other process. If such a lock is held, set *pResOut
1219 ** to a non-zero value otherwise *pResOut is set to zero. The return value
1220 ** is set to SQLITE_OK unless an I/O error occurs during lock checking.
1222 static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){
1225 unixFile *pFile = (unixFile*)id;
1227 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
1230 enterMutex(); /* Because pFile->pLock is shared across threads */
1232 /* Check if a thread in this process holds such a lock */
1233 if( pFile->pLock->locktype>SHARED_LOCK ){
1237 /* Otherwise see if some other process holds it.
1241 lock.l_whence = SEEK_SET;
1242 lock.l_start = RESERVED_BYTE;
1244 lock.l_type = F_WRLCK;
1245 if (-1 == fcntl(pFile->h, F_GETLK, &lock)) {
1247 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK);
1248 pFile->lastErrno = tErrno;
1249 } else if( lock.l_type!=F_UNLCK ){
1255 OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
1257 *pResOut = reserved;
1262 ** Lock the file with the lock specified by parameter locktype - one
1263 ** of the following:
1266 ** (2) RESERVED_LOCK
1268 ** (4) EXCLUSIVE_LOCK
1270 ** Sometimes when requesting one lock state, additional lock states
1271 ** are inserted in between. The locking might fail on one of the later
1272 ** transitions leaving the lock state different from what it started but
1273 ** still short of its goal. The following chart shows the allowed
1274 ** transitions and the inserted intermediate states:
1276 ** UNLOCKED -> SHARED
1277 ** SHARED -> RESERVED
1278 ** SHARED -> (PENDING) -> EXCLUSIVE
1279 ** RESERVED -> (PENDING) -> EXCLUSIVE
1280 ** PENDING -> EXCLUSIVE
1282 ** This routine will only increase a lock. Use the sqlite3OsUnlock()
1283 ** routine to lower a locking level.
1285 static int unixLock(sqlite3_file *id, int locktype){
1286 /* The following describes the implementation of the various locks and
1287 ** lock transitions in terms of the POSIX advisory shared and exclusive
1288 ** lock primitives (called read-locks and write-locks below, to avoid
1289 ** confusion with SQLite lock names). The algorithms are complicated
1290 ** slightly in order to be compatible with windows systems simultaneously
1291 ** accessing the same database file, in case that is ever required.
1293 ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
1294 ** byte', each single bytes at well known offsets, and the 'shared byte
1295 ** range', a range of 510 bytes at a well known offset.
1297 ** To obtain a SHARED lock, a read-lock is obtained on the 'pending
1298 ** byte'. If this is successful, a random byte from the 'shared byte
1299 ** range' is read-locked and the lock on the 'pending byte' released.
1301 ** A process may only obtain a RESERVED lock after it has a SHARED lock.
1302 ** A RESERVED lock is implemented by grabbing a write-lock on the
1305 ** A process may only obtain a PENDING lock after it has obtained a
1306 ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
1307 ** on the 'pending byte'. This ensures that no new SHARED locks can be
1308 ** obtained, but existing SHARED locks are allowed to persist. A process
1309 ** does not have to obtain a RESERVED lock on the way to a PENDING lock.
1310 ** This property is used by the algorithm for rolling back a journal file
1313 ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
1314 ** implemented by obtaining a write-lock on the entire 'shared byte
1315 ** range'. Since all other locks require a read-lock on one of the bytes
1316 ** within this range, this ensures that no other locks are held on the
1319 ** The reason a single byte cannot be used instead of the 'shared byte
1320 ** range' is that some versions of windows do not support read-locks. By
1321 ** locking a random byte from a range, concurrent SHARED locks may exist
1322 ** even if the locking primitive used is always a write-lock.
1325 unixFile *pFile = (unixFile*)id;
1326 struct lockInfo *pLock = pFile->pLock;
1331 OSTRACE7("LOCK %d %s was %s(%s,%d) pid=%d\n", pFile->h,
1332 locktypeName(locktype), locktypeName(pFile->locktype),
1333 locktypeName(pLock->locktype), pLock->cnt , getpid());
1335 /* If there is already a lock of this type or more restrictive on the
1336 ** unixFile, do nothing. Don't use the end_lock: exit path, as
1337 ** enterMutex() hasn't been called yet.
1339 if( pFile->locktype>=locktype ){
1340 OSTRACE3("LOCK %d %s ok (already held)\n", pFile->h,
1341 locktypeName(locktype));
1345 /* Make sure the locking sequence is correct
1347 assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
1348 assert( locktype!=PENDING_LOCK );
1349 assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
1351 /* This mutex is needed because pFile->pLock is shared across threads
1355 /* Make sure the current thread owns the pFile.
1357 rc = transferOwnership(pFile);
1358 if( rc!=SQLITE_OK ){
1362 pLock = pFile->pLock;
1364 /* If some thread using this PID has a lock via a different unixFile*
1365 ** handle that precludes the requested lock, return BUSY.
1367 if( (pFile->locktype!=pLock->locktype &&
1368 (pLock->locktype>=PENDING_LOCK || locktype>SHARED_LOCK))
1374 /* If a SHARED lock is requested, and some thread using this PID already
1375 ** has a SHARED or RESERVED lock, then increment reference counts and
1376 ** return SQLITE_OK.
1378 if( locktype==SHARED_LOCK &&
1379 (pLock->locktype==SHARED_LOCK || pLock->locktype==RESERVED_LOCK) ){
1380 assert( locktype==SHARED_LOCK );
1381 assert( pFile->locktype==0 );
1382 assert( pLock->cnt>0 );
1383 pFile->locktype = SHARED_LOCK;
1385 pFile->pOpen->nLock++;
1391 lock.l_whence = SEEK_SET;
1393 /* A PENDING lock is needed before acquiring a SHARED lock and before
1394 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
1397 if( locktype==SHARED_LOCK
1398 || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
1400 lock.l_type = (locktype==SHARED_LOCK?F_RDLCK:F_WRLCK);
1401 lock.l_start = PENDING_BYTE;
1402 s = fcntl(pFile->h, F_SETLK, &lock);
1405 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
1406 if( IS_LOCK_ERROR(rc) ){
1407 pFile->lastErrno = tErrno;
1414 /* If control gets to this point, then actually go ahead and make
1415 ** operating system calls for the specified lock.
1417 if( locktype==SHARED_LOCK ){
1419 assert( pLock->cnt==0 );
1420 assert( pLock->locktype==0 );
1422 /* Now get the read-lock */
1423 lock.l_start = SHARED_FIRST;
1424 lock.l_len = SHARED_SIZE;
1425 if( (s = fcntl(pFile->h, F_SETLK, &lock))==(-1) ){
1428 /* Drop the temporary PENDING lock */
1429 lock.l_start = PENDING_BYTE;
1431 lock.l_type = F_UNLCK;
1432 if( fcntl(pFile->h, F_SETLK, &lock)!=0 ){
1434 /* This could happen with a network mount */
1436 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
1437 if( IS_LOCK_ERROR(rc) ){
1438 pFile->lastErrno = tErrno;
1444 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
1445 if( IS_LOCK_ERROR(rc) ){
1446 pFile->lastErrno = tErrno;
1449 pFile->locktype = SHARED_LOCK;
1450 pFile->pOpen->nLock++;
1453 }else if( locktype==EXCLUSIVE_LOCK && pLock->cnt>1 ){
1454 /* We are trying for an exclusive lock but another thread in this
1455 ** same process is still holding a shared lock. */
1458 /* The request was for a RESERVED or EXCLUSIVE lock. It is
1459 ** assumed that there is a SHARED or greater lock on the file
1462 assert( 0!=pFile->locktype );
1463 lock.l_type = F_WRLCK;
1466 lock.l_start = RESERVED_BYTE;
1468 case EXCLUSIVE_LOCK:
1469 lock.l_start = SHARED_FIRST;
1470 lock.l_len = SHARED_SIZE;
1475 s = fcntl(pFile->h, F_SETLK, &lock);
1478 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
1479 if( IS_LOCK_ERROR(rc) ){
1480 pFile->lastErrno = tErrno;
1485 if( rc==SQLITE_OK ){
1486 pFile->locktype = locktype;
1487 pLock->locktype = locktype;
1488 }else if( locktype==EXCLUSIVE_LOCK ){
1489 pFile->locktype = PENDING_LOCK;
1490 pLock->locktype = PENDING_LOCK;
1495 OSTRACE4("LOCK %d %s %s\n", pFile->h, locktypeName(locktype),
1496 rc==SQLITE_OK ? "ok" : "failed");
1501 ** Lower the locking level on file descriptor pFile to locktype. locktype
1502 ** must be either NO_LOCK or SHARED_LOCK.
1504 ** If the locking level of the file descriptor is already at or below
1505 ** the requested locking level, this routine is a no-op.
1507 static int unixUnlock(sqlite3_file *id, int locktype){
1508 struct lockInfo *pLock;
1511 unixFile *pFile = (unixFile*)id;
1515 OSTRACE7("UNLOCK %d %d was %d(%d,%d) pid=%d\n", pFile->h, locktype,
1516 pFile->locktype, pFile->pLock->locktype, pFile->pLock->cnt, getpid());
1518 assert( locktype<=SHARED_LOCK );
1519 if( pFile->locktype<=locktype ){
1522 if( CHECK_THREADID(pFile) ){
1523 return SQLITE_MISUSE;
1527 pLock = pFile->pLock;
1528 assert( pLock->cnt!=0 );
1529 if( pFile->locktype>SHARED_LOCK ){
1530 assert( pLock->locktype==pFile->locktype );
1531 SimulateIOErrorBenign(1);
1532 SimulateIOError( h=(-1) )
1533 SimulateIOErrorBenign(0);
1534 if( locktype==SHARED_LOCK ){
1535 lock.l_type = F_RDLCK;
1536 lock.l_whence = SEEK_SET;
1537 lock.l_start = SHARED_FIRST;
1538 lock.l_len = SHARED_SIZE;
1539 if( fcntl(h, F_SETLK, &lock)==(-1) ){
1541 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK);
1542 if( IS_LOCK_ERROR(rc) ){
1543 pFile->lastErrno = tErrno;
1548 lock.l_type = F_UNLCK;
1549 lock.l_whence = SEEK_SET;
1550 lock.l_start = PENDING_BYTE;
1551 lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE );
1552 if( fcntl(h, F_SETLK, &lock)!=(-1) ){
1553 pLock->locktype = SHARED_LOCK;
1556 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
1557 if( IS_LOCK_ERROR(rc) ){
1558 pFile->lastErrno = tErrno;
1563 if( locktype==NO_LOCK ){
1564 struct openCnt *pOpen;
1566 /* Decrement the shared lock counter. Release the lock using an
1567 ** OS call only when all threads in this same process have released
1571 if( pLock->cnt==0 ){
1572 lock.l_type = F_UNLCK;
1573 lock.l_whence = SEEK_SET;
1574 lock.l_start = lock.l_len = 0L;
1575 SimulateIOErrorBenign(1);
1576 SimulateIOError( h=(-1) )
1577 SimulateIOErrorBenign(0);
1578 if( fcntl(h, F_SETLK, &lock)!=(-1) ){
1579 pLock->locktype = NO_LOCK;
1582 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
1583 if( IS_LOCK_ERROR(rc) ){
1584 pFile->lastErrno = tErrno;
1591 /* Decrement the count of locks against this same file. When the
1592 ** count reaches zero, close any other file descriptors whose close
1593 ** was deferred because of outstanding locks.
1595 if( rc==SQLITE_OK ){
1596 pOpen = pFile->pOpen;
1598 assert( pOpen->nLock>=0 );
1599 if( pOpen->nLock==0 && pOpen->nPending>0 ){
1601 for(i=0; i<pOpen->nPending; i++){
1602 close(pOpen->aPending[i]);
1604 sqlite3_free(pOpen->aPending);
1605 pOpen->nPending = 0;
1606 pOpen->aPending = 0;
1613 if( rc==SQLITE_OK ) pFile->locktype = locktype;
1618 ** This function performs the parts of the "close file" operation
1619 ** common to all locking schemes. It closes the directory and file
1620 ** handles, if they are valid, and sets all fields of the unixFile
1623 static int closeUnixFile(sqlite3_file *id){
1624 unixFile *pFile = (unixFile*)id;
1626 if( pFile->dirfd>=0 ){
1627 close(pFile->dirfd);
1632 OSTRACE2("CLOSE %-3d\n", pFile->h);
1634 memset(pFile, 0, sizeof(unixFile));
1642 static int unixClose(sqlite3_file *id){
1644 unixFile *pFile = (unixFile *)id;
1645 unixUnlock(id, NO_LOCK);
1647 if( pFile->pOpen && pFile->pOpen->nLock ){
1648 /* If there are outstanding locks, do not actually close the file just
1649 ** yet because that would clear those locks. Instead, add the file
1650 ** descriptor to pOpen->aPending. It will be automatically closed when
1651 ** the last lock is cleared.
1654 struct openCnt *pOpen = pFile->pOpen;
1655 aNew = sqlite3_realloc(pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
1657 /* If a malloc fails, just leak the file descriptor */
1659 pOpen->aPending = aNew;
1660 pOpen->aPending[pOpen->nPending] = pFile->h;
1665 releaseLockInfo(pFile->pLock);
1666 releaseOpenCnt(pFile->pOpen);
1674 #if SQLITE_ENABLE_LOCKING_STYLE
1675 #pragma mark AFP Support
1678 ** The afpLockingContext structure contains all afp lock specific state
1680 typedef struct afpLockingContext afpLockingContext;
1681 struct afpLockingContext {
1682 unsigned long long sharedLockByte;
1683 const char *filePath;
1686 struct ByteRangeLockPB2
1688 unsigned long long offset; /* offset to first byte to lock */
1689 unsigned long long length; /* nbr of bytes to lock */
1690 unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */
1691 unsigned char unLockFlag; /* 1 = unlock, 0 = lock */
1692 unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */
1693 int fd; /* file desc to assoc this lock with */
1696 #define afpfsByteRangeLock2FSCTL _IOWR('z', 23, struct ByteRangeLockPB2)
1699 ** Return SQLITE_OK on success, SQLITE_BUSY on failure.
1701 static int _AFPFSSetLock(
1704 unsigned long long offset,
1705 unsigned long long length,
1708 struct ByteRangeLockPB2 pb;
1711 pb.unLockFlag = setLockFlag ? 0 : 1;
1712 pb.startEndFlag = 0;
1716 OSTRACE5("AFPLOCK setting lock %s for %d in range %llx:%llx\n",
1717 (setLockFlag?"ON":"OFF"), pFile->h, offset, length);
1718 err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0);
1722 OSTRACE4("AFPLOCK failed to fsctl() '%s' %d %s\n", path, tErrno, strerror(tErrno));
1723 rc = sqliteErrorFromPosixError(tErrno, setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK); /* error */
1724 if( IS_LOCK_ERROR(rc) ){
1725 pFile->lastErrno = tErrno;
1733 /* AFP-style reserved lock checking following the behavior of
1734 ** unixCheckReservedLock, see the unixCheckReservedLock function comments */
1735 static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){
1738 unixFile *pFile = (unixFile*)id;
1740 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
1743 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
1745 /* Check if a thread in this process holds such a lock */
1746 if( pFile->locktype>SHARED_LOCK ){
1750 /* Otherwise see if some other process holds it.
1753 /* lock the RESERVED byte */
1754 int lrc = _AFPFSSetLock(context->filePath, pFile, RESERVED_BYTE, 1,1);
1755 if( SQLITE_OK==lrc ){
1756 /* if we succeeded in taking the reserved lock, unlock it to restore
1757 ** the original state */
1758 lrc = _AFPFSSetLock(context->filePath, pFile, RESERVED_BYTE, 1, 0);
1760 /* if we failed to get the lock then someone else must have it */
1763 if( IS_LOCK_ERROR(lrc) ){
1768 OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
1770 *pResOut = reserved;
1774 /* AFP-style locking following the behavior of unixLock, see the unixLock
1775 ** function comments for details of lock management. */
1776 static int afpLock(sqlite3_file *id, int locktype){
1778 unixFile *pFile = (unixFile*)id;
1779 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
1782 OSTRACE5("LOCK %d %s was %s pid=%d\n", pFile->h,
1783 locktypeName(locktype), locktypeName(pFile->locktype), getpid());
1785 /* If there is already a lock of this type or more restrictive on the
1786 ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as
1787 ** enterMutex() hasn't been called yet.
1789 if( pFile->locktype>=locktype ){
1790 OSTRACE3("LOCK %d %s ok (already held)\n", pFile->h,
1791 locktypeName(locktype));
1795 /* Make sure the locking sequence is correct
1797 assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
1798 assert( locktype!=PENDING_LOCK );
1799 assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
1801 /* This mutex is needed because pFile->pLock is shared across threads
1805 /* Make sure the current thread owns the pFile.
1807 rc = transferOwnership(pFile);
1808 if( rc!=SQLITE_OK ){
1813 /* A PENDING lock is needed before acquiring a SHARED lock and before
1814 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
1817 if( locktype==SHARED_LOCK
1818 || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
1821 failed = _AFPFSSetLock(context->filePath, pFile, PENDING_BYTE, 1, 1);
1828 /* If control gets to this point, then actually go ahead and make
1829 ** operating system calls for the specified lock.
1831 if( locktype==SHARED_LOCK ){
1832 int lk, lrc1, lrc2, lrc1Errno;
1834 /* Now get the read-lock SHARED_LOCK */
1835 /* note that the quality of the randomness doesn't matter that much */
1837 context->sharedLockByte = (lk & 0x7fffffff)%(SHARED_SIZE - 1);
1838 lrc1 = _AFPFSSetLock(context->filePath, pFile,
1839 SHARED_FIRST+context->sharedLockByte, 1, 1);
1840 if( IS_LOCK_ERROR(lrc1) ){
1841 lrc1Errno = pFile->lastErrno;
1843 /* Drop the temporary PENDING lock */
1844 lrc2 = _AFPFSSetLock(context->filePath, pFile, PENDING_BYTE, 1, 0);
1846 if( IS_LOCK_ERROR(lrc1) ) {
1847 pFile->lastErrno = lrc1Errno;
1850 } else if( IS_LOCK_ERROR(lrc2) ){
1853 } else if( lrc1 != SQLITE_OK ) {
1856 pFile->locktype = SHARED_LOCK;
1859 /* The request was for a RESERVED or EXCLUSIVE lock. It is
1860 ** assumed that there is a SHARED or greater lock on the file
1864 assert( 0!=pFile->locktype );
1865 if (locktype >= RESERVED_LOCK && pFile->locktype < RESERVED_LOCK) {
1866 /* Acquire a RESERVED lock */
1867 failed = _AFPFSSetLock(context->filePath, pFile, RESERVED_BYTE, 1,1);
1869 if (!failed && locktype == EXCLUSIVE_LOCK) {
1870 /* Acquire an EXCLUSIVE lock */
1872 /* Remove the shared lock before trying the range. we'll need to
1873 ** reestablish the shared lock if we can't get the afpUnlock
1875 if (!(failed = _AFPFSSetLock(context->filePath, pFile, SHARED_FIRST +
1876 context->sharedLockByte, 1, 0))) {
1877 /* now attemmpt to get the exclusive lock range */
1878 failed = _AFPFSSetLock(context->filePath, pFile, SHARED_FIRST,
1880 if (failed && (failed = _AFPFSSetLock(context->filePath, pFile,
1881 SHARED_FIRST + context->sharedLockByte, 1, 1))) {
1893 if( rc==SQLITE_OK ){
1894 pFile->locktype = locktype;
1895 }else if( locktype==EXCLUSIVE_LOCK ){
1896 pFile->locktype = PENDING_LOCK;
1901 OSTRACE4("LOCK %d %s %s\n", pFile->h, locktypeName(locktype),
1902 rc==SQLITE_OK ? "ok" : "failed");
1907 ** Lower the locking level on file descriptor pFile to locktype. locktype
1908 ** must be either NO_LOCK or SHARED_LOCK.
1910 ** If the locking level of the file descriptor is already at or below
1911 ** the requested locking level, this routine is a no-op.
1913 static int afpUnlock(sqlite3_file *id, int locktype) {
1915 unixFile *pFile = (unixFile*)id;
1916 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
1919 OSTRACE5("UNLOCK %d %d was %d pid=%d\n", pFile->h, locktype,
1920 pFile->locktype, getpid());
1922 assert( locktype<=SHARED_LOCK );
1923 if( pFile->locktype<=locktype ){
1926 if( CHECK_THREADID(pFile) ){
1927 return SQLITE_MISUSE;
1930 int failed = SQLITE_OK;
1931 if( pFile->locktype>SHARED_LOCK ){
1932 if( locktype==SHARED_LOCK ){
1934 /* unlock the exclusive range - then re-establish the shared lock */
1935 if (pFile->locktype==EXCLUSIVE_LOCK) {
1936 failed = _AFPFSSetLock(context->filePath, pFile, SHARED_FIRST,
1939 /* successfully removed the exclusive lock */
1940 if ((failed = _AFPFSSetLock(context->filePath, pFile, SHARED_FIRST+
1941 context->sharedLockByte, 1, 1))) {
1942 /* failed to re-establish our shared lock */
1950 if (rc == SQLITE_OK && pFile->locktype>=PENDING_LOCK) {
1951 if ((failed = _AFPFSSetLock(context->filePath, pFile,
1952 PENDING_BYTE, 1, 0))){
1953 /* failed to release the pending lock */
1957 if (rc == SQLITE_OK && pFile->locktype>=RESERVED_LOCK) {
1958 if ((failed = _AFPFSSetLock(context->filePath, pFile,
1959 RESERVED_BYTE, 1, 0))) {
1960 /* failed to release the reserved lock */
1965 if( locktype==NO_LOCK ){
1966 int failed = _AFPFSSetLock(context->filePath, pFile,
1967 SHARED_FIRST + context->sharedLockByte, 1, 0);
1972 if (rc == SQLITE_OK)
1973 pFile->locktype = locktype;
1979 ** Close a file & cleanup AFP specific locking context
1981 static int afpClose(sqlite3_file *id) {
1983 unixFile *pFile = (unixFile*)id;
1984 afpUnlock(id, NO_LOCK);
1985 sqlite3_free(pFile->lockingContext);
1987 return closeUnixFile(id);
1991 #pragma mark flock() style locking
1994 ** The flockLockingContext is not used
1996 typedef void flockLockingContext;
1998 /* flock-style reserved lock checking following the behavior of
1999 ** unixCheckReservedLock, see the unixCheckReservedLock function comments */
2000 static int flockCheckReservedLock(sqlite3_file *id, int *pResOut){
2003 unixFile *pFile = (unixFile*)id;
2005 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
2009 /* Check if a thread in this process holds such a lock */
2010 if( pFile->locktype>SHARED_LOCK ){
2014 /* Otherwise see if some other process holds it. */
2016 /* attempt to get the lock */
2017 int lrc = flock(pFile->h, LOCK_EX | LOCK_NB);
2019 /* got the lock, unlock it */
2020 lrc = flock(pFile->h, LOCK_UN);
2023 /* unlock failed with an error */
2024 lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
2025 if( IS_LOCK_ERROR(lrc) ){
2026 pFile->lastErrno = tErrno;
2033 /* someone else might have it reserved */
2034 lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
2035 if( IS_LOCK_ERROR(lrc) ){
2036 pFile->lastErrno = tErrno;
2041 OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
2043 *pResOut = reserved;
2047 static int flockLock(sqlite3_file *id, int locktype) {
2049 unixFile *pFile = (unixFile*)id;
2053 /* if we already have a lock, it is exclusive.
2054 ** Just adjust level and punt on outta here. */
2055 if (pFile->locktype > NO_LOCK) {
2056 pFile->locktype = locktype;
2060 /* grab an exclusive lock */
2062 if (flock(pFile->h, LOCK_EX | LOCK_NB)) {
2064 /* didn't get, must be busy */
2065 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
2066 if( IS_LOCK_ERROR(rc) ){
2067 pFile->lastErrno = tErrno;
2070 /* got it, set the type and return ok */
2071 pFile->locktype = locktype;
2073 OSTRACE4("LOCK %d %s %s\n", pFile->h, locktypeName(locktype),
2074 rc==SQLITE_OK ? "ok" : "failed");
2078 static int flockUnlock(sqlite3_file *id, int locktype) {
2079 unixFile *pFile = (unixFile*)id;
2082 OSTRACE5("UNLOCK %d %d was %d pid=%d\n", pFile->h, locktype,
2083 pFile->locktype, getpid());
2084 assert( locktype<=SHARED_LOCK );
2086 /* no-op if possible */
2087 if( pFile->locktype==locktype ){
2091 /* shared can just be set because we always have an exclusive */
2092 if (locktype==SHARED_LOCK) {
2093 pFile->locktype = locktype;
2097 /* no, really, unlock. */
2098 int rc = flock(pFile->h, LOCK_UN);
2100 int r, tErrno = errno;
2101 r = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
2102 if( IS_LOCK_ERROR(r) ){
2103 pFile->lastErrno = tErrno;
2107 pFile->locktype = NO_LOCK;
2115 static int flockClose(sqlite3_file *id) {
2117 flockUnlock(id, NO_LOCK);
2119 return closeUnixFile(id);
2122 #pragma mark Old-School .lock file based locking
2124 /* Dotlock-style reserved lock checking following the behavior of
2125 ** unixCheckReservedLock, see the unixCheckReservedLock function comments */
2126 static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) {
2129 unixFile *pFile = (unixFile*)id;
2131 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
2135 /* Check if a thread in this process holds such a lock */
2136 if( pFile->locktype>SHARED_LOCK ){
2140 /* Otherwise see if some other process holds it. */
2142 char *zLockFile = (char *)pFile->lockingContext;
2143 struct stat statBuf;
2145 if( lstat(zLockFile, &statBuf)==0 ){
2146 /* file exists, someone else has the lock */
2149 /* file does not exist, we could have it if we want it */
2151 if( ENOENT != tErrno ){
2152 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK);
2153 pFile->lastErrno = tErrno;
2157 OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
2159 *pResOut = reserved;
2163 static int dotlockLock(sqlite3_file *id, int locktype) {
2164 unixFile *pFile = (unixFile*)id;
2166 char *zLockFile = (char *)pFile->lockingContext;
2169 /* if we already have a lock, it is exclusive.
2170 ** Just adjust level and punt on outta here. */
2171 if (pFile->locktype > NO_LOCK) {
2172 pFile->locktype = locktype;
2174 /* Always update the timestamp on the old file */
2175 utimes(zLockFile, NULL);
2177 goto dotlock_end_lock;
2180 /* check to see if lock file already exists */
2181 struct stat statBuf;
2182 if (lstat(zLockFile,&statBuf) == 0){
2183 rc = SQLITE_BUSY; /* it does, busy */
2184 goto dotlock_end_lock;
2187 /* grab an exclusive lock */
2188 fd = open(zLockFile,O_RDONLY|O_CREAT|O_EXCL,0600);
2190 /* failed to open/create the file, someone else may have stolen the lock */
2192 if( EEXIST == tErrno ){
2195 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
2196 if( IS_LOCK_ERROR(rc) ){
2197 pFile->lastErrno = tErrno;
2200 goto dotlock_end_lock;
2204 /* got it, set the type and return ok */
2205 pFile->locktype = locktype;
2211 static int dotlockUnlock(sqlite3_file *id, int locktype) {
2212 unixFile *pFile = (unixFile*)id;
2213 char *zLockFile = (char *)pFile->lockingContext;
2216 OSTRACE5("UNLOCK %d %d was %d pid=%d\n", pFile->h, locktype,
2217 pFile->locktype, getpid());
2218 assert( locktype<=SHARED_LOCK );
2220 /* no-op if possible */
2221 if( pFile->locktype==locktype ){
2225 /* shared can just be set because we always have an exclusive */
2226 if (locktype==SHARED_LOCK) {
2227 pFile->locktype = locktype;
2231 /* no, really, unlock. */
2232 if (unlink(zLockFile) ) {
2233 int rc, tErrno = errno;
2234 if( ENOENT != tErrno ){
2235 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
2237 if( IS_LOCK_ERROR(rc) ){
2238 pFile->lastErrno = tErrno;
2242 pFile->locktype = NO_LOCK;
2249 static int dotlockClose(sqlite3_file *id) {
2251 unixFile *pFile = (unixFile*)id;
2252 dotlockUnlock(id, NO_LOCK);
2253 sqlite3_free(pFile->lockingContext);
2255 return closeUnixFile(id);
2259 #endif /* SQLITE_ENABLE_LOCKING_STYLE */
2262 ** The nolockLockingContext is void
2264 typedef void nolockLockingContext;
2266 static int nolockCheckReservedLock(sqlite3_file *id, int *pResOut) {
2271 static int nolockLock(sqlite3_file *id, int locktype) {
2275 static int nolockUnlock(sqlite3_file *id, int locktype) {
2282 static int nolockClose(sqlite3_file *id) {
2283 return closeUnixFile(id);
2288 ** Information and control of an open file handle.
2290 static int unixFileControl(sqlite3_file *id, int op, void *pArg){
2292 case SQLITE_FCNTL_LOCKSTATE: {
2293 *(int*)pArg = ((unixFile*)id)->locktype;
2297 return SQLITE_ERROR;
2301 ** Return the sector size in bytes of the underlying block device for
2302 ** the specified file. This is almost always 512 bytes, but may be
2303 ** larger for some devices.
2305 ** SQLite code assumes this function cannot fail. It also assumes that
2306 ** if two files are created in the same file-system directory (i.e.
2307 ** a database and its journal file) that the sector size will be the
2310 static int unixSectorSize(sqlite3_file *id){
2311 return SQLITE_DEFAULT_SECTOR_SIZE;
2315 ** Return the device characteristics for the file. This is always 0.
2317 static int unixDeviceCharacteristics(sqlite3_file *id){
2322 ** Initialize the contents of the unixFile structure pointed to by pId.
2324 ** When locking extensions are enabled, the filepath and locking style
2325 ** are needed to determine the unixFile pMethod to use for locking operations.
2326 ** The locking-style specific lockingContext data structure is created
2327 ** and assigned here also.
2329 static int fillInUnixFile(
2330 sqlite3_vfs *pVfs, /* Pointer to vfs object */
2331 int h, /* Open file descriptor of file being opened */
2332 int dirfd, /* Directory file descriptor */
2333 sqlite3_file *pId, /* Write to the unixFile structure here */
2334 const char *zFilename, /* Name of the file being opened */
2335 int noLock /* Omit locking if true */
2338 unixFile *pNew = (unixFile *)pId;
2341 /* Macro to define the static contents of an sqlite3_io_methods
2342 ** structure for a unix backend file. Different locking methods
2343 ** require different functions for the xClose, xLock, xUnlock and
2344 ** xCheckReservedLock methods.
2346 #define IOMETHODS(xClose, xLock, xUnlock, xCheckReservedLock) { \
2348 xClose, /* xClose */ \
2349 unixRead, /* xRead */ \
2350 unixWrite, /* xWrite */ \
2351 unixTruncate, /* xTruncate */ \
2352 unixSync, /* xSync */ \
2353 unixFileSize, /* xFileSize */ \
2354 xLock, /* xLock */ \
2355 xUnlock, /* xUnlock */ \
2356 xCheckReservedLock, /* xCheckReservedLock */ \
2357 unixFileControl, /* xFileControl */ \
2358 unixSectorSize, /* xSectorSize */ \
2359 unixDeviceCharacteristics /* xDeviceCapabilities */ \
2361 static sqlite3_io_methods aIoMethod[] = {
2362 IOMETHODS(unixClose, unixLock, unixUnlock, unixCheckReservedLock)
2363 ,IOMETHODS(nolockClose, nolockLock, nolockUnlock, nolockCheckReservedLock)
2364 #if SQLITE_ENABLE_LOCKING_STYLE
2365 ,IOMETHODS(dotlockClose, dotlockLock, dotlockUnlock,dotlockCheckReservedLock)
2366 ,IOMETHODS(flockClose, flockLock, flockUnlock, flockCheckReservedLock)
2367 ,IOMETHODS(afpClose, afpLock, afpUnlock, afpCheckReservedLock)
2370 /* The order of the IOMETHODS macros above is important. It must be the
2371 ** same order as the LOCKING_STYLE numbers
2373 assert(LOCKING_STYLE_POSIX==1);
2374 assert(LOCKING_STYLE_NONE==2);
2375 assert(LOCKING_STYLE_DOTFILE==3);
2376 assert(LOCKING_STYLE_FLOCK==4);
2377 assert(LOCKING_STYLE_AFP==5);
2379 assert( pNew->pLock==NULL );
2380 assert( pNew->pOpen==NULL );
2382 OSTRACE3("OPEN %-3d %s\n", h, zFilename);
2384 pNew->dirfd = dirfd;
2388 eLockingStyle = LOCKING_STYLE_NONE;
2390 eLockingStyle = detectLockingStyle(pVfs, zFilename, h);
2393 switch( eLockingStyle ){
2395 case LOCKING_STYLE_POSIX: {
2397 rc = findLockInfo(h, &pNew->pLock, &pNew->pOpen);
2402 #if SQLITE_ENABLE_LOCKING_STYLE
2403 case LOCKING_STYLE_AFP: {
2404 /* AFP locking uses the file path so it needs to be included in
2405 ** the afpLockingContext.
2407 afpLockingContext *pCtx;
2408 pNew->lockingContext = pCtx = sqlite3_malloc( sizeof(*pCtx) );
2412 /* NB: zFilename exists and remains valid until the file is closed
2413 ** according to requirement F11141. So we do not need to make a
2414 ** copy of the filename. */
2415 pCtx->filePath = zFilename;
2421 case LOCKING_STYLE_DOTFILE: {
2422 /* Dotfile locking uses the file path so it needs to be included in
2423 ** the dotlockLockingContext
2427 nFilename = strlen(zFilename) + 6;
2428 zLockFile = (char *)sqlite3_malloc(nFilename);
2432 sqlite3_snprintf(nFilename, zLockFile, "%s.lock", zFilename);
2434 pNew->lockingContext = zLockFile;
2438 case LOCKING_STYLE_FLOCK:
2439 case LOCKING_STYLE_NONE:
2444 pNew->lastErrno = 0;
2445 if( rc!=SQLITE_OK ){
2446 if( dirfd>=0 ) close(dirfd);
2449 pNew->pMethod = &aIoMethod[eLockingStyle-1];
2456 ** Open a file descriptor to the directory containing file zFilename.
2457 ** If successful, *pFd is set to the opened file descriptor and
2458 ** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM
2459 ** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined
2462 ** If SQLITE_OK is returned, the caller is responsible for closing
2463 ** the file descriptor *pFd using close().
2465 static int openDirectory(const char *zFilename, int *pFd){
2468 char zDirname[MAX_PATHNAME+1];
2470 sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename);
2471 for(ii=strlen(zDirname); ii>=0 && zDirname[ii]!='/'; ii--);
2473 zDirname[ii] = '\0';
2474 fd = open(zDirname, O_RDONLY|O_BINARY, 0);
2477 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
2479 OSTRACE3("OPENDIR %-3d %s\n", fd, zDirname);
2483 return (fd>=0?SQLITE_OK:SQLITE_CANTOPEN);
2487 ** Create a temporary file name in zBuf. zBuf must be allocated
2488 ** by the calling process and must be big enough to hold at least
2489 ** pVfs->mxPathname bytes.
2491 static int getTempname(int nBuf, char *zBuf){
2492 static const char *azDirs[] = {
2499 static const unsigned char zChars[] =
2500 "abcdefghijklmnopqrstuvwxyz"
2501 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2505 const char *zDir = ".";
2507 /* It's odd to simulate an io-error here, but really this is just
2508 ** using the io-error infrastructure to test that SQLite handles this
2509 ** function failing.
2511 SimulateIOError( return SQLITE_IOERR );
2513 azDirs[0] = sqlite3_temp_directory;
2514 for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
2515 if( azDirs[i]==0 ) continue;
2516 if( stat(azDirs[i], &buf) ) continue;
2517 if( !S_ISDIR(buf.st_mode) ) continue;
2518 if( access(azDirs[i], 07) ) continue;
2523 /* Check that the output buffer is large enough for the temporary file
2524 ** name. If it is not, return SQLITE_ERROR.
2526 if( (strlen(zDir) + strlen(SQLITE_TEMP_FILE_PREFIX) + 17) >= nBuf ){
2527 return SQLITE_ERROR;
2531 sqlite3_snprintf(nBuf-17, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX, zDir);
2533 sqlite3_randomness(15, &zBuf[j]);
2534 for(i=0; i<15; i++, j++){
2535 zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
2538 }while( access(zBuf,0)==0 );
2544 ** Open the file zPath.
2546 ** Previously, the SQLite OS layer used three functions in place of this
2549 ** sqlite3OsOpenReadWrite();
2550 ** sqlite3OsOpenReadOnly();
2551 ** sqlite3OsOpenExclusive();
2553 ** These calls correspond to the following combinations of flags:
2555 ** ReadWrite() -> (READWRITE | CREATE)
2556 ** ReadOnly() -> (READONLY)
2557 ** OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE)
2559 ** The old OpenExclusive() accepted a boolean argument - "delFlag". If
2560 ** true, the file was configured to be automatically deleted when the
2561 ** file handle closed. To achieve the same effect using this new
2562 ** interface, add the DELETEONCLOSE flag to those specified above for
2565 static int unixOpen(
2568 sqlite3_file *pFile,
2572 int fd = 0; /* File descriptor returned by open() */
2573 int dirfd = -1; /* Directory file descriptor */
2574 int oflags = 0; /* Flags to pass to open() */
2575 int eType = flags&0xFFFFFF00; /* Type of file to open */
2576 int noLock; /* True to omit locking primitives */
2578 int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE);
2579 int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE);
2580 int isCreate = (flags & SQLITE_OPEN_CREATE);
2581 int isReadonly = (flags & SQLITE_OPEN_READONLY);
2582 int isReadWrite = (flags & SQLITE_OPEN_READWRITE);
2584 /* If creating a master or main-file journal, this function will open
2585 ** a file-descriptor on the directory too. The first time unixSync()
2586 ** is called the directory file descriptor will be fsync()ed and close()d.
2588 int isOpenDirectory = (isCreate &&
2589 (eType==SQLITE_OPEN_MASTER_JOURNAL || eType==SQLITE_OPEN_MAIN_JOURNAL)
2592 /* If argument zPath is a NULL pointer, this function is required to open
2593 ** a temporary file. Use this buffer to store the file name in.
2595 char zTmpname[MAX_PATHNAME+1];
2596 const char *zName = zPath;
2598 /* Check the following statements are true:
2600 ** (a) Exactly one of the READWRITE and READONLY flags must be set, and
2601 ** (b) if CREATE is set, then READWRITE must also be set, and
2602 ** (c) if EXCLUSIVE is set, then CREATE must also be set.
2603 ** (d) if DELETEONCLOSE is set, then CREATE must also be set.
2605 assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly));
2606 assert(isCreate==0 || isReadWrite);
2607 assert(isExclusive==0 || isCreate);
2608 assert(isDelete==0 || isCreate);
2610 /* The main DB, main journal, and master journal are never automatically
2613 assert( eType!=SQLITE_OPEN_MAIN_DB || !isDelete );
2614 assert( eType!=SQLITE_OPEN_MAIN_JOURNAL || !isDelete );
2615 assert( eType!=SQLITE_OPEN_MASTER_JOURNAL || !isDelete );
2617 /* Assert that the upper layer has set one of the "file-type" flags. */
2618 assert( eType==SQLITE_OPEN_MAIN_DB || eType==SQLITE_OPEN_TEMP_DB
2619 || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL
2620 || eType==SQLITE_OPEN_SUBJOURNAL || eType==SQLITE_OPEN_MASTER_JOURNAL
2621 || eType==SQLITE_OPEN_TRANSIENT_DB
2624 memset(pFile, 0, sizeof(unixFile));
2628 assert(isDelete && !isOpenDirectory);
2629 rc = getTempname(MAX_PATHNAME+1, zTmpname);
2630 if( rc!=SQLITE_OK ){
2636 if( isReadonly ) oflags |= O_RDONLY;
2637 if( isReadWrite ) oflags |= O_RDWR;
2638 if( isCreate ) oflags |= O_CREAT;
2639 if( isExclusive ) oflags |= (O_EXCL|O_NOFOLLOW);
2640 oflags |= (O_LARGEFILE|O_BINARY);
2642 fd = open(zName, oflags, isDelete?0600:SQLITE_DEFAULT_FILE_PERMISSIONS);
2643 if( fd<0 && errno!=EISDIR && isReadWrite && !isExclusive ){
2644 /* Failed to open the file for read/write access. Try read-only. */
2645 flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE);
2646 flags |= SQLITE_OPEN_READONLY;
2647 return unixOpen(pVfs, zPath, pFile, flags, pOutFlags);
2650 return SQLITE_CANTOPEN;
2660 if( isOpenDirectory ){
2661 int rc = openDirectory(zPath, &dirfd);
2662 if( rc!=SQLITE_OK ){
2669 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
2672 noLock = eType!=SQLITE_OPEN_MAIN_DB;
2673 return fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock);
2677 ** Delete the file at zPath. If the dirSync argument is true, fsync()
2678 ** the directory after deleting the file.
2680 static int unixDelete(sqlite3_vfs *pVfs, const char *zPath, int dirSync){
2682 SimulateIOError(return SQLITE_IOERR_DELETE);
2686 rc = openDirectory(zPath, &fd);
2687 if( rc==SQLITE_OK ){
2689 rc = SQLITE_IOERR_DIR_FSYNC;
2698 ** Test the existance of or access permissions of file zPath. The
2699 ** test performed depends on the value of flags:
2701 ** SQLITE_ACCESS_EXISTS: Return 1 if the file exists
2702 ** SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable.
2703 ** SQLITE_ACCESS_READONLY: Return 1 if the file is readable.
2705 ** Otherwise return 0.
2707 static int unixAccess(
2714 SimulateIOError( return SQLITE_IOERR_ACCESS; );
2716 case SQLITE_ACCESS_EXISTS:
2719 case SQLITE_ACCESS_READWRITE:
2722 case SQLITE_ACCESS_READ:
2727 assert(!"Invalid flags argument");
2729 *pResOut = (access(zPath, amode)==0);
2735 ** Turn a relative pathname into a full pathname. The relative path
2736 ** is stored as a nul-terminated string in the buffer pointed to by
2739 ** zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes
2740 ** (in this case, MAX_PATHNAME bytes). The full-path is written to
2741 ** this buffer before returning.
2743 static int unixFullPathname(
2744 sqlite3_vfs *pVfs, /* Pointer to vfs object */
2745 const char *zPath, /* Possibly relative input path */
2746 int nOut, /* Size of output buffer in bytes */
2747 char *zOut /* Output buffer */
2750 /* It's odd to simulate an io-error here, but really this is just
2751 ** using the io-error infrastructure to test that SQLite handles this
2752 ** function failing. This function could fail if, for example, the
2753 ** current working directly has been unlinked.
2755 SimulateIOError( return SQLITE_ERROR );
2757 assert( pVfs->mxPathname==MAX_PATHNAME );
2758 zOut[nOut-1] = '\0';
2759 if( zPath[0]=='/' ){
2760 sqlite3_snprintf(nOut, zOut, "%s", zPath);
2763 if( getcwd(zOut, nOut-1)==0 ){
2764 return SQLITE_CANTOPEN;
2766 nCwd = strlen(zOut);
2767 sqlite3_snprintf(nOut-nCwd, &zOut[nCwd], "/%s", zPath);
2773 ** Remove "/./" path elements and convert "/A/./" path elements
2778 for(i=j=0; zFull[i]; i++){
2779 if( zFull[i]=='/' ){
2780 if( zFull[i+1]=='/' ) continue;
2781 if( zFull[i+1]=='.' && zFull[i+2]=='/' ){
2785 if( zFull[i+1]=='.' && zFull[i+2]=='.' && zFull[i+3]=='/' ){
2786 while( j>0 && zFull[j-1]!='/' ){ j--; }
2791 zFull[j++] = zFull[i];
2799 #ifndef SQLITE_OMIT_LOAD_EXTENSION
2801 ** Interfaces for opening a shared library, finding entry points
2802 ** within the shared library, and closing the shared library.
2805 static void *unixDlOpen(sqlite3_vfs *pVfs, const char *zFilename){
2806 return dlopen(zFilename, RTLD_NOW | RTLD_GLOBAL);
2810 ** SQLite calls this function immediately after a call to unixDlSym() or
2811 ** unixDlOpen() fails (returns a null pointer). If a more detailed error
2812 ** message is available, it is written to zBufOut. If no error message
2813 ** is available, zBufOut is left unmodified and SQLite uses a default
2816 static void unixDlError(sqlite3_vfs *pVfs, int nBuf, char *zBufOut){
2821 sqlite3_snprintf(nBuf, zBufOut, "%s", zErr);
2825 static void *unixDlSym(sqlite3_vfs *pVfs, void *pHandle, const char *zSymbol){
2826 return dlsym(pHandle, zSymbol);
2828 static void unixDlClose(sqlite3_vfs *pVfs, void *pHandle){
2831 #else /* if SQLITE_OMIT_LOAD_EXTENSION is defined: */
2832 #define unixDlOpen 0
2833 #define unixDlError 0
2835 #define unixDlClose 0
2839 ** Write nBuf bytes of random data to the supplied buffer zBuf.
2841 static int unixRandomness(sqlite3_vfs *pVfs, int nBuf, char *zBuf){
2843 assert(nBuf>=(sizeof(time_t)+sizeof(int)));
2845 /* We have to initialize zBuf to prevent valgrind from reporting
2846 ** errors. The reports issued by valgrind are incorrect - we would
2847 ** prefer that the randomness be increased by making use of the
2848 ** uninitialized space in zBuf - but valgrind errors tend to worry
2849 ** some users. Rather than argue, it seems easier just to initialize
2850 ** the whole array and silence valgrind, even if that means less randomness
2851 ** in the random seed.
2853 ** When testing, initializing zBuf[] to zero is all we do. That means
2854 ** that we always use the same random number sequence. This makes the
2855 ** tests repeatable.
2857 memset(zBuf, 0, nBuf);
2858 #if !defined(SQLITE_TEST)
2861 fd = open("/dev/urandom", O_RDONLY);
2865 memcpy(zBuf, &t, sizeof(t));
2867 memcpy(&zBuf[sizeof(t)], &pid, sizeof(pid));
2869 read(fd, zBuf, nBuf);
2879 ** Sleep for a little while. Return the amount of time slept.
2880 ** The argument is the number of microseconds we want to sleep.
2881 ** The return value is the number of microseconds of sleep actually
2882 ** requested from the underlying operating system, a number which
2883 ** might be greater than or equal to the argument, but not less
2884 ** than the argument.
2886 static int unixSleep(sqlite3_vfs *pVfs, int microseconds){
2887 #if defined(HAVE_USLEEP) && HAVE_USLEEP
2888 usleep(microseconds);
2889 return microseconds;
2891 int seconds = (microseconds+999999)/1000000;
2893 return seconds*1000000;
2898 ** The following variable, if set to a non-zero value, becomes the result
2899 ** returned from sqlite3OsCurrentTime(). This is used for testing.
2902 int sqlite3_current_time = 0;
2906 ** Find the current time (in Universal Coordinated Time). Write the
2907 ** current time and date as a Julian Day number into *prNow and
2908 ** return 0. Return 1 if the time and date cannot be found.
2910 static int unixCurrentTime(sqlite3_vfs *pVfs, double *prNow){
2914 *prNow = t/86400.0 + 2440587.5;
2916 struct timeval sNow;
2917 gettimeofday(&sNow, 0);
2918 *prNow = 2440587.5 + sNow.tv_sec/86400.0 + sNow.tv_usec/86400000000.0;
2921 if( sqlite3_current_time ){
2922 *prNow = sqlite3_current_time/86400.0 + 2440587.5;
2928 static int unixGetLastError(sqlite3_vfs *pVfs, int nBuf, char *zBuf){
2933 ** Initialize the operating system interface.
2935 int sqlite3_os_init(void){
2936 /* Macro to define the static contents of an sqlite3_vfs structure for
2937 ** the unix backend. The two parameters are the values to use for
2938 ** the sqlite3_vfs.zName and sqlite3_vfs.pAppData fields, respectively.
2941 #define UNIXVFS(zVfsName, pVfsAppData) { \
2943 sizeof(unixFile), /* szOsFile */ \
2944 MAX_PATHNAME, /* mxPathname */ \
2946 zVfsName, /* zName */ \
2947 (void *)pVfsAppData, /* pAppData */ \
2948 unixOpen, /* xOpen */ \
2949 unixDelete, /* xDelete */ \
2950 unixAccess, /* xAccess */ \
2951 unixFullPathname, /* xFullPathname */ \
2952 unixDlOpen, /* xDlOpen */ \
2953 unixDlError, /* xDlError */ \
2954 unixDlSym, /* xDlSym */ \
2955 unixDlClose, /* xDlClose */ \
2956 unixRandomness, /* xRandomness */ \
2957 unixSleep, /* xSleep */ \
2958 unixCurrentTime, /* xCurrentTime */ \
2959 unixGetLastError /* xGetLastError */ \
2962 static sqlite3_vfs unixVfs = UNIXVFS("unix", 0);
2963 #if SQLITE_ENABLE_LOCKING_STYLE
2965 static sqlite3_vfs aVfs[] = {
2966 UNIXVFS("unix-posix", LOCKING_STYLE_POSIX),
2967 UNIXVFS("unix-afp", LOCKING_STYLE_AFP),
2968 UNIXVFS("unix-flock", LOCKING_STYLE_FLOCK),
2969 UNIXVFS("unix-dotfile", LOCKING_STYLE_DOTFILE),
2970 UNIXVFS("unix-none", LOCKING_STYLE_NONE)
2972 for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){
2973 sqlite3_vfs_register(&aVfs[i], 0);
2976 sqlite3_vfs_register(&unixVfs, 1);
2981 ** Shutdown the operating system interface. This is a no-op for unix.
2983 int sqlite3_os_end(void){
2987 #endif /* SQLITE_OS_UNIX */