


  1. @try{
  2.   char *errorMsg;
  3.   if (sqlite3_exec(_database, "BEGIN", NULL, NULL, &errorMsg)==SQLITE_OK) {
  4.     NSLog(@”启动事务成功”);
  5.     sqlite3_free(errorMsg);
  6.     sqlite3_stmt *statement;
  7.     , &statement, NULL)==SQLITE_OK) {
  8.       //绑定参数
  9.       const char *text=[@”张三” cStringUsingEncoding:NSUTF8StringEncoding];
  10.       sqlite3_bind_text(statement, index, text, strlen(text), SQLITE_STATIC);
  11.       if (sqlite3_step(statement)!=SQLITE_DONE) {
  12.         sqlite3_finalize(statement);
  13.       }
  14.     }
  15.     if (sqlite3_exec(_database, "COMMIT", NULL, NULL, &errorMsg)==SQLITE_OK) {
  16.       NSLog(@”提交事务成功”);
  17.     }
  18.     sqlite3_free(errorMsg);
  19.   }  else{
  20.     sqlite3_free(errorMsg);
  21.   }
  22. }
  23. @catch(NSException *e){
  24.   char *errorMsg;
  25.   if (sqlite3_exec(_database, "ROLLBACK", NULL, NULL, &errorMsg)==SQLITE_OK) {
  26.     NSLog(@”回滚事务成功”);
  27.   }
  28.   sqlite3_free(errorMsg);
  29. }
  30. @finally{
  31. }


  2. sqlite);
  3. sqlite);
  4. sqlite> COMMIT TRANSACTION; --显示事务被提交,数据表中的数据也发生了变化。
  5. sqlite> SELECT COUNT(*) FROM testtable;
  6. COUNT(*)
  7. ----------
  10. sqlite);
  11. sqlite> ROLLBACK TRANSACTION;  --显示事务被回滚,数据表中的数据没有发生变化。
  12. sqlite> SELECT COUNT(*) FROM testtable;
  13. COUNT(*)
  14. ----------

Page Cache之事务处理——SQLite原子提交的实现

  下面通过具体示例来分析SQLite原子提交的实现(基于Version 3.3.6的代码):

  1. CREATE TABLE episodes( id integer primary key,name text, cid int);
  2. ); --插入一条记录


  1. sqlite);
  2. );|
  3. |
  4. |
  5. |
  6. |
  7. |
  8. |
  9. |
  10. |
  11. |episodes|0b|
  12. |
  13. |
  14. |
  15. |
  16. |
  17. |
  18. |
  19. |

1、初始状态(Initial State)

、获取读锁(Acquiring A Read Lock)
在进行读操作之前,必须先获取数据库的共享锁(shared lock),共享锁允许两个或更多的连接在同一时刻读取数据库。但是共享锁不允许其它连接对数据库进行写操作。
  shared lock存在于操作系统磁盘缓存,而不是磁盘本身。文件锁的本质只是操作系统的内核数据结构,当操作系统崩溃或掉电时,这些内核数据也会随之消失。

  一旦得到shared lock,就可以进行读操作。如图所示,数据先由OS从磁盘读取到OS缓存,然后再由OS移到用户进程空间。一般来说,数据库文件分为很多页,而一次读操作只读取一小部分页面。如图,从8个页面读取3个页面。

4、获取Reserved Lock
  在对数据进行修改操作之前,先要获取数据库文件的Reserved Lock,Reserved Lock和shared lock的相似之处在于,它们都允许其它进程对数据库文件进行读操作。Reserved Lock和Shared Lock可以共存,但是只能是一个Reserved Lock和多个Shared Lock——多个Reserved Lock不能共存。所以,在同一时刻,只能进行一个写操作。
  Reserved Lock意味着当前进程(连接)想修改数据库文件,但是还没开始修改操作,所以其它的进程可以读数据库,但不能写数据库。

5、创建恢复日志(Creating A Rollback Journal File)
  从OS的角度来看,当一个文件创建时,大多数OS(Windows、Linux、Mac OS X)不会向磁盘写入数据,新创建的文件此时位于磁盘缓存中,之后才会真正写入磁盘。如图,日志文件位于OS磁盘缓存中,而不是位于磁盘。


  1. //事务指令的实现
  2. //p1为数据库文件的索引号--0为main database;1为temporary tables使用的文件
  3. //p2不为0,一个写事务开始
  4. case OP_Transaction: {
  5. //数据库的索引号
  6. int i = pOp->p1;
  7. //指向数据库对应的btree
  8. Btree *pBt;
  9. assert( i>= && i<db->nDb );
  10. assert( (p->btreeMask & (<<i))!= );
  11. //设置btree指针
  12. pBt = db->aDb[i].pBt;
  13. if( pBt ){
  14. //从这里btree开始事务,主要给文件加锁,并设置btree事务状态
  15. rc = sqlite3BtreeBeginTrans(pBt, pOp->p2);
  17. if( rc==SQLITE_BUSY ){
  18. p->pc = pc;
  19. p->rc = rc = SQLITE_BUSY;
  20. goto vdbe_return;
  21. }
  22. if( rc!=SQLITE_OK && rc!=SQLITE_READONLY /* && rc!=SQLITE_BUSY */ ){
  23. goto abort_due_to_error;
  24. }
  25. }
  26. break;
  27. }
  29. //开始一个事务,如果第二个参数不为0,则一个写事务开始,否则是一个读事务
  30. //如果wrflag>=2,一个exclusive事务开始,此时别的连接不能访问数据库
  31. int sqlite3BtreeBeginTrans(Btree *p, int wrflag){
  32. BtShared *pBt = p->pBt;
  33. int rc = SQLITE_OK;
  34. btreeIntegrity(p);
  35. /* If the btree is already in a write-transaction, or it
  36. ** is already in a read-transaction and a read-transaction
  37. ** is requested, this is a no-op.
  38. */
  39. //如果b-tree处于一个写事务;或者处于一个读事务,一个读事务又请求,则返回SQLITE_OK
  40. if( p->inTrans==TRANS_WRITE || (p->inTrans==TRANS_READ && !wrflag) ){
  41. return SQLITE_OK;
  42. }
  43. /* Write transactions are not possible on a read-only database */
  44. //写事务不能访问只读数据库
  45. if( pBt->readOnly && wrflag ){
  46. return SQLITE_READONLY;
  47. }
  48. /* If another database handle has already opened a write transaction
  49. ** on this shared-btree structure and a second write transaction is
  50. ** requested, return SQLITE_BUSY.
  51. */
  52. //如果数据库已存在一个写事务,则该写事务请求时返回SQLITE_BUSY
  53. if( pBt->inTransaction==TRANS_WRITE && wrflag ){
  54. return SQLITE_BUSY;
  55. }
  56. do {
  57. //如果数据库对应btree的第一个页面还没读进内存
  58. //则把该页面读进内存,数据库也相应的加read lock
  59. ){
  60. //加read lock,并读页面到内存
  61. rc = lockBtree(pBt);
  62. }
  63. if( rc==SQLITE_OK && wrflag ){
  64. //对数据库文件加RESERVED_LOCK锁
  65. rc = sqlite3pager_begin(pBt->pPage1->aData, wrflag>);
  66. if( rc==SQLITE_OK ){
  67. rc = newDatabase(pBt);
  68. }
  69. }
  70. if( rc==SQLITE_OK ){
  71. ;
  72. }else{
  73. unlockBtreeIfUnused(pBt);
  74. }
  75. }while( rc==SQLITE_BUSY && pBt->inTransaction==TRANS_NONE && sqlite3InvokeBusyHandler(pBt->pBusyHandler) );
  76. if( rc==SQLITE_OK ){
  77. if( p->inTrans==TRANS_NONE ){
  78. //btree的事务数加1
  79. pBt->nTransaction++;
  80. }
  81. //设置btree事务状态
  82. p->inTrans = (wrflag?TRANS_WRITE:TRANS_READ);
  83. if( p->inTrans>pBt->inTransaction ){
  84. pBt->inTransaction = p->inTrans;
  85. }
  86. }
  87. btreeIntegrity(p);
  88. return rc;
  89. }
  90. /*
  91. **获取数据库的写锁,发生以下情况时去除写锁:
  92. ** * sqlite3pager_commit() is called.
  93. ** * sqlite3pager_rollback() is called.
  94. ** * sqlite3pager_close() is called.
  95. ** * sqlite3pager_unref() is called to on every outstanding page.
  96. **pData指向数据库的打开的页面,此时并不修改,仅仅只是获取
  97. **相应的pager,检查它是否处于read-lock状态
  98. **如果打开的不是临时文件,则打开日志文件.
  99. **如果数据库已经处于写状态,则do nothing
  100. */
  101. int sqlite3pager_begin(void *pData, int exFlag){
  102. PgHdr *pPg = DATA_TO_PGHDR(pData);
  103. Pager *pPager = pPg->pPager;
  104. int rc = SQLITE_OK;
  105. assert( pPg->nRef> );
  106. assert( pPager->state!=PAGER_UNLOCK );
  107. //pager已经处于share状态
  108. if( pPager->state==PAGER_SHARED ){
  109. assert( pPager->aInJournal== );
  110. if( MEMDB ){
  111. pPager->state = PAGER_EXCLUSIVE;
  112. pPager->origDbSize = pPager->dbSize;
  113. }else{
  114. //对文件加 RESERVED_LOCK
  115. rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK);
  116. if( rc==SQLITE_OK ){
  117. //设置pager的状态
  118. pPager->state = PAGER_RESERVED;
  119. if( exFlag ){
  120. rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
  121. }
  122. }
  123. if( rc!=SQLITE_OK ){
  124. return rc;
  125. }
  126. pPager->dirtyCache = ;
  127. TRACE2("TRANSACTION %d\n", PAGERID(pPager));
  128. //使用日志,不是临时文件,则打开日志文件
  129. if( pPager->useJournal && !pPager->tempFile ){
  130. //为pager打开日志文件,pager应该处于RESERVED或EXCLUSIVE状态
  131. //会向日志文件写入header
  132. rc = pager_open_journal(pPager);
  133. }
  134. }
  135. }
  136. return rc;
  137. }
  138. //创建日志文件,pager应该处于RESERVED或EXCLUSIVE状态
  139. static int pager_open_journal(Pager *pPager){
  140. int rc;
  141. assert( !MEMDB );
  142. assert( pPager->state>=PAGER_RESERVED );
  143. assert( pPager->journalOpen== );
  144. assert( pPager->useJournal );
  145. assert( pPager->aInJournal== );
  146. sqlite3pager_pagecount(pPager);
  147. //日志文件页面位图
  148. pPager->aInJournal = sqliteMalloc( pPager->dbSize/ + );
  149. ){
  150. rc = SQLITE_NOMEM;
  151. goto failed_to_open_journal;
  152. }
  153. //打开日志文件
  154. rc = sqlite3OsOpenExclusive(pPager->zJournal, &pPager->jfd, pPager->tempFile);
  155. //日志文件的位置指针
  156. pPager->journalOff = ;
  157. pPager->setMaster = ;
  158. pPager->journalHdr = ;
  159. if( rc!=SQLITE_OK ){
  160. goto failed_to_open_journal;
  161. }
  162. /*一般来说,OS此时创建的文件位于磁盘缓存,并没有实际
  163. **存在于磁盘,下面三个操作就是为了把结果写入磁盘,而对于
  164. **windows系统来说,并没有提供相应API,所以实际上没有意义.
  165. */
  166. //fullSync操作对windows没有意义
  167. sqlite3OsSetFullSync(pPager->jfd, pPager->full_fsync);
  168. sqlite3OsSetFullSync(pPager->fd, pPager->full_fsync);
  169. /* Attempt to open a file descriptor for the directory that contains a file.
  170. **This file descriptor can be used to fsync() the directory
  171. **in order to make sure the creation of a new file is actually written to disk.
  172. */
  173. sqlite3OsOpenDirectory(pPager->jfd, pPager->zDirectory);
  174. pPager->journalOpen = ;
  175. pPager->journalStarted = ;
  176. pPager->needSync = ;
  177. pPager->alwaysRollback = ;
  178. pPager->nRec = ;
  179. if( pPager->errCode ){
  180. rc = pPager->errCode;
  181. goto failed_to_open_journal;
  182. }
  183. pPager->origDbSize = pPager->dbSize;
  184. //写入日志文件的header--24个字节
  185. rc = writeJournalHdr(pPager);
  187. if( pPager->stmtAutoopen && rc==SQLITE_OK ){
  188. rc = sqlite3pager_stmt_begin(pPager);
  189. }
  190. if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
  191. rc = pager_unwritelock(pPager);
  192. if( rc==SQLITE_OK ){
  193. rc = SQLITE_FULL;
  194. }
  195. }
  196. return rc;
  198. failed_to_open_journal:
  199. sqliteFree(pPager->aInJournal);
  200. pPager->aInJournal = ;
  201. if( rc==SQLITE_NOMEM ){
  202. /* If this was a malloc() failure, then we will not be closing the pager
  203. ** file. So delete any journal file we may have just created. Otherwise,
  204. ** the system will get confused, we have a read-lock on the file and a
  205. ** mysterious journal has appeared in the filesystem.
  206. */
  207. sqlite3OsDelete(pPager->zJournal);
  208. }else{
  209. sqlite3OsUnlock(pPager->fd, NO_LOCK);
  210. pPager->state = PAGER_UNLOCK;
  211. }
  212. return rc;
  213. }
  215. /*写入日志文件头
  216. **journal header的格式如下:
  217. ** - 8 bytes: 标志日志文件的魔数
  218. ** - 4 bytes: 日志文件中记录数
  219. ** - 4 bytes: Random number used for page hash.
  220. ** - 4 bytes: 原来数据库的大小(kb)
  221. ** - 4 bytes: 扇区大小512byte
  222. */
  223. static int writeJournalHdr(Pager *pPager){
  224. //日志文件头
  225. ];
  227. int rc = seekJournalHdr(pPager);
  228. if( rc ) return rc;
  230. pPager->journalHdr = pPager->journalOff;
  231. ){
  232. pPager->stmtHdrOff = pPager->journalHdr;
  233. }
  234. //设置文件指针指向header之后
  235. pPager->journalOff += JOURNAL_HDR_SZ(pPager);
  237. /* FIX ME:
  238. **
  239. ** Possibly for a pager not in no-sync mode, the journal magic should not
  240. ** be written until nRec is filled in as part of next syncJournal().
  241. **
  242. ** Actually maybe the whole journal header should be delayed until that
  243. ** point. Think about this.
  244. */
  245. memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
  246. /* The nRec Field. 0xFFFFFFFF for no-sync journals. */
  247. put32bits(&zHeader[);
  248. /* The random check-hash initialiser */
  249. sqlite3Randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
  250. put32bits(&zHeader[], pPager->cksumInit);
  251. /* The initial database size */
  252. put32bits(&zHeader[], pPager->dbSize);
  253. /* The assumed sector size for this process */
  254. put32bits(&zHeader[], pPager->sectorSize);
  255. //写入文件头
  256. rc = sqlite3OsWrite(pPager->jfd, zHeader, sizeof(zHeader));
  258. /* The journal header has been written successfully. Seek the journal
  259. ** file descriptor to the end of the journal header sector.
  260. */
  261. if( rc==SQLITE_OK ){
  262. rc = sqlite3OsSeek(pPager->jfd, pPager->journalOff-);
  263. if( rc==SQLITE_OK ){
  264. rc = sqlite3OsWrite(pPager->jfd, );
  265. }
  266. }
  267. return rc;
  268. }


6、修改位于用户进程空间的页面(Changing Database Pages In User Space)

7、日志文件刷入磁盘(Flushing The Rollback Journal File To Mass Storage)


  1. /*
  2. **Sync日志文件,保证所有的脏页面写入磁盘日志文件
  3. */
  4. static int syncJournal(Pager *pPager){
  5. PgHdr *pPg;
  6. int rc = SQLITE_OK;
  8. /* Sync the journal before modifying the main database
  9. ** (assuming there is a journal and it needs to be synced.)
  10. */
  11. if( pPager->needSync ){
  12. if( !pPager->tempFile ){
  13. assert( pPager->journalOpen );
  14. /* assert( !pPager->noSync ); // noSync might be set if synchronous
  15. ** was turned off after the transaction was started. Ticket #615 */
  16. #ifndef NDEBUG
  17. {
  18. /* Make sure the pPager->nRec counter we are keeping agrees
  19. ** with the nRec computed from the size of the journal file.
  20. */
  21. i64 jSz;
  22. rc = sqlite3OsFileSize(pPager->jfd, &jSz);
  23. ) return rc;
  24. assert( pPager->journalOff==jSz );
  25. }
  26. #endif
  27. {
  28. /* Write the nRec value into the journal file header. If in
  29. ** full-synchronous mode, sync the journal first. This ensures that
  30. ** all data has really hit the disk before nRec is updated to mark
  31. ** it as a candidate for rollback.
  32. */
  33. if( pPager->fullSync ){
  34. TRACE2("SYNC journal of %d\n", PAGERID(pPager));
  35.     //首先保证脏页面中所有的数据都已经写入日志文件
  36. rc = sqlite3OsSync(pPager->jfd, );
  37. ) return rc;
  38. }
  39. rc = sqlite3OsSeek(pPager->jfd,
  40. pPager->journalHdr + sizeof(aJournalMagic));
  41. if( rc ) return rc;
  42.    //页面的数目写入日志文件
  43. rc = write32bits(pPager->jfd, pPager->nRec);
  44. if( rc ) return rc;
  46. rc = sqlite3OsSeek(pPager->jfd, pPager->journalOff);
  47. if( rc ) return rc;
  48. }
  49. TRACE2("SYNC journal of %d\n", PAGERID(pPager));
  50. rc = sqlite3OsSync(pPager->jfd, pPager->full_fsync);
  51. ) return rc;
  52. pPager->journalStarted = ;
  53. }
  54. pPager->needSync = ;
  56. /* Erase the needSync flag from every page.
  57. */
  58. //清除needSync标志位
  59. for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
  60. pPg->needSync = ;
  61. }
  62. pPager->pFirstSynced = pPager->pFirst;
  63. }
  65. #ifndef NDEBUG
  66. /* If the Pager.needSync flag is clear then the PgHdr.needSync
  67. ** flag must also be clear for all pages. Verify that this
  68. ** invariant is true.
  69. */
  70. else{
  71. for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
  72. assert( pPg->needSync== );
  73. }
  74. assert( pPager->pFirstSynced==pPager->pFirst );
  75. }
  76. #endif
  77. return rc;
  78. }

8、获取排斥锁(Obtaining An Exclusive Lock)
  在对数据库文件进行修改之前(注:这里不是内存中的页面),我们必须得到数据库文件的排斥锁(Exclusive Lock)。得到排斥锁的过程可分为两步:首先得到Pending lock;然后Pending lock升级到exclusive lock。
  Pending lock允许其它已经存在的Shared lock继续读数据库文件,但是不允许产生新的shared lock,这样做目的是为了防止写操作发生饿死情况。一旦所有的shared lock完成操作,则pending lock升级到exclusive lock。

9、修改的页面写入文件(Writing Changes To The Database File)
  一旦得到exclusive lock,其它的进程就不能进行读操作,此时就可以把修改的页面写回数据库文件,但是通常OS都把结果暂时保存到磁盘缓存中,直到某个时刻才会真正把结果写入磁盘。


  1. //把所有的脏页面写入数据库
  2. //到这里开始获取EXCLUSIVEQ锁,并将页面写回操作系统文件
  3. static int pager_write_pagelist(PgHdr *pList){
  4. Pager *pPager;
  5. int rc;
  7. ) return SQLITE_OK;
  8. pPager = pList->pPager;
  10. /* At this point there may be either a RESERVED or EXCLUSIVE lock on the
  11. ** database file. If there is already an EXCLUSIVE lock, the following
  12. ** calls to sqlite3OsLock() are no-ops.
  13. **
  14. ** Moving the lock from RESERVED to EXCLUSIVE actually involves going
  15. ** through an intermediate state PENDING. A PENDING lock prevents new
  16. ** readers from attaching to the database but is unsufficient for us to
  17. ** write. The idea of a PENDING lock is to prevent new readers from
  18. ** coming in while we wait for existing readers to clear.
  19. **
  20. ** While the pager is in the RESERVED state, the original database file
  21. ** is unchanged and we can rollback without having to playback the
  22. ** journal into the original database file. Once we transition to
  23. ** EXCLUSIVE, it means the database file has been changed and any rollback
  24. ** will require a journal playback.
  25. */
  27. rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
  28. if( rc!=SQLITE_OK ){
  29. return rc;
  30. }
  32. while( pList ){
  33. assert( pList->dirty );
  34. rc = sqlite3OsSeek(pPager->fd, (pList->pgno-)*(i64)pPager->pageSize);
  35. if( rc ) return rc;
  36. /* If there are dirty pages in the page cache with page numbers greater
  37. ** than Pager.dbSize, this means sqlite3pager_truncate() was called to
  38. ** make the file smaller (presumably by auto-vacuum code). Do not write
  39. ** any such pages to the file.
  40. */
  41. if( pList->pgno<=pPager->dbSize ){
  42. );
  43. TRACE3("STORE %d page %d\n", PAGERID(pPager), pList->pgno);
  44. //写入文件
  45. rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize);
  46. TEST_INCR(pPager->nWrite);
  47. }
  48. #ifndef NDEBUG
  49. else{
  50. TRACE3("NOSTORE %d page %d\n", PAGERID(pPager), pList->pgno);
  51. }
  52. #endif
  53. if( rc ) return rc;
  54. //设置dirty
  55. pList->dirty = ;
  57. pList->pageHash = pager_pagehash(pList);
  58. #endif
  59.   //指向下一个脏页面
  60. pList = pList->pDirty;
  61. }
  62. return SQLITE_OK;
  63. }

10、修改结果刷入存储设备(Flushing Changes To Mass Storage)


  1. //同步btree对应的数据库文件
  2. //该函数返回之后,只需要提交写事务,删除日志文件
  3. int sqlite3BtreeSync(Btree *p, const char *zMaster){
  4. int rc = SQLITE_OK;
  5. if( p->inTrans==TRANS_WRITE ){
  6. BtShared *pBt = p->pBt;
  7. Pgno nTrunc = ;
  9. if( pBt->autoVacuum ){
  10. rc = autoVacuumCommit(pBt, &nTrunc);
  11. if( rc!=SQLITE_OK ){
  12. return rc;
  13. }
  14. }
  15. #endif
  17.   //调用pager进行sync
  18. rc = sqlite3pager_sync(pBt->pPager, zMaster, nTrunc);
  19. }
  20. return rc;
  21. }
  23. //把pager所有脏页面写回文件
  24. int sqlite3pager_sync(Pager *pPager, const char *zMaster, Pgno nTrunc){
  25. int rc = SQLITE_OK;
  27. TRACE4("DATABASE SYNC: File=%s zMaster=%s nTrunc=%d\n",
  28. pPager->zFilename, zMaster, nTrunc);
  30. /* If this is an in-memory db, or no pages have been written to, or this
  31. ** function has already been called, it is a no-op.
  32. */
  33. //pager不处于PAGER_SYNCED状态,dirtyCache为1,
  34. //则进行sync操作
  35. if( pPager->state!=PAGER_SYNCED && !MEMDB && pPager->dirtyCache ){
  36. PgHdr *pPg;
  37. assert( pPager->journalOpen );
  39. /* If a master journal file name has already been written to the
  40. ** journal file, then no sync is required. This happens when it is
  41. ** written, then the process fails to upgrade from a RESERVED to an
  42. ** EXCLUSIVE lock. The next time the process tries to commit the
  43. ** transaction the m-j name will have already been written.
  44. */
  45. if( !pPager->setMaster ){
  46. //pager修改计数
  47. rc = pager_incr_changecounter(pPager);
  48. if( rc!=SQLITE_OK ) goto sync_exit;
  50. ){
  51. /* If this transaction has made the database smaller, then all pages
  52. ** being discarded by the truncation must be written to the journal
  53. ** file.
  54. */
  55. Pgno i;
  56. void *pPage;
  57. int iSkip = PAGER_MJ_PGNO(pPager);
  58. ; i<=pPager->origDbSize; i++ ){
  59. ] & (<<(i&))) && i!=iSkip ){
  60. rc = sqlite3pager_get(pPager, i, &pPage);
  61. if( rc!=SQLITE_OK ) goto sync_exit;
  62. rc = sqlite3pager_write(pPage);
  63. sqlite3pager_unref(pPage);
  64. if( rc!=SQLITE_OK ) goto sync_exit;
  65. }
  66. }
  67. }
  68. #endif
  69. rc = writeMasterJournal(pPager, zMaster);
  70. if( rc!=SQLITE_OK ) goto sync_exit;
  72. //sync日志文件
  73. rc = syncJournal(pPager);
  74. if( rc!=SQLITE_OK ) goto sync_exit;
  75. }
  78. ){
  79. rc = sqlite3pager_truncate(pPager, nTrunc);
  80. if( rc!=SQLITE_OK ) goto sync_exit;
  81. }
  82. #endif
  84. /* Write all dirty pages to the database file */
  85. pPg = pager_get_all_dirty_pages(pPager);
  87. //把所有脏页面写回操作系统文件
  88. rc = pager_write_pagelist(pPg);
  89. if( rc!=SQLITE_OK ) goto sync_exit;
  91. /* Sync the database file. */
  92. //sync数据库文件
  93. if( !pPager->noSync ){
  94. rc = sqlite3OsSync(pPager->fd, );
  95. }
  97. pPager->state = PAGER_SYNCED;
  98. } ){
  99. rc = sqlite3pager_truncate(pPager, nTrunc);
  100. }
  102. sync_exit:
  103. return rc;
  104. }


11、删除日志文件(Deleting The Rollback Journal)

12、释放锁(Releasing The Lock)


  1. //提交事务,至此一个事务完成.主要做两件事:
  2. //删除日志文件,释放数据库文件的写锁
  3. int sqlite3BtreeCommit(Btree *p){
  4. BtShared *pBt = p->pBt;
  5. btreeIntegrity(p);
  6. /* If the handle has a write-transaction open, commit the shared-btrees
  7. ** transaction and set the shared state to TRANS_READ.
  8. */
  9. if( p->inTrans==TRANS_WRITE ){
  10. int rc;
  11. assert( pBt->inTransaction==TRANS_WRITE );
  12. assert( pBt->nTransaction> );
  14. //调用pager,提交事务
  15. rc = sqlite3pager_commit(pBt->pPager);
  16. if( rc!=SQLITE_OK ){
  17. return rc;
  18. }
  19. pBt->inTransaction = TRANS_READ;
  20. pBt->inStmt = ;
  21. }
  22. unlockAllTables(p);
  24. /* If the handle has any kind of transaction open, decrement the transaction
  25. ** count of the shared btree. If the transaction count reaches 0, set
  26. ** the shared state to TRANS_NONE. The unlockBtreeIfUnused() call below
  27. ** will unlock the pager.
  28. */
  29. if( p->inTrans!=TRANS_NONE ){
  30. pBt->nTransaction--;
  31. ==pBt->nTransaction ){
  32. pBt->inTransaction = TRANS_NONE;
  33. }
  34. }
  35. }
  37. //提交事务,主要调用pager_unwritelock()函数
  38. int sqlite3pager_commit(Pager *pPager){
  39. int rc;
  40. PgHdr *pPg;
  42. if( pPager->errCode ){
  43. return pPager->errCode;
  44. }
  45. if( pPager->state<PAGER_RESERVED ){
  46. return SQLITE_ERROR;
  47. }
  48. TRACE2("COMMIT %d\n", PAGERID(pPager));
  49. if( MEMDB ){
  50. pPg = pager_get_all_dirty_pages(pPager);
  51. while( pPg ){
  52. clearHistory(PGHDR_TO_HIST(pPg, pPager));
  53. pPg->dirty = ;
  54. pPg->inJournal = ;
  55. pPg->inStmt = ;
  56. pPg->needSync = ;
  57. pPg->pPrevStmt = pPg->pNextStmt = ;
  58. pPg = pPg->pDirty;
  59. }
  60. pPager->pDirty = ;
  61. #ifndef NDEBUG
  62. for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
  63. PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
  64. assert( !pPg->alwaysRollback );
  65. assert( !pHist->pOrig );
  66. assert( !pHist->pStmt );
  67. }
  68. #endif
  69. pPager->pStmt = ;
  70. pPager->state = PAGER_SHARED;
  71. return SQLITE_OK;
  72. }
  73. ){
  74. /* Exit early (without doing the time-consuming sqlite3OsSync() calls)
  75. ** if there have been no changes to the database file. */
  76. assert( pPager->needSync== );
  77. rc = pager_unwritelock(pPager);
  78. pPager->dbSize = -;
  79. return rc;
  80. }
  81. assert( pPager->journalOpen );
  82. rc = sqlite3pager_sync(pPager, , );
  84. //删除文件,释放写锁
  85. if( rc==SQLITE_OK ){
  86. rc = pager_unwritelock(pPager);
  87. pPager->dbSize = -;
  88. }
  89. return rc;
  90. }
  92. //对数据库加read lock,删除日志文件
  93. static int pager_unwritelock(Pager *pPager){
  94. PgHdr *pPg;
  95. int rc;
  96. assert( !MEMDB );
  97. if( pPager->state<PAGER_RESERVED ){
  98. return SQLITE_OK;
  99. }
  100. sqlite3pager_stmt_commit(pPager);
  101. if( pPager->stmtOpen ){
  102. sqlite3OsClose(&pPager->stfd);
  103. pPager->stmtOpen = ;
  104. }
  105. if( pPager->journalOpen ){
  107. //关闭日志文件
  108. sqlite3OsClose(&pPager->jfd);
  109. pPager->journalOpen = ;
  110. //删除日志文件
  111. sqlite3OsDelete(pPager->zJournal);
  112. sqliteFree( pPager->aInJournal );
  113. pPager->aInJournal = ;
  114. for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
  115. pPg->inJournal = ;
  116. pPg->dirty = ;
  117. pPg->needSync = ;
  118. #ifdef SQLITE_CHECK_PAGES
  119. pPg->pageHash = pager_pagehash(pPg);
  120. #endif
  121. }
  122. pPager->pDirty = ;
  123. pPager->dirtyCache = ;
  124. pPager->nRec = ;
  125. }else{
  126. assert( pPager->aInJournal== );
  127. assert( pPager->dirtyCache== || pPager->useJournal== );
  128. }
  130. //释放写锁,加读锁
  131. rc = sqlite3OsUnlock(pPager->fd, SHARED_LOCK);
  132. pPager->state = PAGER_SHARED;
  133. pPager->origDbSize = ;
  134. pPager->setMaster = ;
  135. pPager->needSync = ;
  136. pPager->pFirstSynced = pPager->pFirst;
  137. return rc;
  138. }




  1. //虚拟机停机指令
  2. case OP_Halt: { /* no-push */
  3. p->pTos = pTos;
  4. p->rc = pOp->p1;
  5. p->pc = pc;
  6. p->errorAction = pOp->p2;
  7. if( pOp->p3 ){
  8. sqlite3SetString(&p->zErrMsg, pOp->p3, ();
  9. }
  11. //并提交事务
  12. rc = sqlite3VdbeHalt(p);
  13. assert( rc==SQLITE_BUSY || rc==SQLITE_OK );
  14. if( rc==SQLITE_BUSY ){
  15. p->rc = SQLITE_BUSY;
  16. return SQLITE_BUSY;
  17. }
  18. return p->rc ? SQLITE_ERROR : SQLITE_DONE;
  19. }
  21. //当虚拟机要停机时,调用该函数,如果VDBE改变了数据库且为自动
  22. //提交模式,则提交这些改变
  23. int sqlite3VdbeHalt(Vdbe *p){
  24. sqlite3 *db = p->db;
  25. int i;
  26. ; /* Function to call on each btree backend */
  27. int isSpecialError; /* Set to true if SQLITE_NOMEM or IOERR */
  29. /* This function contains the logic that determines if a statement or
  30. ** transaction will be committed or rolled back as a result of the
  31. ** execution of this virtual machine.
  32. **
  33. ** Special errors:
  34. **
  35. ** If an SQLITE_NOMEM error has occured in a statement that writes to
  36. ** the database, then either a statement or transaction must be rolled
  37. ** back to ensure the tree-structures are in a consistent state. A
  38. ** statement transaction is rolled back if one is open, otherwise the
  39. ** entire transaction must be rolled back.
  40. **
  41. ** If an SQLITE_IOERR error has occured in a statement that writes to
  42. ** the database, then the entire transaction must be rolled back. The
  43. ** I/O error may have caused garbage to be written to the journal
  44. ** file. Were the transaction to continue and eventually be rolled
  45. ** back that garbage might end up in the database file.
  46. **
  47. ** In both of the above cases, the Vdbe.errorAction variable is
  48. ** ignored. If the sqlite3.autoCommit flag is false and a transaction
  49. ** is rolled back, it will be set to true.
  50. **
  51. ** Other errors:
  52. **
  53. ** No error:
  54. **
  55. */
  57. if( sqlite3MallocFailed() ){
  58. p->rc = SQLITE_NOMEM;
  59. }
  60. if( p->magic!=VDBE_MAGIC_RUN ){
  61. /* Already halted. Nothing to do. */
  62. assert( p->magic==VDBE_MAGIC_HALT );
  63. return SQLITE_OK;
  64. }
  65. //释放虚拟机中所有的游标
  66. closeAllCursors(p);
  67. checkActiveVdbeCnt(db);
  69. /* No commit or rollback needed if the program never started */
  70. ){
  72. /* Check for one of the special errors - SQLITE_NOMEM or SQLITE_IOERR */
  73. isSpecialError = ((p->rc==SQLITE_NOMEM || p->rc==SQLITE_IOERR)?:);
  74. if( isSpecialError ){
  75. /* This loop does static analysis of the query to see which of the
  76. ** following three categories it falls into:
  77. **
  78. ** Read-only
  79. ** Query with statement journal
  80. ** Query without statement journal
  81. **
  82. ** We could do something more elegant than this static analysis (i.e.
  83. ** store the type of query as part of the compliation phase), but
  84. ** handling malloc() or IO failure is a fairly obscure edge case so
  85. ** this is probably easier. Todo: Might be an opportunity to reduce
  86. ** code size a very small amount though
  87. */
  88. ;
  89. ;
  90. assert(p->aOp || p->nOp==);
  91. ; i<p->nOp; i++){
  92. switch( p->aOp[i].opcode ){
  93. case OP_Transaction:
  94. isReadOnly = ;
  95. break;
  96. case OP_Statement:
  97. isStatement = ;
  98. break;
  99. }
  100. }
  102. /* If the query was read-only, we need do no rollback at all. Otherwise,
  103. ** proceed with the special handling.
  104. */
  105. if( !isReadOnly ){
  106. if( p->rc==SQLITE_NOMEM && isStatement ){
  107. xFunc = sqlite3BtreeRollbackStmt;
  108. }else{
  109. /* We are forced to roll back the active transaction. Before doing
  110. ** so, abort any other statements this handle currently has active.
  111. */
  112. sqlite3AbortOtherActiveVdbes(db, p);
  113. sqlite3RollbackAll(db);
  114. db->autoCommit = ;
  115. }
  116. }
  117. }
  119. /* If the auto-commit flag is set and this is the only active vdbe, then
  120. ** we do either a commit or rollback of the current transaction.
  121. **
  122. ** Note: This block also runs if one of the special errors handled
  123. ** above has occured.
  124. */
  125. //如果自动提交事务,则提交事务
  126. ){
  127. if( p->rc==SQLITE_OK || (p->errorAction==OE_Fail && !isSpecialError) ){
  128. /* The auto-commit flag is true, and the vdbe program was
  129. ** successful or hit an 'OR FAIL' constraint. This means a commit
  130. ** is required.
  131. */
  132. //提交事务
  133. int rc = vdbeCommit(db);
  134. if( rc==SQLITE_BUSY ){
  135. return SQLITE_BUSY;
  136. }else if( rc!=SQLITE_OK ){
  137. p->rc = rc;
  138. sqlite3RollbackAll(db);
  139. }else{
  140. sqlite3CommitInternalChanges(db);
  141. }
  142. }else{
  143. sqlite3RollbackAll(db);
  144. }
  145. }else if( !xFunc ){
  146. if( p->rc==SQLITE_OK || p->errorAction==OE_Fail ){
  147. xFunc = sqlite3BtreeCommitStmt;
  148. }else if( p->errorAction==OE_Abort ){
  149. xFunc = sqlite3BtreeRollbackStmt;
  150. }else{
  151. sqlite3AbortOtherActiveVdbes(db, p);
  152. sqlite3RollbackAll(db);
  153. db->autoCommit = ;
  154. }
  155. }
  157. /* If xFunc is not NULL, then it is one of sqlite3BtreeRollbackStmt or
  158. ** sqlite3BtreeCommitStmt. Call it once on each backend. If an error occurs
  159. ** and the return code is still SQLITE_OK, set the return code to the new
  160. ** error value.
  161. */
  162. assert(!xFunc ||
  163. xFunc==sqlite3BtreeCommitStmt ||
  164. xFunc==sqlite3BtreeRollbackStmt
  165. );
  166. ; xFunc && i<db->nDb; i++){
  167. int rc;
  168. Btree *pBt = db->aDb[i].pBt;
  169. if( pBt ){
  170. rc = xFunc(pBt);
  171. if( rc && (p->rc==SQLITE_OK || p->rc==SQLITE_CONSTRAINT) ){
  172. p->rc = rc;
  173. sqlite3SetString(&p->zErrMsg, );
  174. }
  175. }
  176. }
  178. /* If this was an INSERT, UPDATE or DELETE and the statement was committed,
  179. ** set the change counter.
  180. */
  181. ){
  182. if( !xFunc || xFunc==sqlite3BtreeCommitStmt ){
  183. sqlite3VdbeSetChanges(db, p->nChange);
  184. }else{
  185. sqlite3VdbeSetChanges(db, );
  186. }
  187. p->nChange = ;
  188. }
  190. /* Rollback or commit any schema changes that occurred. */
  191. if( p->rc!=SQLITE_OK && db->flags&SQLITE_InternChanges ){
  192. sqlite3ResetInternalSchema(db, );
  193. db->flags = (db->flags | SQLITE_InternChanges);
  194. }
  195. }
  197. /* We have successfully halted and closed the VM. Record this fact. */
  198. ){
  199. db->activeVdbeCnt--;
  200. }
  201. p->magic = VDBE_MAGIC_HALT;
  202. checkActiveVdbeCnt(db);
  204. return SQLITE_OK;
  205. }
  207. //提交事务,主要调用:
  208. //sqlite3BtreeSync()--同步btree, sqlite3BtreeCommit()---提交事务
  209. static int vdbeCommit(sqlite3 *db){
  210. int i;
  211. ; /* Number of databases with an active write-transaction */
  212. int rc = SQLITE_OK;
  213. ;
  215. ; i<db->nDb; i++){
  216. Btree *pBt = db->aDb[i].pBt;
  217. if( pBt && sqlite3BtreeIsInTrans(pBt) ){
  218. needXcommit = ;
  219. ) nTrans++;
  220. }
  221. }
  223. /* If there are any write-transactions at all, invoke the commit hook */
  224. if( needXcommit && db->xCommitCallback ){
  225. sqlite3SafetyOff(db);
  226. rc = db->xCommitCallback(db->pCommitArg);
  227. sqlite3SafetyOn(db);
  228. if( rc ){
  229. return SQLITE_CONSTRAINT;
  230. }
  231. }
  233. /* The simple case - no more than one database file (not counting the
  234. ** TEMP database) has a transaction active. There is no need for the
  235. ** master-journal.
  236. **
  237. ** If the return value of sqlite3BtreeGetFilename() is a zero length
  238. ** string, it means the main database is :memory:. In that case we do
  239. ** not support atomic multi-file commits, so use the simple case then
  240. ** too.
  241. */
  242. //简单的情况,只有一个数据库文件,不需要master-journal
  243. ==strlen(sqlite3BtreeGetFilename(db->aDb[].pBt)) || nTrans<= ){
  244. ; rc==SQLITE_OK && i<db->nDb; i++){
  245. Btree *pBt = db->aDb[i].pBt;
  246. if( pBt ){
  247. //同步btree
  248. rc = sqlite3BtreeSync(pBt, );
  249. }
  250. }
  252. /* Do the commit only if all databases successfully synced */
  253. //commite事务
  254. if( rc==SQLITE_OK ){
  255. ; i<db->nDb; i++){
  256. Btree *pBt = db->aDb[i].pBt;
  257. if( pBt ){
  258. sqlite3BtreeCommit(pBt);
  259. }
  260. }
  261. }
  262. }
  264. /* The complex case - There is a multi-file write-transaction active.
  265. ** This requires a master journal file to ensure the transaction is
  266. ** committed atomicly.
  267. */
  268. #ifndef SQLITE_OMIT_DISKIO
  269. else{
  270. ;
  271. ; /* File-name for the master journal */
  272. ].pBt);
  273. OsFile *master = ;
  275. /* Select a master journal file name */
  276. do {
  277. u32 random;
  278. sqliteFree(zMaster);
  279. sqlite3Randomness(sizeof(random), &random);
  280. zMaster = sqlite3MPrintf("%s-mj%08X", zMainFile, random&0x7fffffff);
  281. if( !zMaster ){
  282. return SQLITE_NOMEM;
  283. }
  284. }while( sqlite3OsFileExists(zMaster) );
  286. /* Open the master journal. */
  287. rc = sqlite3OsOpenExclusive(zMaster, &master, );
  288. if( rc!=SQLITE_OK ){
  289. sqliteFree(zMaster);
  290. return rc;
  291. }
  293. /* Write the name of each database file in the transaction into the new
  294. ** master journal file. If an error occurs at this point close
  295. ** and delete the master journal file. All the individual journal files
  296. ** still have 'null' as the master journal pointer, so they will roll
  297. ** back independently if a failure occurs.
  298. */
  299. ; i<db->nDb; i++){
  300. Btree *pBt = db->aDb[i].pBt;
  301. ) continue; /* Ignore the TEMP database */
  302. if( pBt && sqlite3BtreeIsInTrans(pBt) ){
  303. char const *zFile = sqlite3BtreeGetJournalname(pBt);
  304. ]== ) continue; /* Ignore :memory: databases */
  305. if( !needSync && !sqlite3BtreeSyncDisabled(pBt) ){
  306. needSync = ;
  307. }
  308. rc = sqlite3OsWrite(master, zFile, strlen(zFile)+);
  309. if( rc!=SQLITE_OK ){
  310. sqlite3OsClose(&master);
  311. sqlite3OsDelete(zMaster);
  312. sqliteFree(zMaster);
  313. return rc;
  314. }
  315. }
  316. }
  318. /* Sync the master journal file. Before doing this, open the directory
  319. ** the master journal file is store in so that it gets synced too.
  320. */
  321. zMainFile = sqlite3BtreeGetDirname(db->aDb[].pBt);
  322. rc = sqlite3OsOpenDirectory(master, zMainFile);
  323. if( rc!=SQLITE_OK ||
  324. (needSync && (rc=sqlite3OsSync(master,))!=SQLITE_OK) ){
  325. sqlite3OsClose(&master);
  326. sqlite3OsDelete(zMaster);
  327. sqliteFree(zMaster);
  328. return rc;
  329. }
  331. /* Sync all the db files involved in the transaction. The same call
  332. ** sets the master journal pointer in each individual journal. If
  333. ** an error occurs here, do not delete the master journal file.
  334. **
  335. ** If the error occurs during the first call to sqlite3BtreeSync(),
  336. ** then there is a chance that the master journal file will be
  337. ** orphaned. But we cannot delete it, in case the master journal
  338. ** file name was written into the journal file before the failure
  339. ** occured.
  340. */
  341. ; i<db->nDb; i++){
  342. Btree *pBt = db->aDb[i].pBt;
  343. if( pBt && sqlite3BtreeIsInTrans(pBt) ){
  344. rc = sqlite3BtreeSync(pBt, zMaster);
  345. if( rc!=SQLITE_OK ){
  346. sqlite3OsClose(&master);
  347. sqliteFree(zMaster);
  348. return rc;
  349. }
  350. }
  351. }
  352. sqlite3OsClose(&master);
  354. /* Delete the master journal file. This commits the transaction. After
  355. ** doing this the directory is synced again before any individual
  356. ** transaction files are deleted.
  357. */
  358. rc = sqlite3OsDelete(zMaster);
  359. assert( rc==SQLITE_OK );
  360. sqliteFree(zMaster);
  361. zMaster = ;
  362. rc = sqlite3OsSyncDirectory(zMainFile);
  363. if( rc!=SQLITE_OK ){
  364. /* This is not good. The master journal file has been deleted, but
  365. ** the directory sync failed. There is no completely safe course of
  366. ** action from here. The individual journals contain the name of the
  367. ** master journal file, but there is no way of knowing if that
  368. ** master journal exists now or if it will exist after the operating
  369. ** system crash that may follow the fsync() failure.
  370. */
  371. return rc;
  372. }
  374. /* All files and directories have already been synced, so the following
  375. ** calls to sqlite3BtreeCommit() are only closing files and deleting
  376. ** journals. If something goes wrong while this is happening we don't
  377. ** really care. The integrity of the transaction is already guaranteed,
  378. ** but some stray 'cold' journals may be lying around. Returning an
  379. ** error code won't help matters.
  380. */
  381. ; i<db->nDb; i++){
  382. Btree *pBt = db->aDb[i].pBt;
  383. if( pBt ){
  384. sqlite3BtreeCommit(pBt);
  385. }
  386. }
  387. }
  388. #endif
  390. return rc;
  391. }

Page Cache之并发控制




  RESERVED锁意味着进程将要对数据库进行写操作。某一时刻只能有一个RESERVED Lock,但是RESERVED锁和SHARED锁可以共存,而且可以对数据库加新的SHARED锁。
  主要是出于并发性的考虑。由于SQLite只有库级排斥锁(EXCLUSIVE LOCK),如果写事务一开始就上EXCLUSIVE锁,然后再进行实际的数据更新,写磁盘操作,这会使得并发性大大降低。而SQLite一旦得到数据库的RESERVED锁,就可以对缓存中的数据进行修改,而与此同时,其它进程可以继续进行读操作。直到真正需要写磁盘时才对数据库加EXCLUSIVE锁。

  PENDING LOCK意味着进程已经完成缓存中的数据修改,并想立即将更新写入磁盘。它将等待此时已经存在的读锁事务完成,但是不允许对数据库加新的SHARED LOCK(这与RESERVED LOCK相区别)。
  主要是为了防止出现写饿死的情况。由于写事务先要获取RESERVED LOCK,所以可能一直产生新的SHARED LOCK,使得写事务发生饿死的情况。



  1. //获取一个文件的锁,如果忙则重复该操作,
  2. //直到busy回调函数返回flase,或者成功获得锁
  3. static int pager_wait_on_lock(Pager *pPager, int locktype){
  4. int rc;
  8. if( pPager->state>=locktype ){
  9. rc = SQLITE_OK;
  10. }else{
  11. //重复直到获得锁
  12. do {
  13. rc = sqlite3OsLock(pPager->fd, locktype);
  14. }while( rc==SQLITE_BUSY && sqlite3InvokeBusyHandler(pPager->pBusyHandler) );
  16. if( rc==SQLITE_OK ){
  18. //设置pager的状态
  19. pPager->state = locktype;
  20. }
  21. }
  22. return rc;
  23. }


  1. static int winLock(OsFile *id, int locktype){
  2. int rc = SQLITE_OK; /* Return code from subroutines */
  3. ; /* Result of a windows lock call */
  4. int newLocktype; /* Set id->locktype to this value before exiting */
  5. ;/* True if we acquired a PENDING lock this time */
  6. winFile *pFile = (winFile*)id;
  8. assert( pFile!= );
  9. TRACE5("LOCK %d %d was %d(%d)\n",
  10. pFile->h, locktype, pFile->locktype, pFile->sharedLockByte);
  12. /* If there is already a lock of this type or more restrictive on the
  13. ** OsFile, do nothing. Don't use the end_lock: exit path, as
  14. ** sqlite3OsEnterMutex() hasn't been called yet.
  15. */
  16. //当前的锁>=locktype,则返回
  17. if( pFile->locktype>=locktype ){
  18. return SQLITE_OK;
  19. }
  21. /* Make sure the locking sequence is correct
  22. */
  23. assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
  24. assert( locktype!=PENDING_LOCK );
  25. assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
  27. /* Lock the PENDING_LOCK byte if we need to acquire a PENDING lock or
  28. ** a SHARED lock. If we are acquiring a SHARED lock, the acquisition of
  29. ** the PENDING_LOCK byte is temporary.
  30. */
  31. newLocktype = pFile->locktype;
  32. /*两种情况: (1)如果当前文件处于无锁状态(获取读锁--读事务
  33. **和写事务在最初阶段都要经历的阶段),
  34. **(2)处于RESERVED_LOCK,且请求的锁为EXCLUSIVE_LOCK(写事务)
  35. **则对执行加PENDING_LOCK
  36. */
  37. /////////////////////(1)///////////////////
  38. if( pFile->locktype==NO_LOCK
  39. || (locktype==EXCLUSIVE_LOCK && pFile->locktype==RESERVED_LOCK)
  40. ){
  41. ;
  42. //加pending锁
  43. && (res = LockFile(pFile->h, PENDING_BYTE, , , ))== ){
  44. /* Try 3 times to get the pending lock. The pending lock might be
  45. ** held by another reader process who will release it momentarily.
  46. */
  47. TRACE2("could not get a PENDING lock. cnt=%d\n", cnt);
  48. Sleep();
  49. }
  50. //设置为gotPendingLock为1,使和在后面要释放PENDING锁
  51. gotPendingLock = res;
  52. }
  54. /* Acquire a shared lock
  55. */
  56. /*获取shared lock
  57. **此时,事务应该持有PENDING锁,而PENDING锁作为事务从UNLOCKED到
  59. **此时,实际上锁处于两个状态:PENDING和SHARED,
  60. **直到后面释放PENDING锁后,才真正处于SHARED状态
  61. */
  62. ////////////////(2)/////////////////////////////////////
  63. if( locktype==SHARED_LOCK && res ){
  64. assert( pFile->locktype==NO_LOCK );
  65. res = getReadLock(pFile);
  66. if( res ){
  67. newLocktype = SHARED_LOCK;
  68. }
  69. }
  71. /* Acquire a RESERVED lock
  72. */
  73. /*获取RESERVED
  74. **此时事务持有SHARED_LOCK,变化过程为SHARED->RESERVED。
  75. **RESERVED锁的作用就是为了提高系统的并发性能
  76. */
  77. ////////////////////////(3)/////////////////////////////////
  78. if( locktype==RESERVED_LOCK && res ){
  79. assert( pFile->locktype==SHARED_LOCK );
  80. //加RESERVED锁
  81. res = LockFile(pFile->h, RESERVED_BYTE, , , );
  82. if( res ){
  83. newLocktype = RESERVED_LOCK;
  84. }
  85. }
  87. /* Acquire a PENDING lock
  88. */
  89. /*获取PENDING锁
  91. **变化过程为:RESERVED->PENDING。
  92. **PENDING状态只是唯一的作用就是防止写饿死.
  93. **读事务不会执行该代码,但是写事务会执行该代码,
  94. **执行该代码后gotPendingLock设为0,后面就不会释放PENDING锁。
  95. */
  96. //////////////////////////////(4)////////////////////////////////
  97. if( locktype==EXCLUSIVE_LOCK && res ){
  98. //这里没有实际的加锁操作,只是把锁的状态改为PENDING状态
  99. newLocktype = PENDING_LOCK;
  100. //设置了gotPendingLock,后面就不会释放PENDING锁了,
  101. //相当于加了PENDING锁,实际上是在开始处加的PENDING锁
  102. gotPendingLock = ;
  103. }
  105. /* Acquire an EXCLUSIVE lock
  106. */
  107. /*获取EXCLUSIVE锁
  108. **当一个事务执行该代码时,它应该满足以下条件:
  109. **(1)锁的状态为:PENDING (2)是一个写事务
  110. **变化过程:PENDING->EXCLUSIVE
  111. */
  112. /////////////////////////(5)///////////////////////////////////////////
  113. if( locktype==EXCLUSIVE_LOCK && res ){
  114. assert( pFile->locktype>=SHARED_LOCK );
  115. res = unlockReadLock(pFile);
  116. TRACE2("unreadlock = %d\n", res);
  117. res = LockFile(pFile->h, SHARED_FIRST, , SHARED_SIZE, );
  118. if( res ){
  119. newLocktype = EXCLUSIVE_LOCK;
  120. }else{
  121. TRACE2("error-code = %d\n", GetLastError());
  122. }
  123. }
  125. /* If we are holding a PENDING lock that ought to be released, then
  126. ** release it now.
  127. */
  128. /*此时事务在第2步中获得PENDING锁,它将申请SHARED_LOCK(第3步,和图形相对照),
  129. **而在之前它已经获取了PENDING锁,
  130. **所以在这里它需要释放PENDING锁,此时锁的变化为:PENDING->SHARED
  131. */
  132. //////////////////////////(6)/////////////////////////////////////
  133. if( gotPendingLock && locktype==SHARED_LOCK ){
  134. UnlockFile(pFile->h, PENDING_BYTE, , , );
  135. }
  137. /* Update the state of the lock has held in the file descriptor then
  138. ** return the appropriate result code.
  139. */
  140. if( res ){
  141. rc = SQLITE_OK;
  142. }else{
  143. TRACE4("LOCK FAILED %d trying for %d but got %d\n", pFile->h,
  144. locktype, newLocktype);
  145. rc = SQLITE_BUSY;
  146. }
  147. //在这里设置文件锁的状态
  148. pFile->locktype = newLocktype;
  149. return rc;
  150. }





  这是一个很有意思的问题,对于任何采取加锁作为并发控制机制的DBMS都得考虑这个问题。有两种方式处理死锁问题:(1)死锁预防(deadlock prevention)(2)死锁检测(deadlock detection)与死锁恢复(deadlock recovery)。SQLite采取了第一种方式,如果一个事务不能获取锁,它会重试有限次(这个重试次数可以由应用程序运行预先设置,默认为1次)——这实际上是基本锁超时的机制。如果还是不能获取锁,SQLite返回SQLITE_BUSY错误给应用程序,应用程序此时应该中断,之后再重试;或者中止当前事务。虽然基于锁超时的机制简单,容易实现,但是它的缺点也是明显的——资源浪费。

5、事务类型(Transaction Types)




  一个deferred事务不获取任何锁,直到它需要锁的时候,而且BEGIN语句本身也不会做什么事情——它开始于UNLOCK状态;默认情况下是这样的。如果仅仅用BEGIN开始一个事务,那么事务就是DEFERRED的,同时它不会获取任何锁,当对数据库进行第一次读操作时,它会获取SHARED LOCK;同样,当进行第一次写操作时,它会获取RESERVED LOCK。
  由BEGIN开始的Immediate事务会试着获取RESERVED LOCK。如果成功,BEGIN IMMEDIATE保证没有别的连接可以写数据库。但是,别的连接可以对数据库进行读操作,但是RESERVED LOCK会阻止其它的连接BEGIN IMMEDIATE或者BEGIN EXCLUSIVE命令,SQLite会返回SQLITE_BUSY错误。这时你就可以对数据库进行修改操作,但是你不能提交,当你COMMIT时,会返回SQLITE_BUSY错误,这意味着还有其它的读事务没有完成,得等它们执行完后才能提交事务。
  上面那个例子的问题在于两个连接最终都想写数据库,但是他们都没有放弃各自原来的锁,最终,shared锁导致了问题的出现。如果两个连接都以BEGIN IMMEDIATE开始事务,那么死锁就不会发生。在这种情况下,在同一时刻只能有一个连接进入BEGIN IMMEDIATE,其它的连接就得等待。BEGIN IMMEDIATE和BEGIN EXCLUSIVE通常被写事务使用。就像同步机制一样,它防止了死锁的产生。
  基本的准则是:如果你在使用的数据库没有其它的连接,用BEGIN就足够了。但是,如果你使用的数据库在其它的连接也要对数据库进行写操作,就得使用BEGIN IMMEDIATE或BEGIN EXCLUSIVE开始你的事务。


