LCOV - code coverage report
Current view: top level - lib/tdb/common - mutex.c (source / functions) Hit Total Coverage
Test: coverage report for master 70ed9daf Lines: 364 517 70.4 %
Date: 2024-01-11 09:59:51 Functions: 19 20 95.0 %

          Line data    Source code
       1             : /*
       2             :    Unix SMB/CIFS implementation.
       3             : 
       4             :    trivial database library
       5             : 
       6             :    Copyright (C) Volker Lendecke 2012,2013
       7             :    Copyright (C) Stefan Metzmacher 2013,2014
       8             :    Copyright (C) Michael Adam 2014
       9             : 
      10             :      ** NOTE! The following LGPL license applies to the tdb
      11             :      ** library. This does NOT imply that all of Samba is released
      12             :      ** under the LGPL
      13             : 
      14             :    This library is free software; you can redistribute it and/or
      15             :    modify it under the terms of the GNU Lesser General Public
      16             :    License as published by the Free Software Foundation; either
      17             :    version 3 of the License, or (at your option) any later version.
      18             : 
      19             :    This library is distributed in the hope that it will be useful,
      20             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      21             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      22             :    Lesser General Public License for more details.
      23             : 
      24             :    You should have received a copy of the GNU Lesser General Public
      25             :    License along with this library; if not, see <http://www.gnu.org/licenses/>.
      26             : */
      27             : #include "tdb_private.h"
      28             : #include "system/threads.h"
      29             : 
      30             : #ifdef USE_TDB_MUTEX_LOCKING
      31             : 
      32             : /*
      33             :  * If we run with mutexes, we store the "struct tdb_mutexes" at the
      34             :  * beginning of the file. We store an additional tdb_header right
      35             :  * beyond the mutex area, page aligned. All the offsets within the tdb
      36             :  * are relative to the area behind the mutex area. tdb->map_ptr points
      37             :  * behind the mmap area as well, so the read and write path in the
      38             :  * mutex case can remain unchanged.
      39             :  *
      40             :  * Early in the mutex development the mutexes were placed between the hash
      41             :  * chain pointers and the real tdb data. This had two drawbacks: First, it
      42             :  * made pointer calculations more complex. Second, we had to mmap the mutex
      43             :  * area twice. One was the normal map_ptr in the tdb. This frequently changed
      44             :  * from within tdb_oob. At least the Linux glibc robust mutex code assumes
      45             :  * constant pointers in memory, so a constantly changing mmap area destroys
      46             :  * the mutex list. So we had to mmap the first bytes of the file with a second
      47             :  * mmap call. With that scheme, very weird errors happened that could be
      48             :  * easily fixed by doing the mutex mmap in a second file. It seemed that
      49             :  * mapping the same memory area twice does not end up in accessing the same
      50             :  * physical page, looking at the mutexes in gdb it seemed that old data showed
      51             :  * up after some re-mapping. To avoid a separate mutex file, the code now puts
      52             :  * the real content of the tdb file after the mutex area. This way we do not
      53             :  * have overlapping mmap areas, the mutex area is mmapped once and not
      54             :  * changed, the tdb data area's mmap is constantly changed but does not
      55             :  * overlap.
      56             :  */
      57             : 
      58             : struct tdb_mutexes {
      59             :         struct tdb_header hdr;
      60             : 
      61             :         /* protect allrecord_lock */
      62             :         pthread_mutex_t allrecord_mutex;
      63             : 
      64             :         /*
      65             :          * F_UNLCK: free,
      66             :          * F_RDLCK: shared,
      67             :          * F_WRLCK: exclusive
      68             :          */
      69             :         short int allrecord_lock;
      70             : 
      71             :         /*
      72             :          * Index 0 is the freelist mutex, followed by
      73             :          * one mutex per hashchain.
      74             :          */
      75             :         pthread_mutex_t hashchains[1];
      76             : };
      77             : 
      78   982334941 : bool tdb_have_mutexes(struct tdb_context *tdb)
      79             : {
      80   982334941 :         return ((tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) != 0);
      81             : }
      82             : 
      83     8206421 : size_t tdb_mutex_size(struct tdb_context *tdb)
      84             : {
      85      203452 :         size_t mutex_size;
      86             : 
      87     8020722 :         if (!tdb_have_mutexes(tdb)) {
      88     6627098 :                 return 0;
      89             :         }
      90             : 
      91     1413934 :         mutex_size = sizeof(struct tdb_mutexes);
      92     1413934 :         mutex_size += tdb->hash_size * sizeof(pthread_mutex_t);
      93             : 
      94     1393624 :         return TDB_ALIGN(mutex_size, tdb->page_size);
      95             : }
      96             : 
      97             : /*
      98             :  * Get the index for a chain mutex
      99             :  */
     100   752493766 : static bool tdb_mutex_index(struct tdb_context *tdb, off_t off, off_t len,
     101             :                             unsigned *idx)
     102             : {
     103             :         /*
     104             :          * Weird but true: We fcntl lock 1 byte at an offset 4 bytes before
     105             :          * the 4 bytes of the freelist start and the hash chain that is about
     106             :          * to be locked. See lock_offset() where the freelist is -1 vs the
     107             :          * "+1" in TDB_HASH_TOP(). Because the mutex array is represented in
     108             :          * the tdb file itself as data, we need to adjust the offset here.
     109             :          */
     110   752493766 :         const off_t freelist_lock_ofs = FREELIST_TOP - sizeof(tdb_off_t);
     111             : 
     112   752493766 :         if (!tdb_have_mutexes(tdb)) {
     113   521446906 :                 return false;
     114             :         }
     115   215161494 :         if (len != 1) {
     116             :                 /* Possibly the allrecord lock */
     117      305027 :                 return false;
     118             :         }
     119   214848029 :         if (off < freelist_lock_ofs) {
     120             :                 /* One of the special locks */
     121    35085247 :                 return false;
     122             :         }
     123   178451809 :         if (tdb->hash_size == 0) {
     124             :                 /* tdb not initialized yet, called from tdb_open_ex() */
     125           0 :                 return false;
     126             :         }
     127   178451809 :         if (off >= TDB_DATA_START(tdb->hash_size)) {
     128             :                 /* Single record lock from traverses */
     129      312896 :                 return false;
     130             :         }
     131             : 
     132             :         /*
     133             :          * Now we know it's a freelist or hash chain lock. Those are always 4
     134             :          * byte aligned. Paranoia check.
     135             :          */
     136   178136017 :         if ((off % sizeof(tdb_off_t)) != 0) {
     137           0 :                 abort();
     138             :         }
     139             : 
     140             :         /*
     141             :          * Re-index the fcntl offset into an offset into the mutex array
     142             :          */
     143   178136017 :         off -= freelist_lock_ofs; /* rebase to index 0 */
     144   178136017 :         off /= sizeof(tdb_off_t); /* 0 for freelist 1-n for hashchain */
     145             : 
     146   178136017 :         *idx = off;
     147   178136017 :         return true;
     148             : }
     149             : 
     150    55658661 : static bool tdb_have_mutex_chainlocks(struct tdb_context *tdb)
     151             : {
     152     1306443 :         int i;
     153             : 
     154    90959795 :         for (i=0; i < tdb->num_lockrecs; i++) {
     155     1281374 :                 bool ret;
     156     1281374 :                 unsigned idx;
     157             : 
     158    36582508 :                 ret = tdb_mutex_index(tdb,
     159    35301134 :                                       tdb->lockrecs[i].off,
     160    35301134 :                                       tdb->lockrecs[i].count,
     161             :                                       &idx);
     162    35301134 :                 if (!ret) {
     163    35301134 :                         continue;
     164             :                 }
     165             : 
     166           0 :                 if (idx == 0) {
     167             :                         /* this is the freelist mutex */
     168           0 :                         continue;
     169             :                 }
     170             : 
     171           0 :                 return true;
     172             :         }
     173             : 
     174    54352218 :         return false;
     175             : }
     176             : 
     177    90069639 : static int chain_mutex_lock(pthread_mutex_t *m, bool waitflag)
     178             : {
     179     1581167 :         int ret;
     180             : 
     181    90069639 :         if (waitflag) {
     182    89689439 :                 ret = pthread_mutex_lock(m);
     183             :         } else {
     184      380200 :                 ret = pthread_mutex_trylock(m);
     185             :         }
     186    90069639 :         if (ret != EOWNERDEAD) {
     187    88488471 :                 return ret;
     188             :         }
     189             : 
     190             :         /*
     191             :          * For chainlocks, we don't do any cleanup (yet?)
     192             :          */
     193           2 :         return pthread_mutex_consistent(m);
     194             : }
     195             : 
     196          18 : static int allrecord_mutex_lock(struct tdb_mutexes *m, bool waitflag)
     197             : {
     198           0 :         int ret;
     199             : 
     200          18 :         if (waitflag) {
     201          13 :                 ret = pthread_mutex_lock(&m->allrecord_mutex);
     202             :         } else {
     203           5 :                 ret = pthread_mutex_trylock(&m->allrecord_mutex);
     204             :         }
     205          18 :         if (ret != EOWNERDEAD) {
     206          17 :                 return ret;
     207             :         }
     208             : 
     209             :         /*
     210             :          * The allrecord lock holder died. We need to reset the allrecord_lock
     211             :          * to F_UNLCK. This should also be the indication for
     212             :          * tdb_needs_recovery.
     213             :          */
     214           1 :         m->allrecord_lock = F_UNLCK;
     215             : 
     216           1 :         return pthread_mutex_consistent(&m->allrecord_mutex);
     217             : }
     218             : 
     219   414588651 : bool tdb_mutex_lock(struct tdb_context *tdb, int rw, off_t off, off_t len,
     220             :                     bool waitflag, int *pret)
     221             : {
     222   414588651 :         struct tdb_mutexes *m = tdb->mutexes;
     223    10815163 :         pthread_mutex_t *chain;
     224    10815163 :         int ret;
     225    10815163 :         unsigned idx;
     226    10815163 :         bool allrecord_ok;
     227             : 
     228   414588651 :         if (!tdb_mutex_index(tdb, off, len, &idx)) {
     229   316285309 :                 return false;
     230             :         }
     231    89069346 :         chain = &m->hashchains[idx];
     232             : 
     233    89069349 : again:
     234    89069349 :         ret = chain_mutex_lock(chain, waitflag);
     235    89069349 :         if (ret == EBUSY) {
     236        2670 :                 ret = EAGAIN;
     237             :         }
     238    89069349 :         if (ret != 0) {
     239        2670 :                 errno = ret;
     240        2670 :                 goto fail;
     241             :         }
     242             : 
     243    89066679 :         if (idx == 0) {
     244             :                 /*
     245             :                  * This is a freelist lock, which is independent to
     246             :                  * the allrecord lock. So we're done once we got the
     247             :                  * freelist mutex.
     248             :                  */
     249    33408018 :                 *pret = 0;
     250    33408018 :                 return true;
     251             :         }
     252             : 
     253    55658661 :         if (tdb_have_mutex_chainlocks(tdb)) {
     254             :                 /*
     255             :                  * We can only check the allrecord lock once. If we do it with
     256             :                  * one chain mutex locked, we will deadlock with the allrecord
     257             :                  * locker process in the following way: We lock the first hash
     258             :                  * chain, we check for the allrecord lock. We keep the hash
     259             :                  * chain locked. Then the allrecord locker locks the
     260             :                  * allrecord_mutex. It walks the list of chain mutexes,
     261             :                  * locking them all in sequence. Meanwhile, we have the chain
     262             :                  * mutex locked, so the allrecord locker blocks trying to lock
     263             :                  * our chain mutex. Then we come in and try to lock the second
     264             :                  * chain lock, which in most cases will be the freelist. We
     265             :                  * see that the allrecord lock is locked and put ourselves on
     266             :                  * the allrecord_mutex. This will never be signalled though
     267             :                  * because the allrecord locker waits for us to give up the
     268             :                  * chain lock.
     269             :                  */
     270             : 
     271           0 :                 *pret = 0;
     272           0 :                 return true;
     273             :         }
     274             : 
     275             :         /*
     276             :          * Check if someone is has the allrecord lock: queue if so.
     277             :          */
     278             : 
     279    55658661 :         allrecord_ok = false;
     280             : 
     281    55658661 :         if (m->allrecord_lock == F_UNLCK) {
     282             :                 /*
     283             :                  * allrecord lock not taken
     284             :                  */
     285    55658649 :                 allrecord_ok = true;
     286             :         }
     287             : 
     288    55658661 :         if ((m->allrecord_lock == F_RDLCK) && (rw == F_RDLCK)) {
     289             :                 /*
     290             :                  * allrecord shared lock taken, but we only want to read
     291             :                  */
     292           6 :                 allrecord_ok = true;
     293             :         }
     294             : 
     295    55658661 :         if (allrecord_ok) {
     296    55658655 :                 *pret = 0;
     297    55658655 :                 return true;
     298             :         }
     299             : 
     300           6 :         ret = pthread_mutex_unlock(chain);
     301           6 :         if (ret != 0) {
     302           0 :                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
     303             :                          "(chain_mutex) failed: %s\n", strerror(ret)));
     304           0 :                 errno = ret;
     305           0 :                 goto fail;
     306             :         }
     307           6 :         ret = allrecord_mutex_lock(m, waitflag);
     308           6 :         if (ret == EBUSY) {
     309           3 :                 ret = EAGAIN;
     310             :         }
     311           6 :         if (ret != 0) {
     312           3 :                 if (waitflag || (ret != EAGAIN)) {
     313           0 :                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_%slock"
     314             :                                  "(allrecord_mutex) failed: %s\n",
     315             :                                  waitflag ? "" : "try_",  strerror(ret)));
     316             :                 }
     317           3 :                 errno = ret;
     318           3 :                 goto fail;
     319             :         }
     320           3 :         ret = pthread_mutex_unlock(&m->allrecord_mutex);
     321           3 :         if (ret != 0) {
     322           0 :                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
     323             :                          "(allrecord_mutex) failed: %s\n", strerror(ret)));
     324           0 :                 errno = ret;
     325           0 :                 goto fail;
     326             :         }
     327           3 :         goto again;
     328             : 
     329        2673 : fail:
     330        2673 :         *pret = -1;
     331        2673 :         return true;
     332             : }
     333             : 
     334   302603981 : bool tdb_mutex_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len,
     335             :                       int *pret)
     336             : {
     337   302603981 :         struct tdb_mutexes *m = tdb->mutexes;
     338     8273469 :         pthread_mutex_t *chain;
     339     8273469 :         int ret;
     340     8273469 :         unsigned idx;
     341             : 
     342   302603981 :         if (!tdb_mutex_index(tdb, off, len, &idx)) {
     343   206845007 :                 return false;
     344             :         }
     345    89066671 :         chain = &m->hashchains[idx];
     346             : 
     347    89066671 :         ret = pthread_mutex_unlock(chain);
     348    89066671 :         if (ret == 0) {
     349    89066671 :                 *pret = 0;
     350    89066671 :                 return true;
     351             :         }
     352           0 :         errno = ret;
     353           0 :         *pret = -1;
     354           0 :         return true;
     355             : }
     356             : 
     357          13 : int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype,
     358             :                              enum tdb_lock_flags flags)
     359             : {
     360          13 :         struct tdb_mutexes *m = tdb->mutexes;
     361           0 :         int ret;
     362           0 :         uint32_t i;
     363          13 :         bool waitflag = (flags & TDB_LOCK_WAIT);
     364           0 :         int saved_errno;
     365             : 
     366          13 :         if (tdb->flags & TDB_NOLOCK) {
     367           0 :                 return 0;
     368             :         }
     369             : 
     370          13 :         if (flags & TDB_LOCK_MARK_ONLY) {
     371           1 :                 return 0;
     372             :         }
     373             : 
     374          12 :         ret = allrecord_mutex_lock(m, waitflag);
     375          12 :         if (!waitflag && (ret == EBUSY)) {
     376           1 :                 errno = EAGAIN;
     377           1 :                 tdb->ecode = TDB_ERR_LOCK;
     378           1 :                 return -1;
     379             :         }
     380          11 :         if (ret != 0) {
     381           0 :                 if (!(flags & TDB_LOCK_PROBE)) {
     382           0 :                         TDB_LOG((tdb, TDB_DEBUG_TRACE,
     383             :                                  "allrecord_mutex_lock() failed: %s\n",
     384             :                                  strerror(ret)));
     385             :                 }
     386           0 :                 tdb->ecode = TDB_ERR_LOCK;
     387           0 :                 return -1;
     388             :         }
     389             : 
     390          11 :         if (m->allrecord_lock != F_UNLCK) {
     391           0 :                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
     392             :                          (int)m->allrecord_lock));
     393           0 :                 goto fail_unlock_allrecord_mutex;
     394             :         }
     395          11 :         m->allrecord_lock = (ltype == F_RDLCK) ? F_RDLCK : F_WRLCK;
     396             : 
     397     1000294 :         for (i=0; i<tdb->hash_size; i++) {
     398             : 
     399             :                 /* ignore hashchains[0], the freelist */
     400     1000284 :                 pthread_mutex_t *chain = &m->hashchains[i+1];
     401             : 
     402     1000284 :                 ret = chain_mutex_lock(chain, waitflag);
     403     1000284 :                 if (!waitflag && (ret == EBUSY)) {
     404           1 :                         errno = EAGAIN;
     405           1 :                         goto fail_unroll_allrecord_lock;
     406             :                 }
     407     1000283 :                 if (ret != 0) {
     408           0 :                         if (!(flags & TDB_LOCK_PROBE)) {
     409           0 :                                 TDB_LOG((tdb, TDB_DEBUG_TRACE,
     410             :                                          "chain_mutex_lock() failed: %s\n",
     411             :                                          strerror(ret)));
     412             :                         }
     413           0 :                         errno = ret;
     414           0 :                         goto fail_unroll_allrecord_lock;
     415             :                 }
     416             : 
     417     1000283 :                 ret = pthread_mutex_unlock(chain);
     418     1000283 :                 if (ret != 0) {
     419           0 :                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
     420             :                                  "(chainlock) failed: %s\n", strerror(ret)));
     421           0 :                         errno = ret;
     422           0 :                         goto fail_unroll_allrecord_lock;
     423             :                 }
     424             :         }
     425             :         /*
     426             :          * We leave this routine with m->allrecord_mutex locked
     427             :          */
     428          10 :         return 0;
     429             : 
     430           1 : fail_unroll_allrecord_lock:
     431           1 :         m->allrecord_lock = F_UNLCK;
     432             : 
     433           1 : fail_unlock_allrecord_mutex:
     434           1 :         saved_errno = errno;
     435           1 :         ret = pthread_mutex_unlock(&m->allrecord_mutex);
     436           1 :         if (ret != 0) {
     437           0 :                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
     438             :                          "(allrecord_mutex) failed: %s\n", strerror(ret)));
     439             :         }
     440           1 :         errno = saved_errno;
     441           1 :         tdb->ecode = TDB_ERR_LOCK;
     442           1 :         return -1;
     443             : }
     444             : 
     445           2 : int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb)
     446             : {
     447           2 :         struct tdb_mutexes *m = tdb->mutexes;
     448           0 :         int ret;
     449           0 :         uint32_t i;
     450             : 
     451           2 :         if (tdb->flags & TDB_NOLOCK) {
     452           0 :                 return 0;
     453             :         }
     454             : 
     455             :         /*
     456             :          * Our only caller tdb_allrecord_upgrade()
     457             :          * guarantees that we already own the allrecord lock.
     458             :          *
     459             :          * Which means m->allrecord_mutex is still locked by us.
     460             :          */
     461             : 
     462           2 :         if (m->allrecord_lock != F_RDLCK) {
     463           0 :                 tdb->ecode = TDB_ERR_LOCK;
     464           0 :                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
     465             :                          (int)m->allrecord_lock));
     466           0 :                 return -1;
     467             :         }
     468             : 
     469           2 :         m->allrecord_lock = F_WRLCK;
     470             : 
     471           8 :         for (i=0; i<tdb->hash_size; i++) {
     472             : 
     473             :                 /* ignore hashchains[0], the freelist */
     474           6 :                 pthread_mutex_t *chain = &m->hashchains[i+1];
     475             : 
     476           6 :                 ret = chain_mutex_lock(chain, true);
     477           6 :                 if (ret != 0) {
     478           0 :                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_lock"
     479             :                                  "(chainlock) failed: %s\n", strerror(ret)));
     480           0 :                         goto fail_unroll_allrecord_lock;
     481             :                 }
     482             : 
     483           6 :                 ret = pthread_mutex_unlock(chain);
     484           6 :                 if (ret != 0) {
     485           0 :                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
     486             :                                  "(chainlock) failed: %s\n", strerror(ret)));
     487           0 :                         goto fail_unroll_allrecord_lock;
     488             :                 }
     489             :         }
     490             : 
     491           2 :         return 0;
     492             : 
     493           0 : fail_unroll_allrecord_lock:
     494           0 :         m->allrecord_lock = F_RDLCK;
     495           0 :         tdb->ecode = TDB_ERR_LOCK;
     496           0 :         return -1;
     497             : }
     498             : 
     499           0 : void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb)
     500             : {
     501           0 :         struct tdb_mutexes *m = tdb->mutexes;
     502             : 
     503             :         /*
     504             :          * Our only caller tdb_allrecord_upgrade() (in the error case)
     505             :          * guarantees that we already own the allrecord lock.
     506             :          *
     507             :          * Which means m->allrecord_mutex is still locked by us.
     508             :          */
     509             : 
     510           0 :         if (m->allrecord_lock != F_WRLCK) {
     511           0 :                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
     512             :                          (int)m->allrecord_lock));
     513           0 :                 return;
     514             :         }
     515             : 
     516           0 :         m->allrecord_lock = F_RDLCK;
     517           0 :         return;
     518             : }
     519             : 
     520             : 
     521           7 : int tdb_mutex_allrecord_unlock(struct tdb_context *tdb)
     522             : {
     523           7 :         struct tdb_mutexes *m = tdb->mutexes;
     524           0 :         short old;
     525           0 :         int ret;
     526             : 
     527           7 :         if (tdb->flags & TDB_NOLOCK) {
     528           0 :                 return 0;
     529             :         }
     530             : 
     531             :         /*
     532             :          * Our only callers tdb_allrecord_unlock() and
     533             :          * tdb_allrecord_lock() (in the error path)
     534             :          * guarantee that we already own the allrecord lock.
     535             :          *
     536             :          * Which means m->allrecord_mutex is still locked by us.
     537             :          */
     538             : 
     539           7 :         if ((m->allrecord_lock != F_RDLCK) && (m->allrecord_lock != F_WRLCK)) {
     540           0 :                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
     541             :                          (int)m->allrecord_lock));
     542           0 :                 return -1;
     543             :         }
     544             : 
     545           7 :         old = m->allrecord_lock;
     546           7 :         m->allrecord_lock = F_UNLCK;
     547             : 
     548           7 :         ret = pthread_mutex_unlock(&m->allrecord_mutex);
     549           7 :         if (ret != 0) {
     550           0 :                 m->allrecord_lock = old;
     551           0 :                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
     552             :                          "(allrecord_mutex) failed: %s\n", strerror(ret)));
     553           0 :                 return -1;
     554             :         }
     555           7 :         return 0;
     556             : }
     557             : 
     558      337710 : int tdb_mutex_init(struct tdb_context *tdb)
     559             : {
     560        9304 :         struct tdb_mutexes *m;
     561        9304 :         pthread_mutexattr_t ma;
     562        9304 :         uint32_t i;
     563        9304 :         int ret;
     564             : 
     565      337710 :         ret = tdb_mutex_mmap(tdb);
     566      337710 :         if (ret == -1) {
     567           0 :                 return -1;
     568             :         }
     569      337710 :         m = tdb->mutexes;
     570             : 
     571      337710 :         ret = pthread_mutexattr_init(&ma);
     572      337710 :         if (ret != 0) {
     573           0 :                 goto fail_munmap;
     574             :         }
     575      337710 :         ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
     576      337710 :         if (ret != 0) {
     577           0 :                 goto fail;
     578             :         }
     579      337710 :         ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
     580      337710 :         if (ret != 0) {
     581           0 :                 goto fail;
     582             :         }
     583      337710 :         ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
     584      337710 :         if (ret != 0) {
     585           0 :                 goto fail;
     586             :         }
     587             : 
     588  2152786230 :         for (i=0; i<tdb->hash_size+1; i++) {
     589  2152448520 :                 pthread_mutex_t *chain = &m->hashchains[i];
     590             : 
     591  2152448520 :                 ret = pthread_mutex_init(chain, &ma);
     592  2152448520 :                 if (ret != 0) {
     593           0 :                         goto fail;
     594             :                 }
     595             :         }
     596             : 
     597      337710 :         m->allrecord_lock = F_UNLCK;
     598             : 
     599      337710 :         ret = pthread_mutex_init(&m->allrecord_mutex, &ma);
     600      337710 :         if (ret != 0) {
     601           0 :                 goto fail;
     602             :         }
     603      328406 :         ret = 0;
     604      337710 : fail:
     605      337710 :         pthread_mutexattr_destroy(&ma);
     606      337710 : fail_munmap:
     607             : 
     608      337710 :         if (ret == 0) {
     609      328406 :                 return 0;
     610             :         }
     611             : 
     612           0 :         tdb_mutex_munmap(tdb);
     613             : 
     614           0 :         errno = ret;
     615           0 :         return -1;
     616             : }
     617             : 
     618      688952 : int tdb_mutex_mmap(struct tdb_context *tdb)
     619             : {
     620       18608 :         size_t len;
     621       18608 :         void *ptr;
     622             : 
     623      688952 :         len = tdb_mutex_size(tdb);
     624      688952 :         if (len == 0) {
     625           0 :                 return 0;
     626             :         }
     627             : 
     628      688952 :         if (tdb->mutexes != NULL) {
     629      328406 :                 return 0;
     630             :         }
     631             : 
     632      351242 :         ptr = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE,
     633             :                    tdb->fd, 0);
     634      351242 :         if (ptr == MAP_FAILED) {
     635           0 :                 return -1;
     636             :         }
     637      351242 :         tdb->mutexes = (struct tdb_mutexes *)ptr;
     638             : 
     639      351242 :         return 0;
     640             : }
     641             : 
     642     6854953 : int tdb_mutex_munmap(struct tdb_context *tdb)
     643             : {
     644      167091 :         size_t len;
     645      167091 :         int ret;
     646             : 
     647     6854953 :         len = tdb_mutex_size(tdb);
     648     6689564 :         if (len == 0) {
     649     6627098 :                 return 0;
     650             :         }
     651             : 
     652       62466 :         ret = munmap(tdb->mutexes, len);
     653       62466 :         if (ret == -1) {
     654           0 :                 return -1;
     655             :         }
     656       62466 :         tdb->mutexes = NULL;
     657             : 
     658       62466 :         return 0;
     659             : }
     660             : 
     661             : static bool tdb_mutex_locking_cached;
     662             : 
     663       40075 : static bool tdb_mutex_locking_supported(void)
     664             : {
     665         880 :         pthread_mutexattr_t ma;
     666         880 :         pthread_mutex_t m;
     667         880 :         int ret;
     668         880 :         static bool initialized;
     669             : 
     670       40075 :         if (initialized) {
     671           0 :                 return tdb_mutex_locking_cached;
     672             :         }
     673             : 
     674       40075 :         initialized = true;
     675             : 
     676       40075 :         ret = pthread_mutexattr_init(&ma);
     677       40075 :         if (ret != 0) {
     678           0 :                 return false;
     679             :         }
     680       40075 :         ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
     681       40075 :         if (ret != 0) {
     682           0 :                 goto cleanup_ma;
     683             :         }
     684       40075 :         ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
     685       40075 :         if (ret != 0) {
     686           0 :                 goto cleanup_ma;
     687             :         }
     688       40075 :         ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
     689       40075 :         if (ret != 0) {
     690           0 :                 goto cleanup_ma;
     691             :         }
     692       40075 :         ret = pthread_mutex_init(&m, &ma);
     693       40075 :         if (ret != 0) {
     694           0 :                 goto cleanup_ma;
     695             :         }
     696       40075 :         ret = pthread_mutex_lock(&m);
     697       40075 :         if (ret != 0) {
     698           0 :                 goto cleanup_m;
     699             :         }
     700             :         /*
     701             :          * This makes sure we have real mutexes
     702             :          * from a threading library instead of just
     703             :          * stubs from libc.
     704             :          */
     705       40075 :         ret = pthread_mutex_lock(&m);
     706       40075 :         if (ret != EDEADLK) {
     707           0 :                 goto cleanup_lock;
     708             :         }
     709       40075 :         ret = pthread_mutex_unlock(&m);
     710       40075 :         if (ret != 0) {
     711           0 :                 goto cleanup_m;
     712             :         }
     713             : 
     714       40075 :         tdb_mutex_locking_cached = true;
     715       40075 :         goto cleanup_m;
     716             : 
     717           0 : cleanup_lock:
     718           0 :         pthread_mutex_unlock(&m);
     719       40075 : cleanup_m:
     720       40075 :         pthread_mutex_destroy(&m);
     721       40075 : cleanup_ma:
     722       40075 :         pthread_mutexattr_destroy(&ma);
     723       40075 :         return tdb_mutex_locking_cached;
     724             : }
     725             : 
     726             : static void (*tdb_robust_mutext_old_handler)(int) = SIG_ERR;
     727             : static pid_t tdb_robust_mutex_pid = -1;
     728             : 
     729       40075 : static bool tdb_robust_mutex_setup_sigchild(void (*handler)(int),
     730             :                         void (**p_old_handler)(int))
     731             : {
     732             : #ifdef HAVE_SIGACTION
     733         880 :         struct sigaction act;
     734         880 :         struct sigaction oldact;
     735             : 
     736       40075 :         memset(&act, '\0', sizeof(act));
     737             : 
     738       40075 :         act.sa_handler = handler;
     739             : #ifdef SA_RESTART
     740       40075 :         act.sa_flags = SA_RESTART;
     741             : #endif
     742       40075 :         sigemptyset(&act.sa_mask);
     743       40075 :         sigaddset(&act.sa_mask, SIGCHLD);
     744       40075 :         sigaction(SIGCHLD, &act, &oldact);
     745       40075 :         if (p_old_handler) {
     746       40075 :                 *p_old_handler = oldact.sa_handler;
     747             :         }
     748       40075 :         return true;
     749             : #else /* !HAVE_SIGACTION */
     750             :         return false;
     751             : #endif
     752             : }
     753             : 
     754       41776 : static void tdb_robust_mutex_handler(int sig)
     755             : {
     756       41776 :         pid_t child_pid = tdb_robust_mutex_pid;
     757             : 
     758       41776 :         if (child_pid != -1) {
     759         878 :                 pid_t pid;
     760             : 
     761       40073 :                 pid = waitpid(child_pid, NULL, WNOHANG);
     762       40073 :                 if (pid == -1) {
     763       35089 :                         switch (errno) {
     764       35089 :                         case ECHILD:
     765       35089 :                                 tdb_robust_mutex_pid = -1;
     766       35089 :                                 return;
     767             : 
     768           0 :                         default:
     769           0 :                                 return;
     770             :                         }
     771             :                 }
     772        4984 :                 if (pid == child_pid) {
     773        4984 :                         tdb_robust_mutex_pid = -1;
     774        4984 :                         return;
     775             :                 }
     776             :         }
     777             : 
     778        1703 :         if (tdb_robust_mutext_old_handler == SIG_DFL) {
     779        1673 :                 return;
     780             :         }
     781          12 :         if (tdb_robust_mutext_old_handler == SIG_IGN) {
     782           0 :                 return;
     783             :         }
     784          12 :         if (tdb_robust_mutext_old_handler == SIG_ERR) {
     785           0 :                 return;
     786             :         }
     787             : 
     788          12 :         tdb_robust_mutext_old_handler(sig);
     789             : }
     790             : 
     791       80150 : static void tdb_robust_mutex_wait_for_child(pid_t *child_pid)
     792             : {
     793       80150 :         int options = WNOHANG;
     794             : 
     795       80150 :         if (*child_pid == -1) {
     796       39195 :                 return;
     797             :         }
     798             : 
     799       75165 :         while (tdb_robust_mutex_pid > 0) {
     800        1477 :                 pid_t pid;
     801             : 
     802             :                 /*
     803             :                  * First we try with WNOHANG, as the process might not exist
     804             :                  * anymore. Once we've sent SIGKILL we block waiting for the
     805             :                  * exit.
     806             :                  */
     807       70181 :                 pid = waitpid(*child_pid, NULL, options);
     808       70181 :                 if (pid == -1) {
     809           0 :                         if (errno == EINTR) {
     810           0 :                                 continue;
     811           0 :                         } else if (errno == ECHILD) {
     812           0 :                                 break;
     813             :                         } else {
     814           0 :                                 abort();
     815             :                         }
     816             :                 }
     817       70181 :                 if (pid == *child_pid) {
     818       34352 :                         break;
     819             :                 }
     820             : 
     821       35090 :                 kill(*child_pid, SIGKILL);
     822       35090 :                 options = 0;
     823             :         }
     824             : 
     825       40075 :         tdb_robust_mutex_pid = -1;
     826       40075 :         *child_pid = -1;
     827             : }
     828             : 
     829      951845 : _PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void)
     830             : {
     831      951845 :         void *ptr = NULL;
     832      951845 :         pthread_mutex_t *m = NULL;
     833       25378 :         pthread_mutexattr_t ma;
     834      951845 :         int ret = 1;
     835      951845 :         int pipe_down[2] = { -1, -1 };
     836      951845 :         int pipe_up[2] = { -1, -1 };
     837       25378 :         ssize_t nread;
     838      951845 :         char c = 0;
     839       25378 :         bool ok;
     840       25378 :         static bool initialized;
     841      951845 :         pid_t saved_child_pid = -1;
     842      951845 :         bool cleanup_ma = false;
     843             : 
     844      951845 :         if (initialized) {
     845      911770 :                 return tdb_mutex_locking_cached;
     846             :         }
     847             : 
     848       40075 :         initialized = true;
     849             : 
     850       40075 :         ok = tdb_mutex_locking_supported();
     851       40075 :         if (!ok) {
     852           0 :                 return false;
     853             :         }
     854             : 
     855       40075 :         tdb_mutex_locking_cached = false;
     856             : 
     857       40075 :         ptr = mmap(NULL, sizeof(pthread_mutex_t), PROT_READ|PROT_WRITE,
     858             :                    MAP_SHARED|MAP_ANON, -1 /* fd */, 0);
     859       40075 :         if (ptr == MAP_FAILED) {
     860           0 :                 return false;
     861             :         }
     862             : 
     863       40075 :         ret = pipe(pipe_down);
     864       40075 :         if (ret != 0) {
     865           0 :                 goto cleanup;
     866             :         }
     867       40075 :         ret = pipe(pipe_up);
     868       40075 :         if (ret != 0) {
     869           0 :                 goto cleanup;
     870             :         }
     871             : 
     872       40075 :         ret = pthread_mutexattr_init(&ma);
     873       40075 :         if (ret != 0) {
     874           0 :                 goto cleanup;
     875             :         }
     876       40075 :         cleanup_ma = true;
     877       40075 :         ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
     878       40075 :         if (ret != 0) {
     879           0 :                 goto cleanup;
     880             :         }
     881       40075 :         ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
     882       40075 :         if (ret != 0) {
     883           0 :                 goto cleanup;
     884             :         }
     885       40075 :         ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
     886       40075 :         if (ret != 0) {
     887           0 :                 goto cleanup;
     888             :         }
     889       40075 :         ret = pthread_mutex_init(ptr, &ma);
     890       40075 :         if (ret != 0) {
     891           0 :                 goto cleanup;
     892             :         }
     893       40075 :         m = (pthread_mutex_t *)ptr;
     894             : 
     895       40075 :         if (tdb_robust_mutex_setup_sigchild(tdb_robust_mutex_handler,
     896       39195 :                         &tdb_robust_mutext_old_handler) == false) {
     897           0 :                 goto cleanup;
     898             :         }
     899             : 
     900       40075 :         tdb_robust_mutex_pid = fork();
     901       40075 :         saved_child_pid = tdb_robust_mutex_pid;
     902       40075 :         if (tdb_robust_mutex_pid == 0) {
     903           0 :                 size_t nwritten;
     904           0 :                 close(pipe_down[1]);
     905           0 :                 close(pipe_up[0]);
     906           0 :                 ret = pthread_mutex_lock(m);
     907           0 :                 nwritten = write(pipe_up[1], &ret, sizeof(ret));
     908           0 :                 if (nwritten != sizeof(ret)) {
     909           0 :                         if (pipe_down[0] != -1) {
     910           0 :                                 close(pipe_down[0]);
     911             :                         }
     912           0 :                         if (pipe_up[1] != -1) {
     913           0 :                                 close(pipe_up[1]);
     914             :                         }
     915           0 :                         _exit(1);
     916             :                 }
     917           0 :                 if (ret != 0) {
     918           0 :                         if (pipe_down[0] != -1) {
     919           0 :                                 close(pipe_down[0]);
     920             :                         }
     921           0 :                         if (pipe_up[1] != -1) {
     922           0 :                                 close(pipe_up[1]);
     923             :                         }
     924           0 :                         _exit(1);
     925             :                 }
     926           0 :                 nread = read(pipe_down[0], &c, 1);
     927           0 :                 if (nread != 1) {
     928           0 :                         if (pipe_down[0] != -1) {
     929           0 :                                 close(pipe_down[0]);
     930             :                         }
     931           0 :                         if (pipe_up[1] != -1) {
     932           0 :                                 close(pipe_up[1]);
     933             :                         }
     934           0 :                         _exit(1);
     935             :                 }
     936           0 :                 if (pipe_down[0] != -1) {
     937           0 :                         close(pipe_down[0]);
     938             :                 }
     939           0 :                 if (pipe_up[1] != -1) {
     940           0 :                         close(pipe_up[1]);
     941             :                 }
     942             :                 /* leave locked */
     943           0 :                 _exit(0);
     944             :         }
     945       40075 :         if (tdb_robust_mutex_pid == -1) {
     946           0 :                 goto cleanup;
     947             :         }
     948       40075 :         close(pipe_down[0]);
     949       40075 :         pipe_down[0] = -1;
     950       40075 :         close(pipe_up[1]);
     951       40075 :         pipe_up[1] = -1;
     952             : 
     953       40075 :         nread = read(pipe_up[0], &ret, sizeof(ret));
     954       40075 :         if (nread != sizeof(ret)) {
     955           0 :                 goto cleanup;
     956             :         }
     957             : 
     958       40075 :         ret = pthread_mutex_trylock(m);
     959       40075 :         if (ret != EBUSY) {
     960           0 :                 if (ret == 0) {
     961           0 :                         pthread_mutex_unlock(m);
     962             :                 }
     963           0 :                 goto cleanup;
     964             :         }
     965             : 
     966       40075 :         if (write(pipe_down[1], &c, 1) != 1) {
     967           0 :                 goto cleanup;
     968             :         }
     969             : 
     970       40075 :         nread = read(pipe_up[0], &c, 1);
     971       40075 :         if (nread != 0) {
     972           0 :                 goto cleanup;
     973             :         }
     974             : 
     975       40075 :         tdb_robust_mutex_wait_for_child(&saved_child_pid);
     976             : 
     977       40075 :         ret = pthread_mutex_trylock(m);
     978       40075 :         if (ret != EOWNERDEAD) {
     979           0 :                 if (ret == 0) {
     980           0 :                         pthread_mutex_unlock(m);
     981             :                 }
     982           0 :                 goto cleanup;
     983             :         }
     984             : 
     985       40075 :         ret = pthread_mutex_consistent(m);
     986       40075 :         if (ret != 0) {
     987           0 :                 goto cleanup;
     988             :         }
     989             : 
     990       40075 :         ret = pthread_mutex_trylock(m);
     991       40075 :         if (ret != EDEADLK && ret != EBUSY) {
     992           0 :                 pthread_mutex_unlock(m);
     993           0 :                 goto cleanup;
     994             :         }
     995             : 
     996       40075 :         ret = pthread_mutex_unlock(m);
     997       40075 :         if (ret != 0) {
     998           0 :                 goto cleanup;
     999             :         }
    1000             : 
    1001       40075 :         tdb_mutex_locking_cached = true;
    1002             : 
    1003       40075 : cleanup:
    1004             :         /*
    1005             :          * Note that we don't reset the signal handler we just reset
    1006             :          * tdb_robust_mutex_pid to -1. This is ok as this code path is only
    1007             :          * called once per process.
    1008             :          *
    1009             :          * Leaving our signal handler avoids races with other threads potentially
    1010             :          * setting up their SIGCHLD handlers.
    1011             :          *
    1012             :          * The worst thing that can happen is that the other newer signal
    1013             :          * handler will get the SIGCHLD signal for our child and/or reap the
    1014             :          * child with a wait() function. tdb_robust_mutex_wait_for_child()
    1015             :          * handles the case where waitpid returns ECHILD.
    1016             :          */
    1017       40075 :         tdb_robust_mutex_wait_for_child(&saved_child_pid);
    1018             : 
    1019       40075 :         if (m != NULL) {
    1020       40075 :                 pthread_mutex_destroy(m);
    1021             :         }
    1022       40075 :         if (cleanup_ma) {
    1023       40075 :                 pthread_mutexattr_destroy(&ma);
    1024             :         }
    1025       40075 :         if (pipe_down[0] != -1) {
    1026           0 :                 close(pipe_down[0]);
    1027             :         }
    1028       40075 :         if (pipe_down[1] != -1) {
    1029       40075 :                 close(pipe_down[1]);
    1030             :         }
    1031       40075 :         if (pipe_up[0] != -1) {
    1032       40075 :                 close(pipe_up[0]);
    1033             :         }
    1034       40075 :         if (pipe_up[1] != -1) {
    1035           0 :                 close(pipe_up[1]);
    1036             :         }
    1037       40075 :         if (ptr != NULL) {
    1038       40075 :                 munmap(ptr, sizeof(pthread_mutex_t));
    1039             :         }
    1040             : 
    1041       40075 :         return tdb_mutex_locking_cached;
    1042             : }
    1043             : 
    1044             : #else
    1045             : 
    1046             : size_t tdb_mutex_size(struct tdb_context *tdb)
    1047             : {
    1048             :         return 0;
    1049             : }
    1050             : 
    1051             : bool tdb_have_mutexes(struct tdb_context *tdb)
    1052             : {
    1053             :         return false;
    1054             : }
    1055             : 
    1056             : int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype,
    1057             :                              enum tdb_lock_flags flags)
    1058             : {
    1059             :         tdb->ecode = TDB_ERR_LOCK;
    1060             :         return -1;
    1061             : }
    1062             : 
    1063             : int tdb_mutex_allrecord_unlock(struct tdb_context *tdb)
    1064             : {
    1065             :         return -1;
    1066             : }
    1067             : 
    1068             : int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb)
    1069             : {
    1070             :         tdb->ecode = TDB_ERR_LOCK;
    1071             :         return -1;
    1072             : }
    1073             : 
    1074             : void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb)
    1075             : {
    1076             :         return;
    1077             : }
    1078             : 
    1079             : int tdb_mutex_mmap(struct tdb_context *tdb)
    1080             : {
    1081             :         errno = ENOSYS;
    1082             :         return -1;
    1083             : }
    1084             : 
    1085             : int tdb_mutex_munmap(struct tdb_context *tdb)
    1086             : {
    1087             :         errno = ENOSYS;
    1088             :         return -1;
    1089             : }
    1090             : 
    1091             : int tdb_mutex_init(struct tdb_context *tdb)
    1092             : {
    1093             :         errno = ENOSYS;
    1094             :         return -1;
    1095             : }
    1096             : 
    1097             : _PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void)
    1098             : {
    1099             :         return false;
    1100             : }
    1101             : 
    1102             : #endif

Generated by: LCOV version 1.14