LCOV - code coverage report
Current view: top level - lib/tdb/common - check.c (source / functions) Hit Total Coverage
Test: coverage report for master 70ed9daf Lines: 215 242 88.8 %
Date: 2024-01-11 09:59:51 Functions: 11 11 100.0 %

          Line data    Source code
       1             :  /*
       2             :    Unix SMB/CIFS implementation.
       3             : 
       4             :    trivial database library
       5             : 
       6             :    Copyright (C) Rusty Russell             2009
       7             : 
       8             :      ** NOTE! The following LGPL license applies to the tdb
       9             :      ** library. This does NOT imply that all of Samba is released
      10             :      ** under the LGPL
      11             : 
      12             :    This library is free software; you can redistribute it and/or
      13             :    modify it under the terms of the GNU Lesser General Public
      14             :    License as published by the Free Software Foundation; either
      15             :    version 3 of the License, or (at your option) any later version.
      16             : 
      17             :    This library is distributed in the hope that it will be useful,
      18             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      19             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      20             :    Lesser General Public License for more details.
      21             : 
      22             :    You should have received a copy of the GNU Lesser General Public
      23             :    License along with this library; if not, see <http://www.gnu.org/licenses/>.
      24             : */
      25             : #include "tdb_private.h"
      26             : 
      27             : /* Since we opened it, these shouldn't fail unless it's recent corruption. */
      28       65658 : static bool tdb_check_header(struct tdb_context *tdb, tdb_off_t *recovery)
      29             : {
      30           1 :         struct tdb_header hdr;
      31           1 :         uint32_t h1, h2;
      32             : 
      33       65658 :         if (tdb->methods->tdb_read(tdb, 0, &hdr, sizeof(hdr), 0) == -1)
      34           0 :                 return false;
      35       65658 :         if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0)
      36         160 :                 goto corrupt;
      37             : 
      38       65498 :         CONVERT(hdr);
      39       65498 :         if (hdr.version != TDB_VERSION)
      40          64 :                 goto corrupt;
      41             : 
      42       65434 :         if (hdr.rwlocks != 0 &&
      43         148 :             hdr.rwlocks != TDB_FEATURE_FLAG_MAGIC &&
      44         148 :             hdr.rwlocks != TDB_HASH_RWLOCK_MAGIC)
      45          64 :                 goto corrupt;
      46             : 
      47       65370 :         tdb_header_hash(tdb, &h1, &h2);
      48       65370 :         if (hdr.magic1_hash && hdr.magic2_hash &&
      49       65363 :             (hdr.magic1_hash != h1 || hdr.magic2_hash != h2))
      50         128 :                 goto corrupt;
      51             : 
      52       65242 :         if (hdr.hash_size == 0)
      53           2 :                 goto corrupt;
      54             : 
      55       65240 :         if (hdr.hash_size != tdb->hash_size)
      56          62 :                 goto corrupt;
      57             : 
      58       65178 :         if (hdr.recovery_start != 0 &&
      59          51 :             hdr.recovery_start < TDB_DATA_START(tdb->hash_size))
      60          16 :                 goto corrupt;
      61             : 
      62       65162 :         *recovery = hdr.recovery_start;
      63       65162 :         return true;
      64             : 
      65         496 : corrupt:
      66         496 :         tdb->ecode = TDB_ERR_CORRUPT;
      67         496 :         TDB_LOG((tdb, TDB_DEBUG_ERROR, "Header is corrupt\n"));
      68         496 :         return false;
      69             : }
      70             : 
      71             : /* Generic record header check. */
      72      385203 : static bool tdb_check_record(struct tdb_context *tdb,
      73             :                              tdb_off_t off,
      74             :                              const struct tdb_record *rec)
      75             : {
      76           2 :         tdb_off_t tailer;
      77             : 
      78             :         /* Check rec->next: 0 or points to record offset, aligned. */
      79      385203 :         if (rec->next > 0 && rec->next < TDB_DATA_START(tdb->hash_size)){
      80          48 :                 TDB_LOG((tdb, TDB_DEBUG_ERROR,
      81             :                          "Record offset %u too small next %u\n",
      82             :                          off, rec->next));
      83          48 :                 goto corrupt;
      84             :         }
      85           2 :         if (rec->next + sizeof(*rec) < rec->next) {
      86             :                 TDB_LOG((tdb, TDB_DEBUG_ERROR,
      87             :                          "Record offset %u too large next %u\n",
      88             :                          off, rec->next));
      89             :                 goto corrupt;
      90             :         }
      91      385155 :         if ((rec->next % TDB_ALIGNMENT) != 0) {
      92          12 :                 TDB_LOG((tdb, TDB_DEBUG_ERROR,
      93             :                          "Record offset %u misaligned next %u\n",
      94             :                          off, rec->next));
      95          12 :                 goto corrupt;
      96             :         }
      97      385143 :         if (tdb_oob(tdb, rec->next, sizeof(*rec), 0))
      98         244 :                 goto corrupt;
      99             : 
     100             :         /* Check rec_len: similar to rec->next, implies next record. */
     101      384899 :         if ((rec->rec_len % TDB_ALIGNMENT) != 0) {
     102          24 :                 TDB_LOG((tdb, TDB_DEBUG_ERROR,
     103             :                          "Record offset %u misaligned length %u\n",
     104             :                          off, rec->rec_len));
     105          24 :                 goto corrupt;
     106             :         }
     107             :         /* Must fit tailer. */
     108      384875 :         if (rec->rec_len < sizeof(tailer)) {
     109           6 :                 TDB_LOG((tdb, TDB_DEBUG_ERROR,
     110             :                          "Record offset %u too short length %u\n",
     111             :                          off, rec->rec_len));
     112           6 :                 goto corrupt;
     113             :         }
     114             :         /* OOB allows "right at the end" access, so this works for last rec. */
     115      384869 :         if (tdb_oob(tdb, off, sizeof(*rec)+rec->rec_len, 0))
     116         298 :                 goto corrupt;
     117             : 
     118             :         /* Check tailer. */
     119      384571 :         if (tdb_ofs_read(tdb, off+sizeof(*rec)+rec->rec_len-sizeof(tailer),
     120             :                          &tailer) == -1)
     121           0 :                 goto corrupt;
     122      384571 :         if (tailer != sizeof(*rec) + rec->rec_len) {
     123         440 :                 TDB_LOG((tdb, TDB_DEBUG_ERROR,
     124             :                          "Record offset %u invalid tailer\n", off));
     125         440 :                 goto corrupt;
     126             :         }
     127             : 
     128      384129 :         return true;
     129             : 
     130        1072 : corrupt:
     131        1072 :         tdb->ecode = TDB_ERR_CORRUPT;
     132        1072 :         return false;
     133             : }
     134             : 
     135             : /* Grab some bytes: may copy if can't use mmap.
     136             :    Caller has already done bounds check. */
     137      634268 : static TDB_DATA get_bytes(struct tdb_context *tdb,
     138             :                           tdb_off_t off, tdb_len_t len)
     139             : {
     140           1 :         TDB_DATA d;
     141             : 
     142      634268 :         d.dsize = len;
     143             : 
     144      634268 :         if (tdb->transaction == NULL && tdb->map_ptr != NULL)
     145      318315 :                 d.dptr = (unsigned char *)tdb->map_ptr + off;
     146             :         else
     147      315953 :                 d.dptr = tdb_alloc_read(tdb, off, d.dsize);
     148      634268 :         return d;
     149             : }
     150             : 
     151             : /* Frees data if we're not able to simply use mmap. */
     152      634268 : static void put_bytes(struct tdb_context *tdb, TDB_DATA d)
     153             : {
     154      634268 :         if (tdb->transaction == NULL && tdb->map_ptr != NULL)
     155      318314 :                 return;
     156      315953 :         free(d.dptr);
     157             : }
     158             : 
     159             : /* We use the excellent Jenkins lookup3 hash; this is based on hash_word2.
     160             :  * See: http://burtleburtle.net/bob/c/lookup3.c
     161             :  */
     162             : #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
     163     3086384 : static void hash(uint32_t key, uint32_t *pc, uint32_t *pb)
     164             : {
     165          16 :         uint32_t a,b,c;
     166             : 
     167             :         /* Set up the internal state */
     168     3086384 :         a = b = c = 0xdeadbeef + *pc;
     169     3086384 :         c += *pb;
     170     3086384 :         a += key;
     171     3086384 :         c ^= b; c -= rot(b,14);
     172     3086384 :         a ^= c; a -= rot(c,11);
     173     3086384 :         b ^= a; b -= rot(a,25);
     174     3086384 :         c ^= b; c -= rot(b,16);
     175     3086384 :         a ^= c; a -= rot(c,4);
     176     3086384 :         b ^= a; b -= rot(a,14);
     177     3086384 :         c ^= b; c -= rot(b,24);
     178     3086384 :         *pc=c; *pb=b;
     179     3086384 : }
     180             : 
     181             : /*
     182             :   We want to check that all free records are in the free list
     183             :   (only once), and all free list entries are free records.  Similarly
     184             :   for each hash chain of used records.
     185             : 
     186             :   Doing that naively (without walking hash chains, since we want to be
     187             :   linear) means keeping a list of records which have been seen in each
     188             :   hash chain, and another of records pointed to (ie. next pointers
     189             :   from records and the initial hash chain heads).  These two lists
     190             :   should be equal.  This will take 8 bytes per record, and require
     191             :   sorting at the end.
     192             : 
     193             :   So instead, we record each offset in a bitmap such a way that
     194             :   recording it twice will cancel out.  Since each offset should appear
     195             :   exactly twice, the bitmap should be zero at the end.
     196             : 
     197             :   The approach was inspired by Bloom Filters (see Wikipedia).  For
     198             :   each value, we flip K bits in a bitmap of size N.  The number of
     199             :   distinct arrangements is:
     200             : 
     201             :         N! / (K! * (N-K)!)
     202             : 
     203             :   Of course, not all arrangements are actually distinct, but testing
     204             :   shows this formula to be close enough.
     205             : 
     206             :   So, if K == 8 and N == 256, the probability of two things flipping the same
     207             :   bits is 1 in 409,663,695,276,000.
     208             : 
     209             :   Given that ldb uses a hash size of 10000, using 32 bytes per hash chain
     210             :   (320k) seems reasonable.
     211             : */
     212             : #define NUM_HASHES 8
     213             : #define BITMAP_BITS 256
     214             : 
     215     6172752 : static void bit_flip(unsigned char bits[], unsigned int idx)
     216             : {
     217     6172752 :         bits[idx / CHAR_BIT] ^= (1 << (idx % CHAR_BIT));
     218     6172736 : }
     219             : 
     220             : /* We record offsets in a bitmap for the particular chain it should be in.  */
     221      771596 : static void record_offset(unsigned char bits[], tdb_off_t off)
     222             : {
     223      771596 :         uint32_t h1 = off, h2 = 0;
     224           4 :         unsigned int i;
     225             : 
     226             :         /* We get two good hash values out of jhash2, so we use both.  Then
     227             :          * we keep going to produce further hash values. */
     228     3857980 :         for (i = 0; i < NUM_HASHES / 2; i++) {
     229     3086384 :                 hash(off, &h1, &h2);
     230     3086384 :                 bit_flip(bits, h1 % BITMAP_BITS);
     231     3086384 :                 bit_flip(bits, h2 % BITMAP_BITS);
     232     3086384 :                 h2++;
     233             :         }
     234      771596 : }
     235             : 
     236             : /* Check that an in-use record is valid. */
     237      320104 : static bool tdb_check_used_record(struct tdb_context *tdb,
     238             :                                   tdb_off_t off,
     239             :                                   const struct tdb_record *rec,
     240             :                                   unsigned char **hashes,
     241             :                                   int (*check)(TDB_DATA, TDB_DATA, void *),
     242             :                                   void *private_data)
     243             : {
     244           1 :         TDB_DATA key, data;
     245           1 :         tdb_len_t len;
     246             : 
     247      320104 :         if (!tdb_check_record(tdb, off, rec))
     248         888 :                 return false;
     249             : 
     250             :         /* key + data + tailer must fit in record */
     251      319216 :         len = rec->key_len;
     252      319216 :         len += rec->data_len;
     253      319216 :         if (len < rec->data_len) {
     254             :                 /* overflow */
     255           0 :                 TDB_LOG((tdb, TDB_DEBUG_ERROR, "Record lengths overflow\n"));
     256           0 :                 return false;
     257             :         }
     258      319216 :         len += sizeof(tdb_off_t);
     259      319216 :         if (len < sizeof(tdb_off_t)) {
     260             :                 /* overflow */
     261           0 :                 TDB_LOG((tdb, TDB_DEBUG_ERROR, "Record lengths overflow\n"));
     262           0 :                 return false;
     263             :         }
     264             : 
     265      319216 :         if (len > rec->rec_len) {
     266         586 :                 TDB_LOG((tdb, TDB_DEBUG_ERROR,
     267             :                          "Record offset %u too short for contents\n", off));
     268         586 :                 return false;
     269             :         }
     270             : 
     271      318630 :         key = get_bytes(tdb, off + sizeof(*rec), rec->key_len);
     272      318630 :         if (!key.dptr)
     273           0 :                 return false;
     274             : 
     275      318630 :         if (tdb->hash_fn(&key) != rec->full_hash) {
     276         586 :                 TDB_LOG((tdb, TDB_DEBUG_ERROR,
     277             :                          "Record offset %u has incorrect hash\n", off));
     278         586 :                 goto fail_put_key;
     279             :         }
     280             : 
     281             :         /* Mark this offset as a known value for this hash bucket. */
     282      318044 :         record_offset(hashes[BUCKET(rec->full_hash)+1], off);
     283             :         /* And similarly if the next pointer is valid. */
     284      318044 :         if (rec->next)
     285      192915 :                 record_offset(hashes[BUCKET(rec->full_hash)+1], rec->next);
     286             : 
     287             :         /* If they supply a check function and this record isn't dead,
     288             :            get data and feed it. */
     289      318044 :         if (check && rec->magic != TDB_DEAD_MAGIC) {
     290      315638 :                 data = get_bytes(tdb, off + sizeof(*rec) + rec->key_len,
     291      315638 :                                  rec->data_len);
     292      315638 :                 if (!data.dptr)
     293           0 :                         goto fail_put_key;
     294             : 
     295      315638 :                 if (check(key, data, private_data) == -1)
     296         428 :                         goto fail_put_data;
     297      315210 :                 put_bytes(tdb, data);
     298             :         }
     299             : 
     300      317616 :         put_bytes(tdb, key);
     301      317616 :         return true;
     302             : 
     303         428 : fail_put_data:
     304         428 :         put_bytes(tdb, data);
     305        1014 : fail_put_key:
     306        1014 :         put_bytes(tdb, key);
     307        1014 :         return false;
     308             : }
     309             : 
     310             : /* Check that an unused record is valid. */
     311       65099 : static bool tdb_check_free_record(struct tdb_context *tdb,
     312             :                                   tdb_off_t off,
     313             :                                   const struct tdb_record *rec,
     314             :                                   unsigned char **hashes)
     315             : {
     316       65099 :         if (!tdb_check_record(tdb, off, rec))
     317         184 :                 return false;
     318             : 
     319             :         /* Mark this offset as a known value for the free list. */
     320       64915 :         record_offset(hashes[0], off);
     321             :         /* And similarly if the next pointer is valid. */
     322       64915 :         if (rec->next)
     323          68 :                 record_offset(hashes[0], rec->next);
     324       64914 :         return true;
     325             : }
     326             : 
     327             : /* Slow, but should be very rare. */
     328          33 : size_t tdb_dead_space(struct tdb_context *tdb, tdb_off_t off)
     329             : {
     330           0 :         size_t len;
     331             : 
     332    40685457 :         for (len = 0; off + len < tdb->map_size; len++) {
     333           0 :                 char c;
     334    40685424 :                 if (tdb->methods->tdb_read(tdb, off, &c, 1, 0))
     335           0 :                         return 0;
     336    40685424 :                 if (c != 0 && c != 0x42)
     337           0 :                         break;
     338             :         }
     339          33 :         return len;
     340             : }
     341             : 
     342       65698 : _PUBLIC_ int tdb_check(struct tdb_context *tdb,
     343             :               int (*check)(TDB_DATA key, TDB_DATA data, void *private_data),
     344             :               void *private_data)
     345             : {
     346           1 :         unsigned int h;
     347           1 :         unsigned char **hashes;
     348           1 :         tdb_off_t off, recovery_start;
     349           1 :         struct tdb_record rec;
     350       65698 :         bool found_recovery = false;
     351           1 :         tdb_len_t dead;
     352           1 :         bool locked;
     353             : 
     354             :         /* Read-only databases use no locking at all: it's best-effort.
     355             :          * We may have a write lock already, so skip that case too. */
     356       65698 :         if (tdb->read_only || tdb->allrecord_lock.count != 0) {
     357          26 :                 locked = false;
     358             :         } else {
     359       65672 :                 if (tdb_lockall_read(tdb) == -1)
     360          40 :                         return -1;
     361       65631 :                 locked = true;
     362             :         }
     363             : 
     364             :         /* Make sure we know true size of the underlying file. */
     365       65658 :         tdb_oob(tdb, tdb->map_size, 1, 1);
     366             : 
     367             :         /* Header must be OK: also gets us the recovery ptr, if any. */
     368       65658 :         if (!tdb_check_header(tdb, &recovery_start))
     369         496 :                 goto unlock;
     370             : 
     371             :         /* We should have the whole header, too. */
     372       65162 :         if (tdb->map_size < TDB_DATA_START(tdb->hash_size)) {
     373           0 :                 tdb->ecode = TDB_ERR_CORRUPT;
     374           0 :                 TDB_LOG((tdb, TDB_DEBUG_ERROR, "File too short for hashes\n"));
     375           0 :                 goto unlock;
     376             :         }
     377             : 
     378             :         /* One big malloc: pointers then bit arrays. */
     379       65162 :         hashes = (unsigned char **)calloc(
     380       65162 :                         1, sizeof(hashes[0]) * (1+tdb->hash_size)
     381       65162 :                         + BITMAP_BITS / CHAR_BIT * (1+tdb->hash_size));
     382       65162 :         if (!hashes) {
     383           0 :                 tdb->ecode = TDB_ERR_OOM;
     384           0 :                 goto unlock;
     385             :         }
     386             : 
     387             :         /* Initialize pointers */
     388       65162 :         hashes[0] = (unsigned char *)(&hashes[1+tdb->hash_size]);
     389      321999 :         for (h = 1; h < 1+tdb->hash_size; h++)
     390      256837 :                 hashes[h] = hashes[h-1] + BITMAP_BITS / CHAR_BIT;
     391             : 
     392             :         /* Freelist and hash headers are all in a row: read them. */
     393      387161 :         for (h = 0; h < 1+tdb->hash_size; h++) {
     394      321999 :                 if (tdb_ofs_read(tdb, FREELIST_TOP + h*sizeof(tdb_off_t),
     395             :                                  &off) == -1)
     396           0 :                         goto free;
     397      321999 :                 if (off)
     398      195654 :                         record_offset(hashes[h], off);
     399             :         }
     400             : 
     401             :         /* For each record, read it in and check it's ok. */
     402       65162 :         for (off = TDB_DATA_START(tdb->hash_size);
     403      447753 :              off < tdb->map_size;
     404      382591 :              off += sizeof(rec) + rec.rec_len) {
     405      385647 :                 if (tdb->methods->tdb_read(tdb, off, &rec, sizeof(rec),
     406      385647 :                                            DOCONV()) == -1)
     407           0 :                         goto free;
     408      385647 :                 switch (rec.magic) {
     409      320104 :                 case TDB_MAGIC:
     410             :                 case TDB_DEAD_MAGIC:
     411      320104 :                         if (!tdb_check_used_record(tdb, off, &rec, hashes,
     412             :                                                    check, private_data))
     413        2488 :                                 goto free;
     414      317615 :                         break;
     415       65099 :                 case TDB_FREE_MAGIC:
     416       65099 :                         if (!tdb_check_free_record(tdb, off, &rec, hashes))
     417         184 :                                 goto free;
     418       64914 :                         break;
     419             :                 /* If we crash after ftruncate, we can get zeroes or fill. */
     420          60 :                 case TDB_RECOVERY_INVALID_MAGIC:
     421             :                 case 0x42424242:
     422          60 :                         if (recovery_start == off) {
     423          27 :                                 found_recovery = true;
     424          27 :                                 break;
     425             :                         }
     426          33 :                         dead = tdb_dead_space(tdb, off);
     427          33 :                         if (dead < sizeof(rec))
     428           0 :                                 goto corrupt;
     429             : 
     430          33 :                         TDB_LOG((tdb, TDB_DEBUG_ERROR,
     431             :                                  "Dead space at %u-%u (of %u)\n",
     432             :                                  off, off + dead, tdb->map_size));
     433          33 :                         rec.rec_len = dead - sizeof(rec);
     434          33 :                         break;
     435           0 :                 case TDB_RECOVERY_MAGIC:
     436           0 :                         if (recovery_start != off) {
     437           0 :                                 TDB_LOG((tdb, TDB_DEBUG_ERROR,
     438             :                                          "Unexpected recovery record at offset %u\n",
     439             :                                          off));
     440           0 :                                 goto free;
     441             :                         }
     442           0 :                         found_recovery = true;
     443           0 :                         break;
     444           0 :                 default: ;
     445         384 :                 corrupt:
     446         384 :                         tdb->ecode = TDB_ERR_CORRUPT;
     447         384 :                         TDB_LOG((tdb, TDB_DEBUG_ERROR,
     448             :                                  "Bad magic 0x%x at offset %u\n",
     449             :                                  rec.magic, off));
     450         384 :                         goto free;
     451             :                 }
     452             :         }
     453             : 
     454             :         /* Now, hashes should all be empty: each record exists and is referred
     455             :          * to by one other. */
     456      374334 :         for (h = 0; h < 1+tdb->hash_size; h++) {
     457             :                 unsigned int i;
     458    10304341 :                 for (i = 0; i < BITMAP_BITS / CHAR_BIT; i++) {
     459     9992113 :                         if (hashes[h][i] != 0) {
     460         273 :                                 tdb->ecode = TDB_ERR_CORRUPT;
     461         273 :                                 TDB_LOG((tdb, TDB_DEBUG_ERROR,
     462             :                                          "Hashes do not match records\n"));
     463         273 :                                 goto free;
     464             :                         }
     465             :                 }
     466             :         }
     467             : 
     468             :         /* We must have found recovery area if there was one. */
     469       61833 :         if (recovery_start != 0 && !found_recovery) {
     470           8 :                 TDB_LOG((tdb, TDB_DEBUG_ERROR,
     471             :                          "Expected a recovery area at %u\n",
     472             :                          recovery_start));
     473           8 :                 goto free;
     474             :         }
     475             : 
     476       61825 :         free(hashes);
     477       61825 :         if (locked) {
     478       61799 :                 tdb_unlockall_read(tdb);
     479             :         }
     480       61824 :         return 0;
     481             : 
     482        3337 : free:
     483        3337 :         free(hashes);
     484        3833 : unlock:
     485        3833 :         if (locked) {
     486        3833 :                 tdb_unlockall_read(tdb);
     487             :         }
     488        3833 :         return -1;
     489             : }

Generated by: LCOV version 1.14