Line data Source code
1 : /*
2 : Unix SMB/CIFS implementation.
3 : Samba utility functions
4 : Copyright (C) Andrew Tridgell 1992-2001
5 : Copyright (C) Simo Sorce 2001
6 :
7 : This program is free software; you can redistribute it and/or modify
8 : it under the terms of the GNU General Public License as published by
9 : the Free Software Foundation; either version 3 of the License, or
10 : (at your option) any later version.
11 :
12 : This program is distributed in the hope that it will be useful,
13 : but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 : GNU General Public License for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with this program. If not, see <http://www.gnu.org/licenses/>.
19 : */
20 :
21 : #include "replace.h"
22 : #include "system/locale.h"
23 : #include "charset.h"
24 : #include "lib/util/byteorder.h"
25 : #include "lib/util/fault.h"
26 :
27 : /**
28 : String replace.
29 : NOTE: oldc and newc must be 7 bit characters
30 : **/
31 5 : _PUBLIC_ void string_replace_m(char *s, char oldc, char newc)
32 : {
33 5 : struct smb_iconv_handle *ic = get_iconv_handle();
34 19 : while (s && *s) {
35 14 : size_t size;
36 14 : codepoint_t c = next_codepoint_handle(ic, s, &size);
37 14 : if (c == oldc) {
38 5 : *s = newc;
39 : }
40 14 : s += size;
41 : }
42 5 : }
43 :
44 : /**
45 : Convert a string to lower case, allocated with talloc
46 : **/
47 5799865 : _PUBLIC_ char *strlower_talloc_handle(struct smb_iconv_handle *iconv_handle,
48 : TALLOC_CTX *ctx, const char *src)
49 : {
50 5799865 : size_t size=0;
51 18037 : char *dest;
52 :
53 5799865 : if(src == NULL) {
54 0 : return NULL;
55 : }
56 :
57 : /* this takes advantage of the fact that upper/lower can't
58 : change the length of a character by more than 1 byte */
59 5799865 : dest = talloc_array(ctx, char, 2*(strlen(src))+1);
60 5799865 : if (dest == NULL) {
61 0 : return NULL;
62 : }
63 :
64 128963483 : while (*src) {
65 414760 : size_t c_size;
66 123163618 : codepoint_t c = next_codepoint_handle(iconv_handle, src, &c_size);
67 123163618 : src += c_size;
68 :
69 123163618 : c = tolower_m(c);
70 :
71 123163618 : c_size = push_codepoint_handle(iconv_handle, dest+size, c);
72 123163618 : if (c_size == -1) {
73 0 : talloc_free(dest);
74 0 : return NULL;
75 : }
76 123163618 : size += c_size;
77 : }
78 :
79 5799865 : dest[size] = 0;
80 :
81 : /* trim it so talloc_append_string() works */
82 5799865 : dest = talloc_realloc(ctx, dest, char, size+1);
83 :
84 5799865 : talloc_set_name_const(dest, dest);
85 :
86 5799865 : return dest;
87 : }
88 :
89 5799859 : _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
90 : {
91 5799859 : struct smb_iconv_handle *iconv_handle = get_iconv_handle();
92 5799859 : return strlower_talloc_handle(iconv_handle, ctx, src);
93 : }
94 :
95 : /**
96 : Convert a string to UPPER case, allocated with talloc
97 : source length limited to n bytes, iconv handle supplied
98 : **/
99 698380815 : _PUBLIC_ char *strupper_talloc_n_handle(struct smb_iconv_handle *iconv_handle,
100 : TALLOC_CTX *ctx, const char *src, size_t n)
101 : {
102 698380815 : size_t size=0;
103 12955647 : char *dest;
104 :
105 698380815 : if (!src) {
106 193488 : return NULL;
107 : }
108 :
109 : /* this takes advantage of the fact that upper/lower can't
110 : change the length of a character by more than 1 byte */
111 698149650 : dest = talloc_array(ctx, char, 2*(n+1));
112 698149650 : if (dest == NULL) {
113 0 : return NULL;
114 : }
115 :
116 10687205324 : while (n && *src) {
117 118753225 : size_t c_size;
118 9989055676 : codepoint_t c = next_codepoint_handle_ext(iconv_handle, src, n,
119 : CH_UNIX, &c_size);
120 9989055676 : src += c_size;
121 9989055676 : n -= c_size;
122 :
123 9989055676 : c = toupper_m(c);
124 :
125 9989055676 : c_size = push_codepoint_handle(iconv_handle, dest+size, c);
126 9989055676 : if (c_size == -1) {
127 2 : talloc_free(dest);
128 2 : return NULL;
129 : }
130 9989055674 : size += c_size;
131 : }
132 :
133 698149648 : dest[size] = 0;
134 :
135 : /* trim it so talloc_append_string() works */
136 698149648 : dest = talloc_realloc(ctx, dest, char, size+1);
137 :
138 698149648 : talloc_set_name_const(dest, dest);
139 :
140 698149648 : return dest;
141 : }
142 :
143 : /**
144 : Convert a string to UPPER case, allocated with talloc
145 : source length limited to n bytes
146 : **/
147 698380809 : _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
148 : {
149 698380809 : struct smb_iconv_handle *iconv_handle = get_iconv_handle();
150 698380809 : return strupper_talloc_n_handle(iconv_handle, ctx, src, n);
151 : }
152 : /**
153 : Convert a string to UPPER case, allocated with talloc
154 : **/
155 5972649 : _PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
156 : {
157 5972649 : return strupper_talloc_n(ctx, src, src?strlen(src):0);
158 : }
159 :
160 : /**
161 : talloc_strdup() a unix string to upper case.
162 : **/
163 3061905 : _PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src)
164 : {
165 3061905 : return strupper_talloc(ctx, src);
166 : }
167 :
168 : /**
169 : Find the number of 'c' chars in a string
170 : **/
171 4 : _PUBLIC_ size_t count_chars_m(const char *s, char c)
172 : {
173 4 : struct smb_iconv_handle *ic = get_iconv_handle();
174 4 : size_t count = 0;
175 :
176 13 : while (*s) {
177 9 : size_t size;
178 9 : codepoint_t c2 = next_codepoint_handle(ic, s, &size);
179 9 : if (c2 == c) count++;
180 9 : s += size;
181 : }
182 :
183 4 : return count;
184 : }
185 :
186 3039705 : size_t ucs2_align(const void *base_ptr, const void *p, int flags)
187 : {
188 3039705 : if (flags & (STR_NOALIGN|STR_ASCII)) {
189 128545 : return 0;
190 : }
191 2911066 : return PTR_DIFF(p, base_ptr) & 1;
192 : }
193 :
194 : /**
195 : return the number of bytes occupied by a buffer in CH_UTF16 format
196 : **/
197 15 : size_t utf16_len(const void *buf)
198 : {
199 15 : size_t len;
200 :
201 1207 : for (len = 0; PULL_LE_U16(buf,len); len += 2) ;
202 :
203 15 : return len;
204 : }
205 :
206 : /**
207 : return the number of bytes occupied by a buffer in CH_UTF16 format
208 : the result includes the null termination
209 : **/
210 9 : size_t utf16_null_terminated_len(const void *buf)
211 : {
212 9 : return utf16_len(buf) + 2;
213 : }
214 :
215 : /**
216 : return the number of bytes occupied by a buffer in CH_UTF16 format
217 : limited by 'n' bytes
218 : **/
219 1391994 : size_t utf16_len_n(const void *src, size_t n)
220 : {
221 6248 : size_t len;
222 :
223 17748694 : for (len = 0; (len+2 <= n) && PULL_LE_U16(src, len); len += 2) ;
224 :
225 1391994 : return len;
226 : }
227 :
228 : /**
229 : return the number of bytes occupied by a buffer in CH_UTF16 format
230 : the result includes the null termination
231 : limited by 'n' bytes
232 : **/
233 1391979 : size_t utf16_null_terminated_len_n(const void *src, size_t n)
234 : {
235 6241 : size_t len;
236 :
237 1391979 : len = utf16_len_n(src, n);
238 :
239 1391979 : if (len+2 <= n) {
240 378590 : len += 2;
241 : }
242 :
243 1391979 : return len;
244 : }
245 :
246 5 : unsigned char *talloc_utf16_strlendup(TALLOC_CTX *mem_ctx, const char *str, size_t len)
247 : {
248 5 : unsigned char *new_str = NULL;
249 :
250 : /* Check for overflow. */
251 5 : if (len > SIZE_MAX - 2) {
252 0 : return NULL;
253 : }
254 :
255 : /*
256 : * Allocate the new string, including space for the
257 : * UTF‐16 null terminator.
258 : */
259 5 : new_str = talloc_size(mem_ctx, len + 2);
260 5 : if (new_str == NULL) {
261 0 : return NULL;
262 : }
263 :
264 5 : memcpy(new_str, str, len);
265 :
266 : /*
267 : * Ensure that the UTF‐16 string is
268 : * null‐terminated.
269 : */
270 5 : new_str[len] = '\0';
271 5 : new_str[len + 1] = '\0';
272 :
273 5 : return new_str;
274 : }
275 :
276 0 : unsigned char *talloc_utf16_strdup(TALLOC_CTX *mem_ctx, const char *str)
277 : {
278 0 : if (str == NULL) {
279 0 : return NULL;
280 : }
281 0 : return talloc_utf16_strlendup(mem_ctx, str, utf16_len(str));
282 : }
283 :
284 0 : unsigned char *talloc_utf16_strndup(TALLOC_CTX *mem_ctx, const char *str, size_t n)
285 : {
286 0 : if (str == NULL) {
287 0 : return NULL;
288 : }
289 0 : return talloc_utf16_strlendup(mem_ctx, str, utf16_len_n(str, n));
290 : }
291 :
292 : /**
293 : * Determine the length and validity of a utf-8 string.
294 : *
295 : * @param input the string pointer
296 : * @param maxlen maximum size of the string
297 : * @param byte_len receives the length of the valid section
298 : * @param char_len receives the number of unicode characters in the valid section
299 : * @param utf16_len receives the number of bytes the string would need in UTF16 encoding.
300 : *
301 : * @return true if the input is valid up to maxlen, or a '\0' byte, otherwise false.
302 : */
303 0 : bool utf8_check(const char *input, size_t maxlen,
304 : size_t *byte_len,
305 : size_t *char_len,
306 : size_t *utf16_len)
307 : {
308 0 : const uint8_t *s = (const uint8_t *)input;
309 0 : size_t i;
310 0 : size_t chars = 0;
311 0 : size_t long_chars = 0;
312 0 : uint32_t codepoint;
313 0 : uint8_t a, b, c, d;
314 0 : for (i = 0; i < maxlen; i++, chars++) {
315 0 : if (s[i] == 0) {
316 0 : break;
317 : }
318 0 : if (s[i] < 0x80) {
319 0 : continue;
320 : }
321 0 : if ((s[i] & 0xe0) == 0xc0) {
322 : /* 110xxxxx 10xxxxxx */
323 0 : a = s[i];
324 0 : if (maxlen - i < 2) {
325 0 : goto error;
326 : }
327 0 : b = s[i + 1];
328 0 : if ((b & 0xc0) != 0x80) {
329 0 : goto error;
330 : }
331 0 : codepoint = (a & 31) << 6 | (b & 63);
332 0 : if (codepoint < 0x80) {
333 0 : goto error;
334 : }
335 0 : i++;
336 0 : continue;
337 : }
338 0 : if ((s[i] & 0xf0) == 0xe0) {
339 : /* 1110xxxx 10xxxxxx 10xxxxxx */
340 0 : if (maxlen - i < 3) {
341 0 : goto error;
342 : }
343 0 : a = s[i];
344 0 : b = s[i + 1];
345 0 : c = s[i + 2];
346 0 : if ((b & 0xc0) != 0x80 || (c & 0xc0) != 0x80) {
347 0 : goto error;
348 : }
349 0 : codepoint = (c & 63) | (b & 63) << 6 | (a & 15) << 12;
350 :
351 0 : if (codepoint < 0x800) {
352 0 : goto error;
353 : }
354 0 : if (codepoint >= 0xd800 && codepoint <= 0xdfff) {
355 : /*
356 : * This is an invalid codepoint, per
357 : * RFC3629, as it encodes part of a
358 : * UTF-16 surrogate pair for a
359 : * character over U+10000, which ought
360 : * to have been encoded as a four byte
361 : * utf-8 sequence.
362 : */
363 0 : goto error;
364 : }
365 0 : i += 2;
366 0 : continue;
367 : }
368 :
369 0 : if ((s[i] & 0xf8) == 0xf0) {
370 : /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
371 0 : if (maxlen - i < 4) {
372 0 : goto error;
373 : }
374 0 : a = s[i];
375 0 : b = s[i + 1];
376 0 : c = s[i + 2];
377 0 : d = s[i + 3];
378 :
379 0 : if ((b & 0xc0) != 0x80 ||
380 0 : (c & 0xc0) != 0x80 ||
381 0 : (d & 0xc0) != 0x80) {
382 0 : goto error;
383 : }
384 0 : codepoint = (d & 63) | (c & 63) << 6 | (b & 63) << 12 | (a & 7) << 18;
385 :
386 0 : if (codepoint < 0x10000 || codepoint > 0x10ffff) {
387 0 : goto error;
388 : }
389 : /* this one will need two UTF16 characters */
390 0 : long_chars++;
391 0 : i += 3;
392 0 : continue;
393 : }
394 : /*
395 : * If it wasn't handled yet, it's wrong.
396 : */
397 0 : goto error;
398 : }
399 0 : *byte_len = i;
400 0 : *char_len = chars;
401 0 : *utf16_len = chars + long_chars;
402 0 : return true;
403 :
404 0 : error:
405 0 : *byte_len = i;
406 0 : *char_len = chars;
407 0 : *utf16_len = chars + long_chars;
408 0 : return false;
409 : }
410 :
411 :
412 : /**
413 : * Copy a string from a char* unix src to a dos codepage string destination.
414 : *
415 : * @converted_size the number of bytes occupied by the string in the destination.
416 : * @return bool true if success.
417 : *
418 : * @param flags can include
419 : * <dl>
420 : * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
421 : * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
422 : * </dl>
423 : *
424 : * @param dest_len the maximum length in bytes allowed in the
425 : * destination. If @p dest_len is -1 then no maximum is used.
426 : **/
427 4123 : static bool push_ascii_string(void *dest, const char *src, size_t dest_len, int flags, size_t *converted_size)
428 : {
429 144 : size_t src_len;
430 144 : bool ret;
431 :
432 4123 : if (flags & STR_UPPER) {
433 4 : char *tmpbuf = strupper_talloc(NULL, src);
434 4 : if (tmpbuf == NULL) {
435 0 : return false;
436 : }
437 4 : ret = push_ascii_string(dest, tmpbuf, dest_len, flags & ~STR_UPPER, converted_size);
438 4 : talloc_free(tmpbuf);
439 4 : return ret;
440 : }
441 :
442 4119 : src_len = strlen(src);
443 :
444 4119 : if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
445 4075 : src_len++;
446 :
447 4119 : return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, converted_size);
448 : }
449 :
450 : /**
451 : * Copy a string from a dos codepage source to a unix char* destination.
452 : *
453 : * The resulting string in "dest" is always null terminated.
454 : *
455 : * @param flags can have:
456 : * <dl>
457 : * <dt>STR_TERMINATE</dt>
458 : * <dd>STR_TERMINATE means the string in @p src
459 : * is null terminated, and src_len is ignored.</dd>
460 : * </dl>
461 : *
462 : * @param src_len is the length of the source area in bytes.
463 : * @returns the number of bytes occupied by the string in @p src.
464 : **/
465 174 : static ssize_t pull_ascii_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
466 : {
467 174 : size_t size = 0;
468 :
469 174 : if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
470 0 : if (src_len == (size_t)-1) {
471 0 : src_len = strlen((const char *)src) + 1;
472 : } else {
473 0 : size_t len = strnlen((const char *)src, src_len);
474 0 : if (len < src_len)
475 0 : len++;
476 0 : src_len = len;
477 : }
478 : }
479 :
480 : /* We're ignoring the return here.. */
481 174 : (void)convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, &size);
482 :
483 174 : if (dest_len)
484 174 : dest[MIN(size, dest_len-1)] = 0;
485 :
486 174 : return src_len;
487 : }
488 :
489 : /**
490 : * Copy a string from a char* src to a unicode destination.
491 : *
492 : * @returns the number of bytes occupied by the string in the destination.
493 : *
494 : * @param flags can have:
495 : *
496 : * <dl>
497 : * <dt>STR_TERMINATE <dd>means include the null termination.
498 : * <dt>STR_UPPER <dd>means uppercase in the destination.
499 : * <dt>STR_NOALIGN <dd>means don't do alignment.
500 : * </dl>
501 : *
502 : * @param dest_len is the maximum length allowed in the
503 : * destination. If dest_len is -1 then no maximum is used.
504 : **/
505 464137 : static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
506 : {
507 464137 : size_t len=0;
508 464137 : size_t src_len = strlen(src);
509 464137 : size_t size = 0;
510 11140 : bool ret;
511 :
512 464137 : if (flags & STR_UPPER) {
513 2952 : char *tmpbuf = strupper_talloc(NULL, src);
514 144 : ssize_t retval;
515 2952 : if (tmpbuf == NULL) {
516 0 : return -1;
517 : }
518 2952 : retval = push_ucs2(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
519 2952 : talloc_free(tmpbuf);
520 2952 : return retval;
521 : }
522 :
523 461185 : if (flags & STR_TERMINATE)
524 321645 : src_len++;
525 :
526 461185 : if (ucs2_align(NULL, dest, flags)) {
527 151082 : *(char *)dest = 0;
528 151082 : dest = (void *)((char *)dest + 1);
529 151082 : if (dest_len) dest_len--;
530 146012 : len++;
531 : }
532 :
533 : /* ucs2 is always a multiple of 2 bytes */
534 461185 : dest_len &= ~1;
535 :
536 461185 : ret = convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len, &size);
537 461185 : if (ret == false) {
538 0 : return 0;
539 : }
540 :
541 461185 : len += size;
542 :
543 461185 : return (ssize_t)len;
544 : }
545 :
546 :
547 : /**
548 : Copy a string from a ucs2 source to a unix char* destination.
549 : Flags can have:
550 : STR_TERMINATE means the string in src is null terminated.
551 : STR_NOALIGN means don't try to align.
552 : if STR_TERMINATE is set then src_len is ignored if it is -1.
553 : src_len is the length of the source area in bytes
554 : Return the number of bytes occupied by the string in src.
555 : The resulting string in "dest" is always null terminated.
556 : **/
557 :
558 0 : static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
559 : {
560 0 : size_t size = 0;
561 :
562 0 : if (ucs2_align(NULL, src, flags)) {
563 0 : src = (const void *)((const char *)src + 1);
564 0 : if (src_len > 0)
565 0 : src_len--;
566 : }
567 :
568 0 : if (flags & STR_TERMINATE) {
569 0 : if (src_len == (size_t)-1) {
570 0 : src_len = utf16_null_terminated_len(src);
571 : } else {
572 0 : src_len = utf16_null_terminated_len_n(src, src_len);
573 : }
574 : }
575 :
576 : /* ucs2 is always a multiple of 2 bytes */
577 0 : if (src_len != (size_t)-1)
578 0 : src_len &= ~1;
579 :
580 : /* We're ignoring the return here.. */
581 0 : (void)convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len, &size);
582 0 : if (dest_len)
583 0 : dest[MIN(size, dest_len-1)] = 0;
584 :
585 0 : return src_len;
586 : }
587 :
588 : /**
589 : Copy a string from a char* src to a unicode or ascii
590 : dos codepage destination choosing unicode or ascii based on the
591 : flags in the SMB buffer starting at base_ptr.
592 : Return the number of bytes occupied by the string in the destination.
593 : flags can have:
594 : STR_TERMINATE means include the null termination.
595 : STR_UPPER means uppercase in the destination.
596 : STR_ASCII use ascii even with unicode packet.
597 : STR_NOALIGN means don't do alignment.
598 : dest_len is the maximum length allowed in the destination. If dest_len
599 : is -1 then no maximum is used.
600 : **/
601 :
602 465304 : _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
603 : {
604 465304 : if (flags & STR_ASCII) {
605 4119 : size_t size = 0;
606 4119 : if (push_ascii_string(dest, src, dest_len, flags, &size)) {
607 4119 : return (ssize_t)size;
608 : } else {
609 0 : return (ssize_t)-1;
610 : }
611 461185 : } else if (flags & STR_UNICODE) {
612 461185 : return push_ucs2(dest, src, dest_len, flags);
613 : } else {
614 0 : smb_panic("push_string requires either STR_ASCII or STR_UNICODE flag to be set");
615 : return -1;
616 : }
617 : }
618 :
619 :
620 : /**
621 : Copy a string from a unicode or ascii source (depending on
622 : the packet flags) to a char* destination.
623 : Flags can have:
624 : STR_TERMINATE means the string in src is null terminated.
625 : STR_UNICODE means to force as unicode.
626 : STR_ASCII use ascii even with unicode packet.
627 : STR_NOALIGN means don't do alignment.
628 : if STR_TERMINATE is set then src_len is ignored is it is -1
629 : src_len is the length of the source area in bytes.
630 : Return the number of bytes occupied by the string in src.
631 : The resulting string in "dest" is always null terminated.
632 : **/
633 :
634 174 : _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
635 : {
636 174 : if (flags & STR_ASCII) {
637 174 : return pull_ascii_string(dest, src, dest_len, src_len, flags);
638 0 : } else if (flags & STR_UNICODE) {
639 0 : return pull_ucs2(dest, src, dest_len, src_len, flags);
640 : } else {
641 0 : smb_panic("pull_string requires either STR_ASCII or STR_UNICODE flag to be set");
642 : return -1;
643 : }
644 : }
|