Ticket #2201: coreutils-6.12-i18n-1.patch
File coreutils-6.12-i18n-1.patch, 99.6 KB (added by , 16 years ago) |
---|
-
new file coreutils-6.8+/tests/misc/sort-mb-tests
// This i18n patch for coreutils came from the Fedora CVS. // This fixes the i18n bugs. // Anyway, heres the patch's location: http://cvs.fedoraproject.org/viewcvs/*checkout*/rpms/coreutils/devel/coreutils-i18n.patch. // Modified by willimm.
- + 1 #! /bin/sh 2 case $# in 3 0) xx='../src/sort';; 4 *) xx="$1";; 5 esac 6 test "$VERBOSE" && echo=echo || echo=: 7 $echo testing program: $xx 8 errors=0 9 test "$srcdir" || srcdir=. 10 test "$VERBOSE" && $xx --version 2> /dev/null 11 12 export LC_ALL=en_US.UTF-8 13 locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77 14 errors=0 15 16 $xx -t @ -k2 -n misc/mb1.I > misc/mb1.O 17 code=$? 18 if test $code != 0; then 19 $echo "Test mb1 failed: $xx return code $code differs from expected value 0" 1>&2 20 errors=`expr $errors + 1` 21 else 22 cmp misc/mb1.O $srcdir/misc/mb1.X > /dev/null 2>&1 23 case $? in 24 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;; 25 1) $echo "Test mb1 failed: files misc/mb1.O and $srcdir/misc/mb1.X differ" 1>&2 26 (diff -c misc/mb1.O $srcdir/misc/mb1.X) 2> /dev/null 27 errors=`expr $errors + 1`;; 28 2) $echo "Test mb1 may have failed." 1>&2 29 $echo The command "cmp misc/mb1.O $srcdir/misc/mb1.X" failed. 1>&2 30 errors=`expr $errors + 1`;; 31 esac 32 fi 33 34 $xx -t @ -k4 -n misc/mb2.I > misc/mb2.O 35 code=$? 36 if test $code != 0; then 37 $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2 38 errors=`expr $errors + 1` 39 else 40 cmp misc/mb2.O $srcdir/misc/mb2.X > /dev/null 2>&1 41 case $? in 42 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;; 43 1) $echo "Test mb2 failed: files misc/mb2.O and $srcdir/misc/mb2.X differ" 1>&2 44 (diff -c misc/mb2.O $srcdir/misc/mb2.X) 2> /dev/null 45 errors=`expr $errors + 1`;; 46 2) $echo "Test mb2 may have failed." 1>&2 47 $echo The command "cmp misc/mb2.O $srcdir/misc/mb2.X" failed. 1>&2 48 errors=`expr $errors + 1`;; 49 esac 50 fi 51 52 if test $errors = 0; then 53 $echo Passed all 113 tests. 1>&2 54 else 55 $echo Failed $errors tests. 1>&2 56 fi 57 test $errors = 0 || errors=1 58 exit $errors -
new file coreutils-6.8+/tests/misc/mb2.I
- + 1 Apple@AA10@@20 2 Banana@AA5@@30 3 Citrus@AA20@@5 4 Cherry@AA30@@10 -
new file coreutils-6.8+/tests/misc/mb2.X
- + 1 Citrus@AA20@@5 2 Cherry@AA30@@10 3 Apple@AA10@@20 4 Banana@AA5@@30 -
new file coreutils-6.8+/tests/misc/mb1.I
- + 1 Apple@10 2 Banana@5 3 Citrus@20 4 Cherry@30 -
new file coreutils-6.8+/tests/misc/mb1.X
- + 1 Banana@5 2 Apple@10 3 Citrus@20 4 Cherry@30 -
tests/Makefile.am
diff -urN coreutils-6.12-orig/tests/Makefile.am coreutils-6.12/tests/Makefile.am
old new 191 191 misc/shuf \ 192 192 misc/sort \ 193 193 misc/sort-compress \ 194 misc/sort-mb-tests \ 194 195 misc/sort-merge \ 195 196 misc/sort-rand \ 196 197 misc/split-a \ … … 391 392 $(root_tests) 392 393 393 394 pr_data = \ 395 misc/mb1.X \ 396 misc/mb1.I \ 397 misc/mb2.X \ 398 misc/mb2.I \ 394 399 pr/0F \ 395 400 pr/0FF \ 396 401 pr/0FFnt \ -
coreutils-6.8+/lib/linebuffer.h
old new 22 22 23 23 # include <stdio.h> 24 24 25 /* Get mbstate_t. */ 26 # if HAVE_WCHAR_H 27 # include <wchar.h> 28 # endif 29 25 30 /* A `struct linebuffer' holds a line of text. */ 26 31 27 32 struct linebuffer … … 29 34 size_t size; /* Allocated. */ 30 35 size_t length; /* Used. */ 31 36 char *buffer; 37 # if HAVE_WCHAR_H 38 mbstate_t state; 39 # endif 32 40 }; 33 41 34 42 /* Initialize linebuffer LINEBUFFER for use. */ -
coreutils-6.8+/src/expand.c
old new 38 38 #include <stdio.h> 39 39 #include <getopt.h> 40 40 #include <sys/types.h> 41 42 /* Get mbstate_t, mbrtowc(), wcwidth(). */ 43 #if HAVE_WCHAR_H 44 # include <wchar.h> 45 #endif 46 41 47 #include "system.h" 42 48 #include "error.h" 43 49 #include "quote.h" 44 50 #include "xstrndup.h" 45 51 52 /* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 53 installation; work around this configuration error. */ 54 #if !defined MB_LEN_MAX || MB_LEN_MAX < 2 55 # define MB_LEN_MAX 16 56 #endif 57 58 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 59 #if HAVE_MBRTOWC && defined mbstate_t 60 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 61 #endif 62 46 63 /* The official name of this program (e.g., no `g' prefix). */ 47 64 #define PROGRAM_NAME "expand" 48 65 … … 183 200 stops = num_start + len - 1; 184 201 } 185 202 } 203 186 204 else 187 205 { 188 206 error (0, 0, _("tab size contains invalid character(s): %s"), … … 365 383 } 366 384 } 367 385 386 #if HAVE_MBRTOWC 387 static void 388 expand_multibyte (void) 389 { 390 FILE *fp; /* Input strem. */ 391 mbstate_t i_state; /* Current shift state of the input stream. */ 392 mbstate_t i_state_bak; /* Back up the I_STATE. */ 393 mbstate_t o_state; /* Current shift state of the output stream. */ 394 char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 395 char *bufpos; /* Next read position of BUF. */ 396 size_t buflen = 0; /* The length of the byte sequence in buf. */ 397 wchar_t wc; /* A gotten wide character. */ 398 size_t mblength; /* The byte size of a multibyte character 399 which shows as same character as WC. */ 400 int tab_index = 0; /* Index in `tab_list' of next tabstop. */ 401 int column = 0; /* Column on screen of the next char. */ 402 int next_tab_column; /* Column the next tab stop is on. */ 403 int convert = 1; /* If nonzero, perform translations. */ 404 405 fp = next_file ((FILE *) NULL); 406 if (fp == NULL) 407 return; 408 409 memset (&o_state, '\0', sizeof(mbstate_t)); 410 memset (&i_state, '\0', sizeof(mbstate_t)); 411 412 for (;;) 413 { 414 /* Refill the buffer BUF. */ 415 if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp)) 416 { 417 memmove (buf, bufpos, buflen); 418 buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp); 419 bufpos = buf; 420 } 421 422 /* No character is left in BUF. */ 423 if (buflen < 1) 424 { 425 fp = next_file (fp); 426 427 if (fp == NULL) 428 break; /* No more files. */ 429 else 430 { 431 memset (&i_state, '\0', sizeof(mbstate_t)); 432 continue; 433 } 434 } 435 436 /* Get a wide character. */ 437 i_state_bak = i_state; 438 mblength = mbrtowc (&wc, bufpos, buflen, &i_state); 439 440 switch (mblength) 441 { 442 case (size_t)-1: /* illegal byte sequence. */ 443 case (size_t)-2: 444 mblength = 1; 445 i_state = i_state_bak; 446 if (convert) 447 { 448 ++column; 449 if (convert_entire_line == 0) 450 convert = 0; 451 } 452 putchar (*bufpos); 453 break; 454 455 case 0: /* null. */ 456 mblength = 1; 457 if (convert && convert_entire_line == 0) 458 convert = 0; 459 putchar ('\0'); 460 break; 461 462 default: 463 if (wc == L'\n') /* LF. */ 464 { 465 tab_index = 0; 466 column = 0; 467 convert = 1; 468 putchar ('\n'); 469 } 470 else if (wc == L'\t' && convert) /* Tab. */ 471 { 472 if (tab_size == 0) 473 { 474 /* Do not let tab_index == first_free_tab; 475 stop when it is 1 less. */ 476 while (tab_index < first_free_tab - 1 477 && column >= tab_list[tab_index]) 478 tab_index++; 479 next_tab_column = tab_list[tab_index]; 480 if (tab_index < first_free_tab - 1) 481 tab_index++; 482 if (column >= next_tab_column) 483 next_tab_column = column + 1; 484 } 485 else 486 next_tab_column = column + tab_size - column % tab_size; 487 488 while (column < next_tab_column) 489 { 490 putchar (' '); 491 ++column; 492 } 493 } 494 else /* Others. */ 495 { 496 if (convert) 497 { 498 if (wc == L'\b') 499 { 500 if (column > 0) 501 --column; 502 } 503 else 504 { 505 int width; /* The width of WC. */ 506 507 width = wcwidth (wc); 508 column += (width > 0) ? width : 0; 509 if (convert_entire_line == 0) 510 convert = 0; 511 } 512 } 513 fwrite (bufpos, sizeof(char), mblength, stdout); 514 } 515 } 516 buflen -= mblength; 517 bufpos += mblength; 518 } 519 } 520 #endif 521 368 522 int 369 523 main (int argc, char **argv) 370 524 { … … 429 583 430 584 file_list = (optind < argc ? &argv[optind] : stdin_argv); 431 585 432 expand (); 586 #if HAVE_MBRTOWC 587 if (MB_CUR_MAX > 1) 588 expand_multibyte (); 589 else 590 #endif 591 expand (); 433 592 434 593 if (have_read_stdin && fclose (stdin) != 0) 435 594 error (EXIT_FAILURE, errno, "-"); -
coreutils-6.8+/src/join.c
old new 23 23 #include <sys/types.h> 24 24 #include <getopt.h> 25 25 26 /* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */ 27 #if HAVE_WCHAR_H 28 # include <wchar.h> 29 #endif 30 31 /* Get iswblank(), towupper. */ 32 #if HAVE_WCTYPE_H 33 # include <wctype.h> 34 #endif 35 26 36 #include "system.h" 27 37 #include "error.h" 28 38 #include "hard-locale.h" 29 39 #include "linebuffer.h" 30 #include "memcasecmp.h"31 40 #include "quote.h" 32 41 #include "stdio--.h" 33 42 #include "xmemcoll.h" 34 43 #include "xstrtol.h" 35 44 #include "argmatch.h" 36 45 46 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 47 #if HAVE_MBRTOWC && defined mbstate_t 48 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 49 #endif 50 37 51 /* The official name of this program (e.g., no `g' prefix). */ 38 52 #define PROGRAM_NAME "join" 39 53 … … 104 118 /* Last element in `outlist', where a new element can be added. */ 105 119 static struct outlist *outlist_end = &outlist_head; 106 120 107 /* Tab character separating fields. If negative, fields are separated 108 by any nonempty string of blanks, otherwise by exactly one 109 tab character whose value (when cast to unsigned char) equals TAB. */ 110 static int tab = -1; 121 /* Tab character separating fields. If NULL, fields are separated 122 by any nonempty string of blanks. */ 123 static char *tab = NULL; 124 125 /* The number of bytes used for tab. */ 126 static size_t tablen = 0; 111 127 112 128 /* If nonzero, check that the input is correctly ordered. */ 113 129 static enum … … 199 217 if (ptr == lim) 200 218 return; 201 219 202 if ( 0 <= tab)220 if (tab != NULL) 203 221 { 222 unsigned char t = tab[0]; 204 223 char *sep; 205 for (; (sep = memchr (ptr, t ab, lim - ptr)) != NULL; ptr = sep + 1)224 for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1) 206 225 extract_field (line, ptr, sep - ptr); 207 226 } 208 227 else … … 229 248 extract_field (line, ptr, lim - ptr); 230 249 } 231 250 251 #if HAVE_MBRTOWC 252 static void 253 xfields_multibyte (struct line *line) 254 { 255 char *ptr = line->buf.buffer; 256 char const *lim = ptr + line->buf.length - 1; 257 wchar_t wc = 0; 258 size_t mblength = 1; 259 mbstate_t state, state_bak; 260 261 memset (&state, 0, sizeof (mbstate_t)); 262 263 if (ptr == lim) 264 return; 265 266 if (tab != NULL) 267 { 268 unsigned char t = tab[0]; 269 char *sep = ptr; 270 for (; ptr < lim; ptr = sep + mblength) 271 { 272 sep = ptr; 273 while (sep < lim) 274 { 275 state_bak = state; 276 mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); 277 278 if (mblength == (size_t)-1 || mblength == (size_t)-2) 279 { 280 mblength = 1; 281 state = state_bak; 282 } 283 mblength = (mblength < 1) ? 1 : mblength; 284 285 if (mblength == tablen && !memcmp (sep, tab, mblength)) 286 break; 287 else 288 { 289 sep += mblength; 290 continue; 291 } 292 } 293 294 if (sep == lim) 295 break; 296 297 extract_field (line, ptr, sep - ptr); 298 } 299 } 300 else 301 { 302 /* Skip leading blanks before the first field. */ 303 while(ptr < lim) 304 { 305 state_bak = state; 306 mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); 307 308 if (mblength == (size_t)-1 || mblength == (size_t)-2) 309 { 310 mblength = 1; 311 state = state_bak; 312 break; 313 } 314 mblength = (mblength < 1) ? 1 : mblength; 315 316 if (!iswblank(wc)) 317 break; 318 ptr += mblength; 319 } 320 321 do 322 { 323 char *sep; 324 state_bak = state; 325 mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); 326 if (mblength == (size_t)-1 || mblength == (size_t)-2) 327 { 328 mblength = 1; 329 state = state_bak; 330 break; 331 } 332 mblength = (mblength < 1) ? 1 : mblength; 333 334 sep = ptr + mblength; 335 while (sep != lim) 336 { 337 state_bak = state; 338 mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); 339 if (mblength == (size_t)-1 || mblength == (size_t)-2) 340 { 341 mblength = 1; 342 state = state_bak; 343 break; 344 } 345 mblength = (mblength < 1) ? 1 : mblength; 346 347 if (iswblank (wc)) 348 break; 349 350 sep += mblength; 351 } 352 353 extract_field (line, ptr, sep - ptr); 354 if (sep == lim) 355 return; 356 357 state_bak = state; 358 mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); 359 if (mblength == (size_t)-1 || mblength == (size_t)-2) 360 { 361 mblength = 1; 362 state = state_bak; 363 break; 364 } 365 mblength = (mblength < 1) ? 1 : mblength; 366 367 ptr = sep + mblength; 368 while (ptr != lim) 369 { 370 state_bak = state; 371 mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); 372 if (mblength == (size_t)-1 || mblength == (size_t)-2) 373 { 374 mblength = 1; 375 state = state_bak; 376 break; 377 } 378 mblength = (mblength < 1) ? 1 : mblength; 379 380 if (!iswblank (wc)) 381 break; 382 383 ptr += mblength; 384 } 385 } 386 while (ptr != lim); 387 } 388 389 extract_field (line, ptr, lim - ptr); 390 } 391 #endif 392 232 393 static struct line * 233 394 dup_line (const struct line *old) 234 395 { … … 377 601 378 602 /* Print the join of LINE1 and LINE2. */ 379 603 604 #define PUT_TAB_CHAR \ 605 do \ 606 { \ 607 (tab != NULL) ? \ 608 fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \ 609 } \ 610 while (0) 611 380 612 static void 381 613 prjoin (struct line const *line1, struct line const *line2) 382 614 { 383 615 const struct outlist *outlist; 384 char output_separator = tab < 0 ? ' ' : tab;385 616 386 617 outlist = outlist_head.next; 387 618 if (outlist) … … 397 628 if (o->file == 0) 398 629 { 399 630 if (line1 == &uni_blank) 400 631 { 401 632 line = line2; 402 633 field = join_field_2; 403 634 } 404 635 else 405 636 { 406 637 line = line1; 407 638 field = join_field_1; 408 639 } … … 416 647 o = o->next; 417 648 if (o == NULL) 418 649 break; 419 putchar (output_separator);650 PUT_TAB_CHAR; 420 651 } 421 652 putchar ('\n'); 422 653 } … … 434 665 prfield (join_field_1, line1); 435 666 for (i = 0; i < join_field_1 && i < line1->nfields; ++i) 436 667 { 437 putchar (output_separator);668 PUT_TAB_CHAR; 438 669 prfield (i, line1); 439 670 } 440 671 for (i = join_field_1 + 1; i < line1->nfields; ++i) 441 672 { 442 putchar (output_separator);673 PUT_TAB_CHAR; 443 674 prfield (i, line1); 444 675 } 445 676 446 677 for (i = 0; i < join_field_2 && i < line2->nfields; ++i) 447 678 { 448 putchar (output_separator);679 PUT_TAB_CHAR; 449 680 prfield (i, line2); 450 681 } 451 682 for (i = join_field_2 + 1; i < line2->nfields; ++i) 452 683 { 453 putchar (output_separator);684 PUT_TAB_CHAR; 454 685 prfield (i, line2); 455 686 } 456 687 putchar ('\n'); … … 859 1090 860 1091 case 't': 861 1092 { 862 unsigned char newtab = optarg[0]; 863 if (! newtab) 1093 char *newtab; 1094 size_t newtablen; 1095 if (! optarg[0]) 864 1096 error (EXIT_FAILURE, 0, _("empty tab")); 865 if (optarg[1]) 1097 newtab = xstrdup (optarg); 1098 #if HAVE_MBRTOWC 1099 if (MB_CUR_MAX > 1) 1100 { 1101 mbstate_t state; 1102 1103 memset (&state, 0, sizeof (mbstate_t)); 1104 newtablen = mbrtowc (NULL, newtab, 1105 strnlen (newtab, MB_LEN_MAX), 1106 &state); 1107 if (newtablen == (size_t) 0 1108 || newtablen == (size_t) -1 1109 || newtablen == (size_t) -2) 1110 newtablen = 1; 1111 } 1112 else 1113 #endif 1114 newtablen = 1; 1115 1116 if (newtablen == 1 && newtab[1]) 1117 { 1118 if (STREQ (newtab, "\\0")) 1119 newtab[0] = '\0'; 1120 } 1121 if (tab != NULL && strcmp (tab, newtab)) 866 1122 { 867 if (STREQ (optarg, "\\0")) 868 newtab = '\0'; 869 else 870 error (EXIT_FAILURE, 0, _("multi-character tab %s"), 871 quote (optarg)); 1123 free (newtab); 1124 error (EXIT_FAILURE, 0, _("incompatible tabs")); 872 1125 } 873 if (0 <= tab && tab != newtab)874 error (EXIT_FAILURE, 0, _("incompatible tabs"));875 1126 tab = newtab; 1127 tablen = newtablen; 876 1128 } 877 1129 break; 878 1130 -
coreutils-6.
diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c
old new keycmp (struct line const *line1, struct 324 324 size_t jf_1, size_t jf_2) 325 325 { 326 326 /* Start of field to compare in each file. */ 327 char *beg1; 328 char *beg2; 329 330 size_t len1; 331 size_t len2; /* Length of fields to compare. */ 327 char *beg[2]; 328 char *copy[2]; 329 size_t len[2]; /* Length of fields to compare. */ 332 330 int diff; 331 int i, j; 333 332 334 333 if (jf_1 < line1->nfields) 335 334 { 336 beg 1= line1->fields[jf_1].beg;337 len 1= line1->fields[jf_1].len;335 beg[0] = line1->fields[jf_1].beg; 336 len[0] = line1->fields[jf_1].len; 338 337 } 339 338 else 340 339 { 341 beg 1= NULL;342 len 1= 0;340 beg[0] = NULL; 341 len[0] = 0; 343 342 } 344 343 345 344 if (jf_2 < line2->nfields) 346 345 { 347 beg 2= line2->fields[jf_2].beg;348 len 2= line2->fields[jf_2].len;346 beg[1] = line2->fields[jf_2].beg; 347 len[1] = line2->fields[jf_2].len; 349 348 } 350 349 else 351 350 { 352 beg 2= NULL;353 len 2= 0;351 beg[1] = NULL; 352 len[1] = 0; 354 353 } 355 354 356 if (len 1== 0)357 return len 2== 0 ? 0 : -1;358 if (len 2== 0)355 if (len[0] == 0) 356 return len[1] == 0 ? 0 : -1; 357 if (len[1] == 0) 359 358 return 1; 360 359 361 360 if (ignore_case) 362 361 { 363 /* FIXME: ignore_case does not work with NLS (in particular, 364 with multibyte chars). */ 365 diff = memcasecmp (beg1, beg2, MIN (len1, len2)); 362 #ifdef HAVE_MBRTOWC 363 if (MB_CUR_MAX > 1) 364 { 365 size_t mblength; 366 wchar_t wc, uwc; 367 mbstate_t state, state_bak; 368 369 memset (&state, '\0', sizeof (mbstate_t)); 370 371 for (i = 0; i < 2; i++) 372 { 373 copy[i] = alloca (len[i] + 1); 374 375 for (j = 0; j < MIN (len[0], len[1]);) 376 { 377 state_bak = state; 378 mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state); 379 380 switch (mblength) 381 { 382 case (size_t) -1: 383 case (size_t) -2: 384 state = state_bak; 385 /* Fall through */ 386 case 0: 387 mblength = 1; 388 break; 389 390 default: 391 uwc = towupper (wc); 392 393 if (uwc != wc) 394 { 395 mbstate_t state_wc; 396 397 memset (&state_wc, '\0', sizeof (mbstate_t)); 398 wcrtomb (copy[i] + j, uwc, &state_wc); 399 } 400 else 401 memcpy (copy[i] + j, beg[i] + j, mblength); 402 } 403 j += mblength; 404 } 405 copy[i][j] = '\0'; 406 } 407 } 408 else 409 #endif 410 { 411 for (i = 0; i < 2; i++) 412 { 413 copy[i] = alloca (len[i] + 1); 414 415 for (j = 0; j < MIN (len[0], len[1]); j++) 416 copy[i][j] = toupper (beg[i][j]); 417 418 copy[i][j] = '\0'; 419 } 420 } 366 421 } 367 422 else 368 423 { 369 if (hard_LC_COLLATE) 370 return xmemcoll (beg1, len1, beg2, len2); 371 diff = memcmp (beg1, beg2, MIN (len1, len2)); 424 copy[0] = (unsigned char *) beg[0]; 425 copy[1] = (unsigned char *) beg[1]; 372 426 } 373 427 428 if (hard_LC_COLLATE) 429 return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]); 430 diff = memcmp (copy[0], copy[1], MIN (len[0], len[1])); 431 432 374 433 if (diff) 375 434 return diff; 376 return len 1 < len2 ? -1 : len1 != len2;435 return len[0] - len[1]; 377 436 } 378 437 379 438 /* Check that successive input lines PREV and CURRENT from input file -
coreutils-6.8+/src/uniq.c
old new 23 23 #include <getopt.h> 24 24 #include <sys/types.h> 25 25 26 /* Get mbstate_t, mbrtowc(). */ 27 #if HAVE_WCHAR_H 28 # include <wchar.h> 29 #endif 30 31 /* Get isw* functions. */ 32 #if HAVE_WCTYPE_H 33 # include <wctype.h> 34 #endif 35 26 36 #include "system.h" 27 37 #include "argmatch.h" 28 38 #include "linebuffer.h" … … 32 42 #include "quote.h" 33 43 #include "xmemcoll.h" 34 44 #include "xstrtol.h" 35 #include "memcasecmp.h" 45 #include "xmemcoll.h" 46 47 /* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 48 installation; work around this configuration error. */ 49 #if !defined MB_LEN_MAX || MB_LEN_MAX < 2 50 # define MB_LEN_MAX 16 51 #endif 52 53 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 54 #if HAVE_MBRTOWC && defined mbstate_t 55 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 56 #endif 57 36 58 37 59 /* The official name of this program (e.g., no `g' prefix). */ 38 60 #define PROGRAM_NAME "uniq" … … 109 131 /* Select whether/how to delimit groups of duplicate lines. */ 110 132 static enum delimit_method delimit_groups; 111 133 134 /* Function pointers. */ 135 static char * 136 (*find_field) (struct linebuffer *line); 137 112 138 static struct option const longopts[] = 113 139 { 114 140 {"count", no_argument, NULL, 'c'}, … … 198 224 return a pointer to the beginning of the line's field to be compared. */ 199 225 200 226 static char * 201 find_field (conststruct linebuffer *line)227 find_field_uni (struct linebuffer *line) 202 228 { 203 229 size_t count; 204 230 char *lp = line->buffer; … … 219 245 return lp + i; 220 246 } 221 247 248 #if HAVE_MBRTOWC 249 250 # define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \ 251 do \ 252 { \ 253 mbstate_t state_bak; \ 254 \ 255 CONVFAIL = 0; \ 256 state_bak = *STATEP; \ 257 \ 258 MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \ 259 \ 260 switch (MBLENGTH) \ 261 { \ 262 case (size_t)-2: \ 263 case (size_t)-1: \ 264 *STATEP = state_bak; \ 265 CONVFAIL++; \ 266 /* Fall through */ \ 267 case 0: \ 268 MBLENGTH = 1; \ 269 } \ 270 } \ 271 while (0) 272 273 static char * 274 find_field_multi (struct linebuffer *line) 275 { 276 size_t count; 277 char *lp = line->buffer; 278 size_t size = line->length - 1; 279 size_t pos; 280 size_t mblength; 281 wchar_t wc; 282 mbstate_t *statep; 283 int convfail; 284 285 pos = 0; 286 statep = &(line->state); 287 288 /* skip fields. */ 289 for (count = 0; count < skip_fields && pos < size; count++) 290 { 291 while (pos < size) 292 { 293 MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); 294 295 if (convfail || !iswblank (wc)) 296 { 297 pos += mblength; 298 break; 299 } 300 pos += mblength; 301 } 302 303 while (pos < size) 304 { 305 MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); 306 307 if (!convfail && iswblank (wc)) 308 break; 309 310 pos += mblength; 311 } 312 } 313 314 /* skip fields. */ 315 for (count = 0; count < skip_chars && pos < size; count++) 316 { 317 MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); 318 pos += mblength; 319 } 320 321 return lp + pos; 322 } 323 #endif 324 222 325 /* Return false if two strings OLD and NEW match, true if not. 223 326 OLD and NEW point not to the beginnings of the lines 224 327 but rather to the beginnings of the fields to compare. … … 227 330 static bool 228 331 different (char *old, char *new, size_t oldlen, size_t newlen) 229 332 { 333 char *copy_old, *copy_new; 334 230 335 if (check_chars < oldlen) 231 336 oldlen = check_chars; 232 337 if (check_chars < newlen) … … 234 339 235 340 if (ignore_case) 236 341 { 237 /* FIXME: This should invoke strcoll somehow. */ 238 return oldlen != newlen || memcasecmp (old, new, oldlen); 342 size_t i; 343 344 copy_old = alloca (oldlen + 1); 345 copy_new = alloca (oldlen + 1); 346 347 for (i = 0; i < oldlen; i++) 348 { 349 copy_old[i] = toupper (old[i]); 350 copy_new[i] = toupper (new[i]); 351 } 239 352 } 240 else if (hard_LC_COLLATE)241 return xmemcoll (old, oldlen, new, newlen) != 0;242 353 else 243 return oldlen != newlen || memcmp (old, new, oldlen); 354 { 355 copy_old = (char *)old; 356 copy_new = (char *)new; 357 } 358 359 return xmemcoll (copy_old, oldlen, copy_new, newlen); 360 } 361 362 #if HAVE_MBRTOWC 363 static int 364 different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate) 365 { 366 size_t i, j, chars; 367 const char *str[2]; 368 char *copy[2]; 369 size_t len[2]; 370 mbstate_t state[2]; 371 size_t mblength; 372 wchar_t wc, uwc; 373 mbstate_t state_bak; 374 375 str[0] = old; 376 str[1] = new; 377 len[0] = oldlen; 378 len[1] = newlen; 379 state[0] = oldstate; 380 state[1] = newstate; 381 382 for (i = 0; i < 2; i++) 383 { 384 copy[i] = alloca (len[i] + 1); 385 386 for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++) 387 { 388 state_bak = state[i]; 389 mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i])); 390 391 switch (mblength) 392 { 393 case (size_t)-1: 394 case (size_t)-2: 395 state[i] = state_bak; 396 /* Fall through */ 397 case 0: 398 mblength = 1; 399 break; 400 401 default: 402 if (ignore_case) 403 { 404 uwc = towupper (wc); 405 406 if (uwc != wc) 407 { 408 mbstate_t state_wc; 409 410 memset (&state_wc, '\0', sizeof(mbstate_t)); 411 wcrtomb (copy[i] + j, uwc, &state_wc); 412 } 413 else 414 memcpy (copy[i] + j, str[i] + j, mblength); 415 } 416 else 417 memcpy (copy[i] + j, str[i] + j, mblength); 418 } 419 j += mblength; 420 } 421 copy[i][j] = '\0'; 422 len[i] = j; 423 } 424 425 return xmemcoll (copy[0], len[0], copy[1], len[1]); 244 426 } 427 #endif 245 428 246 429 /* Output the line in linebuffer LINE to standard output 247 430 provided that the switches say it should be output. … … 295 478 { 296 479 char *prevfield IF_LINT (= NULL); 297 480 size_t prevlen IF_LINT (= 0); 481 #if HAVE_MBRTOWC 482 mbstate_t prevstate; 483 484 memset (&prevstate, '\0', sizeof (mbstate_t)); 485 #endif 298 486 299 487 while (!feof (stdin)) 300 488 { 301 489 char *thisfield; 302 490 size_t thislen; 491 #if HAVE_MBRTOWC 492 mbstate_t thisstate; 493 #endif 494 303 495 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) 304 496 break; 305 497 thisfield = find_field (thisline); 306 498 thislen = thisline->length - 1 - (thisfield - thisline->buffer); 499 #if HAVE_MBRTOWC 500 if (MB_CUR_MAX > 1) 501 { 502 thisstate = thisline->state; 503 504 if (prevline->length == 0 || different_multi 505 (thisfield, prevfield, thislen, prevlen, thisstate, prevstate)) 506 { 507 fwrite (thisline->buffer, sizeof (char), 508 thisline->length, stdout); 509 510 SWAP_LINES (prevline, thisline); 511 prevfield = thisfield; 512 prevlen = thislen; 513 prevstate = thisstate; 514 } 515 } 516 else 517 #endif 307 518 if (prevline->length == 0 308 519 || different (thisfield, prevfield, thislen, prevlen)) 309 520 { … … 322 533 size_t prevlen; 323 534 uintmax_t match_count = 0; 324 535 bool first_delimiter = true; 536 #if HAVE_MBRTOWC 537 mbstate_t prevstate; 538 #endif 325 539 326 540 if (readlinebuffer_delim (prevline, stdin, delimiter) == 0) 327 541 goto closefiles; 328 542 prevfield = find_field (prevline); 329 543 prevlen = prevline->length - 1 - (prevfield - prevline->buffer); 544 #if HAVE_MBRTOWC 545 prevstate = prevline->state; 546 #endif 330 547 331 548 while (!feof (stdin)) 332 549 { 333 550 bool match; 334 551 char *thisfield; 335 552 size_t thislen; 553 #if HAVE_MBRTOWC 554 mbstate_t thisstate; 555 #endif 336 556 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) 337 557 { 338 558 if (ferror (stdin)) … … 341 561 } 342 562 thisfield = find_field (thisline); 343 563 thislen = thisline->length - 1 - (thisfield - thisline->buffer); 564 #if HAVE_MBRTOWC 565 if (MB_CUR_MAX > 1) 566 { 567 thisstate = thisline->state; 568 match = !different_multi (thisfield, prevfield, 569 thislen, prevlen, thisstate, prevstate); 570 } 571 else 572 #endif 344 573 match = !different (thisfield, prevfield, thislen, prevlen); 345 574 match_count += match; 346 575 … … 373 602 SWAP_LINES (prevline, thisline); 374 603 prevfield = thisfield; 375 604 prevlen = thislen; 605 #if HAVE_MBRTOWC 606 prevstate = thisstate; 607 #endif 376 608 if (!match) 377 609 match_count = 0; 378 610 } … … 417 649 418 650 atexit (close_stdout); 419 651 652 #if HAVE_MBRTOWC 653 if (MB_CUR_MAX > 1) 654 { 655 find_field = find_field_multi; 656 } 657 else 658 #endif 659 { 660 find_field = find_field_uni; 661 } 662 663 664 420 665 skip_chars = 0; 421 666 skip_fields = 0; 422 667 check_chars = SIZE_MAX; -
coreutils-6.8+/src/fold.c
old new 23 23 #include <getopt.h> 24 24 #include <sys/types.h> 25 25 26 /* Get mbstate_t, mbrtowc(), wcwidth(). */ 27 #if HAVE_WCHAR_H 28 # include <wchar.h> 29 #endif 30 31 /* Get iswprint(), iswblank(), wcwidth(). */ 32 #if HAVE_WCTYPE_H 33 # include <wctype.h> 34 #endif 35 26 36 #include "system.h" 27 37 #include "error.h" 28 38 #include "quote.h" 29 39 #include "xstrtol.h" 30 40 41 /* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 42 installation; work around this configuration error. */ 43 #if !defined MB_LEN_MAX || MB_LEN_MAX < 2 44 # undef MB_LEN_MAX 45 # define MB_LEN_MAX 16 46 #endif 47 48 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 49 #if HAVE_MBRTOWC && defined mbstate_t 50 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 51 #endif 52 31 53 #define TAB_WIDTH 8 32 54 33 55 /* The official name of this program (e.g., no `g' prefix). */ … … 35 57 36 58 #define AUTHORS proper_name ("David MacKenzie") 37 59 60 #define FATAL_ERROR(Message) \ 61 do \ 62 { \ 63 error (0, 0, (Message)); \ 64 usage (2); \ 65 } \ 66 while (0) 67 68 enum operating_mode 69 { 70 /* Fold texts by columns that are at the given positions. */ 71 column_mode, 72 73 /* Fold texts by bytes that are at the given positions. */ 74 byte_mode, 75 76 /* Fold texts by characters that are at the given positions. */ 77 character_mode, 78 }; 79 38 80 /* The name this program was run with. */ 39 81 char *program_name; 40 82 83 /* The argument shows current mode. (Default: column_mode) */ 84 static enum operating_mode operating_mode; 85 41 86 /* If nonzero, try to break on whitespace. */ 42 87 static bool break_spaces; 43 88 44 /* If nonzero, count bytes, not column positions. */45 static bool count_bytes;46 47 89 /* If nonzero, at least one of the files we read was standard input. */ 48 90 static bool have_read_stdin; 49 91 50 static char const shortopts[] = "b sw:0::1::2::3::4::5::6::7::8::9::";92 static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::"; 51 93 52 94 static struct option const longopts[] = 53 95 { 54 96 {"bytes", no_argument, NULL, 'b'}, 97 {"characters", no_argument, NULL, 'c'}, 55 98 {"spaces", no_argument, NULL, 's'}, 56 99 {"width", required_argument, NULL, 'w'}, 57 100 {GETOPT_HELP_OPTION_DECL}, … … 81 124 "), stdout); 82 125 fputs (_("\ 83 126 -b, --bytes count bytes rather than columns\n\ 127 -c, --characters count characters rather than columns\n\ 84 128 -s, --spaces break at spaces\n\ 85 129 -w, --width=WIDTH use WIDTH columns instead of 80\n\ 86 130 "), stdout); … … 98 142 static size_t 99 143 adjust_column (size_t column, char c) 100 144 { 101 if ( !count_bytes)145 if (operating_mode != byte_mode) 102 146 { 103 147 if (c == '\b') 104 148 { … … 121 165 to stdout, with maximum line length WIDTH. 122 166 Return true if successful. */ 123 167 124 static bool125 fold_ file (char const *filename, size_t width)168 static void 169 fold_text (FILE *istream, size_t width, int *saved_errno) 126 170 { 127 FILE *istream;128 171 int c; 129 172 size_t column = 0; /* Screen column where next char will go. */ 130 173 size_t offset_out = 0; /* Index in `line_out' for next char. */ 131 174 static char *line_out = NULL; 132 175 static size_t allocated_out = 0; 133 int saved_errno;134 135 if (STREQ (filename, "-"))136 {137 istream = stdin;138 have_read_stdin = true;139 }140 else141 istream = fopen (filename, "r");142 143 if (istream == NULL)144 {145 error (0, errno, "%s", filename);146 return false;147 }148 176 149 177 while ((c = getc (istream)) != EOF) 150 178 { … … 172 200 bool found_blank = false; 173 201 size_t logical_end = offset_out; 174 202 203 /* If LINE_OUT has no wide character, 204 put a new wide character in LINE_OUT 205 if column is bigger than width. */ 206 if (offset_out == 0) 207 { 208 line_out[offset_out++] = c; 209 continue; 210 } 211 175 212 /* Look for the last blank. */ 176 213 while (logical_end) 177 214 { … … 218 255 line_out[offset_out++] = c; 219 256 } 220 257 221 saved_errno = errno; 258 *saved_errno = errno; 259 260 if (offset_out) 261 fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); 262 263 free(line_out); 264 } 265 266 #if HAVE_MBRTOWC 267 static void 268 fold_multibyte_text (FILE *istream, size_t width, int *saved_errno) 269 { 270 char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 271 size_t buflen = 0; /* The length of the byte sequence in buf. */ 272 char *bufpos; /* Next read position of BUF. */ 273 wint_t wc; /* A gotten wide character. */ 274 size_t mblength; /* The byte size of a multibyte character which shows 275 as same character as WC. */ 276 mbstate_t state, state_bak; /* State of the stream. */ 277 int convfail; /* 1, when conversion is failed. Otherwise 0. */ 278 279 char *line_out = NULL; 280 size_t offset_out = 0; /* Index in `line_out' for next char. */ 281 size_t allocated_out = 0; 282 283 int increment; 284 size_t column = 0; 285 286 size_t last_blank_pos; 287 size_t last_blank_column; 288 int is_blank_seen; 289 int last_blank_increment; 290 int is_bs_following_last_blank; 291 size_t bs_following_last_blank_num; 292 int is_cr_after_last_blank; 293 294 #define CLEAR_FLAGS \ 295 do \ 296 { \ 297 last_blank_pos = 0; \ 298 last_blank_column = 0; \ 299 is_blank_seen = 0; \ 300 is_bs_following_last_blank = 0; \ 301 bs_following_last_blank_num = 0; \ 302 is_cr_after_last_blank = 0; \ 303 } \ 304 while (0) 305 306 #define START_NEW_LINE \ 307 do \ 308 { \ 309 putchar ('\n'); \ 310 column = 0; \ 311 offset_out = 0; \ 312 CLEAR_FLAGS; \ 313 } \ 314 while (0) 315 316 CLEAR_FLAGS; 317 memset (&state, '\0', sizeof(mbstate_t)); 318 319 for (;; bufpos += mblength, buflen -= mblength) 320 { 321 if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream)) 322 { 323 memmove (buf, bufpos, buflen); 324 buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream); 325 bufpos = buf; 326 } 327 328 if (buflen < 1) 329 break; 330 331 /* Get a wide character. */ 332 convfail = 0; 333 state_bak = state; 334 mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state); 335 336 switch (mblength) 337 { 338 case (size_t)-1: 339 case (size_t)-2: 340 convfail++; 341 state = state_bak; 342 /* Fall through. */ 343 344 case 0: 345 mblength = 1; 346 break; 347 } 348 349 rescan: 350 if (operating_mode == byte_mode) /* byte mode */ 351 increment = mblength; 352 else if (operating_mode == character_mode) /* character mode */ 353 increment = 1; 354 else /* column mode */ 355 { 356 if (convfail) 357 increment = 1; 358 else 359 { 360 switch (wc) 361 { 362 case L'\n': 363 fwrite (line_out, sizeof(char), offset_out, stdout); 364 START_NEW_LINE; 365 continue; 366 367 case L'\b': 368 increment = (column > 0) ? -1 : 0; 369 break; 370 371 case L'\r': 372 increment = -1 * column; 373 break; 374 375 case L'\t': 376 increment = 8 - column % 8; 377 break; 378 379 default: 380 increment = wcwidth (wc); 381 increment = (increment < 0) ? 0 : increment; 382 } 383 } 384 } 385 386 if (column + increment > width && break_spaces && last_blank_pos) 387 { 388 fwrite (line_out, sizeof(char), last_blank_pos, stdout); 389 putchar ('\n'); 390 391 offset_out = offset_out - last_blank_pos; 392 column = column - last_blank_column + ((is_cr_after_last_blank) 393 ? last_blank_increment : bs_following_last_blank_num); 394 memmove (line_out, line_out + last_blank_pos, offset_out); 395 CLEAR_FLAGS; 396 goto rescan; 397 } 398 399 if (column + increment > width && column != 0) 400 { 401 fwrite (line_out, sizeof(char), offset_out, stdout); 402 START_NEW_LINE; 403 goto rescan; 404 } 405 406 if (allocated_out < offset_out + mblength) 407 { 408 allocated_out += 1024; 409 line_out = xrealloc (line_out, allocated_out); 410 } 411 412 memcpy (line_out + offset_out, bufpos, mblength); 413 offset_out += mblength; 414 column += increment; 415 416 if (is_blank_seen && !convfail && wc == L'\r') 417 is_cr_after_last_blank = 1; 418 419 if (is_bs_following_last_blank && !convfail && wc == L'\b') 420 ++bs_following_last_blank_num; 421 else 422 is_bs_following_last_blank = 0; 423 424 if (break_spaces && !convfail && iswblank (wc)) 425 { 426 last_blank_pos = offset_out; 427 last_blank_column = column; 428 is_blank_seen = 1; 429 last_blank_increment = increment; 430 is_bs_following_last_blank = 1; 431 bs_following_last_blank_num = 0; 432 is_cr_after_last_blank = 0; 433 } 434 } 435 436 *saved_errno = errno; 222 437 223 438 if (offset_out) 224 439 fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); 225 440 441 free(line_out); 442 } 443 #endif 444 445 /* Fold file FILENAME, or standard input if FILENAME is "-", 446 to stdout, with maximum line length WIDTH. 447 Return 0 if successful, 1 if an error occurs. */ 448 449 static bool 450 fold_file (char *filename, size_t width) 451 { 452 FILE *istream; 453 int saved_errno; 454 455 if (STREQ (filename, "-")) 456 { 457 istream = stdin; 458 have_read_stdin = 1; 459 } 460 else 461 istream = fopen (filename, "r"); 462 463 if (istream == NULL) 464 { 465 error (0, errno, "%s", filename); 466 return 1; 467 } 468 469 /* Define how ISTREAM is being folded. */ 470 #if HAVE_MBRTOWC 471 if (MB_CUR_MAX > 1) 472 fold_multibyte_text (istream, width, &saved_errno); 473 else 474 #endif 475 fold_text (istream, width, &saved_errno); 476 226 477 if (ferror (istream)) 227 478 { 228 479 error (0, saved_errno, "%s", filename); … … 255 506 256 507 atexit (close_stdout); 257 508 258 break_spaces = count_bytes = have_read_stdin = false; 509 operating_mode = column_mode; 510 break_spaces = have_read_stdin = false; 259 511 260 512 while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) 261 513 { … … 264 516 switch (optc) 265 517 { 266 518 case 'b': /* Count bytes rather than columns. */ 267 count_bytes = true; 519 if (operating_mode != column_mode) 520 FATAL_ERROR (_("only one way of folding may be specified")); 521 operating_mode = byte_mode; 522 break; 523 524 case 'c': 525 if (operating_mode != column_mode) 526 FATAL_ERROR (_("only one way of folding may be specified")); 527 operating_mode = character_mode; 268 528 break; 269 529 270 530 case 's': /* Break at word boundaries. */ -
coreutils-6.8+/src/sort.c
old new 23 23 24 24 #include <config.h> 25 25 26 #include <assert.h> 26 27 #include <getopt.h> 27 28 #include <sys/types.h> 28 29 #include <sys/wait.h> 29 30 #include <signal.h> 31 #if HAVE_WCHAR_H 32 # include <wchar.h> 33 #endif 34 /* Get isw* functions. */ 35 #if HAVE_WCTYPE_H 36 # include <wctype.h> 37 #endif 38 30 39 #include "system.h" 31 40 #include "argmatch.h" 32 41 #include "error.h" … … 116 125 /* Thousands separator; if -1, then there isn't one. */ 117 126 static int thousands_sep; 118 127 128 static int force_general_numcompare = 0; 129 119 130 /* Nonzero if the corresponding locales are hard. */ 120 131 static bool hard_LC_COLLATE; 121 #if HAVE_ NL_LANGINFO132 #if HAVE_LANGINFO_CODESET 122 133 static bool hard_LC_TIME; 123 134 #endif 124 135 125 136 #define NONZERO(x) ((x) != 0) 126 137 138 /* get a multibyte character's byte length. */ 139 #define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \ 140 do \ 141 { \ 142 wchar_t wc; \ 143 mbstate_t state_bak; \ 144 \ 145 state_bak = STATE; \ 146 mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \ 147 \ 148 switch (MBLENGTH) \ 149 { \ 150 case (size_t)-1: \ 151 case (size_t)-2: \ 152 STATE = state_bak; \ 153 /* Fall through. */ \ 154 case 0: \ 155 MBLENGTH = 1; \ 156 } \ 157 } \ 158 while (0) 159 127 160 /* The kind of blanks for '-b' to skip in various options. */ 128 161 enum blanktype { bl_start, bl_end, bl_both }; 129 162 … … 261 294 they were read if all keys compare equal. */ 262 295 static bool stable; 263 296 264 /* If TAB has this value, blanks separate fields. */ 265 enum { TAB_DEFAULT = CHAR_MAX + 1 }; 266 267 /* Tab character separating fields. If TAB_DEFAULT, then fields are 297 /* Tab character separating fields. If tab_length is 0, then fields are 268 298 separated by the empty string between a non-blank character and a blank 269 299 character. */ 270 static int tab = TAB_DEFAULT; 300 static char tab[MB_LEN_MAX + 1]; 301 static size_t tab_length = 0; 271 302 272 303 /* Flag to remove consecutive duplicate lines from the output. 273 304 Only the last of a sequence of equal lines will be output. */ … … 639 670 update_proc (pid); 640 671 } 641 672 673 /* Function pointers. */ 674 static void 675 (*inittables) (void); 676 static char * 677 (*begfield) (const struct line*, const struct keyfield *); 678 static char * 679 (*limfield) (const struct line*, const struct keyfield *); 680 static int 681 (*getmonth) (char const *, size_t); 682 static int 683 (*keycompare) (const struct line *, const struct line *); 684 static int 685 (*numcompare) (const char *, const char *); 686 687 /* Test for white space multibyte character. 688 Set LENGTH the byte length of investigated multibyte character. */ 689 #if HAVE_MBRTOWC 690 static int 691 ismbblank (const char *str, size_t len, size_t *length) 692 { 693 size_t mblength; 694 wchar_t wc; 695 mbstate_t state; 696 697 memset (&state, '\0', sizeof(mbstate_t)); 698 mblength = mbrtowc (&wc, str, len, &state); 699 700 if (mblength == (size_t)-1 || mblength == (size_t)-2) 701 { 702 *length = 1; 703 return 0; 704 } 705 706 *length = (mblength < 1) ? 1 : mblength; 707 return iswblank (wc); 708 } 709 #endif 710 642 711 /* Clean up any remaining temporary files. */ 643 712 644 713 static void … … 978 1047 free (node); 979 1048 } 980 1049 981 #if HAVE_ NL_LANGINFO1050 #if HAVE_LANGINFO_CODESET 982 1051 983 1052 static int 984 1053 struct_month_cmp (const void *m1, const void *m2) … … 993 1062 /* Initialize the character class tables. */ 994 1063 995 1064 static void 996 inittables (void)1065 inittables_uni (void) 997 1066 { 998 1067 size_t i; 999 1068 … … 1005 1074 fold_toupper[i] = toupper (i); 1006 1075 } 1007 1076 1008 #if HAVE_ NL_LANGINFO1077 #if HAVE_LANGINFO_CODESET 1009 1078 /* If we're not in the "C" locale, read different names for months. */ 1010 1079 if (hard_LC_TIME) 1011 1080 { … … 1031 1100 #endif 1032 1101 } 1033 1102 1103 #if HAVE_MBRTOWC 1104 static void 1105 inittables_mb (void) 1106 { 1107 int i, j, k, l; 1108 char *name, *s; 1109 size_t s_len, mblength; 1110 char mbc[MB_LEN_MAX]; 1111 wchar_t wc, pwc; 1112 mbstate_t state_mb, state_wc; 1113 1114 for (i = 0; i < MONTHS_PER_YEAR; i++) 1115 { 1116 s = (char *) nl_langinfo (ABMON_1 + i); 1117 s_len = strlen (s); 1118 monthtab[i].name = name = (char *) xmalloc (s_len + 1); 1119 monthtab[i].val = i + 1; 1120 1121 memset (&state_mb, '\0', sizeof (mbstate_t)); 1122 memset (&state_wc, '\0', sizeof (mbstate_t)); 1123 1124 for (j = 0; j < s_len;) 1125 { 1126 if (!ismbblank (s + j, s_len - j, &mblength)) 1127 break; 1128 j += mblength; 1129 } 1130 1131 for (k = 0; j < s_len;) 1132 { 1133 mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb); 1134 assert (mblength != (size_t)-1 && mblength != (size_t)-2); 1135 if (mblength == 0) 1136 break; 1137 1138 pwc = towupper (wc); 1139 if (pwc == wc) 1140 { 1141 memcpy (mbc, s + j, mblength); 1142 j += mblength; 1143 } 1144 else 1145 { 1146 j += mblength; 1147 mblength = wcrtomb (mbc, pwc, &state_wc); 1148 assert (mblength != (size_t)0 && mblength != (size_t)-1); 1149 } 1150 1151 for (l = 0; l < mblength; l++) 1152 name[k++] = mbc[l]; 1153 } 1154 name[k] = '\0'; 1155 } 1156 qsort ((void *) monthtab, MONTHS_PER_YEAR, 1157 sizeof (struct month), struct_month_cmp); 1158 } 1159 #endif 1160 1034 1161 /* Specify the amount of main memory to use when sorting. */ 1035 1162 static void 1036 1163 specify_sort_size (int oi, char c, char const *s) … … 1241 1368 by KEY in LINE. */ 1242 1369 1243 1370 static char * 1244 begfield (const struct line *line, const struct keyfield *key)1371 begfield_uni (const struct line *line, const struct keyfield *key) 1245 1372 { 1246 1373 char *ptr = line->text, *lim = ptr + line->length - 1; 1247 1374 size_t sword = key->sword; … … 1251 1378 /* The leading field separator itself is included in a field when -t 1252 1379 is absent. */ 1253 1380 1254 if (tab != TAB_DEFAULT)1381 if (tab_length) 1255 1382 while (ptr < lim && sword--) 1256 1383 { 1257 while (ptr < lim && *ptr != tab )1384 while (ptr < lim && *ptr != tab[0]) 1258 1385 ++ptr; 1259 1386 if (ptr < lim) 1260 1387 ++ptr; … … 1282 1409 return ptr; 1283 1410 } 1284 1411 1412 #if HAVE_MBRTOWC 1413 static char * 1414 begfield_mb (const struct line *line, const struct keyfield *key) 1415 { 1416 int i; 1417 char *ptr = line->text, *lim = ptr + line->length - 1; 1418 size_t sword = key->sword; 1419 size_t schar = key->schar; 1420 size_t mblength; 1421 mbstate_t state; 1422 1423 memset (&state, '\0', sizeof(mbstate_t)); 1424 1425 if (tab_length) 1426 while (ptr < lim && sword--) 1427 { 1428 while (ptr < lim && memcmp (ptr, tab, tab_length) != 0) 1429 { 1430 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1431 ptr += mblength; 1432 } 1433 if (ptr < lim) 1434 { 1435 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1436 ptr += mblength; 1437 } 1438 } 1439 else 1440 while (ptr < lim && sword--) 1441 { 1442 while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) 1443 ptr += mblength; 1444 if (ptr < lim) 1445 { 1446 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1447 ptr += mblength; 1448 } 1449 while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength)) 1450 ptr += mblength; 1451 } 1452 1453 if (key->skipsblanks) 1454 while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) 1455 ptr += mblength; 1456 1457 for (i = 0; i < schar; i++) 1458 { 1459 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1460 1461 if (ptr + mblength > lim) 1462 break; 1463 else 1464 ptr += mblength; 1465 } 1466 1467 return ptr; 1468 } 1469 #endif 1470 1285 1471 /* Return the limit of (a pointer to the first character after) the field 1286 1472 in LINE specified by KEY. */ 1287 1473 1288 1474 static char * 1289 limfield (const struct line *line, const struct keyfield *key)1475 limfield_uni (const struct line *line, const struct keyfield *key) 1290 1476 { 1291 1477 char *ptr = line->text, *lim = ptr + line->length - 1; 1292 1478 size_t eword = key->eword, echar = key->echar; … … 1299 1485 `beginning' is the first character following the delimiting TAB. 1300 1486 Otherwise, leave PTR pointing at the first `blank' character after 1301 1487 the preceding field. */ 1302 if (tab != TAB_DEFAULT)1488 if (tab_length) 1303 1489 while (ptr < lim && eword--) 1304 1490 { 1305 while (ptr < lim && *ptr != tab )1491 while (ptr < lim && *ptr != tab[0]) 1306 1492 ++ptr; 1307 1493 if (ptr < lim && (eword | echar)) 1308 1494 ++ptr; … … 1348 1534 */ 1349 1535 1350 1536 /* Make LIM point to the end of (one byte past) the current field. */ 1351 if (tab != TAB_DEFAULT)1537 if (tab_length) 1352 1538 { 1353 1539 char *newlim; 1354 newlim = memchr (ptr, tab , lim - ptr);1540 newlim = memchr (ptr, tab[0], lim - ptr); 1355 1541 if (newlim) 1356 1542 lim = newlim; 1357 1543 } … … 1384 1570 return ptr; 1385 1571 } 1386 1572 1573 #if HAVE_MBRTOWC 1574 static char * 1575 limfield_mb (const struct line *line, const struct keyfield *key) 1576 { 1577 char *ptr = line->text, *lim = ptr + line->length - 1; 1578 size_t eword = key->eword, echar = key->echar; 1579 int i; 1580 size_t mblength; 1581 mbstate_t state; 1582 1583 memset (&state, '\0', sizeof(mbstate_t)); 1584 1585 if (tab_length) 1586 while (ptr < lim && eword--) 1587 { 1588 while (ptr < lim && memcmp (ptr, tab, tab_length) != 0) 1589 { 1590 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1591 ptr += mblength; 1592 } 1593 if (ptr < lim && (eword | echar)) 1594 { 1595 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1596 ptr += mblength; 1597 } 1598 } 1599 else 1600 while (ptr < lim && eword--) 1601 { 1602 while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) 1603 ptr += mblength; 1604 if (ptr < lim) 1605 { 1606 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1607 ptr += mblength; 1608 } 1609 while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength)) 1610 ptr += mblength; 1611 } 1612 1613 1614 # ifdef POSIX_UNSPECIFIED 1615 /* Make LIM point to the end of (one byte past) the current field. */ 1616 if (tab_length) 1617 { 1618 char *newlim, *p; 1619 1620 newlim = NULL; 1621 for (p = ptr; p < lim;) 1622 { 1623 if (memcmp (p, tab, tab_length) == 0) 1624 { 1625 newlim = p; 1626 break; 1627 } 1628 1629 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1630 p += mblength; 1631 } 1632 } 1633 else 1634 { 1635 char *newlim; 1636 newlim = ptr; 1637 1638 while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength)) 1639 newlim += mblength; 1640 if (ptr < lim) 1641 { 1642 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1643 ptr += mblength; 1644 } 1645 while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength)) 1646 newlim += mblength; 1647 lim = newlim; 1648 } 1649 # endif 1650 1651 /* If we're skipping leading blanks, don't start counting characters 1652 * until after skipping past any leading blanks. */ 1653 if (key->skipsblanks) 1654 while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) 1655 ptr += mblength; 1656 1657 memset (&state, '\0', sizeof(mbstate_t)); 1658 1659 /* Advance PTR by ECHAR (if possible), but no further than LIM. */ 1660 for (i = 0; i < echar; i++) 1661 { 1662 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1663 1664 if (ptr + mblength > lim) 1665 break; 1666 else 1667 ptr += mblength; 1668 } 1669 1670 return ptr; 1671 } 1672 #endif 1673 1387 1674 /* Fill BUF reading from FP, moving buf->left bytes from the end 1388 1675 of buf->buf to the beginning first. If EOF is reached and the 1389 1676 file wasn't terminated by a newline, supply one. Set up BUF's line … … 1466 1753 else 1467 1754 { 1468 1755 if (key->skipsblanks) 1469 while (blanks[to_uchar (*line_start)]) 1470 line_start++; 1756 { 1757 #if HAVE_MBRTOWC 1758 if (MB_CUR_MAX > 1) 1759 { 1760 size_t mblength; 1761 mbstate_t state; 1762 memset (&state, '\0', sizeof(mbstate_t)); 1763 while (line_start < line->keylim && 1764 ismbblank (line_start, 1765 line->keylim - line_start, 1766 &mblength)) 1767 line_start += mblength; 1768 } 1769 else 1770 #endif 1771 while (blanks[to_uchar (*line_start)]) 1772 line_start++; 1773 } 1471 1774 line->keybeg = line_start; 1472 1775 } 1473 1776 } … … 1500 1803 hideously fast. */ 1501 1804 1502 1805 static int 1503 numcompare (const char *a, const char *b)1806 numcompare_uni (const char *a, const char *b) 1504 1807 { 1505 1808 while (blanks[to_uchar (*a)]) 1506 1809 a++; … … 1510 1813 return strnumcmp (a, b, decimal_point, thousands_sep); 1511 1814 } 1512 1815 1816 #if HAVE_MBRTOWC 1817 static int 1818 numcompare_mb (const char *a, const char *b) 1819 { 1820 size_t mblength, len; 1821 len = strlen (a); /* okay for UTF-8 */ 1822 while (*a && ismbblank (a, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength)) 1823 { 1824 a += mblength; 1825 len -= mblength; 1826 } 1827 len = strlen (b); /* okay for UTF-8 */ 1828 while (*b && ismbblank (b, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength)) 1829 b += mblength; 1830 1831 return strnumcmp (a, b, decimal_point, thousands_sep); 1832 } 1833 #endif /* HAV_EMBRTOWC */ 1834 1513 1835 static int 1514 1836 general_numcompare (const char *sa, const char *sb) 1515 1837 { … … 1543 1865 Return 0 if the name in S is not recognized. */ 1544 1866 1545 1867 static int 1546 getmonth (char const *month, size_t len)1868 getmonth_uni (char const *month, size_t len) 1547 1869 { 1548 1870 size_t lo = 0; 1549 1871 size_t hi = MONTHS_PER_YEAR; … … 1698 2020 return diff; 1699 2021 } 1700 2022 2023 #if HAVE_MBRTOWC 2024 static int 2025 getmonth_mb (const char *s, size_t len) 2026 { 2027 char *month; 2028 register size_t i; 2029 register int lo = 0, hi = MONTHS_PER_YEAR, result; 2030 char *tmp; 2031 size_t wclength, mblength; 2032 const char **pp; 2033 const wchar_t **wpp; 2034 wchar_t *month_wcs; 2035 mbstate_t state; 2036 2037 while (len > 0 && ismbblank (s, len, &mblength)) 2038 { 2039 s += mblength; 2040 len -= mblength; 2041 } 2042 2043 if (len == 0) 2044 return 0; 2045 2046 month = (char *) alloca (len + 1); 2047 2048 tmp = (char *) alloca (len + 1); 2049 memcpy (tmp, s, len); 2050 tmp[len] = '\0'; 2051 pp = (const char **)&tmp; 2052 month_wcs = (wchar_t *) alloca ((len + 1) * sizeof (wchar_t)); 2053 memset (&state, '\0', sizeof(mbstate_t)); 2054 2055 wclength = mbsrtowcs (month_wcs, pp, len + 1, &state); 2056 assert (wclength != (size_t)-1 && *pp == NULL); 2057 2058 for (i = 0; i < wclength; i++) 2059 { 2060 month_wcs[i] = towupper(month_wcs[i]); 2061 if (iswblank (month_wcs[i])) 2062 { 2063 month_wcs[i] = L'\0'; 2064 break; 2065 } 2066 } 2067 2068 wpp = (const wchar_t **)&month_wcs; 2069 2070 mblength = wcsrtombs (month, wpp, len + 1, &state); 2071 assert (mblength != (-1) && *wpp == NULL); 2072 2073 do 2074 { 2075 int ix = (lo + hi) / 2; 2076 2077 if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0) 2078 hi = ix; 2079 else 2080 lo = ix; 2081 } 2082 while (hi - lo > 1); 2083 2084 result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name)) 2085 ? monthtab[lo].val : 0); 2086 2087 return result; 2088 } 2089 #endif 2090 1701 2091 /* Compare two lines A and B trying every key in sequence until there 1702 2092 are no more keys or a difference is found. */ 1703 2093 1704 2094 static int 1705 keycompare (const struct line *a, const struct line *b)2095 keycompare_uni (const struct line *a, const struct line *b) 1706 2096 { 1707 2097 struct keyfield const *key = keylist; 1708 2098 … … 1875 2265 return key->reverse ? -diff : diff; 1876 2266 } 1877 2267 2268 #if HAVE_MBRTOWC 2269 static int 2270 keycompare_mb (const struct line *a, const struct line *b) 2271 { 2272 struct keyfield *key = keylist; 2273 2274 /* For the first iteration only, the key positions have been 2275 precomputed for us. */ 2276 char *texta = a->keybeg; 2277 char *textb = b->keybeg; 2278 char *lima = a->keylim; 2279 char *limb = b->keylim; 2280 2281 size_t mblength_a, mblength_b; 2282 wchar_t wc_a, wc_b; 2283 mbstate_t state_a, state_b; 2284 2285 int diff; 2286 2287 memset (&state_a, '\0', sizeof(mbstate_t)); 2288 memset (&state_b, '\0', sizeof(mbstate_t)); 2289 2290 for (;;) 2291 { 2292 unsigned char *translate = (unsigned char *) key->translate; 2293 bool const *ignore = key->ignore; 2294 2295 /* Find the lengths. */ 2296 size_t lena = lima <= texta ? 0 : lima - texta; 2297 size_t lenb = limb <= textb ? 0 : limb - textb; 2298 2299 /* Actually compare the fields. */ 2300 if (key->random) 2301 diff = compare_random (texta, lena, textb, lenb); 2302 else if (key->numeric | key->general_numeric) 2303 { 2304 char savea = *lima, saveb = *limb; 2305 2306 *lima = *limb = '\0'; 2307 if (force_general_numcompare) 2308 diff = general_numcompare (texta, textb); 2309 else 2310 diff = ((key->numeric ? numcompare : general_numcompare) 2311 (texta, textb)); 2312 *lima = savea, *limb = saveb; 2313 } 2314 else if (key->month) 2315 diff = getmonth (texta, lena) - getmonth (textb, lenb); 2316 else 2317 { 2318 if (ignore || translate) 2319 { 2320 char *copy_a = (char *) alloca (lena + 1 + lenb + 1); 2321 char *copy_b = copy_a + lena + 1; 2322 size_t new_len_a, new_len_b; 2323 size_t i, j; 2324 2325 /* Ignore and/or translate chars before comparing. */ 2326 # define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \ 2327 do \ 2328 { \ 2329 wchar_t uwc; \ 2330 char mbc[MB_LEN_MAX]; \ 2331 mbstate_t state_wc; \ 2332 \ 2333 for (NEW_LEN = i = 0; i < LEN;) \ 2334 { \ 2335 mbstate_t state_bak; \ 2336 \ 2337 state_bak = STATE; \ 2338 MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE); \ 2339 \ 2340 if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1 \ 2341 || MBLENGTH == 0) \ 2342 { \ 2343 if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1) \ 2344 STATE = state_bak; \ 2345 if (!ignore) \ 2346 COPY[NEW_LEN++] = TEXT[i++]; \ 2347 continue; \ 2348 } \ 2349 \ 2350 if (ignore) \ 2351 { \ 2352 if ((ignore == nonprinting && !iswprint (WC)) \ 2353 || (ignore == nondictionary \ 2354 && !iswalnum (WC) && !iswblank (WC))) \ 2355 { \ 2356 i += MBLENGTH; \ 2357 continue; \ 2358 } \ 2359 } \ 2360 \ 2361 if (translate) \ 2362 { \ 2363 \ 2364 uwc = towupper(WC); \ 2365 if (WC == uwc) \ 2366 { \ 2367 memcpy (mbc, TEXT + i, MBLENGTH); \ 2368 i += MBLENGTH; \ 2369 } \ 2370 else \ 2371 { \ 2372 i += MBLENGTH; \ 2373 WC = uwc; \ 2374 memset (&state_wc, '\0', sizeof (mbstate_t)); \ 2375 \ 2376 MBLENGTH = wcrtomb (mbc, WC, &state_wc); \ 2377 assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0); \ 2378 } \ 2379 \ 2380 for (j = 0; j < MBLENGTH; j++) \ 2381 COPY[NEW_LEN++] = mbc[j]; \ 2382 } \ 2383 else \ 2384 for (j = 0; j < MBLENGTH; j++) \ 2385 COPY[NEW_LEN++] = TEXT[i++]; \ 2386 } \ 2387 COPY[NEW_LEN] = '\0'; \ 2388 } \ 2389 while (0) 2390 IGNORE_CHARS (new_len_a, lena, texta, copy_a, 2391 wc_a, mblength_a, state_a); 2392 IGNORE_CHARS (new_len_b, lenb, textb, copy_b, 2393 wc_b, mblength_b, state_b); 2394 diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b); 2395 } 2396 else if (lena == 0) 2397 diff = - NONZERO (lenb); 2398 else if (lenb == 0) 2399 goto greater; 2400 else 2401 diff = xmemcoll (texta, lena, textb, lenb); 2402 } 2403 2404 if (diff) 2405 goto not_equal; 2406 2407 key = key->next; 2408 if (! key) 2409 break; 2410 2411 /* Find the beginning and limit of the next field. */ 2412 if (key->eword != -1) 2413 lima = limfield (a, key), limb = limfield (b, key); 2414 else 2415 lima = a->text + a->length - 1, limb = b->text + b->length - 1; 2416 2417 if (key->sword != -1) 2418 texta = begfield (a, key), textb = begfield (b, key); 2419 else 2420 { 2421 texta = a->text, textb = b->text; 2422 if (key->skipsblanks) 2423 { 2424 while (texta < lima && ismbblank (texta, lima - texta, &mblength_a)) 2425 texta += mblength_a; 2426 while (textb < limb && ismbblank (textb, limb - textb, &mblength_b)) 2427 textb += mblength_b; 2428 } 2429 } 2430 } 2431 2432 return 0; 2433 2434 greater: 2435 diff = 1; 2436 not_equal: 2437 return key->reverse ? -diff : diff; 2438 } 2439 #endif 2440 1878 2441 /* Compare two lines A and B, returning negative, zero, or positive 1879 2442 depending on whether A compares less than, equal to, or greater than B. */ 1880 2443 … … 2744 3305 initialize_exit_failure (SORT_FAILURE); 2745 3306 2746 3307 hard_LC_COLLATE = hard_locale (LC_COLLATE); 2747 #if HAVE_ NL_LANGINFO3308 #if HAVE_LANGINFO_CODESET 2748 3309 hard_LC_TIME = hard_locale (LC_TIME); 2749 3310 #endif 2750 3311 … … 2765 3326 thousands_sep = -1; 2766 3327 } 2767 3328 3329 #if HAVE_MBRTOWC 3330 if (MB_CUR_MAX > 1) 3331 { 3332 inittables = inittables_mb; 3333 begfield = begfield_mb; 3334 limfield = limfield_mb; 3335 getmonth = getmonth_mb; 3336 keycompare = keycompare_mb; 3337 numcompare = numcompare_mb; 3338 } 3339 else 3340 #endif 3341 { 3342 inittables = inittables_uni; 3343 begfield = begfield_uni; 3344 limfield = limfield_uni; 3345 getmonth = getmonth_uni; 3346 keycompare = keycompare_uni; 3347 numcompare = numcompare_uni; 3348 } 3349 2768 3350 have_read_stdin = false; 2769 3351 inittables (); 2770 3352 … … 3015 3597 3016 3598 case 't': 3017 3599 { 3018 char newtab = optarg[0]; 3019 if (! newtab) 3600 char newtab[MB_LEN_MAX + 1]; 3601 size_t newtab_length = 1; 3602 strncpy (newtab, optarg, MB_LEN_MAX); 3603 if (! newtab[0]) 3020 3604 error (SORT_FAILURE, 0, _("empty tab")); 3021 if (optarg[1]) 3605 #if HAVE_MBRTOWC 3606 if (MB_CUR_MAX > 1) 3607 { 3608 wchar_t wc; 3609 mbstate_t state; 3610 size_t i; 3611 3612 memset (&state, '\0', sizeof (mbstate_t)); 3613 newtab_length = mbrtowc (&wc, newtab, strnlen (newtab, 3614 MB_LEN_MAX), 3615 &state); 3616 switch (newtab_length) 3617 { 3618 case (size_t) -1: 3619 case (size_t) -2: 3620 case 0: 3621 newtab_length = 1; 3622 } 3623 } 3624 #endif 3625 if (newtab_length == 1 && optarg[1]) 3022 3626 { 3023 3627 if (STREQ (optarg, "\\0")) 3024 newtab = '\0';3628 newtab[0] = '\0'; 3025 3629 else 3026 3630 { 3027 3631 /* Provoke with `sort -txx'. Complain about … … 3032 3636 quote (optarg)); 3033 3637 } 3034 3638 } 3035 if (tab != TAB_DEFAULT && tab != newtab) 3639 if (tab_length 3640 && (tab_length != newtab_length 3641 || memcmp (tab, newtab, tab_length) != 0)) 3036 3642 error (SORT_FAILURE, 0, _("incompatible tabs")); 3037 tab = newtab; 3643 memcpy (tab, newtab, newtab_length); 3644 tab_length = newtab_length; 3038 3645 } 3039 3646 break; 3040 3647 -
coreutils-6.8+/src/unexpand.c
old new 39 39 #include <stdio.h> 40 40 #include <getopt.h> 41 41 #include <sys/types.h> 42 43 /* Get mbstate_t, mbrtowc(), wcwidth(). */ 44 #if HAVE_WCHAR_H 45 # include <wchar.h> 46 #endif 47 42 48 #include "system.h" 43 49 #include "error.h" 44 50 #include "quote.h" 45 51 #include "xstrndup.h" 46 52 53 /* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 54 installation; work around this configuration error. */ 55 #if !defined MB_LEN_MAX || MB_LEN_MAX < 2 56 # define MB_LEN_MAX 16 57 #endif 58 59 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 60 #if HAVE_MBRTOWC && defined mbstate_t 61 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 62 #endif 63 47 64 /* The official name of this program (e.g., no `g' prefix). */ 48 65 #define PROGRAM_NAME "unexpand" 49 66 … … 110 127 {NULL, 0, NULL, 0} 111 128 }; 112 129 130 static FILE *next_file (FILE *fp); 131 132 #if HAVE_MBRTOWC 133 static void 134 unexpand_multibyte (void) 135 { 136 FILE *fp; /* Input stream. */ 137 mbstate_t i_state; /* Current shift state of the input stream. */ 138 mbstate_t i_state_bak; /* Back up the I_STATE. */ 139 mbstate_t o_state; /* Current shift state of the output stream. */ 140 char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 141 char *bufpos; /* Next read position of BUF. */ 142 size_t buflen = 0; /* The length of the byte sequence in buf. */ 143 wint_t wc; /* A gotten wide character. */ 144 size_t mblength; /* The byte size of a multibyte character 145 which shows as same character as WC. */ 146 147 /* Index in `tab_list' of next tabstop: */ 148 int tab_index = 0; /* For calculating width of pending tabs. */ 149 int print_tab_index = 0; /* For printing as many tabs as possible. */ 150 unsigned int column = 0; /* Column on screen of next char. */ 151 int next_tab_column; /* Column the next tab stop is on. */ 152 int convert = 1; /* If nonzero, perform translations. */ 153 unsigned int pending = 0; /* Pending columns of blanks. */ 154 155 fp = next_file ((FILE *) NULL); 156 if (fp == NULL) 157 return; 158 159 memset (&o_state, '\0', sizeof(mbstate_t)); 160 memset (&i_state, '\0', sizeof(mbstate_t)); 161 162 for (;;) 163 { 164 if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp)) 165 { 166 memmove (buf, bufpos, buflen); 167 buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp); 168 bufpos = buf; 169 } 170 171 /* Get a wide character. */ 172 if (buflen < 1) 173 { 174 mblength = 1; 175 wc = WEOF; 176 } 177 else 178 { 179 i_state_bak = i_state; 180 mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &i_state); 181 } 182 183 if (mblength == (size_t)-1 || mblength == (size_t)-2) 184 { 185 i_state = i_state_bak; 186 wc = L'\0'; 187 } 188 189 if (wc == L' ' && convert && column < INT_MAX) 190 { 191 ++pending; 192 ++column; 193 } 194 else if (wc == L'\t' && convert) 195 { 196 if (tab_size == 0) 197 { 198 /* Do not let tab_index == first_free_tab; 199 stop when it is 1 less. */ 200 while (tab_index < first_free_tab - 1 201 && column >= tab_list[tab_index]) 202 tab_index++; 203 next_tab_column = tab_list[tab_index]; 204 if (tab_index < first_free_tab - 1) 205 tab_index++; 206 if (column >= next_tab_column) 207 { 208 convert = 0; /* Ran out of tab stops. */ 209 goto flush_pend_mb; 210 } 211 } 212 else 213 { 214 next_tab_column = column + tab_size - column % tab_size; 215 } 216 pending += next_tab_column - column; 217 column = next_tab_column; 218 } 219 else 220 { 221 flush_pend_mb: 222 /* Flush pending spaces. Print as many tabs as possible, 223 then print the rest as spaces. */ 224 if (pending == 1) 225 { 226 putchar (' '); 227 pending = 0; 228 } 229 column -= pending; 230 while (pending > 0) 231 { 232 if (tab_size == 0) 233 { 234 /* Do not let print_tab_index == first_free_tab; 235 stop when it is 1 less. */ 236 while (print_tab_index < first_free_tab - 1 237 && column >= tab_list[print_tab_index]) 238 print_tab_index++; 239 next_tab_column = tab_list[print_tab_index]; 240 if (print_tab_index < first_free_tab - 1) 241 print_tab_index++; 242 } 243 else 244 { 245 next_tab_column = 246 column + tab_size - column % tab_size; 247 } 248 if (next_tab_column - column <= pending) 249 { 250 putchar ('\t'); 251 pending -= next_tab_column - column; 252 column = next_tab_column; 253 } 254 else 255 { 256 --print_tab_index; 257 column += pending; 258 while (pending != 0) 259 { 260 putchar (' '); 261 pending--; 262 } 263 } 264 } 265 266 if (wc == WEOF) 267 { 268 fp = next_file (fp); 269 if (fp == NULL) 270 break; /* No more files. */ 271 else 272 { 273 memset (&i_state, '\0', sizeof(mbstate_t)); 274 continue; 275 } 276 } 277 278 if (mblength == (size_t)-1 || mblength == (size_t)-2) 279 { 280 if (convert) 281 { 282 ++column; 283 if (convert_entire_line == 0) 284 convert = 0; 285 } 286 mblength = 1; 287 putchar (buf[0]); 288 } 289 else if (mblength == 0) 290 { 291 if (convert && convert_entire_line == 0) 292 convert = 0; 293 mblength = 1; 294 putchar ('\0'); 295 } 296 else 297 { 298 if (convert) 299 { 300 if (wc == L'\b') 301 { 302 if (column > 0) 303 --column; 304 } 305 else 306 { 307 int width; /* The width of WC. */ 308 309 width = wcwidth (wc); 310 column += (width > 0) ? width : 0; 311 if (convert_entire_line == 0) 312 convert = 0; 313 } 314 } 315 316 if (wc == L'\n') 317 { 318 tab_index = print_tab_index = 0; 319 column = pending = 0; 320 convert = 1; 321 } 322 fwrite (bufpos, sizeof(char), mblength, stdout); 323 } 324 } 325 buflen -= mblength; 326 bufpos += mblength; 327 } 328 } 329 #endif 330 331 113 332 void 114 333 usage (int status) 115 334 { … … 531 750 532 751 file_list = (optind < argc ? &argv[optind] : stdin_argv); 533 752 534 unexpand (); 753 #if HAVE_MBRTOWC 754 if (MB_CUR_MAX > 1) 755 unexpand_multibyte (); 756 else 757 #endif 758 unexpand (); 535 759 536 760 if (have_read_stdin && fclose (stdin) != 0) 537 761 error (EXIT_FAILURE, errno, "-"); -
coreutils-6.8+/src/pr.c
old new 313 313 314 314 #include <getopt.h> 315 315 #include <sys/types.h> 316 317 /* Get MB_LEN_MAX. */ 318 #include <limits.h> 319 /* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 320 installation; work around this configuration error. */ 321 #if !defined MB_LEN_MAX || MB_LEN_MAX == 1 322 # define MB_LEN_MAX 16 323 #endif 324 325 /* Get MB_CUR_MAX. */ 326 #include <stdlib.h> 327 328 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ 329 /* Get mbstate_t, mbrtowc(), wcwidth(). */ 330 #if HAVE_WCHAR_H 331 # include <wchar.h> 332 #endif 333 334 /* Get iswprint(). -- for wcwidth(). */ 335 #if HAVE_WCTYPE_H 336 # include <wctype.h> 337 #endif 338 #if !defined iswprint && !HAVE_ISWPRINT 339 # define iswprint(wc) 1 340 #endif 341 316 342 #include "system.h" 317 343 #include "error.h" 318 344 #include "hard-locale.h" … … 324 350 #include "strftime.h" 325 351 #include "xstrtol.h" 326 352 353 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 354 #if HAVE_MBRTOWC && defined mbstate_t 355 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 356 #endif 357 358 #ifndef HAVE_DECL_WCWIDTH 359 "this configure-time declaration test was not run" 360 #endif 361 #if !HAVE_DECL_WCWIDTH 362 extern int wcwidth (); 363 #endif 364 327 365 /* The official name of this program (e.g., no `g' prefix). */ 328 366 #define PROGRAM_NAME "pr" 329 367 … … 416 454 417 455 #define NULLCOL (COLUMN *)0 418 456 419 static int char_to_clump (char c); 457 /* Funtion pointers to switch functions for single byte locale or for 458 multibyte locale. If multibyte functions do not exist in your sysytem, 459 these pointers always point the function for single byte locale. */ 460 static void (*print_char) (char c); 461 static int (*char_to_clump) (char c); 462 463 /* Functions for single byte locale. */ 464 static void print_char_single (char c); 465 static int char_to_clump_single (char c); 466 467 /* Functions for multibyte locale. */ 468 static void print_char_multi (char c); 469 static int char_to_clump_multi (char c); 470 420 471 static bool read_line (COLUMN *p); 421 472 static bool print_page (void); 422 473 static bool print_stored (COLUMN *p); … … 426 477 static void pad_across_to (int position); 427 478 static void add_line_number (COLUMN *p); 428 479 static void getoptarg (char *arg, char switch_char, char *character, 480 int *character_length, int *character_width, 429 481 int *number); 430 482 void usage (int status); 431 483 static void print_files (int number_of_files, char **av); … … 440 492 static void pad_down (int lines); 441 493 static void read_rest_of_line (COLUMN *p); 442 494 static void skip_read (COLUMN *p, int column_number); 443 static void print_char (char c);444 495 static void cleanup (void); 445 496 static void print_sep_string (void); 446 497 static void separator_string (const char *optarg_S); … … 455 506 we store the leftmost columns contiguously in buff. 456 507 To print a line from buff, get the index of the first character 457 508 from line_vector[i], and print up to line_vector[i + 1]. */ 458 static char *buff;509 static unsigned char *buff; 459 510 460 511 /* Index of the position in buff where the next character 461 512 will be stored. */ … … 559 610 static bool untabify_input = false; 560 611 561 612 /* (-e) The input tab character. */ 562 static char input_tab_char = '\t';613 static char input_tab_char[MB_LEN_MAX] = "\t"; 563 614 564 615 /* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ... 565 616 where the leftmost column is 1. */ … … 569 620 static bool tabify_output = false; 570 621 571 622 /* (-i) The output tab character. */ 572 static char output_tab_char = '\t'; 623 static char output_tab_char[MB_LEN_MAX] = "\t"; 624 625 /* (-i) The byte length of output tab character. */ 626 static int output_tab_char_length = 1; 573 627 574 628 /* (-i) The width of the output tab. */ 575 629 static int chars_per_output_tab = 8; … … 643 697 static bool numbered_lines = false; 644 698 645 699 /* (-n) Character which follows each line number. */ 646 static char number_separator = '\t'; 700 static char number_separator[MB_LEN_MAX] = "\t"; 701 702 /* (-n) The byte length of the character which follows each line number. */ 703 static int number_separator_length = 1; 704 705 /* (-n) The character width of the character which follows each line number. */ 706 static int number_separator_width = 0; 647 707 648 708 /* (-n) line counting starts with 1st line of input file (not with 1st 649 709 line of 1st page printed). */ … … 696 756 -a|COLUMN|-m is a `space' and with the -J option a `tab'. */ 697 757 static char *col_sep_string = ""; 698 758 static int col_sep_length = 0; 759 static int col_sep_width = 0; 699 760 static char *column_separator = " "; 700 761 static char *line_separator = "\t"; 701 762 … … 852 913 col_sep_length = (int) strlen (optarg_S); 853 914 col_sep_string = xmalloc (col_sep_length + 1); 854 915 strcpy (col_sep_string, optarg_S); 916 917 #if HAVE_MBRTOWC 918 if (MB_CUR_MAX > 1) 919 col_sep_width = mbswidth (col_sep_string, 0); 920 else 921 #endif 922 col_sep_width = col_sep_length; 855 923 } 856 924 857 925 int … … 877 945 878 946 atexit (close_stdout); 879 947 948 /* Define which functions are used, the ones for single byte locale or the ones 949 for multibyte locale. */ 950 #if HAVE_MBRTOWC 951 if (MB_CUR_MAX > 1) 952 { 953 print_char = print_char_multi; 954 char_to_clump = char_to_clump_multi; 955 } 956 else 957 #endif 958 { 959 print_char = print_char_single; 960 char_to_clump = char_to_clump_single; 961 } 962 880 963 n_files = 0; 881 964 file_names = (argc > 1 882 965 ? xmalloc ((argc - 1) * sizeof (char *)) … … 949 1032 break; 950 1033 case 'e': 951 1034 if (optarg) 952 getoptarg (optarg, 'e', &input_tab_char, 953 &chars_per_input_tab); 1035 { 1036 int dummy_length, dummy_width; 1037 1038 getoptarg (optarg, 'e', input_tab_char, &dummy_length, 1039 &dummy_width, &chars_per_input_tab); 1040 } 954 1041 /* Could check tab width > 0. */ 955 1042 untabify_input = true; 956 1043 break; … … 963 1050 break; 964 1051 case 'i': 965 1052 if (optarg) 966 getoptarg (optarg, 'i', &output_tab_char, 967 &chars_per_output_tab); 1053 { 1054 int dummy_width; 1055 1056 getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length, 1057 &dummy_width, &chars_per_output_tab); 1058 } 968 1059 /* Could check tab width > 0. */ 969 1060 tabify_output = true; 970 1061 break; … … 991 1082 case 'n': 992 1083 numbered_lines = true; 993 1084 if (optarg) 994 getoptarg (optarg, 'n', &number_separator,995 & chars_per_number);1085 getoptarg (optarg, 'n', number_separator, &number_separator_length, 1086 &number_separator_width, &chars_per_number); 996 1087 break; 997 1088 case 'N': 998 1089 skip_count = false; … … 1031 1122 old_s = false; 1032 1123 /* Reset an additional input of -s, -S dominates -s */ 1033 1124 col_sep_string = ""; 1034 col_sep_length = 0;1125 col_sep_length = col_sep_width = 0; 1035 1126 use_col_separator = true; 1036 1127 if (optarg) 1037 1128 separator_string (optarg); … … 1188 1279 a number. */ 1189 1280 1190 1281 static void 1191 getoptarg (char *arg, char switch_char, char *character, int *number) 1282 getoptarg (char *arg, char switch_char, char *character, int *character_length, 1283 int *character_width, int *number) 1192 1284 { 1193 1285 if (!ISDIGIT (*arg)) 1194 *character = *arg++; 1286 { 1287 #ifdef HAVE_MBRTOWC 1288 if (MB_CUR_MAX > 1) /* for multibyte locale. */ 1289 { 1290 wchar_t wc; 1291 size_t mblength; 1292 int width; 1293 mbstate_t state = {'\0'}; 1294 1295 mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state); 1296 1297 if (mblength == (size_t)-1 || mblength == (size_t)-2) 1298 { 1299 *character_length = 1; 1300 *character_width = 1; 1301 } 1302 else 1303 { 1304 *character_length = (mblength < 1) ? 1 : mblength; 1305 width = wcwidth (wc); 1306 *character_width = (width < 0) ? 0 : width; 1307 } 1308 1309 strncpy (character, arg, *character_length); 1310 arg += *character_length; 1311 } 1312 else /* for single byte locale. */ 1313 #endif 1314 { 1315 *character = *arg++; 1316 *character_length = 1; 1317 *character_width = 1; 1318 } 1319 } 1320 1195 1321 if (*arg) 1196 1322 { 1197 1323 long int tmp_long; … … 1256 1382 else 1257 1383 col_sep_string = column_separator; 1258 1384 1259 col_sep_length = 1;1385 col_sep_length = col_sep_width = 1; 1260 1386 use_col_separator = true; 1261 1387 } 1262 1388 /* It's rather pointless to define a TAB separator with column … … 1288 1414 TAB_WIDTH (chars_per_input_tab, chars_per_number); */ 1289 1415 1290 1416 /* Estimate chars_per_text without any margin and keep it constant. */ 1291 if (number_separator == '\t')1417 if (number_separator[0] == '\t') 1292 1418 number_width = chars_per_number + 1293 1419 TAB_WIDTH (chars_per_default_tab, chars_per_number); 1294 1420 else 1295 number_width = chars_per_number + 1;1421 number_width = chars_per_number + number_separator_width; 1296 1422 1297 1423 /* The number is part of the column width unless we are 1298 1424 printing files in parallel. */ … … 1307 1433 } 1308 1434 1309 1435 chars_per_column = (chars_per_line - chars_used_by_number - 1310 (columns - 1) * col_sep_ length) / columns;1436 (columns - 1) * col_sep_width) / columns; 1311 1437 1312 1438 if (chars_per_column < 1) 1313 1439 error (EXIT_FAILURE, 0, _("page width too narrow")); … … 1432 1558 1433 1559 /* Enlarge p->start_position of first column to use the same form of 1434 1560 padding_not_printed with all columns. */ 1435 h = h + col_sep_ length;1561 h = h + col_sep_width; 1436 1562 1437 1563 /* This loop takes care of all but the rightmost column. */ 1438 1564 … … 1466 1592 } 1467 1593 else 1468 1594 { 1469 h = h_next + col_sep_ length;1595 h = h_next + col_sep_width; 1470 1596 h_next = h + chars_per_column; 1471 1597 } 1472 1598 } … … 1756 1882 align_column (COLUMN *p) 1757 1883 { 1758 1884 padding_not_printed = p->start_position; 1759 if (padding_not_printed - col_sep_ length > 0)1885 if (padding_not_printed - col_sep_width > 0) 1760 1886 { 1761 pad_across_to (padding_not_printed - col_sep_ length);1887 pad_across_to (padding_not_printed - col_sep_width); 1762 1888 padding_not_printed = ANYWHERE; 1763 1889 } 1764 1890 … … 2029 2155 /* May be too generous. */ 2030 2156 buff = X2REALLOC (buff, &buff_allocated); 2031 2157 } 2032 buff[buff_current++] = c;2158 buff[buff_current++] = (unsigned char) c; 2033 2159 } 2034 2160 2035 2161 static void 2036 2162 add_line_number (COLUMN *p) 2037 2163 { 2038 int i ;2164 int i, j; 2039 2165 char *s; 2040 2166 int left_cut; 2041 2167 … … 2058 2184 /* Tabification is assumed for multiple columns, also for n-separators, 2059 2185 but `default n-separator = TAB' hasn't been given priority over 2060 2186 equal column_width also specified by POSIX. */ 2061 if (number_separator == '\t')2187 if (number_separator[0] == '\t') 2062 2188 { 2063 2189 i = number_width - chars_per_number; 2064 2190 while (i-- > 0) 2065 2191 (p->char_func) (' '); 2066 2192 } 2067 2193 else 2068 (p->char_func) (number_separator); 2194 for (j = 0; j < number_separator_length; j++) 2195 (p->char_func) (number_separator[j]); 2069 2196 } 2070 2197 else 2071 2198 /* To comply with POSIX, we avoid any expansion of default TAB 2072 2199 separator with a single column output. No column_width requirement 2073 2200 has to be considered. */ 2074 2201 { 2075 (p->char_func) (number_separator); 2076 if (number_separator == '\t') 2202 for (j = 0; j < number_separator_length; j++) 2203 (p->char_func) (number_separator[j]); 2204 if (number_separator[0] == '\t') 2077 2205 output_position = POS_AFTER_TAB (chars_per_output_tab, 2078 2206 output_position); 2079 2207 } … … 2234 2362 while (goal - h_old > 1 2235 2363 && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal) 2236 2364 { 2237 putchar (output_tab_char);2365 fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout); 2238 2366 h_old = h_new; 2239 2367 } 2240 2368 while (++h_old <= goal) … … 2254 2382 { 2255 2383 char *s; 2256 2384 int l = col_sep_length; 2385 int not_space_flag; 2257 2386 2258 2387 s = col_sep_string; 2259 2388 … … 2267 2396 { 2268 2397 for (; separators_not_printed > 0; --separators_not_printed) 2269 2398 { 2399 not_space_flag = 0; 2270 2400 while (l-- > 0) 2271 2401 { 2272 2402 /* 3 types of sep_strings: spaces only, spaces and chars, … … 2280 2410 } 2281 2411 else 2282 2412 { 2413 not_space_flag = 1; 2283 2414 if (spaces_not_printed > 0) 2284 2415 print_white_space (); 2285 2416 putchar (*s++); 2286 ++output_position;2287 2417 } 2288 2418 } 2419 if (not_space_flag) 2420 output_position += col_sep_width; 2421 2289 2422 /* sep_string ends with some spaces */ 2290 2423 if (spaces_not_printed > 0) 2291 2424 print_white_space (); … … 2313 2446 required number of tabs and spaces. */ 2314 2447 2315 2448 static void 2316 print_char (char c)2449 print_char_single (char c) 2317 2450 { 2318 2451 if (tabify_output) 2319 2452 { … … 2337 2470 putchar (c); 2338 2471 } 2339 2472 2473 #ifdef HAVE_MBRTOWC 2474 static void 2475 print_char_multi (char c) 2476 { 2477 static size_t mbc_pos = 0; 2478 static char mbc[MB_LEN_MAX] = {'\0'}; 2479 static mbstate_t state = {'\0'}; 2480 mbstate_t state_bak; 2481 wchar_t wc; 2482 size_t mblength; 2483 int width; 2484 2485 if (tabify_output) 2486 { 2487 state_bak = state; 2488 mbc[mbc_pos++] = c; 2489 mblength = mbrtowc (&wc, mbc, mbc_pos, &state); 2490 2491 while (mbc_pos > 0) 2492 { 2493 switch (mblength) 2494 { 2495 case (size_t)-2: 2496 state = state_bak; 2497 return; 2498 2499 case (size_t)-1: 2500 state = state_bak; 2501 ++output_position; 2502 putchar (mbc[0]); 2503 memmove (mbc, mbc + 1, MB_CUR_MAX - 1); 2504 --mbc_pos; 2505 break; 2506 2507 case 0: 2508 mblength = 1; 2509 2510 default: 2511 if (wc == L' ') 2512 { 2513 memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); 2514 --mbc_pos; 2515 ++spaces_not_printed; 2516 return; 2517 } 2518 else if (spaces_not_printed > 0) 2519 print_white_space (); 2520 2521 /* Nonprintables are assumed to have width 0, except L'\b'. */ 2522 if ((width = wcwidth (wc)) < 1) 2523 { 2524 if (wc == L'\b') 2525 --output_position; 2526 } 2527 else 2528 output_position += width; 2529 2530 fwrite (mbc, sizeof(char), mblength, stdout); 2531 memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); 2532 mbc_pos -= mblength; 2533 } 2534 } 2535 return; 2536 } 2537 putchar (c); 2538 } 2539 #endif 2540 2340 2541 /* Skip to page PAGE before printing. 2341 2542 PAGE may be larger than total number of pages. */ 2342 2543 … … 2517 2718 align_empty_cols = false; 2518 2719 } 2519 2720 2520 if (padding_not_printed - col_sep_ length > 0)2721 if (padding_not_printed - col_sep_width > 0) 2521 2722 { 2522 pad_across_to (padding_not_printed - col_sep_ length);2723 pad_across_to (padding_not_printed - col_sep_width); 2523 2724 padding_not_printed = ANYWHERE; 2524 2725 } 2525 2726 … … 2620 2821 } 2621 2822 } 2622 2823 2623 if (padding_not_printed - col_sep_ length > 0)2824 if (padding_not_printed - col_sep_width > 0) 2624 2825 { 2625 pad_across_to (padding_not_printed - col_sep_ length);2826 pad_across_to (padding_not_printed - col_sep_width); 2626 2827 padding_not_printed = ANYWHERE; 2627 2828 } 2628 2829 … … 2635 2836 if (spaces_not_printed == 0) 2636 2837 { 2637 2838 output_position = p->start_position + end_vector[line]; 2638 if (p->start_position - col_sep_ length == chars_per_margin)2639 output_position -= col_sep_ length;2839 if (p->start_position - col_sep_width == chars_per_margin) 2840 output_position -= col_sep_width; 2640 2841 } 2641 2842 2642 2843 return true; … … 2655 2856 number of characters is 1.) */ 2656 2857 2657 2858 static int 2658 char_to_clump (char c)2859 char_to_clump_single (char c) 2659 2860 { 2660 2861 unsigned char uc = c; 2661 2862 char *s = clump_buff; … … 2665 2866 int chars; 2666 2867 int chars_per_c = 8; 2667 2868 2668 if (c == input_tab_char )2869 if (c == input_tab_char[0]) 2669 2870 chars_per_c = chars_per_input_tab; 2670 2871 2671 if (c == input_tab_char || c == '\t')2872 if (c == input_tab_char[0] || c == '\t') 2672 2873 { 2673 2874 width = TAB_WIDTH (chars_per_c, input_position); 2674 2875 … … 2739 2940 return chars; 2740 2941 } 2741 2942 2943 #ifdef HAVE_MBRTOWC 2944 static int 2945 char_to_clump_multi (char c) 2946 { 2947 static size_t mbc_pos = 0; 2948 static char mbc[MB_LEN_MAX] = {'\0'}; 2949 static mbstate_t state = {'\0'}; 2950 mbstate_t state_bak; 2951 wchar_t wc; 2952 size_t mblength; 2953 int wc_width; 2954 register char *s = clump_buff; 2955 register int i, j; 2956 char esc_buff[4]; 2957 int width; 2958 int chars; 2959 int chars_per_c = 8; 2960 2961 state_bak = state; 2962 mbc[mbc_pos++] = c; 2963 mblength = mbrtowc (&wc, mbc, mbc_pos, &state); 2964 2965 width = 0; 2966 chars = 0; 2967 while (mbc_pos > 0) 2968 { 2969 switch (mblength) 2970 { 2971 case (size_t)-2: 2972 state = state_bak; 2973 return 0; 2974 2975 case (size_t)-1: 2976 state = state_bak; 2977 mblength = 1; 2978 2979 if (use_esc_sequence || use_cntrl_prefix) 2980 { 2981 width = +4; 2982 chars = +4; 2983 *s++ = '\\'; 2984 sprintf (esc_buff, "%03o", mbc[0]); 2985 for (i = 0; i <= 2; ++i) 2986 *s++ = (int) esc_buff[i]; 2987 } 2988 else 2989 { 2990 width += 1; 2991 chars += 1; 2992 *s++ = mbc[0]; 2993 } 2994 break; 2995 2996 case 0: 2997 mblength = 1; 2998 /* Fall through */ 2999 3000 default: 3001 if (memcmp (mbc, input_tab_char, mblength) == 0) 3002 chars_per_c = chars_per_input_tab; 3003 3004 if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t') 3005 { 3006 int width_inc; 3007 3008 width_inc = TAB_WIDTH (chars_per_c, input_position); 3009 width += width_inc; 3010 3011 if (untabify_input) 3012 { 3013 for (i = width_inc; i; --i) 3014 *s++ = ' '; 3015 chars += width_inc; 3016 } 3017 else 3018 { 3019 for (i = 0; i < mblength; i++) 3020 *s++ = mbc[i]; 3021 chars += mblength; 3022 } 3023 } 3024 else if ((wc_width = wcwidth (wc)) < 1) 3025 { 3026 if (use_esc_sequence) 3027 { 3028 for (i = 0; i < mblength; i++) 3029 { 3030 width += 4; 3031 chars += 4; 3032 *s++ = '\\'; 3033 sprintf (esc_buff, "%03o", c); 3034 for (j = 0; j <= 2; ++j) 3035 *s++ = (int) esc_buff[j]; 3036 } 3037 } 3038 else if (use_cntrl_prefix) 3039 { 3040 if (wc < 0200) 3041 { 3042 width += 2; 3043 chars += 2; 3044 *s++ = '^'; 3045 *s++ = wc ^ 0100; 3046 } 3047 else 3048 { 3049 for (i = 0; i < mblength; i++) 3050 { 3051 width += 4; 3052 chars += 4; 3053 *s++ = '\\'; 3054 sprintf (esc_buff, "%03o", c); 3055 for (j = 0; j <= 2; ++j) 3056 *s++ = (int) esc_buff[j]; 3057 } 3058 } 3059 } 3060 else if (wc == L'\b') 3061 { 3062 width += -1; 3063 chars += 1; 3064 *s++ = c; 3065 } 3066 else 3067 { 3068 width += 0; 3069 chars += mblength; 3070 for (i = 0; i < mblength; i++) 3071 *s++ = mbc[i]; 3072 } 3073 } 3074 else 3075 { 3076 width += wc_width; 3077 chars += mblength; 3078 for (i = 0; i < mblength; i++) 3079 *s++ = mbc[i]; 3080 } 3081 } 3082 memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); 3083 mbc_pos -= mblength; 3084 } 3085 3086 input_position += width; 3087 return chars; 3088 } 3089 #endif 3090 2742 3091 /* We've just printed some files and need to clean up things before 2743 3092 looking for more options and printing the next batch of files. 2744 3093 -
coreutils-6.8+/src/cut.c
old new 29 29 #include <assert.h> 30 30 #include <getopt.h> 31 31 #include <sys/types.h> 32 33 /* Get mbstate_t, mbrtowc(). */ 34 #if HAVE_WCHAR_H 35 # include <wchar.h> 36 #endif 32 37 #include "system.h" 33 38 34 39 #include "error.h" … … 37 42 #include "quote.h" 38 43 #include "xstrndup.h" 39 44 45 /* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 46 installation; work around this configuration error. */ 47 #if !defined MB_LEN_MAX || MB_LEN_MAX < 2 48 # undef MB_LEN_MAX 49 # define MB_LEN_MAX 16 50 #endif 51 52 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 53 #if HAVE_MBRTOWC && defined mbstate_t 54 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 55 #endif 56 40 57 /* The official name of this program (e.g., no `g' prefix). */ 41 58 #define PROGRAM_NAME "cut" 42 59 … … 67 84 } \ 68 85 while (0) 69 86 87 /* Refill the buffer BUF to get a multibyte character. */ 88 #define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \ 89 do \ 90 { \ 91 if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \ 92 { \ 93 memmove (BUF, BUFPOS, BUFLEN); \ 94 BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \ 95 BUFPOS = BUF; \ 96 } \ 97 } \ 98 while (0) 99 100 /* Get wide character on BUFPOS. BUFPOS is not included after that. 101 If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */ 102 #define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \ 103 do \ 104 { \ 105 mbstate_t state_bak; \ 106 \ 107 if (BUFLEN < 1) \ 108 { \ 109 WC = WEOF; \ 110 break; \ 111 } \ 112 \ 113 /* Get a wide character. */ \ 114 CONVFAIL = 0; \ 115 state_bak = STATE; \ 116 MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \ 117 \ 118 switch (MBLENGTH) \ 119 { \ 120 case (size_t)-1: \ 121 case (size_t)-2: \ 122 CONVFAIL++; \ 123 STATE = state_bak; \ 124 /* Fall througn. */ \ 125 \ 126 case 0: \ 127 MBLENGTH = 1; \ 128 break; \ 129 } \ 130 } \ 131 while (0) 132 70 133 struct range_pair 71 134 { 72 135 size_t lo; … … 85 148 /* The number of bytes allocated for FIELD_1_BUFFER. */ 86 149 static size_t field_1_bufsize; 87 150 88 /* The largest field or byteindex used as an endpoint of a closed151 /* The largest byte, character or field index used as an endpoint of a closed 89 152 or degenerate range specification; this doesn't include the starting 90 153 index of right-open-ended ranges. For example, with either range spec 91 154 `2-5,9-', `2-3,5,9-' this variable would be set to 5. */ … … 97 160 98 161 /* This is a bit vector. 99 162 In byte mode, which bytes to output. 163 In character mode, which characters to output. 100 164 In field mode, which DELIM-separated fields to output. 101 B oth bytes and fields are numbered starting with 1,165 Bytes, characters and fields are numbered starting with 1, 102 166 so the zeroth bit of this array is unused. 103 A field or byteK has been selected if167 A byte, character or field K has been selected if 104 168 (K <= MAX_RANGE_ENDPOINT and is_printable_field(K)) 105 169 || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */ 106 170 static unsigned char *printable_field; … … 109 173 { 110 174 undefined_mode, 111 175 112 /* Output characters that are in the given bytes. */176 /* Output bytes that are at the given positions. */ 113 177 byte_mode, 114 178 179 /* Output characters that are at the given positions. */ 180 character_mode, 181 115 182 /* Output the given delimeter-separated fields. */ 116 183 field_mode 117 184 }; … … 121 188 122 189 static enum operating_mode operating_mode; 123 190 191 /* If nonzero, when in byte mode, don't split multibyte characters. */ 192 static int byte_mode_character_aware; 193 194 /* If nonzero, the function for single byte locale is work 195 if this program runs on multibyte locale. */ 196 static int force_singlebyte_mode; 197 124 198 /* If true do not output lines containing no delimeter characters. 125 199 Otherwise, all such lines are printed. This option is valid only 126 200 with field mode. */ … … 132 206 133 207 /* The delimeter character for field mode. */ 134 208 static unsigned char delim; 209 #if HAVE_WCHAR_H 210 static wchar_t wcdelim; 211 #endif 135 212 136 213 /* True if the --output-delimiter=STRING option was specified. */ 137 214 static bool output_delimiter_specified; … … 205 282 -f, --fields=LIST select only these fields; also print any line\n\ 206 283 that contains no delimiter character, unless\n\ 207 284 the -s option is specified\n\ 208 -n (ignored)\n\285 -n with -b: don't split multibyte characters\n\ 209 286 "), stdout); 210 287 fputs (_("\ 211 288 --complement complement the set of selected bytes, characters\n\ … … 362 439 in_digits = false; 363 440 /* Starting a range. */ 364 441 if (dash_found) 365 FATAL_ERROR (_("invalid byte or field list"));442 FATAL_ERROR (_("invalid byte, character or field list")); 366 443 dash_found = true; 367 444 fieldstr++; 368 445 … … 387 464 if (!rhs_specified) 388 465 { 389 466 /* `n-'. From `initial' to end of line. */ 390 eol_range_start = initial; 467 if (eol_range_start == 0 || 468 (eol_range_start != 0 && eol_range_start > initial)) 469 eol_range_start = initial; 391 470 field_found = true; 392 471 } 393 472 else 394 473 { 395 474 /* `m-n' or `-n' (1-n). */ 396 475 if (value < initial) 397 FATAL_ERROR (_("invalid decreasing range"));476 FATAL_ERROR (_("invalid byte, character or field list")); 398 477 399 478 /* Is there already a range going to end of line? */ 400 479 if (eol_range_start != 0) … … 467 546 if (operating_mode == byte_mode) 468 547 error (0, 0, 469 548 _("byte offset %s is too large"), quote (bad_num)); 549 else if (operating_mode == character_mode) 550 error (0, 0, 551 _("character offset %s is too large"), quote (bad_num)); 470 552 else 471 553 error (0, 0, 472 554 _("field number %s is too large"), quote (bad_num)); … … 477 559 fieldstr++; 478 560 } 479 561 else 480 FATAL_ERROR (_("invalid byte or field list"));562 FATAL_ERROR (_("invalid byte, character or field list")); 481 563 } 482 564 483 565 max_range_endpoint = 0; … … 570 652 } 571 653 } 572 654 655 #if HAVE_MBRTOWC 656 /* This function is in use for the following case. 657 658 1. Read from the stream STREAM, printing to standard output any selected 659 characters. 660 661 2. Read from stream STREAM, printing to standard output any selected bytes, 662 without splitting multibyte characters. */ 663 664 static void 665 cut_characters_or_cut_bytes_no_split (FILE *stream) 666 { 667 int idx; /* number of bytes or characters in the line so far. */ 668 char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 669 char *bufpos; /* Next read position of BUF. */ 670 size_t buflen; /* The length of the byte sequence in buf. */ 671 wint_t wc; /* A gotten wide character. */ 672 size_t mblength; /* The byte size of a multibyte character which shows 673 as same character as WC. */ 674 mbstate_t state; /* State of the stream. */ 675 int convfail; /* 1, when conversion is failed. Otherwise 0. */ 676 677 idx = 0; 678 buflen = 0; 679 bufpos = buf; 680 memset (&state, '\0', sizeof(mbstate_t)); 681 682 while (1) 683 { 684 REFILL_BUFFER (buf, bufpos, buflen, stream); 685 686 GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail); 687 688 if (wc == WEOF) 689 { 690 if (idx > 0) 691 putchar ('\n'); 692 break; 693 } 694 else if (wc == L'\n') 695 { 696 putchar ('\n'); 697 idx = 0; 698 } 699 else 700 { 701 idx += (operating_mode == byte_mode) ? mblength : 1; 702 if (print_kth (idx, NULL)) 703 fwrite (bufpos, mblength, sizeof(char), stdout); 704 } 705 706 buflen -= mblength; 707 bufpos += mblength; 708 } 709 } 710 #endif 711 573 712 /* Read from stream STREAM, printing to standard output any selected fields. */ 574 713 575 714 static void … … 692 831 } 693 832 } 694 833 834 #if HAVE_MBRTOWC 835 static void 836 cut_fields_mb (FILE *stream) 837 { 838 int c; 839 unsigned int field_idx; 840 int found_any_selected_field; 841 int buffer_first_field; 842 int empty_input; 843 char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 844 char *bufpos; /* Next read position of BUF. */ 845 size_t buflen; /* The length of the byte sequence in buf. */ 846 wint_t wc = 0; /* A gotten wide character. */ 847 size_t mblength; /* The byte size of a multibyte character which shows 848 as same character as WC. */ 849 mbstate_t state; /* State of the stream. */ 850 int convfail; /* 1, when conversion is failed. Otherwise 0. */ 851 852 found_any_selected_field = 0; 853 field_idx = 1; 854 bufpos = buf; 855 buflen = 0; 856 memset (&state, '\0', sizeof(mbstate_t)); 857 858 c = getc (stream); 859 empty_input = (c == EOF); 860 if (c != EOF) 861 ungetc (c, stream); 862 else 863 wc = WEOF; 864 865 /* To support the semantics of the -s flag, we may have to buffer 866 all of the first field to determine whether it is `delimited.' 867 But that is unnecessary if all non-delimited lines must be printed 868 and the first field has been selected, or if non-delimited lines 869 must be suppressed and the first field has *not* been selected. 870 That is because a non-delimited line has exactly one field. */ 871 buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL)); 872 873 while (1) 874 { 875 if (field_idx == 1 && buffer_first_field) 876 { 877 int len = 0; 878 879 while (1) 880 { 881 REFILL_BUFFER (buf, bufpos, buflen, stream); 882 883 GET_NEXT_WC_FROM_BUFFER 884 (wc, bufpos, buflen, mblength, state, convfail); 885 886 if (wc == WEOF) 887 break; 888 889 field_1_buffer = xrealloc (field_1_buffer, len + mblength); 890 memcpy (field_1_buffer + len, bufpos, mblength); 891 len += mblength; 892 buflen -= mblength; 893 bufpos += mblength; 894 895 if (!convfail && (wc == L'\n' || wc == wcdelim)) 896 break; 897 } 898 899 if (wc == WEOF) 900 break; 901 902 /* If the first field extends to the end of line (it is not 903 delimited) and we are printing all non-delimited lines, 904 print this one. */ 905 if (convfail || (!convfail && wc != wcdelim)) 906 { 907 if (suppress_non_delimited) 908 { 909 /* Empty. */ 910 } 911 else 912 { 913 fwrite (field_1_buffer, sizeof (char), len, stdout); 914 /* Make sure the output line is newline terminated. */ 915 if (convfail || (!convfail && wc != L'\n')) 916 putchar ('\n'); 917 } 918 continue; 919 } 920 921 if (print_kth (1, NULL)) 922 { 923 /* Print the field, but not the trailing delimiter. */ 924 fwrite (field_1_buffer, sizeof (char), len - 1, stdout); 925 found_any_selected_field = 1; 926 } 927 ++field_idx; 928 } 929 930 if (wc != WEOF) 931 { 932 if (print_kth (field_idx, NULL)) 933 { 934 if (found_any_selected_field) 935 { 936 fwrite (output_delimiter_string, sizeof (char), 937 output_delimiter_length, stdout); 938 } 939 found_any_selected_field = 1; 940 } 941 942 while (1) 943 { 944 REFILL_BUFFER (buf, bufpos, buflen, stream); 945 946 GET_NEXT_WC_FROM_BUFFER 947 (wc, bufpos, buflen, mblength, state, convfail); 948 949 if (wc == WEOF) 950 break; 951 else if (!convfail && (wc == wcdelim || wc == L'\n')) 952 { 953 buflen -= mblength; 954 bufpos += mblength; 955 break; 956 } 957 958 if (print_kth (field_idx, NULL)) 959 fwrite (bufpos, mblength, sizeof(char), stdout); 960 961 buflen -= mblength; 962 bufpos += mblength; 963 } 964 } 965 966 if ((!convfail || wc == L'\n') && buflen < 1) 967 wc = WEOF; 968 969 if (!convfail && wc == wcdelim) 970 ++field_idx; 971 else if (wc == WEOF || (!convfail && wc == L'\n')) 972 { 973 if (found_any_selected_field 974 || (!empty_input && !(suppress_non_delimited && field_idx == 1))) 975 putchar ('\n'); 976 if (wc == WEOF) 977 break; 978 field_idx = 1; 979 found_any_selected_field = 0; 980 } 981 } 982 } 983 #endif 984 695 985 static void 696 986 cut_stream (FILE *stream) 697 987 { 698 if (operating_mode == byte_mode) 699 cut_bytes (stream); 988 #if HAVE_MBRTOWC 989 if (MB_CUR_MAX > 1 && !force_singlebyte_mode) 990 { 991 switch (operating_mode) 992 { 993 case byte_mode: 994 if (byte_mode_character_aware) 995 cut_characters_or_cut_bytes_no_split (stream); 996 else 997 cut_bytes (stream); 998 break; 999 1000 case character_mode: 1001 cut_characters_or_cut_bytes_no_split (stream); 1002 break; 1003 1004 case field_mode: 1005 cut_fields_mb (stream); 1006 break; 1007 1008 default: 1009 abort (); 1010 } 1011 } 700 1012 else 701 cut_fields (stream); 1013 #endif 1014 { 1015 if (operating_mode == field_mode) 1016 cut_fields (stream); 1017 else 1018 cut_bytes (stream); 1019 } 702 1020 } 703 1021 704 1022 /* Process file FILE to standard output. … … 748 1066 bool ok; 749 1067 bool delim_specified = false; 750 1068 char *spec_list_string IF_LINT(= NULL); 1069 char mbdelim[MB_LEN_MAX + 1]; 1070 size_t delimlen = 0; 751 1071 752 1072 initialize_main (&argc, &argv); 753 1073 program_name = argv[0]; … … 770 1090 switch (optc) 771 1091 { 772 1092 case 'b': 773 case 'c':774 1093 /* Build the byte list. */ 775 1094 if (operating_mode != undefined_mode) 776 1095 FATAL_ERROR (_("only one type of list may be specified")); … … 778 1097 spec_list_string = optarg; 779 1098 break; 780 1099 1100 case 'c': 1101 /* Build the character list. */ 1102 if (operating_mode != undefined_mode) 1103 FATAL_ERROR (_("only one type of list may be specified")); 1104 operating_mode = character_mode; 1105 spec_list_string = optarg; 1106 break; 1107 781 1108 case 'f': 782 1109 /* Build the field list. */ 783 1110 if (operating_mode != undefined_mode) … … 789 1116 case 'd': 790 1117 /* New delimiter. */ 791 1118 /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */ 792 if (optarg[0] != '\0' && optarg[1] != '\0') 793 FATAL_ERROR (_("the delimiter must be a single character")); 794 delim = optarg[0]; 795 delim_specified = true; 1119 { 1120 #if HAVE_MBRTOWC 1121 if(MB_CUR_MAX > 1) 1122 { 1123 mbstate_t state; 1124 1125 memset (&state, '\0', sizeof(mbstate_t)); 1126 delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state); 1127 1128 if (delimlen == (size_t)-1 || delimlen == (size_t)-2) 1129 ++force_singlebyte_mode; 1130 else 1131 { 1132 delimlen = (delimlen < 1) ? 1 : delimlen; 1133 if (wcdelim != L'\0' && *(optarg + delimlen) != '\0') 1134 FATAL_ERROR (_("the delimiter must be a single character")); 1135 memcpy (mbdelim, optarg, delimlen); 1136 } 1137 } 1138 1139 if (MB_CUR_MAX <= 1 || force_singlebyte_mode) 1140 #endif 1141 { 1142 if (optarg[0] != '\0' && optarg[1] != '\0') 1143 FATAL_ERROR (_("the delimiter must be a single character")); 1144 delim = (unsigned char) optarg[0]; 1145 } 1146 delim_specified = true; 1147 } 796 1148 break; 797 1149 798 1150 case OUTPUT_DELIMITER_OPTION: … … 805 1157 break; 806 1158 807 1159 case 'n': 1160 byte_mode_character_aware = 1; 808 1161 break; 809 1162 810 1163 case 's': … … 827 1180 if (operating_mode == undefined_mode) 828 1181 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields")); 829 1182 830 if (delim != '\0'&& operating_mode != field_mode)1183 if (delim_specified && operating_mode != field_mode) 831 1184 FATAL_ERROR (_("an input delimiter may be specified only\ 832 1185 when operating on fields")); 833 1186 … … 854 1207 } 855 1208 856 1209 if (!delim_specified) 857 delim = '\t'; 1210 { 1211 delim = '\t'; 1212 #ifdef HAVE_MBRTOWC 1213 wcdelim = L'\t'; 1214 mbdelim[0] = '\t'; 1215 mbdelim[1] = '\0'; 1216 delimlen = 1; 1217 #endif 1218 } 858 1219 859 1220 if (output_delimiter_string == NULL) 860 1221 { 861 static char dummy[2]; 862 dummy[0] = delim; 863 dummy[1] = '\0'; 864 output_delimiter_string = dummy; 865 output_delimiter_length = 1; 1222 #ifdef HAVE_MBRTOWC 1223 if (MB_CUR_MAX > 1 && !force_singlebyte_mode) 1224 { 1225 output_delimiter_string = xstrdup(mbdelim); 1226 output_delimiter_length = delimlen; 1227 } 1228 1229 if (MB_CUR_MAX <= 1 || force_singlebyte_mode) 1230 #endif 1231 { 1232 static char dummy[2]; 1233 dummy[0] = delim; 1234 dummy[1] = '\0'; 1235 output_delimiter_string = dummy; 1236 output_delimiter_length = 1; 1237 } 866 1238 } 867 1239 868 1240 if (optind == argc) -
coreutils-6.
diff -urNp coreutils-6.12/src/join.c coreutils-6.12-orig/src/join.c
old new get_line (FILE *fp, struct line *line, i 489 489 line->nfields_allocated = 0; 490 490 line->nfields = 0; 491 491 line->fields = NULL; 492 #if HAVE_MBRTOWC 493 if (MB_CUR_MAX > 1) 494 xfields_multibyte (line); 495 else 496 #endif 492 497 xfields (line); 493 498 494 499 if (prevline[which - 1])