Ticket #1982: coreutils-6.9-i18n-1.patch
File coreutils-6.9-i18n-1.patch, 101.2 KB (added by , 18 years ago) |
---|
-
lib/linebuffer.h
Submitted by: Matt Burgess (matthew at linuxfromscratch.org) Date: 2007-04-07 Initial Package Version: 6.9 Upstream Status: Rejected Origin: Based on coreutils-5.93-i18n-2.patch by Alexander Patrakov Description: This patch fixes various problems with multibyte character support. LSB >= 2.0 tests for features added by this patch, but only Coreutils-5.2.1 plus http://www.linuxfromscratch.org/~alexander/patches/coreutils-5.2.1-i18n_fixes-1.patch actually pass the Li18nux2000-level1 testsuite. diff -Naur coreutils-6.9.orig/lib/linebuffer.h coreutils-6.9/lib/linebuffer.h
old new 22 22 23 23 # include <stdio.h> 24 24 25 /* Get mbstate_t. */ 26 # if HAVE_WCHAR_H 27 # include <wchar.h> 28 # endif 29 25 30 /* A `struct linebuffer' holds a line of text. */ 26 31 27 32 struct linebuffer … … 29 34 size_t size; /* Allocated. */ 30 35 size_t length; /* Used. */ 31 36 char *buffer; 37 # if HAVE_WCHAR_H 38 mbstate_t state; 39 # endif 32 40 }; 33 41 34 42 /* Initialize linebuffer LINEBUFFER for use. */ -
coreutils-6.9
diff -Naur coreutils-6.9.orig/src/cut.c coreutils-6.9/src/cut.c
old new 29 29 #include <assert.h> 30 30 #include <getopt.h> 31 31 #include <sys/types.h> 32 33 /* Get mbstate_t, mbrtowc(). */ 34 #if HAVE_WCHAR_H 35 # include <wchar.h> 36 #endif 32 37 #include "system.h" 33 38 34 39 #include "error.h" … … 37 42 #include "quote.h" 38 43 #include "xstrndup.h" 39 44 45 /* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 46 installation; work around this configuration error. */ 47 #if !defined MB_LEN_MAX || MB_LEN_MAX < 2 48 # undef MB_LEN_MAX 49 # define MB_LEN_MAX 16 50 #endif 51 52 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 53 #if HAVE_MBRTOWC && defined mbstate_t 54 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 55 #endif 56 40 57 /* The official name of this program (e.g., no `g' prefix). */ 41 58 #define PROGRAM_NAME "cut" 42 59 … … 67 84 } \ 68 85 while (0) 69 86 87 /* Refill the buffer BUF to get a multibyte character. */ 88 #define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \ 89 do \ 90 { \ 91 if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \ 92 { \ 93 memmove (BUF, BUFPOS, BUFLEN); \ 94 BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \ 95 BUFPOS = BUF; \ 96 } \ 97 } \ 98 while (0) 99 100 /* Get wide character on BUFPOS. BUFPOS is not included after that. 101 If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */ 102 #define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \ 103 do \ 104 { \ 105 mbstate_t state_bak; \ 106 \ 107 if (BUFLEN < 1) \ 108 { \ 109 WC = WEOF; \ 110 break; \ 111 } \ 112 \ 113 /* Get a wide character. */ \ 114 CONVFAIL = 0; \ 115 state_bak = STATE; \ 116 MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \ 117 \ 118 switch (MBLENGTH) \ 119 { \ 120 case (size_t)-1: \ 121 case (size_t)-2: \ 122 CONVFAIL++; \ 123 STATE = state_bak; \ 124 /* Fall througn. */ \ 125 \ 126 case 0: \ 127 MBLENGTH = 1; \ 128 break; \ 129 } \ 130 } \ 131 while (0) 132 70 133 struct range_pair 71 134 { 72 135 size_t lo; … … 85 148 /* The number of bytes allocated for FIELD_1_BUFFER. */ 86 149 static size_t field_1_bufsize; 87 150 88 /* The largest field or byteindex used as an endpoint of a closed151 /* The largest byte, character or field index used as an endpoint of a closed 89 152 or degenerate range specification; this doesn't include the starting 90 153 index of right-open-ended ranges. For example, with either range spec 91 154 `2-5,9-', `2-3,5,9-' this variable would be set to 5. */ … … 97 160 98 161 /* This is a bit vector. 99 162 In byte mode, which bytes to output. 163 In character mode, which characters to output. 100 164 In field mode, which DELIM-separated fields to output. 101 B oth bytes and fields are numbered starting with 1,165 Bytes, characters and fields are numbered starting with 1, 102 166 so the zeroth bit of this array is unused. 103 A field or byteK has been selected if167 A byte, character or field K has been selected if 104 168 (K <= MAX_RANGE_ENDPOINT and is_printable_field(K)) 105 169 || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */ 106 170 static unsigned char *printable_field; … … 109 173 { 110 174 undefined_mode, 111 175 112 /* Output characters that are in the given bytes. */176 /* Output bytes that are at the given positions. */ 113 177 byte_mode, 114 178 179 /* Output characters that are at the given positions. */ 180 character_mode, 181 115 182 /* Output the given delimeter-separated fields. */ 116 183 field_mode 117 184 }; … … 121 188 122 189 static enum operating_mode operating_mode; 123 190 191 /* If nonzero, when in byte mode, don't split multibyte characters. */ 192 static int byte_mode_character_aware; 193 194 /* If nonzero, the function for single byte locale is work 195 if this program runs on multibyte locale. */ 196 static int force_singlebyte_mode; 197 124 198 /* If true do not output lines containing no delimeter characters. 125 199 Otherwise, all such lines are printed. This option is valid only 126 200 with field mode. */ … … 132 206 133 207 /* The delimeter character for field mode. */ 134 208 static unsigned char delim; 209 #if HAVE_WCHAR_H 210 static wchar_t wcdelim; 211 #endif 135 212 136 213 /* True if the --output-delimiter=STRING option was specified. */ 137 214 static bool output_delimiter_specified; … … 205 282 -f, --fields=LIST select only these fields; also print any line\n\ 206 283 that contains no delimiter character, unless\n\ 207 284 the -s option is specified\n\ 208 -n (ignored)\n\285 -n with -b: don't split multibyte characters\n\ 209 286 "), stdout); 210 287 fputs (_("\ 211 288 --complement complement the set of selected bytes, characters\n\ … … 362 439 in_digits = false; 363 440 /* Starting a range. */ 364 441 if (dash_found) 365 FATAL_ERROR (_("invalid byte or field list"));442 FATAL_ERROR (_("invalid byte, character or field list")); 366 443 dash_found = true; 367 444 fieldstr++; 368 445 … … 387 464 if (value == 0) 388 465 { 389 466 /* `n-'. From `initial' to end of line. */ 390 eol_range_start = initial; 467 if (eol_range_start == 0 || 468 (eol_range_start != 0 && eol_range_start > initial)) 469 eol_range_start = initial; 391 470 field_found = true; 392 471 } 393 472 else 394 473 { 395 474 /* `m-n' or `-n' (1-n). */ 396 475 if (value < initial) 397 FATAL_ERROR (_("invalid byte or field list"));476 FATAL_ERROR (_("invalid byte, character or field list")); 398 477 399 478 /* Is there already a range going to end of line? */ 400 479 if (eol_range_start != 0) … … 467 546 if (operating_mode == byte_mode) 468 547 error (0, 0, 469 548 _("byte offset %s is too large"), quote (bad_num)); 549 else if (operating_mode == character_mode) 550 error (0, 0, 551 _("character offset %s is too large"), quote (bad_num)); 470 552 else 471 553 error (0, 0, 472 554 _("field number %s is too large"), quote (bad_num)); … … 477 559 fieldstr++; 478 560 } 479 561 else 480 FATAL_ERROR (_("invalid byte or field list"));562 FATAL_ERROR (_("invalid byte, character or field list")); 481 563 } 482 564 483 565 max_range_endpoint = 0; … … 570 652 } 571 653 } 572 654 655 #if HAVE_MBRTOWC 656 /* This function is in use for the following case. 657 658 1. Read from the stream STREAM, printing to standard output any selected 659 characters. 660 661 2. Read from stream STREAM, printing to standard output any selected bytes, 662 without splitting multibyte characters. */ 663 664 static void 665 cut_characters_or_cut_bytes_no_split (FILE *stream) 666 { 667 int idx; /* number of bytes or characters in the line so far. */ 668 char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 669 char *bufpos; /* Next read position of BUF. */ 670 size_t buflen; /* The length of the byte sequence in buf. */ 671 wint_t wc; /* A gotten wide character. */ 672 size_t mblength; /* The byte size of a multibyte character which shows 673 as same character as WC. */ 674 mbstate_t state; /* State of the stream. */ 675 int convfail; /* 1, when conversion is failed. Otherwise 0. */ 676 677 idx = 0; 678 buflen = 0; 679 bufpos = buf; 680 memset (&state, '\0', sizeof(mbstate_t)); 681 682 while (1) 683 { 684 REFILL_BUFFER (buf, bufpos, buflen, stream); 685 686 GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail); 687 688 if (wc == WEOF) 689 { 690 if (idx > 0) 691 putchar ('\n'); 692 break; 693 } 694 else if (wc == L'\n') 695 { 696 putchar ('\n'); 697 idx = 0; 698 } 699 else 700 { 701 idx += (operating_mode == byte_mode) ? mblength : 1; 702 if (print_kth (idx, NULL)) 703 fwrite (bufpos, mblength, sizeof(char), stdout); 704 } 705 706 buflen -= mblength; 707 bufpos += mblength; 708 } 709 } 710 #endif 711 573 712 /* Read from stream STREAM, printing to standard output any selected fields. */ 574 713 575 714 static void … … 692 831 } 693 832 } 694 833 834 #if HAVE_MBRTOWC 835 static void 836 cut_fields_mb (FILE *stream) 837 { 838 int c; 839 unsigned int field_idx; 840 int found_any_selected_field; 841 int buffer_first_field; 842 int empty_input; 843 char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 844 char *bufpos; /* Next read position of BUF. */ 845 size_t buflen; /* The length of the byte sequence in buf. */ 846 wint_t wc = 0; /* A gotten wide character. */ 847 size_t mblength; /* The byte size of a multibyte character which shows 848 as same character as WC. */ 849 mbstate_t state; /* State of the stream. */ 850 int convfail; /* 1, when conversion is failed. Otherwise 0. */ 851 852 found_any_selected_field = 0; 853 field_idx = 1; 854 bufpos = buf; 855 buflen = 0; 856 memset (&state, '\0', sizeof(mbstate_t)); 857 858 c = getc (stream); 859 empty_input = (c == EOF); 860 if (c != EOF) 861 ungetc (c, stream); 862 else 863 wc = WEOF; 864 865 /* To support the semantics of the -s flag, we may have to buffer 866 all of the first field to determine whether it is `delimited.' 867 But that is unnecessary if all non-delimited lines must be printed 868 and the first field has been selected, or if non-delimited lines 869 must be suppressed and the first field has *not* been selected. 870 That is because a non-delimited line has exactly one field. */ 871 buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL)); 872 873 while (1) 874 { 875 if (field_idx == 1 && buffer_first_field) 876 { 877 int len = 0; 878 879 while (1) 880 { 881 REFILL_BUFFER (buf, bufpos, buflen, stream); 882 883 GET_NEXT_WC_FROM_BUFFER 884 (wc, bufpos, buflen, mblength, state, convfail); 885 886 if (wc == WEOF) 887 break; 888 889 field_1_buffer = xrealloc (field_1_buffer, len + mblength); 890 memcpy (field_1_buffer + len, bufpos, mblength); 891 len += mblength; 892 buflen -= mblength; 893 bufpos += mblength; 894 895 if (!convfail && (wc == L'\n' || wc == wcdelim)) 896 break; 897 } 898 899 if (wc == WEOF) 900 break; 901 902 /* If the first field extends to the end of line (it is not 903 delimited) and we are printing all non-delimited lines, 904 print this one. */ 905 if (convfail || (!convfail && wc != wcdelim)) 906 { 907 if (suppress_non_delimited) 908 { 909 /* Empty. */ 910 } 911 else 912 { 913 fwrite (field_1_buffer, sizeof (char), len, stdout); 914 /* Make sure the output line is newline terminated. */ 915 if (convfail || (!convfail && wc != L'\n')) 916 putchar ('\n'); 917 } 918 continue; 919 } 920 921 if (print_kth (1, NULL)) 922 { 923 /* Print the field, but not the trailing delimiter. */ 924 fwrite (field_1_buffer, sizeof (char), len - 1, stdout); 925 found_any_selected_field = 1; 926 } 927 ++field_idx; 928 } 929 930 if (wc != WEOF) 931 { 932 if (print_kth (field_idx, NULL)) 933 { 934 if (found_any_selected_field) 935 { 936 fwrite (output_delimiter_string, sizeof (char), 937 output_delimiter_length, stdout); 938 } 939 found_any_selected_field = 1; 940 } 941 942 while (1) 943 { 944 REFILL_BUFFER (buf, bufpos, buflen, stream); 945 946 GET_NEXT_WC_FROM_BUFFER 947 (wc, bufpos, buflen, mblength, state, convfail); 948 949 if (wc == WEOF) 950 break; 951 else if (!convfail && (wc == wcdelim || wc == L'\n')) 952 { 953 buflen -= mblength; 954 bufpos += mblength; 955 break; 956 } 957 958 if (print_kth (field_idx, NULL)) 959 fwrite (bufpos, mblength, sizeof(char), stdout); 960 961 buflen -= mblength; 962 bufpos += mblength; 963 } 964 } 965 966 if ((!convfail || wc == L'\n') && buflen < 1) 967 wc = WEOF; 968 969 if (!convfail && wc == wcdelim) 970 ++field_idx; 971 else if (wc == WEOF || (!convfail && wc == L'\n')) 972 { 973 if (found_any_selected_field 974 || (!empty_input && !(suppress_non_delimited && field_idx == 1))) 975 putchar ('\n'); 976 if (wc == WEOF) 977 break; 978 field_idx = 1; 979 found_any_selected_field = 0; 980 } 981 } 982 } 983 #endif 984 695 985 static void 696 986 cut_stream (FILE *stream) 697 987 { 698 if (operating_mode == byte_mode) 699 cut_bytes (stream); 988 #if HAVE_MBRTOWC 989 if (MB_CUR_MAX > 1 && !force_singlebyte_mode) 990 { 991 switch (operating_mode) 992 { 993 case byte_mode: 994 if (byte_mode_character_aware) 995 cut_characters_or_cut_bytes_no_split (stream); 996 else 997 cut_bytes (stream); 998 break; 999 1000 case character_mode: 1001 cut_characters_or_cut_bytes_no_split (stream); 1002 break; 1003 1004 case field_mode: 1005 cut_fields_mb (stream); 1006 break; 1007 1008 default: 1009 abort (); 1010 } 1011 } 700 1012 else 701 cut_fields (stream); 1013 #endif 1014 { 1015 if (operating_mode == field_mode) 1016 cut_fields (stream); 1017 else 1018 cut_bytes (stream); 1019 } 702 1020 } 703 1021 704 1022 /* Process file FILE to standard output. … … 748 1066 bool ok; 749 1067 bool delim_specified = false; 750 1068 char *spec_list_string IF_LINT(= NULL); 1069 char mbdelim[MB_LEN_MAX + 1]; 1070 size_t delimlen = 0; 751 1071 752 1072 initialize_main (&argc, &argv); 753 1073 program_name = argv[0]; … … 770 1090 switch (optc) 771 1091 { 772 1092 case 'b': 773 case 'c':774 1093 /* Build the byte list. */ 775 1094 if (operating_mode != undefined_mode) 776 1095 FATAL_ERROR (_("only one type of list may be specified")); … … 778 1097 spec_list_string = optarg; 779 1098 break; 780 1099 1100 case 'c': 1101 /* Build the character list. */ 1102 if (operating_mode != undefined_mode) 1103 FATAL_ERROR (_("only one type of list may be specified")); 1104 operating_mode = character_mode; 1105 spec_list_string = optarg; 1106 break; 1107 781 1108 case 'f': 782 1109 /* Build the field list. */ 783 1110 if (operating_mode != undefined_mode) … … 789 1116 case 'd': 790 1117 /* New delimiter. */ 791 1118 /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */ 792 if (optarg[0] != '\0' && optarg[1] != '\0') 793 FATAL_ERROR (_("the delimiter must be a single character")); 794 delim = optarg[0]; 795 delim_specified = true; 1119 #if HAVE_MBRTOWC 1120 { 1121 if(MB_CUR_MAX > 1) 1122 { 1123 mbstate_t state; 1124 1125 memset (&state, '\0', sizeof(mbstate_t)); 1126 delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state); 1127 1128 if (delimlen == (size_t)-1 || delimlen == (size_t)-2) 1129 ++force_singlebyte_mode; 1130 else 1131 { 1132 delimlen = (delimlen < 1) ? 1 : delimlen; 1133 if (wcdelim != L'\0' && *(optarg + delimlen) != '\0') 1134 FATAL_ERROR (_("the delimiter must be a single character")); 1135 memcpy (mbdelim, optarg, delimlen); 1136 } 1137 } 1138 1139 if (MB_CUR_MAX <= 1 || force_singlebyte_mode) 1140 #endif 1141 { 1142 if (optarg[0] != '\0' && optarg[1] != '\0') 1143 FATAL_ERROR (_("the delimiter must be a single character")); 1144 delim = (unsigned char) optarg[0]; 1145 } 1146 delim_specified = true; 1147 } 796 1148 break; 797 1149 798 1150 case OUTPUT_DELIMITER_OPTION: … … 805 1157 break; 806 1158 807 1159 case 'n': 1160 byte_mode_character_aware = 1; 808 1161 break; 809 1162 810 1163 case 's': … … 827 1180 if (operating_mode == undefined_mode) 828 1181 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields")); 829 1182 830 if (delim != '\0'&& operating_mode != field_mode)1183 if (delim_specified && operating_mode != field_mode) 831 1184 FATAL_ERROR (_("an input delimiter may be specified only\ 832 1185 when operating on fields")); 833 1186 … … 854 1207 } 855 1208 856 1209 if (!delim_specified) 857 delim = '\t'; 1210 { 1211 delim = '\t'; 1212 #ifdef HAVE_MBRTOWC 1213 wcdelim = L'\t'; 1214 mbdelim[0] = '\t'; 1215 mbdelim[1] = '\0'; 1216 delimlen = 1; 1217 #endif 1218 } 858 1219 859 1220 if (output_delimiter_string == NULL) 860 1221 { 861 static char dummy[2]; 862 dummy[0] = delim; 863 dummy[1] = '\0'; 864 output_delimiter_string = dummy; 865 output_delimiter_length = 1; 1222 #ifdef HAVE_MBRTOWC 1223 if (MB_CUR_MAX > 1 && !force_singlebyte_mode) 1224 { 1225 output_delimiter_string = xstrdup(mbdelim); 1226 output_delimiter_length = delimlen; 1227 } 1228 1229 if (MB_CUR_MAX <= 1 || force_singlebyte_mode) 1230 #endif 1231 { 1232 static char dummy[2]; 1233 dummy[0] = delim; 1234 dummy[1] = '\0'; 1235 output_delimiter_string = dummy; 1236 output_delimiter_length = 1; 1237 } 866 1238 } 867 1239 868 1240 if (optind == argc) -
src/expand.c
diff -Naur coreutils-6.9.orig/src/expand.c coreutils-6.9/src/expand.c
old new 38 38 #include <stdio.h> 39 39 #include <getopt.h> 40 40 #include <sys/types.h> 41 42 /* Get mbstate_t, mbrtowc(), wcwidth(). */ 43 #if HAVE_WCHAR_H 44 # include <wchar.h> 45 #endif 46 41 47 #include "system.h" 42 48 #include "error.h" 43 49 #include "quote.h" 44 50 #include "xstrndup.h" 45 51 52 /* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 53 installation; work around this configuration error. */ 54 #if !defined MB_LEN_MAX || MB_LEN_MAX < 2 55 # define MB_LEN_MAX 16 56 #endif 57 58 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 59 #if HAVE_MBRTOWC && defined mbstate_t 60 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 61 #endif 62 46 63 /* The official name of this program (e.g., no `g' prefix). */ 47 64 #define PROGRAM_NAME "expand" 48 65 … … 183 200 stops = num_start + len - 1; 184 201 } 185 202 } 203 186 204 else 187 205 { 188 206 error (0, 0, _("tab size contains invalid character(s): %s"), … … 365 383 } 366 384 } 367 385 386 #if HAVE_MBRTOWC 387 static void 388 expand_multibyte (void) 389 { 390 FILE *fp; /* Input strem. */ 391 mbstate_t i_state; /* Current shift state of the input stream. */ 392 mbstate_t i_state_bak; /* Back up the I_STATE. */ 393 mbstate_t o_state; /* Current shift state of the output stream. */ 394 char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 395 char *bufpos; /* Next read position of BUF. */ 396 size_t buflen = 0; /* The length of the byte sequence in buf. */ 397 wchar_t wc; /* A gotten wide character. */ 398 size_t mblength; /* The byte size of a multibyte character 399 which shows as same character as WC. */ 400 int tab_index = 0; /* Index in `tab_list' of next tabstop. */ 401 int column = 0; /* Column on screen of the next char. */ 402 int next_tab_column; /* Column the next tab stop is on. */ 403 int convert = 1; /* If nonzero, perform translations. */ 404 405 fp = next_file ((FILE *) NULL); 406 if (fp == NULL) 407 return; 408 409 memset (&o_state, '\0', sizeof(mbstate_t)); 410 memset (&i_state, '\0', sizeof(mbstate_t)); 411 412 for (;;) 413 { 414 /* Refill the buffer BUF. */ 415 if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp)) 416 { 417 memmove (buf, bufpos, buflen); 418 buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp); 419 bufpos = buf; 420 } 421 422 /* No character is left in BUF. */ 423 if (buflen < 1) 424 { 425 fp = next_file (fp); 426 427 if (fp == NULL) 428 break; /* No more files. */ 429 else 430 { 431 memset (&i_state, '\0', sizeof(mbstate_t)); 432 continue; 433 } 434 } 435 436 /* Get a wide character. */ 437 i_state_bak = i_state; 438 mblength = mbrtowc (&wc, bufpos, buflen, &i_state); 439 440 switch (mblength) 441 { 442 case (size_t)-1: /* illegal byte sequence. */ 443 case (size_t)-2: 444 mblength = 1; 445 i_state = i_state_bak; 446 if (convert) 447 { 448 ++column; 449 if (convert_entire_line == 0) 450 convert = 0; 451 } 452 putchar (*bufpos); 453 break; 454 455 case 0: /* null. */ 456 mblength = 1; 457 if (convert && convert_entire_line == 0) 458 convert = 0; 459 putchar ('\0'); 460 break; 461 462 default: 463 if (wc == L'\n') /* LF. */ 464 { 465 tab_index = 0; 466 column = 0; 467 convert = 1; 468 putchar ('\n'); 469 } 470 else if (wc == L'\t' && convert) /* Tab. */ 471 { 472 if (tab_size == 0) 473 { 474 /* Do not let tab_index == first_free_tab; 475 stop when it is 1 less. */ 476 while (tab_index < first_free_tab - 1 477 && column >= tab_list[tab_index]) 478 tab_index++; 479 next_tab_column = tab_list[tab_index]; 480 if (tab_index < first_free_tab - 1) 481 tab_index++; 482 if (column >= next_tab_column) 483 next_tab_column = column + 1; 484 } 485 else 486 next_tab_column = column + tab_size - column % tab_size; 487 488 while (column < next_tab_column) 489 { 490 putchar (' '); 491 ++column; 492 } 493 } 494 else /* Others. */ 495 { 496 if (convert) 497 { 498 if (wc == L'\b') 499 { 500 if (column > 0) 501 --column; 502 } 503 else 504 { 505 int width; /* The width of WC. */ 506 507 width = wcwidth (wc); 508 column += (width > 0) ? width : 0; 509 if (convert_entire_line == 0) 510 convert = 0; 511 } 512 } 513 fwrite (bufpos, sizeof(char), mblength, stdout); 514 } 515 } 516 buflen -= mblength; 517 bufpos += mblength; 518 } 519 } 520 #endif 521 368 522 int 369 523 main (int argc, char **argv) 370 524 { … … 429 583 430 584 file_list = (optind < argc ? &argv[optind] : stdin_argv); 431 585 432 expand (); 586 #if HAVE_MBRTOWC 587 if (MB_CUR_MAX > 1) 588 expand_multibyte (); 589 else 590 #endif 591 expand (); 433 592 434 593 if (have_read_stdin && fclose (stdin) != 0) 435 594 error (EXIT_FAILURE, errno, "-"); -
coreutils-6.9
diff -Naur coreutils-6.9.orig/src/fold.c coreutils-6.9/src/fold.c
old new 23 23 #include <getopt.h> 24 24 #include <sys/types.h> 25 25 26 /* Get mbstate_t, mbrtowc(), wcwidth(). */ 27 #if HAVE_WCHAR_H 28 # include <wchar.h> 29 #endif 30 31 /* Get iswprint(), iswblank(), wcwidth(). */ 32 #if HAVE_WCTYPE_H 33 # include <wctype.h> 34 #endif 35 26 36 #include "system.h" 27 37 #include "error.h" 28 38 #include "quote.h" 29 39 #include "xstrtol.h" 30 40 41 /* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 42 installation; work around this configuration error. */ 43 #if !defined MB_LEN_MAX || MB_LEN_MAX < 2 44 # undef MB_LEN_MAX 45 # define MB_LEN_MAX 16 46 #endif 47 48 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 49 #if HAVE_MBRTOWC && defined mbstate_t 50 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 51 #endif 52 31 53 #define TAB_WIDTH 8 32 54 33 55 /* The official name of this program (e.g., no `g' prefix). */ … … 35 57 36 58 #define AUTHORS "David MacKenzie" 37 59 60 #define FATAL_ERROR(Message) \ 61 do \ 62 { \ 63 error (0, 0, (Message)); \ 64 usage (2); \ 65 } \ 66 while (0) 67 68 enum operating_mode 69 { 70 /* Fold texts by columns that are at the given positions. */ 71 column_mode, 72 73 /* Fold texts by bytes that are at the given positions. */ 74 byte_mode, 75 76 /* Fold texts by characters that are at the given positions. */ 77 character_mode, 78 }; 79 38 80 /* The name this program was run with. */ 39 81 char *program_name; 40 82 83 /* The argument shows current mode. (Default: column_mode) */ 84 static enum operating_mode operating_mode; 85 41 86 /* If nonzero, try to break on whitespace. */ 42 87 static bool break_spaces; 43 88 44 /* If nonzero, count bytes, not column positions. */45 static bool count_bytes;46 47 89 /* If nonzero, at least one of the files we read was standard input. */ 48 90 static bool have_read_stdin; 49 91 50 static char const shortopts[] = "b sw:0::1::2::3::4::5::6::7::8::9::";92 static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::"; 51 93 52 94 static struct option const longopts[] = 53 95 { 54 96 {"bytes", no_argument, NULL, 'b'}, 97 {"characters", no_argument, NULL, 'c'}, 55 98 {"spaces", no_argument, NULL, 's'}, 56 99 {"width", required_argument, NULL, 'w'}, 57 100 {GETOPT_HELP_OPTION_DECL}, … … 81 124 "), stdout); 82 125 fputs (_("\ 83 126 -b, --bytes count bytes rather than columns\n\ 127 -c, --characters count characters rather than columns\n\ 84 128 -s, --spaces break at spaces\n\ 85 129 -w, --width=WIDTH use WIDTH columns instead of 80\n\ 86 130 "), stdout); … … 98 142 static size_t 99 143 adjust_column (size_t column, char c) 100 144 { 101 if ( !count_bytes)145 if (operating_mode != byte_mode) 102 146 { 103 147 if (c == '\b') 104 148 { … … 117 161 return column; 118 162 } 119 163 120 /* Fold file FILENAME, or standard input if FILENAME is "-", 121 to stdout, with maximum line length WIDTH. 122 Return true if successful. */ 123 124 static bool 125 fold_file (char const *filename, size_t width) 164 static void 165 fold_text (FILE *istream, size_t width, int *saved_errno) 126 166 { 127 FILE *istream;128 167 int c; 129 168 size_t column = 0; /* Screen column where next char will go. */ 130 169 size_t offset_out = 0; /* Index in `line_out' for next char. */ 131 170 static char *line_out = NULL; 132 171 static size_t allocated_out = 0; 133 int saved_errno;134 135 if (STREQ (filename, "-"))136 {137 istream = stdin;138 have_read_stdin = true;139 }140 else141 istream = fopen (filename, "r");142 143 if (istream == NULL)144 {145 error (0, errno, "%s", filename);146 return false;147 }148 172 149 173 while ((c = getc (istream)) != EOF) 150 174 { … … 172 196 bool found_blank = false; 173 197 size_t logical_end = offset_out; 174 198 199 /* If LINE_OUT has no wide character, 200 put a new wide character in LINE_OUT 201 if column is bigger than width. */ 202 if (offset_out == 0) 203 { 204 line_out[offset_out++] = c; 205 continue; 206 } 207 175 208 /* Look for the last blank. */ 176 209 while (logical_end) 177 210 { … … 218 251 line_out[offset_out++] = c; 219 252 } 220 253 221 saved_errno = errno;254 *saved_errno = errno; 222 255 223 256 if (offset_out) 224 257 fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); 225 258 259 free(line_out); 260 } 261 262 #if HAVE_MBRTOWC 263 static void 264 fold_multibyte_text (FILE *istream, int width, int *saved_errno) 265 { 266 char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 267 size_t buflen = 0; /* The length of the byte sequence in buf. */ 268 char *bufpos; /* Next read position of BUF. */ 269 wint_t wc; /* A gotten wide character. */ 270 size_t mblength; /* The byte size of a multibyte character which shows 271 as same character as WC. */ 272 mbstate_t state, state_bak; /* State of the stream. */ 273 int convfail; /* 1, when conversion is failed. Otherwise 0. */ 274 275 char *line_out = NULL; 276 size_t offset_out = 0; /* Index in `line_out' for next char. */ 277 size_t allocated_out = 0; 278 279 int increment; 280 size_t column = 0; 281 282 size_t last_blank_pos; 283 size_t last_blank_column; 284 int is_blank_seen; 285 int last_blank_increment; 286 int is_bs_following_last_blank; 287 size_t bs_following_last_blank_num; 288 int is_cr_after_last_blank; 289 290 #define CLEAR_FLAGS \ 291 do \ 292 { \ 293 last_blank_pos = 0; \ 294 last_blank_column = 0; \ 295 is_blank_seen = 0; \ 296 is_bs_following_last_blank = 0; \ 297 bs_following_last_blank_num = 0; \ 298 is_cr_after_last_blank = 0; \ 299 } \ 300 while (0) 301 302 #define START_NEW_LINE \ 303 do \ 304 { \ 305 putchar ('\n'); \ 306 column = 0; \ 307 offset_out = 0; \ 308 CLEAR_FLAGS; \ 309 } \ 310 while (0) 311 312 CLEAR_FLAGS; 313 memset (&state, '\0', sizeof(mbstate_t)); 314 315 for (;; bufpos += mblength, buflen -= mblength) 316 { 317 if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream)) 318 { 319 memmove (buf, bufpos, buflen); 320 buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream); 321 bufpos = buf; 322 } 323 324 if (buflen < 1) 325 break; 326 327 /* Get a wide character. */ 328 convfail = 0; 329 state_bak = state; 330 mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state); 331 332 switch (mblength) 333 { 334 case (size_t)-1: 335 case (size_t)-2: 336 convfail++; 337 state = state_bak; 338 /* Fall through. */ 339 340 case 0: 341 mblength = 1; 342 break; 343 } 344 345 rescan: 346 if (operating_mode == byte_mode) /* byte mode */ 347 increment = mblength; 348 else if (operating_mode == character_mode) /* character mode */ 349 increment = 1; 350 else /* column mode */ 351 { 352 if (convfail) 353 increment = 1; 354 else 355 { 356 switch (wc) 357 { 358 case L'\n': 359 fwrite (line_out, sizeof(char), offset_out, stdout); 360 START_NEW_LINE; 361 continue; 362 363 case L'\b': 364 increment = (column > 0) ? -1 : 0; 365 break; 366 367 case L'\r': 368 increment = -1 * column; 369 break; 370 371 case L'\t': 372 increment = 8 - column % 8; 373 break; 374 375 default: 376 increment = wcwidth (wc); 377 increment = (increment < 0) ? 0 : increment; 378 } 379 } 380 } 381 382 if (column + increment > width && break_spaces && last_blank_pos) 383 { 384 fwrite (line_out, sizeof(char), last_blank_pos, stdout); 385 putchar ('\n'); 386 387 offset_out = offset_out - last_blank_pos; 388 column = column - last_blank_column + ((is_cr_after_last_blank) 389 ? last_blank_increment : bs_following_last_blank_num); 390 memmove (line_out, line_out + last_blank_pos, offset_out); 391 CLEAR_FLAGS; 392 goto rescan; 393 } 394 395 if (column + increment > width && column != 0) 396 { 397 fwrite (line_out, sizeof(char), offset_out, stdout); 398 START_NEW_LINE; 399 goto rescan; 400 } 401 402 if (allocated_out < offset_out + mblength) 403 { 404 allocated_out += 1024; 405 line_out = xrealloc (line_out, allocated_out); 406 } 407 408 memcpy (line_out + offset_out, bufpos, mblength); 409 offset_out += mblength; 410 column += increment; 411 412 if (is_blank_seen && !convfail && wc == L'\r') 413 is_cr_after_last_blank = 1; 414 415 if (is_bs_following_last_blank && !convfail && wc == L'\b') 416 ++bs_following_last_blank_num; 417 else 418 is_bs_following_last_blank = 0; 419 420 if (break_spaces && !convfail && iswblank (wc)) 421 { 422 last_blank_pos = offset_out; 423 last_blank_column = column; 424 is_blank_seen = 1; 425 last_blank_increment = increment; 426 is_bs_following_last_blank = 1; 427 bs_following_last_blank_num = 0; 428 is_cr_after_last_blank = 0; 429 } 430 } 431 432 *saved_errno = errno; 433 434 if (offset_out) 435 fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); 436 437 free(line_out); 438 } 439 #endif 440 441 /* Fold file FILENAME, or standard input if FILENAME is "-", 442 to stdout, with maximum line length WIDTH. 443 Return 0 if successful, 1 if an error occurs. */ 444 445 static int 446 fold_file (char *filename, int width) 447 { 448 FILE *istream; 449 int saved_errno; 450 451 if (STREQ (filename, "-")) 452 { 453 istream = stdin; 454 have_read_stdin = 1; 455 } 456 else 457 istream = fopen (filename, "r"); 458 459 if (istream == NULL) 460 { 461 error (0, errno, "%s", filename); 462 return 1; 463 } 464 465 /* Define how ISTREAM is being folded. */ 466 #if HAVE_MBRTOWC 467 if (MB_CUR_MAX > 1) 468 fold_multibyte_text (istream, width, &saved_errno); 469 else 470 #endif 471 fold_text (istream, width, &saved_errno); 472 226 473 if (ferror (istream)) 227 474 { 228 475 error (0, saved_errno, "%s", filename); … … 255 502 256 503 atexit (close_stdout); 257 504 258 break_spaces = count_bytes = have_read_stdin = false; 505 operating_mode = column_mode; 506 break_spaces = have_read_stdin = false; 259 507 260 508 while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) 261 509 { … … 264 512 switch (optc) 265 513 { 266 514 case 'b': /* Count bytes rather than columns. */ 267 count_bytes = true; 515 if (operating_mode != column_mode) 516 FATAL_ERROR (_("only one way of folding may be specified")); 517 operating_mode = byte_mode; 518 break; 519 520 case 'c': 521 if (operating_mode != column_mode) 522 FATAL_ERROR (_("only one way of folding may be specified")); 523 operating_mode = character_mode; 268 524 break; 269 525 270 526 case 's': /* Break at word boundaries. */ -
coreutils-6.9
diff -Naur coreutils-6.9.orig/src/join.c coreutils-6.9/src/join.c
old new 23 23 #include <sys/types.h> 24 24 #include <getopt.h> 25 25 26 /* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */ 27 #if HAVE_WCHAR_H 28 # include <wchar.h> 29 #endif 30 31 /* Get iswblank(), towupper. */ 32 #if HAVE_WCTYPE_H 33 # include <wctype.h> 34 #endif 35 26 36 #include "system.h" 27 37 #include "error.h" 28 38 #include "hard-locale.h" 29 39 #include "linebuffer.h" 30 #include "memcasecmp.h"31 40 #include "quote.h" 32 41 #include "stdio--.h" 33 42 #include "xmemcoll.h" 34 43 #include "xstrtol.h" 35 44 45 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 46 #if HAVE_MBRTOWC && defined mbstate_t 47 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 48 #endif 49 36 50 /* The official name of this program (e.g., no `g' prefix). */ 37 51 #define PROGRAM_NAME "join" 38 52 … … 104 118 /* Last element in `outlist', where a new element can be added. */ 105 119 static struct outlist *outlist_end = &outlist_head; 106 120 107 /* Tab character separating fields. If negative, fields are separated 108 by any nonempty string of blanks, otherwise by exactly one 109 tab character whose value (when cast to unsigned char) equals TAB. */ 110 static int tab = -1; 121 /* Tab character separating fields. If NULL, fields are separated 122 by any nonempty string of blanks. */ 123 static char *tab = NULL; 124 125 /* The number of bytes used for tab. */ 126 static size_t tablen = 0; 111 127 112 128 static struct option const longopts[] = 113 129 { … … 190 206 191 207 /* Fill in the `fields' structure in LINE. */ 192 208 209 /* Fill in the `fields' structure in LINE. */ 210 193 211 static void 194 212 xfields (struct line *line) 195 213 { … … 199 217 if (ptr == lim) 200 218 return; 201 219 202 if ( 0 <= tab)220 if (tab != NULL) 203 221 { 222 unsigned char t = tab[0]; 204 223 char *sep; 205 for (; (sep = memchr (ptr, t ab, lim - ptr)) != NULL; ptr = sep + 1)224 for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1) 206 225 extract_field (line, ptr, sep - ptr); 207 226 } 208 227 else … … 229 248 extract_field (line, ptr, lim - ptr); 230 249 } 231 250 251 #if HAVE_MBRTOWC 252 static void 253 xfields_multibyte (struct line *line) 254 { 255 char *ptr = line->buf.buffer; 256 char const *lim = ptr + line->buf.length - 1; 257 wchar_t wc = 0; 258 size_t mblength = 1; 259 mbstate_t state, state_bak; 260 261 memset (&state, 0, sizeof (mbstate_t)); 262 263 if (ptr == lim) 264 return; 265 266 if (tab != NULL) 267 { 268 unsigned char t = tab[0]; 269 char *sep = ptr; 270 for (; ptr < lim; ptr = sep + mblength) 271 { 272 sep = ptr; 273 while (sep < lim) 274 { 275 state_bak = state; 276 mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); 277 278 if (mblength == (size_t)-1 || mblength == (size_t)-2) 279 { 280 mblength = 1; 281 state = state_bak; 282 } 283 mblength = (mblength < 1) ? 1 : mblength; 284 285 if (mblength == tablen && !memcmp (sep, tab, mblength)) 286 break; 287 else 288 { 289 sep += mblength; 290 continue; 291 } 292 } 293 294 if (sep == lim) 295 break; 296 297 extract_field (line, ptr, sep - ptr); 298 } 299 } 300 else 301 { 302 /* Skip leading blanks before the first field. */ 303 while(ptr < lim) 304 { 305 state_bak = state; 306 mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); 307 308 if (mblength == (size_t)-1 || mblength == (size_t)-2) 309 { 310 mblength = 1; 311 state = state_bak; 312 break; 313 } 314 mblength = (mblength < 1) ? 1 : mblength; 315 316 if (!iswblank(wc)) 317 break; 318 ptr += mblength; 319 } 320 321 do 322 { 323 char *sep; 324 state_bak = state; 325 mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); 326 if (mblength == (size_t)-1 || mblength == (size_t)-2) 327 { 328 mblength = 1; 329 state = state_bak; 330 break; 331 } 332 mblength = (mblength < 1) ? 1 : mblength; 333 334 sep = ptr + mblength; 335 while (sep != lim) 336 { 337 state_bak = state; 338 mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); 339 if (mblength == (size_t)-1 || mblength == (size_t)-2) 340 { 341 mblength = 1; 342 state = state_bak; 343 break; 344 } 345 mblength = (mblength < 1) ? 1 : mblength; 346 347 if (iswblank (wc)) 348 break; 349 350 sep += mblength; 351 } 352 353 extract_field (line, ptr, sep - ptr); 354 if (sep == lim) 355 return; 356 357 state_bak = state; 358 mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); 359 if (mblength == (size_t)-1 || mblength == (size_t)-2) 360 { 361 mblength = 1; 362 state = state_bak; 363 break; 364 } 365 mblength = (mblength < 1) ? 1 : mblength; 366 367 ptr = sep + mblength; 368 while (ptr != lim) 369 { 370 state_bak = state; 371 mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); 372 if (mblength == (size_t)-1 || mblength == (size_t)-2) 373 { 374 mblength = 1; 375 state = state_bak; 376 break; 377 } 378 mblength = (mblength < 1) ? 1 : mblength; 379 380 if (!iswblank (wc)) 381 break; 382 383 ptr += mblength; 384 } 385 } 386 while (ptr != lim); 387 } 388 389 extract_field (line, ptr, lim - ptr); 390 } 391 #endif 392 232 393 /* Read a line from FP into LINE and split it into fields. 233 394 Return true if successful. */ 234 395 … … 249 410 line->nfields_allocated = 0; 250 411 line->nfields = 0; 251 412 line->fields = NULL; 413 #if HAVE_MBRTOWC 414 if (MB_CUR_MAX > 1) 415 xfields_multibyte (line); 416 else 417 #endif 252 418 xfields (line); 253 419 return true; 254 420 } … … 303 469 keycmp (struct line const *line1, struct line const *line2) 304 470 { 305 471 /* Start of field to compare in each file. */ 306 char *beg1; 307 char *beg2; 308 309 size_t len1; 310 size_t len2; /* Length of fields to compare. */ 472 char *beg[2]; 473 char *copy[2]; 474 size_t len[2]; /* Length of fields to compare. */ 311 475 int diff; 476 int i, j; 312 477 313 478 if (join_field_1 < line1->nfields) 314 479 { 315 beg 1= line1->fields[join_field_1].beg;316 len 1= line1->fields[join_field_1].len;480 beg[0] = line1->fields[join_field_1].beg; 481 len[0] = line1->fields[join_field_1].len; 317 482 } 318 483 else 319 484 { 320 beg 1= NULL;321 len 1= 0;485 beg[0] = NULL; 486 len[0] = 0; 322 487 } 323 488 324 489 if (join_field_2 < line2->nfields) 325 490 { 326 beg 2= line2->fields[join_field_2].beg;327 len 2= line2->fields[join_field_2].len;491 beg[1] = line2->fields[join_field_2].beg; 492 len[1] = line2->fields[join_field_2].len; 328 493 } 329 494 else 330 495 { 331 beg 2= NULL;332 len 2= 0;496 beg[1] = NULL; 497 len[1] = 0; 333 498 } 334 499 335 if (len 1== 0)336 return len 2== 0 ? 0 : -1;337 if (len 2== 0)500 if (len[0] == 0) 501 return len[1] == 0 ? 0 : -1; 502 if (len[1] == 0) 338 503 return 1; 339 504 340 505 if (ignore_case) 341 506 { 342 /* FIXME: ignore_case does not work with NLS (in particular, 343 with multibyte chars). */ 344 diff = memcasecmp (beg1, beg2, MIN (len1, len2)); 507 #ifdef HAVE_MBRTOWC 508 if (MB_CUR_MAX > 1) 509 { 510 size_t mblength; 511 wchar_t wc, uwc; 512 mbstate_t state, state_bak; 513 514 memset (&state, '\0', sizeof (mbstate_t)); 515 516 for (i = 0; i < 2; i++) 517 { 518 copy[i] = alloca (len[i] + 1); 519 520 for (j = 0; j < MIN (len[0], len[1]);) 521 { 522 state_bak = state; 523 mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state); 524 525 switch (mblength) 526 { 527 case (size_t) -1: 528 case (size_t) -2: 529 state = state_bak; 530 /* Fall through */ 531 case 0: 532 mblength = 1; 533 break; 534 535 default: 536 uwc = towupper (wc); 537 538 if (uwc != wc) 539 { 540 mbstate_t state_wc; 541 542 memset (&state_wc, '\0', sizeof (mbstate_t)); 543 wcrtomb (copy[i] + j, uwc, &state_wc); 544 } 545 else 546 memcpy (copy[i] + j, beg[i] + j, mblength); 547 } 548 j += mblength; 549 } 550 copy[i][j] = '\0'; 551 } 552 } 553 else 554 #endif 555 { 556 for (i = 0; i < 2; i++) 557 { 558 copy[i] = alloca (len[i] + 1); 559 560 for (j = 0; j < MIN (len[0], len[1]); j++) 561 copy[i][j] = toupper (beg[i][j]); 562 563 copy[i][j] = '\0'; 564 } 565 } 345 566 } 346 567 else 347 568 { 348 if (hard_LC_COLLATE) 349 return xmemcoll (beg1, len1, beg2, len2); 350 diff = memcmp (beg1, beg2, MIN (len1, len2)); 569 copy[0] = (unsigned char *) beg[0]; 570 copy[1] = (unsigned char *) beg[1]; 351 571 } 352 572 573 if (hard_LC_COLLATE) 574 return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]); 575 diff = memcmp (copy[0], copy[1], MIN (len[0], len[1])); 576 353 577 if (diff) 354 578 return diff; 355 return len 1 < len2 ? -1 : len1 != len2;579 return len[0] - len[1]; 356 580 } 357 581 358 582 /* Print field N of LINE if it exists and is nonempty, otherwise … … 377 601 378 602 /* Print the join of LINE1 and LINE2. */ 379 603 604 #define PUT_TAB_CHAR \ 605 do \ 606 { \ 607 (tab != NULL) ? \ 608 fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \ 609 } \ 610 while (0) 611 380 612 static void 381 613 prjoin (struct line const *line1, struct line const *line2) 382 614 { 383 615 const struct outlist *outlist; 384 char output_separator = tab < 0 ? ' ' : tab;385 616 386 617 outlist = outlist_head.next; 387 618 if (outlist) … … 397 628 if (o->file == 0) 398 629 { 399 630 if (line1 == &uni_blank) 400 631 { 401 632 line = line2; 402 633 field = join_field_2; 403 634 } 404 635 else 405 636 { 406 637 line = line1; 407 638 field = join_field_1; 408 639 } … … 416 647 o = o->next; 417 648 if (o == NULL) 418 649 break; 419 putchar (output_separator);650 PUT_TAB_CHAR; 420 651 } 421 652 putchar ('\n'); 422 653 } … … 434 665 prfield (join_field_1, line1); 435 666 for (i = 0; i < join_field_1 && i < line1->nfields; ++i) 436 667 { 437 putchar (output_separator);668 PUT_TAB_CHAR; 438 669 prfield (i, line1); 439 670 } 440 671 for (i = join_field_1 + 1; i < line1->nfields; ++i) 441 672 { 442 putchar (output_separator);673 PUT_TAB_CHAR; 443 674 prfield (i, line1); 444 675 } 445 676 446 677 for (i = 0; i < join_field_2 && i < line2->nfields; ++i) 447 678 { 448 putchar (output_separator);679 PUT_TAB_CHAR; 449 680 prfield (i, line2); 450 681 } 451 682 for (i = join_field_2 + 1; i < line2->nfields; ++i) 452 683 { 453 putchar (output_separator);684 PUT_TAB_CHAR; 454 685 prfield (i, line2); 455 686 } 456 687 putchar ('\n'); … … 859 1090 860 1091 case 't': 861 1092 { 862 unsigned char newtab = optarg[0]; 863 if (! newtab) 1093 char *newtab; 1094 size_t newtablen; 1095 if (! optarg[0]) 864 1096 error (EXIT_FAILURE, 0, _("empty tab")); 865 if (optarg[1]) 1097 newtab = xstrdup (optarg); 1098 #if HAVE_MBRTOWC 1099 if (MB_CUR_MAX > 1) 1100 { 1101 mbstate_t state; 1102 1103 memset (&state, 0, sizeof (mbstate_t)); 1104 newtablen = mbrtowc (NULL, newtab, 1105 strnlen (newtab, MB_LEN_MAX), 1106 &state); 1107 if (newtablen == (size_t) 0 1108 || newtablen == (size_t) -1 1109 || newtablen == (size_t) -2) 1110 newtablen = 1; 1111 } 1112 else 1113 #endif 1114 newtablen = 1; 1115 1116 if (newtablen == 1 && newtab[1]) 1117 { 1118 if (STREQ (newtab, "\\0")) 1119 newtab[0] = '\0'; 1120 } 1121 if (tab != NULL && strcmp (tab, newtab)) 866 1122 { 867 if (STREQ (optarg, "\\0")) 868 newtab = '\0'; 869 else 870 error (EXIT_FAILURE, 0, _("multi-character tab %s"), 871 quote (optarg)); 1123 free (newtab); 1124 error (EXIT_FAILURE, 0, _("incompatible tabs")); 872 1125 } 873 if (0 <= tab && tab != newtab)874 error (EXIT_FAILURE, 0, _("incompatible tabs"));875 1126 tab = newtab; 1127 tablen = newtablen; 876 1128 } 877 1129 break; 878 1130 -
coreutils-6.9
diff -Naur coreutils-6.9.orig/src/pr.c coreutils-6.9/src/pr.c
old new 313 313 314 314 #include <getopt.h> 315 315 #include <sys/types.h> 316 317 /* Get MB_LEN_MAX. */ 318 #include <limits.h> 319 /* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 320 installation; work around this configuration error. */ 321 #if !defined MB_LEN_MAX || MB_LEN_MAX == 1 322 # define MB_LEN_MAX 16 323 #endif 324 325 /* Get MB_CUR_MAX. */ 326 #include <stdlib.h> 327 328 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ 329 /* Get mbstate_t, mbrtowc(), wcwidth(). */ 330 #if HAVE_WCHAR_H 331 # include <wchar.h> 332 #endif 333 334 /* Get iswprint(). -- for wcwidth(). */ 335 #if HAVE_WCTYPE_H 336 # include <wctype.h> 337 #endif 338 #if !defined iswprint && !HAVE_ISWPRINT 339 # define iswprint(wc) 1 340 #endif 341 316 342 #include "system.h" 317 343 #include "error.h" 318 344 #include "hard-locale.h" … … 324 350 #include "strftime.h" 325 351 #include "xstrtol.h" 326 352 353 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 354 #if HAVE_MBRTOWC && defined mbstate_t 355 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 356 #endif 357 358 #ifndef HAVE_DECL_WCWIDTH 359 "this configure-time declaration test was not run" 360 #endif 361 #if !HAVE_DECL_WCWIDTH 362 extern int wcwidth (); 363 #endif 364 327 365 /* The official name of this program (e.g., no `g' prefix). */ 328 366 #define PROGRAM_NAME "pr" 329 367 … … 416 454 417 455 #define NULLCOL (COLUMN *)0 418 456 419 static int char_to_clump (char c); 457 /* Funtion pointers to switch functions for single byte locale or for 458 multibyte locale. If multibyte functions do not exist in your sysytem, 459 these pointers always point the function for single byte locale. */ 460 static void (*print_char) (char c); 461 static int (*char_to_clump) (char c); 462 463 /* Functions for single byte locale. */ 464 static void print_char_single (char c); 465 static int char_to_clump_single (char c); 466 467 /* Functions for multibyte locale. */ 468 static void print_char_multi (char c); 469 static int char_to_clump_multi (char c); 470 420 471 static bool read_line (COLUMN *p); 421 472 static bool print_page (void); 422 473 static bool print_stored (COLUMN *p); … … 426 477 static void pad_across_to (int position); 427 478 static void add_line_number (COLUMN *p); 428 479 static void getoptarg (char *arg, char switch_char, char *character, 480 int *character_length, int *character_width, 429 481 int *number); 430 482 void usage (int status); 431 483 static void print_files (int number_of_files, char **av); … … 440 492 static void pad_down (int lines); 441 493 static void read_rest_of_line (COLUMN *p); 442 494 static void skip_read (COLUMN *p, int column_number); 443 static void print_char (char c);444 495 static void cleanup (void); 445 496 static void print_sep_string (void); 446 497 static void separator_string (const char *optarg_S); … … 455 506 we store the leftmost columns contiguously in buff. 456 507 To print a line from buff, get the index of the first character 457 508 from line_vector[i], and print up to line_vector[i + 1]. */ 458 static char *buff;509 static unsigned char *buff; 459 510 460 511 /* Index of the position in buff where the next character 461 512 will be stored. */ … … 559 610 static bool untabify_input = false; 560 611 561 612 /* (-e) The input tab character. */ 562 static char input_tab_char = '\t';613 static char input_tab_char[MB_LEN_MAX] = "\t"; 563 614 564 615 /* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ... 565 616 where the leftmost column is 1. */ … … 569 620 static bool tabify_output = false; 570 621 571 622 /* (-i) The output tab character. */ 572 static char output_tab_char = '\t'; 623 static char output_tab_char[MB_LEN_MAX] = "\t"; 624 625 /* (-i) The byte length of output tab character. */ 626 static int output_tab_char_length = 1; 573 627 574 628 /* (-i) The width of the output tab. */ 575 629 static int chars_per_output_tab = 8; … … 643 697 static bool numbered_lines = false; 644 698 645 699 /* (-n) Character which follows each line number. */ 646 static char number_separator = '\t'; 700 static char number_separator[MB_LEN_MAX] = "\t"; 701 702 /* (-n) The byte length of the character which follows each line number. */ 703 static int number_separator_length = 1; 704 705 /* (-n) The character width of the character which follows each line number. */ 706 static int number_separator_width = 0; 647 707 648 708 /* (-n) line counting starts with 1st line of input file (not with 1st 649 709 line of 1st page printed). */ … … 696 756 -a|COLUMN|-m is a `space' and with the -J option a `tab'. */ 697 757 static char *col_sep_string = ""; 698 758 static int col_sep_length = 0; 759 static int col_sep_width = 0; 699 760 static char *column_separator = " "; 700 761 static char *line_separator = "\t"; 701 762 … … 852 913 col_sep_length = (int) strlen (optarg_S); 853 914 col_sep_string = xmalloc (col_sep_length + 1); 854 915 strcpy (col_sep_string, optarg_S); 916 917 #if HAVE_MBRTOWC 918 if (MB_CUR_MAX > 1) 919 col_sep_width = mbswidth (col_sep_string, 0); 920 else 921 #endif 922 col_sep_width = col_sep_length; 855 923 } 856 924 857 925 int … … 877 945 878 946 atexit (close_stdout); 879 947 948 /* Define which functions are used, the ones for single byte locale or the ones 949 for multibyte locale. */ 950 #if HAVE_MBRTOWC 951 if (MB_CUR_MAX > 1) 952 { 953 print_char = print_char_multi; 954 char_to_clump = char_to_clump_multi; 955 } 956 else 957 #endif 958 { 959 print_char = print_char_single; 960 char_to_clump = char_to_clump_single; 961 } 962 880 963 n_files = 0; 881 964 file_names = (argc > 1 882 965 ? xmalloc ((argc - 1) * sizeof (char *)) … … 949 1032 break; 950 1033 case 'e': 951 1034 if (optarg) 952 getoptarg (optarg, 'e', &input_tab_char, 953 &chars_per_input_tab); 1035 { 1036 int dummy_length, dummy_width; 1037 1038 getoptarg (optarg, 'e', input_tab_char, &dummy_length, 1039 &dummy_width, &chars_per_input_tab); 1040 } 954 1041 /* Could check tab width > 0. */ 955 1042 untabify_input = true; 956 1043 break; … … 963 1050 break; 964 1051 case 'i': 965 1052 if (optarg) 966 getoptarg (optarg, 'i', &output_tab_char, 967 &chars_per_output_tab); 1053 { 1054 int dummy_width; 1055 1056 getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length, 1057 &dummy_width, &chars_per_output_tab); 1058 } 968 1059 /* Could check tab width > 0. */ 969 1060 tabify_output = true; 970 1061 break; … … 991 1082 case 'n': 992 1083 numbered_lines = true; 993 1084 if (optarg) 994 getoptarg (optarg, 'n', &number_separator,995 & chars_per_number);1085 getoptarg (optarg, 'n', number_separator, &number_separator_length, 1086 &number_separator_width, &chars_per_number); 996 1087 break; 997 1088 case 'N': 998 1089 skip_count = false; … … 1031 1122 old_s = false; 1032 1123 /* Reset an additional input of -s, -S dominates -s */ 1033 1124 col_sep_string = ""; 1034 col_sep_length = 0;1125 col_sep_length = col_sep_width = 0; 1035 1126 use_col_separator = true; 1036 1127 if (optarg) 1037 1128 separator_string (optarg); … … 1188 1279 a number. */ 1189 1280 1190 1281 static void 1191 getoptarg (char *arg, char switch_char, char *character, int *number) 1282 getoptarg (char *arg, char switch_char, char *character, int *character_length, 1283 int *character_width, int *number) 1192 1284 { 1193 1285 if (!ISDIGIT (*arg)) 1194 *character = *arg++; 1286 { 1287 #ifdef HAVE_MBRTOWC 1288 if (MB_CUR_MAX > 1) /* for multibyte locale. */ 1289 { 1290 wchar_t wc; 1291 size_t mblength; 1292 int width; 1293 mbstate_t state = {'\0'}; 1294 1295 mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state); 1296 1297 if (mblength == (size_t)-1 || mblength == (size_t)-2) 1298 { 1299 *character_length = 1; 1300 *character_width = 1; 1301 } 1302 else 1303 { 1304 *character_length = (mblength < 1) ? 1 : mblength; 1305 width = wcwidth (wc); 1306 *character_width = (width < 0) ? 0 : width; 1307 } 1308 1309 strncpy (character, arg, *character_length); 1310 arg += *character_length; 1311 } 1312 else /* for single byte locale. */ 1313 #endif 1314 { 1315 *character = *arg++; 1316 *character_length = 1; 1317 *character_width = 1; 1318 } 1319 } 1320 1195 1321 if (*arg) 1196 1322 { 1197 1323 long int tmp_long; … … 1256 1382 else 1257 1383 col_sep_string = column_separator; 1258 1384 1259 col_sep_length = 1;1385 col_sep_length = col_sep_width = 1; 1260 1386 use_col_separator = true; 1261 1387 } 1262 1388 /* It's rather pointless to define a TAB separator with column … … 1287 1413 TAB_WIDTH (chars_per_input_tab, chars_per_number); */ 1288 1414 1289 1415 /* Estimate chars_per_text without any margin and keep it constant. */ 1290 if (number_separator == '\t')1416 if (number_separator[0] == '\t') 1291 1417 number_width = chars_per_number + 1292 1418 TAB_WIDTH (chars_per_default_tab, chars_per_number); 1293 1419 else 1294 number_width = chars_per_number + 1;1420 number_width = chars_per_number + number_separator_width; 1295 1421 1296 1422 /* The number is part of the column width unless we are 1297 1423 printing files in parallel. */ … … 1306 1432 } 1307 1433 1308 1434 chars_per_column = (chars_per_line - chars_used_by_number - 1309 (columns - 1) * col_sep_ length) / columns;1435 (columns - 1) * col_sep_width) / columns; 1310 1436 1311 1437 if (chars_per_column < 1) 1312 1438 error (EXIT_FAILURE, 0, _("page width too narrow")); … … 1431 1557 1432 1558 /* Enlarge p->start_position of first column to use the same form of 1433 1559 padding_not_printed with all columns. */ 1434 h = h + col_sep_ length;1560 h = h + col_sep_width; 1435 1561 1436 1562 /* This loop takes care of all but the rightmost column. */ 1437 1563 … … 1465 1591 } 1466 1592 else 1467 1593 { 1468 h = h_next + col_sep_ length;1594 h = h_next + col_sep_width; 1469 1595 h_next = h + chars_per_column; 1470 1596 } 1471 1597 } … … 1755 1881 align_column (COLUMN *p) 1756 1882 { 1757 1883 padding_not_printed = p->start_position; 1758 if (padding_not_printed - col_sep_ length > 0)1884 if (padding_not_printed - col_sep_width > 0) 1759 1885 { 1760 pad_across_to (padding_not_printed - col_sep_ length);1886 pad_across_to (padding_not_printed - col_sep_width); 1761 1887 padding_not_printed = ANYWHERE; 1762 1888 } 1763 1889 … … 2028 2154 /* May be too generous. */ 2029 2155 buff = X2REALLOC (buff, &buff_allocated); 2030 2156 } 2031 buff[buff_current++] = c;2157 buff[buff_current++] = (unsigned char) c; 2032 2158 } 2033 2159 2034 2160 static void 2035 2161 add_line_number (COLUMN *p) 2036 2162 { 2037 int i ;2163 int i, j; 2038 2164 char *s; 2039 2165 int left_cut; 2040 2166 … … 2057 2183 /* Tabification is assumed for multiple columns, also for n-separators, 2058 2184 but `default n-separator = TAB' hasn't been given priority over 2059 2185 equal column_width also specified by POSIX. */ 2060 if (number_separator == '\t')2186 if (number_separator[0] == '\t') 2061 2187 { 2062 2188 i = number_width - chars_per_number; 2063 2189 while (i-- > 0) 2064 2190 (p->char_func) (' '); 2065 2191 } 2066 2192 else 2067 (p->char_func) (number_separator); 2193 for (j = 0; j < number_separator_length; j++) 2194 (p->char_func) (number_separator[j]); 2068 2195 } 2069 2196 else 2070 2197 /* To comply with POSIX, we avoid any expansion of default TAB 2071 2198 separator with a single column output. No column_width requirement 2072 2199 has to be considered. */ 2073 2200 { 2074 (p->char_func) (number_separator); 2075 if (number_separator == '\t') 2201 for (j = 0; j < number_separator_length; j++) 2202 (p->char_func) (number_separator[j]); 2203 if (number_separator[0] == '\t') 2076 2204 output_position = POS_AFTER_TAB (chars_per_output_tab, 2077 2205 output_position); 2078 2206 } … … 2233 2361 while (goal - h_old > 1 2234 2362 && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal) 2235 2363 { 2236 putchar (output_tab_char);2364 fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout); 2237 2365 h_old = h_new; 2238 2366 } 2239 2367 while (++h_old <= goal) … … 2253 2381 { 2254 2382 char *s; 2255 2383 int l = col_sep_length; 2384 int not_space_flag; 2256 2385 2257 2386 s = col_sep_string; 2258 2387 … … 2266 2395 { 2267 2396 for (; separators_not_printed > 0; --separators_not_printed) 2268 2397 { 2398 not_space_flag = 0; 2269 2399 while (l-- > 0) 2270 2400 { 2271 2401 /* 3 types of sep_strings: spaces only, spaces and chars, … … 2279 2409 } 2280 2410 else 2281 2411 { 2412 not_space_flag = 1; 2282 2413 if (spaces_not_printed > 0) 2283 2414 print_white_space (); 2284 2415 putchar (*s++); 2285 ++output_position;2286 2416 } 2287 2417 } 2418 if (not_space_flag) 2419 output_position += col_sep_width; 2420 2288 2421 /* sep_string ends with some spaces */ 2289 2422 if (spaces_not_printed > 0) 2290 2423 print_white_space (); … … 2312 2445 required number of tabs and spaces. */ 2313 2446 2314 2447 static void 2315 print_char (char c)2448 print_char_single (char c) 2316 2449 { 2317 2450 if (tabify_output) 2318 2451 { … … 2336 2469 putchar (c); 2337 2470 } 2338 2471 2472 #ifdef HAVE_MBRTOWC 2473 static void 2474 print_char_multi (char c) 2475 { 2476 static size_t mbc_pos = 0; 2477 static unsigned char mbc[MB_LEN_MAX] = {'\0'}; 2478 static mbstate_t state = {'\0'}; 2479 mbstate_t state_bak; 2480 wchar_t wc; 2481 size_t mblength; 2482 int width; 2483 2484 if (tabify_output) 2485 { 2486 state_bak = state; 2487 mbc[mbc_pos++] = (unsigned char)c; 2488 mblength = mbrtowc (&wc, mbc, mbc_pos, &state); 2489 2490 while (mbc_pos > 0) 2491 { 2492 switch (mblength) 2493 { 2494 case (size_t)-2: 2495 state = state_bak; 2496 return; 2497 2498 case (size_t)-1: 2499 state = state_bak; 2500 ++output_position; 2501 putchar (mbc[0]); 2502 memmove (mbc, mbc + 1, MB_CUR_MAX - 1); 2503 --mbc_pos; 2504 break; 2505 2506 case 0: 2507 mblength = 1; 2508 2509 default: 2510 if (wc == L' ') 2511 { 2512 memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); 2513 --mbc_pos; 2514 ++spaces_not_printed; 2515 return; 2516 } 2517 else if (spaces_not_printed > 0) 2518 print_white_space (); 2519 2520 /* Nonprintables are assumed to have width 0, except L'\b'. */ 2521 if ((width = wcwidth (wc)) < 1) 2522 { 2523 if (wc == L'\b') 2524 --output_position; 2525 } 2526 else 2527 output_position += width; 2528 2529 fwrite (mbc, sizeof(char), mblength, stdout); 2530 memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); 2531 mbc_pos -= mblength; 2532 } 2533 } 2534 return; 2535 } 2536 putchar (c); 2537 } 2538 #endif 2539 2339 2540 /* Skip to page PAGE before printing. 2340 2541 PAGE may be larger than total number of pages. */ 2341 2542 … … 2516 2717 align_empty_cols = false; 2517 2718 } 2518 2719 2519 if (padding_not_printed - col_sep_ length > 0)2720 if (padding_not_printed - col_sep_width > 0) 2520 2721 { 2521 pad_across_to (padding_not_printed - col_sep_ length);2722 pad_across_to (padding_not_printed - col_sep_width); 2522 2723 padding_not_printed = ANYWHERE; 2523 2724 } 2524 2725 … … 2619 2820 } 2620 2821 } 2621 2822 2622 if (padding_not_printed - col_sep_ length > 0)2823 if (padding_not_printed - col_sep_width > 0) 2623 2824 { 2624 pad_across_to (padding_not_printed - col_sep_ length);2825 pad_across_to (padding_not_printed - col_sep_width); 2625 2826 padding_not_printed = ANYWHERE; 2626 2827 } 2627 2828 … … 2634 2835 if (spaces_not_printed == 0) 2635 2836 { 2636 2837 output_position = p->start_position + end_vector[line]; 2637 if (p->start_position - col_sep_ length == chars_per_margin)2638 output_position -= col_sep_ length;2838 if (p->start_position - col_sep_width == chars_per_margin) 2839 output_position -= col_sep_width; 2639 2840 } 2640 2841 2641 2842 return true; … … 2654 2855 number of characters is 1.) */ 2655 2856 2656 2857 static int 2657 char_to_clump (char c)2858 char_to_clump_single (char c) 2658 2859 { 2659 2860 unsigned char uc = c; 2660 2861 char *s = clump_buff; … … 2664 2865 int chars; 2665 2866 int chars_per_c = 8; 2666 2867 2667 if (c == input_tab_char )2868 if (c == input_tab_char[0]) 2668 2869 chars_per_c = chars_per_input_tab; 2669 2870 2670 if (c == input_tab_char || c == '\t')2871 if (c == input_tab_char[0] || c == '\t') 2671 2872 { 2672 2873 width = TAB_WIDTH (chars_per_c, input_position); 2673 2874 … … 2738 2939 return chars; 2739 2940 } 2740 2941 2942 #ifdef HAVE_MBRTOWC 2943 static int 2944 char_to_clump_multi (char c) 2945 { 2946 static size_t mbc_pos = 0; 2947 static char mbc[MB_LEN_MAX] = {'\0'}; 2948 static mbstate_t state = {'\0'}; 2949 mbstate_t state_bak; 2950 wchar_t wc; 2951 size_t mblength; 2952 int wc_width; 2953 register int *s = clump_buff; 2954 register int i, j; 2955 char esc_buff[4]; 2956 int width; 2957 int chars; 2958 int chars_per_c = 8; 2959 2960 state_bak = state; 2961 mbc[mbc_pos++] = c; 2962 mblength = mbrtowc (&wc, mbc, mbc_pos, &state); 2963 2964 width = 0; 2965 chars = 0; 2966 while (mbc_pos > 0) 2967 { 2968 switch (mblength) 2969 { 2970 case (size_t)-2: 2971 state = state_bak; 2972 return 0; 2973 2974 case (size_t)-1: 2975 state = state_bak; 2976 mblength = 1; 2977 2978 if (use_esc_sequence || use_cntrl_prefix) 2979 { 2980 width = +4; 2981 chars = +4; 2982 *s++ = '\\'; 2983 sprintf (esc_buff, "%03o", mbc[0]); 2984 for (i = 0; i <= 2; ++i) 2985 *s++ = (int) esc_buff[i]; 2986 } 2987 else 2988 { 2989 width += 1; 2990 chars += 1; 2991 *s++ = mbc[0]; 2992 } 2993 break; 2994 2995 case 0: 2996 mblength = 1; 2997 /* Fall through */ 2998 2999 default: 3000 if (memcmp (mbc, input_tab_char, mblength) == 0) 3001 chars_per_c = chars_per_input_tab; 3002 3003 if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t') 3004 { 3005 int width_inc; 3006 3007 width_inc = TAB_WIDTH (chars_per_c, input_position); 3008 width += width_inc; 3009 3010 if (untabify_input) 3011 { 3012 for (i = width_inc; i; --i) 3013 *s++ = ' '; 3014 chars += width_inc; 3015 } 3016 else 3017 { 3018 for (i = 0; i < mblength; i++) 3019 *s++ = mbc[i]; 3020 chars += mblength; 3021 } 3022 } 3023 else if ((wc_width = wcwidth (wc)) < 1) 3024 { 3025 if (use_esc_sequence) 3026 { 3027 for (i = 0; i < mblength; i++) 3028 { 3029 width += 4; 3030 chars += 4; 3031 *s++ = '\\'; 3032 sprintf (esc_buff, "%03o", c); 3033 for (j = 0; j <= 2; ++j) 3034 *s++ = (int) esc_buff[j]; 3035 } 3036 } 3037 else if (use_cntrl_prefix) 3038 { 3039 if (wc < 0200) 3040 { 3041 width += 2; 3042 chars += 2; 3043 *s++ = '^'; 3044 *s++ = wc ^ 0100; 3045 } 3046 else 3047 { 3048 for (i = 0; i < mblength; i++) 3049 { 3050 width += 4; 3051 chars += 4; 3052 *s++ = '\\'; 3053 sprintf (esc_buff, "%03o", c); 3054 for (j = 0; j <= 2; ++j) 3055 *s++ = (int) esc_buff[j]; 3056 } 3057 } 3058 } 3059 else if (wc == L'\b') 3060 { 3061 width += -1; 3062 chars += 1; 3063 *s++ = c; 3064 } 3065 else 3066 { 3067 width += 0; 3068 chars += mblength; 3069 for (i = 0; i < mblength; i++) 3070 *s++ = mbc[i]; 3071 } 3072 } 3073 else 3074 { 3075 width += wc_width; 3076 chars += mblength; 3077 for (i = 0; i < mblength; i++) 3078 *s++ = mbc[i]; 3079 } 3080 } 3081 memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); 3082 mbc_pos -= mblength; 3083 } 3084 3085 input_position += width; 3086 return chars; 3087 } 3088 #endif 3089 2741 3090 /* We've just printed some files and need to clean up things before 2742 3091 looking for more options and printing the next batch of files. 2743 3092 -
coreutils-6.9
diff -Naur coreutils-6.9.orig/src/sort.c coreutils-6.9/src/sort.c
old new 23 23 24 24 #include <config.h> 25 25 26 #include <assert.h> 26 27 #include <getopt.h> 27 28 #include <sys/types.h> 28 29 #include <sys/wait.h> 29 30 #include <signal.h> 31 #if HAVE_WCHAR_H 32 # include <wchar.h> 33 #endif 34 /* Get isw* functions. */ 35 #if HAVE_WCTYPE_H 36 # include <wctype.h> 37 #endif 30 38 #include "system.h" 31 39 #include "argmatch.h" 32 40 #include "error.h" … … 116 124 /* Thousands separator; if -1, then there isn't one. */ 117 125 static int thousands_sep; 118 126 127 static int force_general_numcompare = 0; 128 119 129 /* Nonzero if the corresponding locales are hard. */ 120 130 static bool hard_LC_COLLATE; 121 #if HAVE_ NL_LANGINFO131 #if HAVE_LANGINFO_CODESET 122 132 static bool hard_LC_TIME; 123 133 #endif 124 134 125 135 #define NONZERO(x) ((x) != 0) 126 136 137 /* get a multibyte character's byte length. */ 138 #define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \ 139 do \ 140 { \ 141 wchar_t wc; \ 142 mbstate_t state_bak; \ 143 \ 144 state_bak = STATE; \ 145 mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \ 146 \ 147 switch (MBLENGTH) \ 148 { \ 149 case (size_t)-1: \ 150 case (size_t)-2: \ 151 STATE = state_bak; \ 152 /* Fall through. */ \ 153 case 0: \ 154 MBLENGTH = 1; \ 155 } \ 156 } \ 157 while (0) 158 127 159 /* The kind of blanks for '-b' to skip in various options. */ 128 160 enum blanktype { bl_start, bl_end, bl_both }; 129 161 … … 261 293 they were read if all keys compare equal. */ 262 294 static bool stable; 263 295 264 /* If TAB has this value, blanks separate fields. */ 265 enum { TAB_DEFAULT = CHAR_MAX + 1 }; 266 267 /* Tab character separating fields. If TAB_DEFAULT, then fields are 296 /* Tab character separating fields. If tab_length is 0, then fields are 268 297 separated by the empty string between a non-blank character and a blank 269 298 character. */ 270 static int tab = TAB_DEFAULT; 299 static char tab[MB_LEN_MAX + 1]; 300 static size_t tab_length = 0; 271 301 272 302 /* Flag to remove consecutive duplicate lines from the output. 273 303 Only the last of a sequence of equal lines will be output. */ … … 639 669 update_proc (pid); 640 670 } 641 671 672 /* Function pointers. */ 673 static void 674 (*inittables) (void); 675 static char * 676 (*begfield) (const struct line*, const struct keyfield *); 677 static char * 678 (*limfield) (const struct line*, const struct keyfield *); 679 static int 680 (*getmonth) (char const *, size_t); 681 static int 682 (*keycompare) (const struct line *, const struct line *); 683 static int 684 (*numcompare) (const char *, const char *); 685 686 /* Test for white space multibyte character. 687 Set LENGTH the byte length of investigated multibyte character. */ 688 #if HAVE_MBRTOWC 689 static int 690 ismbblank (const char *str, size_t len, size_t *length) 691 { 692 size_t mblength; 693 wchar_t wc; 694 mbstate_t state; 695 696 memset (&state, '\0', sizeof(mbstate_t)); 697 mblength = mbrtowc (&wc, str, len, &state); 698 699 if (mblength == (size_t)-1 || mblength == (size_t)-2) 700 { 701 *length = 1; 702 return 0; 703 } 704 705 *length = (mblength < 1) ? 1 : mblength; 706 return iswblank (wc); 707 } 708 #endif 709 642 710 /* Clean up any remaining temporary files. */ 643 711 644 712 static void … … 978 1046 free (node); 979 1047 } 980 1048 981 #if HAVE_ NL_LANGINFO1049 #if HAVE_LANGINFO_CODESET 982 1050 983 1051 static int 984 1052 struct_month_cmp (const void *m1, const void *m2) … … 993 1061 /* Initialize the character class tables. */ 994 1062 995 1063 static void 996 inittables (void)1064 inittables_uni (void) 997 1065 { 998 1066 size_t i; 999 1067 … … 1005 1073 fold_toupper[i] = toupper (i); 1006 1074 } 1007 1075 1008 #if HAVE_ NL_LANGINFO1076 #if HAVE_LANGINFO_CODESET 1009 1077 /* If we're not in the "C" locale, read different names for months. */ 1010 1078 if (hard_LC_TIME) 1011 1079 { … … 1031 1099 #endif 1032 1100 } 1033 1101 1102 #if HAVE_MBRTOWC 1103 static void 1104 inittables_mb (void) 1105 { 1106 int i, j, k, l; 1107 char *name, *s; 1108 size_t s_len, mblength; 1109 char mbc[MB_LEN_MAX]; 1110 wchar_t wc, pwc; 1111 mbstate_t state_mb, state_wc; 1112 1113 for (i = 0; i < MONTHS_PER_YEAR; i++) 1114 { 1115 s = (char *) nl_langinfo (ABMON_1 + i); 1116 s_len = strlen (s); 1117 monthtab[i].name = name = (char *) xmalloc (s_len + 1); 1118 monthtab[i].val = i + 1; 1119 1120 memset (&state_mb, '\0', sizeof (mbstate_t)); 1121 memset (&state_wc, '\0', sizeof (mbstate_t)); 1122 1123 for (j = 0; j < s_len;) 1124 { 1125 if (!ismbblank (s + j, s_len - j, &mblength)) 1126 break; 1127 j += mblength; 1128 } 1129 1130 for (k = 0; j < s_len;) 1131 { 1132 mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb); 1133 assert (mblength != (size_t)-1 && mblength != (size_t)-2); 1134 if (mblength == 0) 1135 break; 1136 1137 pwc = towupper (wc); 1138 if (pwc == wc) 1139 { 1140 memcpy (mbc, s + j, mblength); 1141 j += mblength; 1142 } 1143 else 1144 { 1145 j += mblength; 1146 mblength = wcrtomb (mbc, pwc, &state_wc); 1147 assert (mblength != (size_t)0 && mblength != (size_t)-1); 1148 } 1149 1150 for (l = 0; l < mblength; l++) 1151 name[k++] = mbc[l]; 1152 } 1153 name[k] = '\0'; 1154 } 1155 qsort ((void *) monthtab, MONTHS_PER_YEAR, 1156 sizeof (struct month), struct_month_cmp); 1157 } 1158 #endif 1159 1034 1160 /* Specify the amount of main memory to use when sorting. */ 1035 1161 static void 1036 1162 specify_sort_size (char const *s) … … 1241 1367 by KEY in LINE. */ 1242 1368 1243 1369 static char * 1244 begfield (const struct line *line, const struct keyfield *key)1370 begfield_uni (const struct line *line, const struct keyfield *key) 1245 1371 { 1246 1372 char *ptr = line->text, *lim = ptr + line->length - 1; 1247 1373 size_t sword = key->sword; … … 1251 1377 /* The leading field separator itself is included in a field when -t 1252 1378 is absent. */ 1253 1379 1254 if (tab != TAB_DEFAULT)1380 if (tab_length) 1255 1381 while (ptr < lim && sword--) 1256 1382 { 1257 while (ptr < lim && *ptr != tab )1383 while (ptr < lim && *ptr != tab[0]) 1258 1384 ++ptr; 1259 1385 if (ptr < lim) 1260 1386 ++ptr; … … 1282 1408 return ptr; 1283 1409 } 1284 1410 1411 #if HAVE_MBRTOWC 1412 static char * 1413 begfield_mb (const struct line *line, const struct keyfield *key) 1414 { 1415 int i; 1416 char *ptr = line->text, *lim = ptr + line->length - 1; 1417 size_t sword = key->sword; 1418 size_t schar = key->schar; 1419 size_t mblength; 1420 mbstate_t state; 1421 1422 memset (&state, '\0', sizeof(mbstate_t)); 1423 1424 if (tab_length) 1425 while (ptr < lim && sword--) 1426 { 1427 while (ptr < lim && memcmp (ptr, tab, tab_length) != 0) 1428 { 1429 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1430 ptr += mblength; 1431 } 1432 if (ptr < lim) 1433 { 1434 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1435 ptr += mblength; 1436 } 1437 } 1438 else 1439 while (ptr < lim && sword--) 1440 { 1441 while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) 1442 ptr += mblength; 1443 if (ptr < lim) 1444 { 1445 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1446 ptr += mblength; 1447 } 1448 while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength)) 1449 ptr += mblength; 1450 } 1451 1452 if (key->skipsblanks) 1453 while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) 1454 ptr += mblength; 1455 1456 for (i = 0; i < schar; i++) 1457 { 1458 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1459 1460 if (ptr + mblength > lim) 1461 break; 1462 else 1463 ptr += mblength; 1464 } 1465 1466 return ptr; 1467 } 1468 #endif 1469 1285 1470 /* Return the limit of (a pointer to the first character after) the field 1286 1471 in LINE specified by KEY. */ 1287 1472 1288 1473 static char * 1289 limfield (const struct line *line, const struct keyfield *key)1474 limfield_uni (const struct line *line, const struct keyfield *key) 1290 1475 { 1291 1476 char *ptr = line->text, *lim = ptr + line->length - 1; 1292 1477 size_t eword = key->eword, echar = key->echar; … … 1299 1484 `beginning' is the first character following the delimiting TAB. 1300 1485 Otherwise, leave PTR pointing at the first `blank' character after 1301 1486 the preceding field. */ 1302 if (tab != TAB_DEFAULT)1487 if (tab_length) 1303 1488 while (ptr < lim && eword--) 1304 1489 { 1305 while (ptr < lim && *ptr != tab )1490 while (ptr < lim && *ptr != tab[0]) 1306 1491 ++ptr; 1307 1492 if (ptr < lim && (eword | echar)) 1308 1493 ++ptr; … … 1348 1533 */ 1349 1534 1350 1535 /* Make LIM point to the end of (one byte past) the current field. */ 1351 if (tab != TAB_DEFAULT)1536 if (tab_length) 1352 1537 { 1353 1538 char *newlim; 1354 newlim = memchr (ptr, tab , lim - ptr);1539 newlim = memchr (ptr, tab[0], lim - ptr); 1355 1540 if (newlim) 1356 1541 lim = newlim; 1357 1542 } … … 1384 1569 return ptr; 1385 1570 } 1386 1571 1572 #if HAVE_MBRTOWC 1573 static char * 1574 limfield_mb (const struct line *line, const struct keyfield *key) 1575 { 1576 char *ptr = line->text, *lim = ptr + line->length - 1; 1577 size_t eword = key->eword, echar = key->echar; 1578 int i; 1579 size_t mblength; 1580 mbstate_t state; 1581 1582 memset (&state, '\0', sizeof(mbstate_t)); 1583 1584 if (tab_length) 1585 while (ptr < lim && eword--) 1586 { 1587 while (ptr < lim && memcmp (ptr, tab, tab_length) != 0) 1588 { 1589 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1590 ptr += mblength; 1591 } 1592 if (ptr < lim && (eword | echar)) 1593 { 1594 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1595 ptr += mblength; 1596 } 1597 } 1598 else 1599 while (ptr < lim && eword--) 1600 { 1601 while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) 1602 ptr += mblength; 1603 if (ptr < lim) 1604 { 1605 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1606 ptr += mblength; 1607 } 1608 while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength)) 1609 ptr += mblength; 1610 } 1611 1612 1613 # ifdef POSIX_UNSPECIFIED 1614 /* Make LIM point to the end of (one byte past) the current field. */ 1615 if (tab_length) 1616 { 1617 char *newlim, *p; 1618 1619 newlim = NULL; 1620 for (p = ptr; p < lim;) 1621 { 1622 if (memcmp (p, tab, tab_length) == 0) 1623 { 1624 newlim = p; 1625 break; 1626 } 1627 1628 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1629 p += mblength; 1630 } 1631 } 1632 else 1633 { 1634 char *newlim; 1635 newlim = ptr; 1636 1637 while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength)) 1638 newlim += mblength; 1639 if (ptr < lim) 1640 { 1641 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1642 ptr += mblength; 1643 } 1644 while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength)) 1645 newlim += mblength; 1646 lim = newlim; 1647 } 1648 # endif 1649 1650 /* If we're skipping leading blanks, don't start counting characters 1651 * until after skipping past any leading blanks. */ 1652 if (key->skipsblanks) 1653 while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) 1654 ptr += mblength; 1655 1656 memset (&state, '\0', sizeof(mbstate_t)); 1657 1658 /* Advance PTR by ECHAR (if possible), but no further than LIM. */ 1659 for (i = 0; i < echar; i++) 1660 { 1661 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1662 1663 if (ptr + mblength > lim) 1664 break; 1665 else 1666 ptr += mblength; 1667 } 1668 1669 return ptr; 1670 } 1671 #endif 1672 1387 1673 /* Fill BUF reading from FP, moving buf->left bytes from the end 1388 1674 of buf->buf to the beginning first. If EOF is reached and the 1389 1675 file wasn't terminated by a newline, supply one. Set up BUF's line … … 1500 1786 hideously fast. */ 1501 1787 1502 1788 static int 1503 numcompare (const char *a, const char *b)1789 numcompare_uni (const char *a, const char *b) 1504 1790 { 1505 1791 while (blanks[to_uchar (*a)]) 1506 1792 a++; … … 1510 1796 return strnumcmp (a, b, decimal_point, thousands_sep); 1511 1797 } 1512 1798 1799 #if HAVE_MBRTOWC 1800 static int 1801 numcompare_mb (const char *a, const char *b) 1802 { 1803 size_t mblength, len; 1804 len = strlen (a); /* okay for UTF-8 */ 1805 while (*a && ismbblank (a, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength)) 1806 { 1807 a += mblength; 1808 len -= mblength; 1809 } 1810 len = strlen (b); /* okay for UTF-8 */ 1811 while (*b && ismbblank (b, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength)) 1812 b += mblength; 1813 1814 return strnumcmp (a, b, decimal_point, thousands_sep); 1815 } 1816 #endif /* HAV_EMBRTOWC */ 1817 1513 1818 static int 1514 1819 general_numcompare (const char *sa, const char *sb) 1515 1820 { … … 1543 1848 Return 0 if the name in S is not recognized. */ 1544 1849 1545 1850 static int 1546 getmonth (char const *month, size_t len)1851 getmonth_uni (char const *month, size_t len) 1547 1852 { 1548 1853 size_t lo = 0; 1549 1854 size_t hi = MONTHS_PER_YEAR; … … 1698 2003 return diff; 1699 2004 } 1700 2005 2006 #if HAVE_MBRTOWC 2007 static int 2008 getmonth_mb (const char *s, size_t len) 2009 { 2010 char *month; 2011 register size_t i; 2012 register int lo = 0, hi = MONTHS_PER_YEAR, result; 2013 char *tmp; 2014 size_t wclength, mblength; 2015 const char **pp; 2016 const wchar_t **wpp; 2017 wchar_t *month_wcs; 2018 mbstate_t state; 2019 2020 while (len > 0 && ismbblank (s, len, &mblength)) 2021 { 2022 s += mblength; 2023 len -= mblength; 2024 } 2025 2026 if (len == 0) 2027 return 0; 2028 2029 month = (char *) alloca (len + 1); 2030 2031 tmp = (char *) alloca (len + 1); 2032 memcpy (tmp, s, len); 2033 tmp[len] = '\0'; 2034 pp = (const char **)&tmp; 2035 month_wcs = (wchar_t *) alloca ((len + 1) * sizeof (wchar_t)); 2036 memset (&state, '\0', sizeof(mbstate_t)); 2037 2038 wclength = mbsrtowcs (month_wcs, pp, len + 1, &state); 2039 assert (wclength != (size_t)-1 && *pp == NULL); 2040 2041 for (i = 0; i < wclength; i++) 2042 { 2043 month_wcs[i] = towupper(month_wcs[i]); 2044 if (iswblank (month_wcs[i])) 2045 { 2046 month_wcs[i] = L'\0'; 2047 break; 2048 } 2049 } 2050 2051 wpp = (const wchar_t **)&month_wcs; 2052 2053 mblength = wcsrtombs (month, wpp, len + 1, &state); 2054 assert (mblength != (-1) && *wpp == NULL); 2055 2056 do 2057 { 2058 int ix = (lo + hi) / 2; 2059 2060 if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0) 2061 hi = ix; 2062 else 2063 lo = ix; 2064 } 2065 while (hi - lo > 1); 2066 2067 result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name)) 2068 ? monthtab[lo].val : 0); 2069 2070 return result; 2071 } 2072 #endif 2073 1701 2074 /* Compare two lines A and B trying every key in sequence until there 1702 2075 are no more keys or a difference is found. */ 1703 2076 1704 2077 static int 1705 keycompare (const struct line *a, const struct line *b)2078 keycompare_uni (const struct line *a, const struct line *b) 1706 2079 { 1707 2080 struct keyfield const *key = keylist; 1708 2081 … … 1875 2248 return key->reverse ? -diff : diff; 1876 2249 } 1877 2250 2251 #if HAVE_MBRTOWC 2252 static int 2253 keycompare_mb (const struct line *a, const struct line *b) 2254 { 2255 struct keyfield *key = keylist; 2256 2257 /* For the first iteration only, the key positions have been 2258 precomputed for us. */ 2259 char *texta = a->keybeg; 2260 char *textb = b->keybeg; 2261 char *lima = a->keylim; 2262 char *limb = b->keylim; 2263 2264 size_t mblength_a, mblength_b; 2265 wchar_t wc_a, wc_b; 2266 mbstate_t state_a, state_b; 2267 2268 int diff; 2269 2270 memset (&state_a, '\0', sizeof(mbstate_t)); 2271 memset (&state_b, '\0', sizeof(mbstate_t)); 2272 2273 for (;;) 2274 { 2275 unsigned char *translate = (unsigned char *) key->translate; 2276 bool const *ignore = key->ignore; 2277 2278 /* Find the lengths. */ 2279 size_t lena = lima <= texta ? 0 : lima - texta; 2280 size_t lenb = limb <= textb ? 0 : limb - textb; 2281 2282 /* Actually compare the fields. */ 2283 if (key->numeric | key->general_numeric) 2284 { 2285 char savea = *lima, saveb = *limb; 2286 2287 *lima = *limb = '\0'; 2288 if (force_general_numcompare) 2289 diff = general_numcompare (texta, textb); 2290 else 2291 diff = ((key->numeric ? numcompare : general_numcompare) 2292 (texta, textb)); 2293 *lima = savea, *limb = saveb; 2294 } 2295 else if (key->month) 2296 diff = getmonth (texta, lena) - getmonth (textb, lenb); 2297 else 2298 { 2299 if (ignore || translate) 2300 { 2301 char *copy_a = (char *) alloca (lena + 1 + lenb + 1); 2302 char *copy_b = copy_a + lena + 1; 2303 size_t new_len_a, new_len_b; 2304 size_t i, j; 2305 2306 /* Ignore and/or translate chars before comparing. */ 2307 # define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \ 2308 do \ 2309 { \ 2310 wchar_t uwc; \ 2311 char mbc[MB_LEN_MAX]; \ 2312 mbstate_t state_wc; \ 2313 \ 2314 for (NEW_LEN = i = 0; i < LEN;) \ 2315 { \ 2316 mbstate_t state_bak; \ 2317 \ 2318 state_bak = STATE; \ 2319 MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE); \ 2320 \ 2321 if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1 \ 2322 || MBLENGTH == 0) \ 2323 { \ 2324 if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1) \ 2325 STATE = state_bak; \ 2326 if (!ignore) \ 2327 COPY[NEW_LEN++] = TEXT[i++]; \ 2328 continue; \ 2329 } \ 2330 \ 2331 if (ignore) \ 2332 { \ 2333 if ((ignore == nonprinting && !iswprint (WC)) \ 2334 || (ignore == nondictionary \ 2335 && !iswalnum (WC) && !iswblank (WC))) \ 2336 { \ 2337 i += MBLENGTH; \ 2338 continue; \ 2339 } \ 2340 } \ 2341 \ 2342 if (translate) \ 2343 { \ 2344 \ 2345 uwc = towupper(WC); \ 2346 if (WC == uwc) \ 2347 { \ 2348 memcpy (mbc, TEXT + i, MBLENGTH); \ 2349 i += MBLENGTH; \ 2350 } \ 2351 else \ 2352 { \ 2353 i += MBLENGTH; \ 2354 WC = uwc; \ 2355 memset (&state_wc, '\0', sizeof (mbstate_t)); \ 2356 \ 2357 MBLENGTH = wcrtomb (mbc, WC, &state_wc); \ 2358 assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0); \ 2359 } \ 2360 \ 2361 for (j = 0; j < MBLENGTH; j++) \ 2362 COPY[NEW_LEN++] = mbc[j]; \ 2363 } \ 2364 else \ 2365 for (j = 0; j < MBLENGTH; j++) \ 2366 COPY[NEW_LEN++] = TEXT[i++]; \ 2367 } \ 2368 COPY[NEW_LEN] = '\0'; \ 2369 } \ 2370 while (0) 2371 IGNORE_CHARS (new_len_a, lena, texta, copy_a, 2372 wc_a, mblength_a, state_a); 2373 IGNORE_CHARS (new_len_b, lenb, textb, copy_b, 2374 wc_b, mblength_b, state_b); 2375 diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b); 2376 } 2377 else if (lena == 0) 2378 diff = - NONZERO (lenb); 2379 else if (lenb == 0) 2380 goto greater; 2381 else 2382 diff = xmemcoll (texta, lena, textb, lenb); 2383 } 2384 2385 if (diff) 2386 goto not_equal; 2387 2388 key = key->next; 2389 if (! key) 2390 break; 2391 2392 /* Find the beginning and limit of the next field. */ 2393 if (key->eword != -1) 2394 lima = limfield (a, key), limb = limfield (b, key); 2395 else 2396 lima = a->text + a->length - 1, limb = b->text + b->length - 1; 2397 2398 if (key->sword != -1) 2399 texta = begfield (a, key), textb = begfield (b, key); 2400 else 2401 { 2402 texta = a->text, textb = b->text; 2403 if (key->skipsblanks) 2404 { 2405 while (texta < lima && ismbblank (texta, lima - texta, &mblength_a)) 2406 texta += mblength_a; 2407 while (textb < limb && ismbblank (textb, limb - textb, &mblength_b)) 2408 textb += mblength_b; 2409 } 2410 } 2411 } 2412 2413 return 0; 2414 2415 greater: 2416 diff = 1; 2417 not_equal: 2418 return key->reverse ? -diff : diff; 2419 } 2420 #endif 2421 1878 2422 /* Compare two lines A and B, returning negative, zero, or positive 1879 2423 depending on whether A compares less than, equal to, or greater than B. */ 1880 2424 … … 2744 3288 initialize_exit_failure (SORT_FAILURE); 2745 3289 2746 3290 hard_LC_COLLATE = hard_locale (LC_COLLATE); 2747 #if HAVE_ NL_LANGINFO3291 #if HAVE_LANGINFO_CODESET 2748 3292 hard_LC_TIME = hard_locale (LC_TIME); 2749 3293 #endif 2750 3294 … … 2765 3309 thousands_sep = -1; 2766 3310 } 2767 3311 3312 #if HAVE_MBRTOWC 3313 if (MB_CUR_MAX > 1) 3314 { 3315 inittables = inittables_mb; 3316 begfield = begfield_mb; 3317 limfield = limfield_mb; 3318 getmonth = getmonth_mb; 3319 keycompare = keycompare_mb; 3320 numcompare = numcompare_mb; 3321 } 3322 else 3323 #endif 3324 { 3325 inittables = inittables_uni; 3326 begfield = begfield_uni; 3327 limfield = limfield_uni; 3328 getmonth = getmonth_uni; 3329 keycompare = keycompare_uni; 3330 numcompare = numcompare_uni; 3331 } 3332 2768 3333 have_read_stdin = false; 2769 3334 inittables (); 2770 3335 … … 3015 3580 3016 3581 case 't': 3017 3582 { 3018 char newtab = optarg[0]; 3019 if (! newtab) 3583 char newtab[MB_LEN_MAX + 1]; 3584 size_t newtab_length = 1; 3585 strncpy (newtab, optarg, MB_LEN_MAX); 3586 if (! newtab[0]) 3020 3587 error (SORT_FAILURE, 0, _("empty tab")); 3021 if (optarg[1]) 3588 #if HAVE_MBRTOWC 3589 if (MB_CUR_MAX > 1) 3590 { 3591 wchar_t wc; 3592 mbstate_t state; 3593 size_t i; 3594 3595 memset (&state, '\0', sizeof (mbstate_t)); 3596 newtab_length = mbrtowc (&wc, newtab, strnlen (newtab, 3597 MB_LEN_MAX), 3598 &state); 3599 switch (newtab_length) 3600 { 3601 case (size_t) -1: 3602 case (size_t) -2: 3603 case 0: 3604 newtab_length = 1; 3605 } 3606 } 3607 #endif 3608 if (newtab_length == 1 && optarg[1]) 3022 3609 { 3023 3610 if (STREQ (optarg, "\\0")) 3024 newtab = '\0';3611 newtab[0] = '\0'; 3025 3612 else 3026 3613 { 3027 3614 /* Provoke with `sort -txx'. Complain about … … 3032 3619 quote (optarg)); 3033 3620 } 3034 3621 } 3035 if (tab != TAB_DEFAULT && tab != newtab) 3622 if (tab_length 3623 && (tab_length != newtab_length 3624 || memcmp (tab, newtab, tab_length) != 0)) 3036 3625 error (SORT_FAILURE, 0, _("incompatible tabs")); 3037 tab = newtab; 3626 memcpy (tab, newtab, newtab_length); 3627 tab_length = newtab_length; 3038 3628 } 3039 3629 break; 3040 3630 -
src/unexpand.c
diff -Naur coreutils-6.9.orig/src/unexpand.c coreutils-6.9/src/unexpand.c
old new 39 39 #include <stdio.h> 40 40 #include <getopt.h> 41 41 #include <sys/types.h> 42 43 /* Get mbstate_t, mbrtowc(), wcwidth(). */ 44 #if HAVE_WCHAR_H 45 # include <wchar.h> 46 #endif 47 42 48 #include "system.h" 43 49 #include "error.h" 44 50 #include "quote.h" 45 51 #include "xstrndup.h" 46 52 53 /* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 54 installation; work around this configuration error. */ 55 #if !defined MB_LEN_MAX || MB_LEN_MAX < 2 56 # define MB_LEN_MAX 16 57 #endif 58 59 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 60 #if HAVE_MBRTOWC && defined mbstate_t 61 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 62 #endif 63 47 64 /* The official name of this program (e.g., no `g' prefix). */ 48 65 #define PROGRAM_NAME "unexpand" 49 66 … … 110 127 {NULL, 0, NULL, 0} 111 128 }; 112 129 130 static FILE *next_file (FILE *fp); 131 132 #if HAVE_MBRTOWC 133 static void 134 unexpand_multibyte (void) 135 { 136 FILE *fp; /* Input stream. */ 137 mbstate_t i_state; /* Current shift state of the input stream. */ 138 mbstate_t i_state_bak; /* Back up the I_STATE. */ 139 mbstate_t o_state; /* Current shift state of the output stream. */ 140 char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 141 char *bufpos; /* Next read position of BUF. */ 142 size_t buflen = 0; /* The length of the byte sequence in buf. */ 143 wint_t wc; /* A gotten wide character. */ 144 size_t mblength; /* The byte size of a multibyte character 145 which shows as same character as WC. */ 146 147 /* Index in `tab_list' of next tabstop: */ 148 int tab_index = 0; /* For calculating width of pending tabs. */ 149 int print_tab_index = 0; /* For printing as many tabs as possible. */ 150 unsigned int column = 0; /* Column on screen of next char. */ 151 int next_tab_column; /* Column the next tab stop is on. */ 152 int convert = 1; /* If nonzero, perform translations. */ 153 unsigned int pending = 0; /* Pending columns of blanks. */ 154 155 fp = next_file ((FILE *) NULL); 156 if (fp == NULL) 157 return; 158 159 memset (&o_state, '\0', sizeof(mbstate_t)); 160 memset (&i_state, '\0', sizeof(mbstate_t)); 161 162 for (;;) 163 { 164 if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp)) 165 { 166 memmove (buf, bufpos, buflen); 167 buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp); 168 bufpos = buf; 169 } 170 171 /* Get a wide character. */ 172 if (buflen < 1) 173 { 174 mblength = 1; 175 wc = WEOF; 176 } 177 else 178 { 179 i_state_bak = i_state; 180 mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &i_state); 181 } 182 183 if (mblength == (size_t)-1 || mblength == (size_t)-2) 184 { 185 i_state = i_state_bak; 186 wc = L'\0'; 187 } 188 189 if (wc == L' ' && convert && column < INT_MAX) 190 { 191 ++pending; 192 ++column; 193 } 194 else if (wc == L'\t' && convert) 195 { 196 if (tab_size == 0) 197 { 198 /* Do not let tab_index == first_free_tab; 199 stop when it is 1 less. */ 200 while (tab_index < first_free_tab - 1 201 && column >= tab_list[tab_index]) 202 tab_index++; 203 next_tab_column = tab_list[tab_index]; 204 if (tab_index < first_free_tab - 1) 205 tab_index++; 206 if (column >= next_tab_column) 207 { 208 convert = 0; /* Ran out of tab stops. */ 209 goto flush_pend_mb; 210 } 211 } 212 else 213 { 214 next_tab_column = column + tab_size - column % tab_size; 215 } 216 pending += next_tab_column - column; 217 column = next_tab_column; 218 } 219 else 220 { 221 flush_pend_mb: 222 /* Flush pending spaces. Print as many tabs as possible, 223 then print the rest as spaces. */ 224 if (pending == 1) 225 { 226 putchar (' '); 227 pending = 0; 228 } 229 column -= pending; 230 while (pending > 0) 231 { 232 if (tab_size == 0) 233 { 234 /* Do not let print_tab_index == first_free_tab; 235 stop when it is 1 less. */ 236 while (print_tab_index < first_free_tab - 1 237 && column >= tab_list[print_tab_index]) 238 print_tab_index++; 239 next_tab_column = tab_list[print_tab_index]; 240 if (print_tab_index < first_free_tab - 1) 241 print_tab_index++; 242 } 243 else 244 { 245 next_tab_column = 246 column + tab_size - column % tab_size; 247 } 248 if (next_tab_column - column <= pending) 249 { 250 putchar ('\t'); 251 pending -= next_tab_column - column; 252 column = next_tab_column; 253 } 254 else 255 { 256 --print_tab_index; 257 column += pending; 258 while (pending != 0) 259 { 260 putchar (' '); 261 pending--; 262 } 263 } 264 } 265 266 if (wc == WEOF) 267 { 268 fp = next_file (fp); 269 if (fp == NULL) 270 break; /* No more files. */ 271 else 272 { 273 memset (&i_state, '\0', sizeof(mbstate_t)); 274 continue; 275 } 276 } 277 278 if (mblength == (size_t)-1 || mblength == (size_t)-2) 279 { 280 if (convert) 281 { 282 ++column; 283 if (convert_entire_line == 0) 284 convert = 0; 285 } 286 mblength = 1; 287 putchar (buf[0]); 288 } 289 else if (mblength == 0) 290 { 291 if (convert && convert_entire_line == 0) 292 convert = 0; 293 mblength = 1; 294 putchar ('\0'); 295 } 296 else 297 { 298 if (convert) 299 { 300 if (wc == L'\b') 301 { 302 if (column > 0) 303 --column; 304 } 305 else 306 { 307 int width; /* The width of WC. */ 308 309 width = wcwidth (wc); 310 column += (width > 0) ? width : 0; 311 if (convert_entire_line == 0) 312 convert = 0; 313 } 314 } 315 316 if (wc == L'\n') 317 { 318 tab_index = print_tab_index = 0; 319 column = pending = 0; 320 convert = 1; 321 } 322 fwrite (bufpos, sizeof(char), mblength, stdout); 323 } 324 } 325 buflen -= mblength; 326 bufpos += mblength; 327 } 328 } 329 #endif 330 331 113 332 void 114 333 usage (int status) 115 334 { … … 531 750 532 751 file_list = (optind < argc ? &argv[optind] : stdin_argv); 533 752 534 unexpand (); 753 #if HAVE_MBRTOWC 754 if (MB_CUR_MAX > 1) 755 unexpand_multibyte (); 756 else 757 #endif 758 unexpand (); 535 759 536 760 if (have_read_stdin && fclose (stdin) != 0) 537 761 error (EXIT_FAILURE, errno, "-"); -
coreutils-6.9
diff -Naur coreutils-6.9.orig/src/uniq.c coreutils-6.9/src/uniq.c
old new 23 23 #include <getopt.h> 24 24 #include <sys/types.h> 25 25 26 /* Get mbstate_t, mbrtowc(). */ 27 #if HAVE_WCHAR_H 28 # include <wchar.h> 29 #endif 30 31 /* Get isw* functions. */ 32 #if HAVE_WCTYPE_H 33 # include <wctype.h> 34 #endif 35 26 36 #include "system.h" 27 37 #include "argmatch.h" 28 38 #include "linebuffer.h" … … 32 42 #include "quote.h" 33 43 #include "xmemcoll.h" 34 44 #include "xstrtol.h" 35 #include "memcasecmp.h" 45 #include "xmemcoll.h" 46 47 /* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 48 installation; work around this configuration error. */ 49 #if !defined MB_LEN_MAX || MB_LEN_MAX < 2 50 # define MB_LEN_MAX 16 51 #endif 52 53 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 54 #if HAVE_MBRTOWC && defined mbstate_t 55 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 56 #endif 57 36 58 37 59 /* The official name of this program (e.g., no `g' prefix). */ 38 60 #define PROGRAM_NAME "uniq" … … 109 131 /* Select whether/how to delimit groups of duplicate lines. */ 110 132 static enum delimit_method delimit_groups; 111 133 134 /* Function pointers. */ 135 static char * 136 (*find_field) (struct linebuffer *line); 137 112 138 static struct option const longopts[] = 113 139 { 114 140 {"count", no_argument, NULL, 'c'}, … … 198 224 return a pointer to the beginning of the line's field to be compared. */ 199 225 200 226 static char * 201 find_field (conststruct linebuffer *line)227 find_field_uni (struct linebuffer *line) 202 228 { 203 229 size_t count; 204 230 char *lp = line->buffer; … … 219 245 return lp + i; 220 246 } 221 247 248 #if HAVE_MBRTOWC 249 250 # define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \ 251 do \ 252 { \ 253 mbstate_t state_bak; \ 254 \ 255 CONVFAIL = 0; \ 256 state_bak = *STATEP; \ 257 \ 258 MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \ 259 \ 260 switch (MBLENGTH) \ 261 { \ 262 case (size_t)-2: \ 263 case (size_t)-1: \ 264 *STATEP = state_bak; \ 265 CONVFAIL++; \ 266 /* Fall through */ \ 267 case 0: \ 268 MBLENGTH = 1; \ 269 } \ 270 } \ 271 while (0) 272 273 static char * 274 find_field_multi (struct linebuffer *line) 275 { 276 size_t count; 277 char *lp = line->buffer; 278 size_t size = line->length - 1; 279 size_t pos; 280 size_t mblength; 281 wchar_t wc; 282 mbstate_t *statep; 283 int convfail; 284 285 pos = 0; 286 statep = &(line->state); 287 288 /* skip fields. */ 289 for (count = 0; count < skip_fields && pos < size; count++) 290 { 291 while (pos < size) 292 { 293 MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); 294 295 if (convfail || !iswblank (wc)) 296 { 297 pos += mblength; 298 break; 299 } 300 pos += mblength; 301 } 302 303 while (pos < size) 304 { 305 MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); 306 307 if (!convfail && iswblank (wc)) 308 break; 309 310 pos += mblength; 311 } 312 } 313 314 /* skip fields. */ 315 for (count = 0; count < skip_chars && pos < size; count++) 316 { 317 MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); 318 pos += mblength; 319 } 320 321 return lp + pos; 322 } 323 #endif 324 222 325 /* Return false if two strings OLD and NEW match, true if not. 223 326 OLD and NEW point not to the beginnings of the lines 224 327 but rather to the beginnings of the fields to compare. … … 227 330 static bool 228 331 different (char *old, char *new, size_t oldlen, size_t newlen) 229 332 { 333 char *copy_old, *copy_new; 334 230 335 if (check_chars < oldlen) 231 336 oldlen = check_chars; 232 337 if (check_chars < newlen) … … 234 339 235 340 if (ignore_case) 236 341 { 237 /* FIXME: This should invoke strcoll somehow. */ 238 return oldlen != newlen || memcasecmp (old, new, oldlen); 342 size_t i; 343 344 copy_old = alloca (oldlen + 1); 345 copy_new = alloca (oldlen + 1); 346 347 for (i = 0; i < oldlen; i++) 348 { 349 copy_old[i] = toupper (old[i]); 350 copy_new[i] = toupper (new[i]); 351 } 239 352 } 240 else if (hard_LC_COLLATE)241 return xmemcoll (old, oldlen, new, newlen) != 0;242 353 else 243 return oldlen != newlen || memcmp (old, new, oldlen); 354 { 355 copy_old = (char *)old; 356 copy_new = (char *)new; 357 } 358 359 return xmemcoll (copy_old, oldlen, copy_new, newlen); 360 } 361 362 #if HAVE_MBRTOWC 363 static int 364 different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate) 365 { 366 size_t i, j, chars; 367 const char *str[2]; 368 char *copy[2]; 369 size_t len[2]; 370 mbstate_t state[2]; 371 size_t mblength; 372 wchar_t wc, uwc; 373 mbstate_t state_bak; 374 375 str[0] = old; 376 str[1] = new; 377 len[0] = oldlen; 378 len[1] = newlen; 379 state[0] = oldstate; 380 state[1] = newstate; 381 382 for (i = 0; i < 2; i++) 383 { 384 copy[i] = alloca (len[i] + 1); 385 386 for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++) 387 { 388 state_bak = state[i]; 389 mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i])); 390 391 switch (mblength) 392 { 393 case (size_t)-1: 394 case (size_t)-2: 395 state[i] = state_bak; 396 /* Fall through */ 397 case 0: 398 mblength = 1; 399 break; 400 401 default: 402 if (ignore_case) 403 { 404 uwc = towupper (wc); 405 406 if (uwc != wc) 407 { 408 mbstate_t state_wc; 409 410 memset (&state_wc, '\0', sizeof(mbstate_t)); 411 wcrtomb (copy[i] + j, uwc, &state_wc); 412 } 413 else 414 memcpy (copy[i] + j, str[i] + j, mblength); 415 } 416 else 417 memcpy (copy[i] + j, str[i] + j, mblength); 418 } 419 j += mblength; 420 } 421 copy[i][j] = '\0'; 422 len[i] = j; 423 } 424 425 return xmemcoll (copy[0], len[0], copy[1], len[1]); 244 426 } 427 #endif 245 428 246 429 /* Output the line in linebuffer LINE to standard output 247 430 provided that the switches say it should be output. … … 295 478 { 296 479 char *prevfield IF_LINT (= NULL); 297 480 size_t prevlen IF_LINT (= 0); 481 #if HAVE_MBRTOWC 482 mbstate_t prevstate; 483 484 memset (&prevstate, '\0', sizeof (mbstate_t)); 485 #endif 298 486 299 487 while (!feof (stdin)) 300 488 { 301 489 char *thisfield; 302 490 size_t thislen; 491 #if HAVE_MBRTOWC 492 mbstate_t thisstate; 493 #endif 494 303 495 if (readlinebuffer (thisline, stdin) == 0) 304 496 break; 305 497 thisfield = find_field (thisline); 306 498 thislen = thisline->length - 1 - (thisfield - thisline->buffer); 499 #if HAVE_MBRTOWC 500 if (MB_CUR_MAX > 1) 501 { 502 thisstate = thisline->state; 503 504 if (prevline->length == 0 || different_multi 505 (thisfield, prevfield, thislen, prevlen, thisstate, prevstate)) 506 { 507 fwrite (thisline->buffer, sizeof (char), 508 thisline->length, stdout); 509 510 SWAP_LINES (prevline, thisline); 511 prevfield = thisfield; 512 prevlen = thislen; 513 prevstate = thisstate; 514 } 515 } 516 else 517 #endif 307 518 if (prevline->length == 0 308 519 || different (thisfield, prevfield, thislen, prevlen)) 309 520 { … … 322 533 size_t prevlen; 323 534 uintmax_t match_count = 0; 324 535 bool first_delimiter = true; 536 #if HAVE_MBRTOWC 537 mbstate_t prevstate; 538 #endif 325 539 326 540 if (readlinebuffer (prevline, stdin) == 0) 327 541 goto closefiles; 328 542 prevfield = find_field (prevline); 329 543 prevlen = prevline->length - 1 - (prevfield - prevline->buffer); 544 #if HAVE_MBRTOWC 545 prevstate = prevline->state; 546 #endif 330 547 331 548 while (!feof (stdin)) 332 549 { 333 550 bool match; 334 551 char *thisfield; 335 552 size_t thislen; 553 #if HAVE_MBRTOWC 554 mbstate_t thisstate; 555 #endif 336 556 if (readlinebuffer (thisline, stdin) == 0) 337 557 { 338 558 if (ferror (stdin)) … … 341 561 } 342 562 thisfield = find_field (thisline); 343 563 thislen = thisline->length - 1 - (thisfield - thisline->buffer); 564 #if HAVE_MBRTOWC 565 if (MB_CUR_MAX > 1) 566 { 567 thisstate = thisline->state; 568 match = !different_multi (thisfield, prevfield, 569 thislen, prevlen, thisstate, prevstate); 570 } 571 else 572 #endif 344 573 match = !different (thisfield, prevfield, thislen, prevlen); 345 574 match_count += match; 346 575 … … 373 602 SWAP_LINES (prevline, thisline); 374 603 prevfield = thisfield; 375 604 prevlen = thislen; 605 #if HAVE_MBRTOWC 606 prevstate = thisstate; 607 #endif 376 608 if (!match) 377 609 match_count = 0; 378 610 } … … 417 649 418 650 atexit (close_stdout); 419 651 652 #if HAVE_MBRTOWC 653 if (MB_CUR_MAX > 1) 654 { 655 find_field = find_field_multi; 656 } 657 else 658 #endif 659 { 660 find_field = find_field_uni; 661 } 662 663 664 420 665 skip_chars = 0; 421 666 skip_fields = 0; 422 667 check_chars = SIZE_MAX; -
tests/sort/Makefile.am
diff -Naur coreutils-6.9.orig/tests/sort/Makefile.am coreutils-6.9/tests/sort/Makefile.am
old new 66 66 bigfield.O bigfield.E 67 67 ##test-files-end 68 68 69 EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen) 70 noinst_SCRIPTS = $x-tests 69 run_gen += mb1.0 mb2.0 70 71 EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen) mb1.I mb1.X mb2.I mb2.X 72 noinst_SCRIPTS = $x-tests # $x-mb-tests 71 73 TESTS_ENVIRONMENT = \ 72 74 CU_TEST_NAME=`basename $(abs_srcdir)`,$$tst \ 73 75 PATH="$(VG_PATH_PREFIX)`pwd`/../../src$(PATH_SEPARATOR)$$PATH" 74 76 75 77 editpl = sed -e 's,@''PERL''@,$(PERL),g' -e 's,@''srcdir''@,$(srcdir),g' 76 78 77 TESTS = $x-tests 79 TESTS = $x-tests $x-mb-tests 78 80 79 81 mk_script = $(srcdir)/../mk-script 80 82 $(srcdir)/$x-tests: $(mk_script) Test.pm Makefile.am -
tests/sort/Makefile.in
diff -Naur coreutils-6.9.orig/tests/sort/Makefile.in coreutils-6.9/tests/sort/Makefile.in
old new 540 540 incompat5.O incompat5.E incompat6.O incompat6.E nul-tab.O nul-tab.E \ 541 541 bigfield.O bigfield.E 542 542 543 EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen) 544 noinst_SCRIPTS = $x-tests 543 run_gen += mb1.0 mb2.0 544 545 EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen) mb1.I mb1.X mb2.I mb2.X 546 noinst_SCRIPTS = $x-tests # $x-mb-tests 545 547 TESTS_ENVIRONMENT = \ 546 548 CU_TEST_NAME=`basename $(abs_srcdir)`,$$tst \ 547 549 PATH="$(VG_PATH_PREFIX)`pwd`/../../src$(PATH_SEPARATOR)$$PATH" 548 550 549 551 editpl = sed -e 's,@''PERL''@,$(PERL),g' -e 's,@''srcdir''@,$(srcdir),g' 550 TESTS = $x-tests 552 TESTS = $x-tests $x-mb-tests 551 553 mk_script = $(srcdir)/../mk-script 552 554 MAINTAINERCLEANFILES = $x-tests $(maint_gen) 553 555 CLEANFILES = $(run_gen) -
tests/sort/mb1.I
diff -Naur coreutils-6.9.orig/tests/sort/mb1.I coreutils-6.9/tests/sort/mb1.I
old new 1 Apple@10 2 Banana@5 3 Citrus@20 4 Cherry@30 -
tests/sort/mb1.X
diff -Naur coreutils-6.9.orig/tests/sort/mb1.X coreutils-6.9/tests/sort/mb1.X
old new 1 Banana@5 2 Apple@10 3 Citrus@20 4 Cherry@30 -
tests/sort/mb2.I
diff -Naur coreutils-6.9.orig/tests/sort/mb2.I coreutils-6.9/tests/sort/mb2.I
old new 1 Apple@AA10@@20 2 Banana@AA5@@30 3 Citrus@AA20@@5 4 Cherry@AA30@@10 -
tests/sort/mb2.X
diff -Naur coreutils-6.9.orig/tests/sort/mb2.X coreutils-6.9/tests/sort/mb2.X
old new 1 Citrus@AA20@@5 2 Cherry@AA30@@10 3 Apple@AA10@@20 4 Banana@AA5@@30 -
tests/sort/sort-mb-tests
diff -Naur coreutils-6.9.orig/tests/sort/sort-mb-tests coreutils-6.9/tests/sort/sort-mb-tests
old new 1 #! /bin/sh 2 case $# in 3 0) xx='../../src/sort';; 4 *) xx="$1";; 5 esac 6 test "$VERBOSE" && echo=echo || echo=: 7 $echo testing program: $xx 8 errors=0 9 test "$srcdir" || srcdir=. 10 test "$VERBOSE" && $xx --version 2> /dev/null 11 12 export LC_ALL=en_US.UTF-8 13 locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77 14 errors=0 15 16 $xx -t @ -k2 -n mb1.I > mb1.O 17 code=$? 18 if test $code != 0; then 19 $echo "Test mb1 failed: $xx return code $code differs from expected value 0" 1>&2 20 errors=`expr $errors + 1` 21 else 22 cmp mb1.O $srcdir/mb1.X > /dev/null 2>&1 23 case $? in 24 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;; 25 1) $echo "Test mb1 failed: files mb1.O and $srcdir/mb1.X differ" 1>&2 26 (diff -c mb1.O $srcdir/mb1.X) 2> /dev/null 27 errors=`expr $errors + 1`;; 28 2) $echo "Test mb1 may have failed." 1>&2 29 $echo The command "cmp mb1.O $srcdir/mb1.X" failed. 1>&2 30 errors=`expr $errors + 1`;; 31 esac 32 fi 33 34 $xx -t @ -k4 -n mb2.I > mb2.O 35 code=$? 36 if test $code != 0; then 37 $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2 38 errors=`expr $errors + 1` 39 else 40 cmp mb2.O $srcdir/mb2.X > /dev/null 2>&1 41 case $? in 42 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;; 43 1) $echo "Test mb2 failed: files mb2.O and $srcdir/mb2.X differ" 1>&2 44 (diff -c mb2.O $srcdir/mb2.X) 2> /dev/null 45 errors=`expr $errors + 1`;; 46 2) $echo "Test mb2 may have failed." 1>&2 47 $echo The command "cmp mb2.O $srcdir/mb2.X" failed. 1>&2 48 errors=`expr $errors + 1`;; 49 esac 50 fi 51 52 if test $errors = 0; then 53 $echo Passed all 113 tests. 1>&2 54 else 55 $echo Failed $errors tests. 1>&2 56 fi 57 test $errors = 0 || errors=1 58 exit $errors