Ticket #1982: coreutils-6.9-i18n-1.patch
| File coreutils-6.9-i18n-1.patch, 101.2 kB (added by matthew@linuxfromscratch.org, 1 year ago) |
|---|
-
coreutils-6.9/lib/linebuffer.h
old new 22 22 23 23 # include <stdio.h> 24 24 25 /* Get mbstate_t. */ 26 # if HAVE_WCHAR_H 27 # include <wchar.h> 28 # endif 29 25 30 /* A `struct linebuffer' holds a line of text. */ 26 31 27 32 struct linebuffer … … 29 34 size_t size; /* Allocated. */ 30 35 size_t length; /* Used. */ 31 36 char *buffer; 37 # if HAVE_WCHAR_H 38 mbstate_t state; 39 # endif 32 40 }; 33 41 34 42 /* Initialize linebuffer LINEBUFFER for use. */ -
coreutils-6.9/src/cut.c
old new 29 29 #include <assert.h> 30 30 #include <getopt.h> 31 31 #include <sys/types.h> 32 33 /* Get mbstate_t, mbrtowc(). */ 34 #if HAVE_WCHAR_H 35 # include <wchar.h> 36 #endif 32 37 #include "system.h" 33 38 34 39 #include "error.h" … … 37 42 #include "quote.h" 38 43 #include "xstrndup.h" 39 44 45 /* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 46 installation; work around this configuration error. */ 47 #if !defined MB_LEN_MAX || MB_LEN_MAX < 2 48 # undef MB_LEN_MAX 49 # define MB_LEN_MAX 16 50 #endif 51 52 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 53 #if HAVE_MBRTOWC && defined mbstate_t 54 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 55 #endif 56 40 57 /* The official name of this program (e.g., no `g' prefix). */ 41 58 #define PROGRAM_NAME "cut" 42 59 … … 67 84 } \ 68 85 while (0) 69 86 87 /* Refill the buffer BUF to get a multibyte character. */ 88 #define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \ 89 do \ 90 { \ 91 if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \ 92 { \ 93 memmove (BUF, BUFPOS, BUFLEN); \ 94 BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \ 95 BUFPOS = BUF; \ 96 } \ 97 } \ 98 while (0) 99 100 /* Get wide character on BUFPOS. BUFPOS is not included after that. 101 If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */ 102 #define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \ 103 do \ 104 { \ 105 mbstate_t state_bak; \ 106 \ 107 if (BUFLEN < 1) \ 108 { \ 109 WC = WEOF; \ 110 break; \ 111 } \ 112 \ 113 /* Get a wide character. */ \ 114 CONVFAIL = 0; \ 115 state_bak = STATE; \ 116 MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \ 117 \ 118 switch (MBLENGTH) \ 119 { \ 120 case (size_t)-1: \ 121 case (size_t)-2: \ 122 CONVFAIL++; \ 123 STATE = state_bak; \ 124 /* Fall througn. */ \ 125 \ 126 case 0: \ 127 MBLENGTH = 1; \ 128 break; \ 129 } \ 130 } \ 131 while (0) 132 70 133 struct range_pair 71 134 { 72 135 size_t lo; … … 85 148 /* The number of bytes allocated for FIELD_1_BUFFER. */ 86 149 static size_t field_1_bufsize; 87 150 88 /* The largest field or byteindex used as an endpoint of a closed151 /* The largest byte, character or field index used as an endpoint of a closed 89 152 or degenerate range specification; this doesn't include the starting 90 153 index of right-open-ended ranges. For example, with either range spec 91 154 `2-5,9-', `2-3,5,9-' this variable would be set to 5. */ … … 97 160 98 161 /* This is a bit vector. 99 162 In byte mode, which bytes to output. 163 In character mode, which characters to output. 100 164 In field mode, which DELIM-separated fields to output. 101 B oth bytes and fields are numbered starting with 1,165 Bytes, characters and fields are numbered starting with 1, 102 166 so the zeroth bit of this array is unused. 103 A field or byteK has been selected if167 A byte, character or field K has been selected if 104 168 (K <= MAX_RANGE_ENDPOINT and is_printable_field(K)) 105 169 || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */ 106 170 static unsigned char *printable_field; … … 109 173 { 110 174 undefined_mode, 111 175 112 /* Output characters that are in the given bytes. */176 /* Output bytes that are at the given positions. */ 113 177 byte_mode, 114 178 179 /* Output characters that are at the given positions. */ 180 character_mode, 181 115 182 /* Output the given delimeter-separated fields. */ 116 183 field_mode 117 184 }; … … 121 188 122 189 static enum operating_mode operating_mode; 123 190 191 /* If nonzero, when in byte mode, don't split multibyte characters. */ 192 static int byte_mode_character_aware; 193 194 /* If nonzero, the function for single byte locale is work 195 if this program runs on multibyte locale. */ 196 static int force_singlebyte_mode; 197 124 198 /* If true do not output lines containing no delimeter characters. 125 199 Otherwise, all such lines are printed. This option is valid only 126 200 with field mode. */ … … 132 206 133 207 /* The delimeter character for field mode. */ 134 208 static unsigned char delim; 209 #if HAVE_WCHAR_H 210 static wchar_t wcdelim; 211 #endif 135 212 136 213 /* True if the --output-delimiter=STRING option was specified. */ 137 214 static bool output_delimiter_specified; … … 205 282 -f, --fields=LIST select only these fields; also print any line\n\ 206 283 that contains no delimiter character, unless\n\ 207 284 the -s option is specified\n\ 208 -n (ignored)\n\285 -n with -b: don't split multibyte characters\n\ 209 286 "), stdout); 210 287 fputs (_("\ 211 288 --complement complement the set of selected bytes, characters\n\ … … 362 439 in_digits = false; 363 440 /* Starting a range. */ 364 441 if (dash_found) 365 FATAL_ERROR (_("invalid byte or field list"));442 FATAL_ERROR (_("invalid byte, character or field list")); 366 443 dash_found = true; 367 444 fieldstr++; 368 445 … … 387 464 if (value == 0) 388 465 { 389 466 /* `n-'. From `initial' to end of line. */ 390 eol_range_start = initial; 467 if (eol_range_start == 0 || 468 (eol_range_start != 0 && eol_range_start > initial)) 469 eol_range_start = initial; 391 470 field_found = true; 392 471 } 393 472 else 394 473 { 395 474 /* `m-n' or `-n' (1-n). */ 396 475 if (value < initial) 397 FATAL_ERROR (_("invalid byte or field list"));476 FATAL_ERROR (_("invalid byte, character or field list")); 398 477 399 478 /* Is there already a range going to end of line? */ 400 479 if (eol_range_start != 0) … … 467 546 if (operating_mode == byte_mode) 468 547 error (0, 0, 469 548 _("byte offset %s is too large"), quote (bad_num)); 549 else if (operating_mode == character_mode) 550 error (0, 0, 551 _("character offset %s is too large"), quote (bad_num)); 470 552 else 471 553 error (0, 0, 472 554 _("field number %s is too large"), quote (bad_num)); … … 477 559 fieldstr++; 478 560 } 479 561 else 480 FATAL_ERROR (_("invalid byte or field list"));562 FATAL_ERROR (_("invalid byte, character or field list")); 481 563 } 482 564 483 565 max_range_endpoint = 0; … … 570 652 } 571 653 } 572 654 655 #if HAVE_MBRTOWC 656 /* This function is in use for the following case. 657 658 1. Read from the stream STREAM, printing to standard output any selected 659 characters. 660 661 2. Read from stream STREAM, printing to standard output any selected bytes, 662 without splitting multibyte characters. */ 663 664 static void 665 cut_characters_or_cut_bytes_no_split (FILE *stream) 666 { 667 int idx; /* number of bytes or characters in the line so far. */ 668 char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 669 char *bufpos; /* Next read position of BUF. */ 670 size_t buflen; /* The length of the byte sequence in buf. */ 671 wint_t wc; /* A gotten wide character. */ 672 size_t mblength; /* The byte size of a multibyte character which shows 673 as same character as WC. */ 674 mbstate_t state; /* State of the stream. */ 675 int convfail; /* 1, when conversion is failed. Otherwise 0. */ 676 677 idx = 0; 678 buflen = 0; 679 bufpos = buf; 680 memset (&state, '\0', sizeof(mbstate_t)); 681 682 while (1) 683 { 684 REFILL_BUFFER (buf, bufpos, buflen, stream); 685 686 GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail); 687 688 if (wc == WEOF) 689 { 690 if (idx > 0) 691 putchar ('\n'); 692 break; 693 } 694 else if (wc == L'\n') 695 { 696 putchar ('\n'); 697 idx = 0; 698 } 699 else 700 { 701 idx += (operating_mode == byte_mode) ? mblength : 1; 702 if (print_kth (idx, NULL)) 703 fwrite (bufpos, mblength, sizeof(char), stdout); 704 } 705 706 buflen -= mblength; 707 bufpos += mblength; 708 } 709 } 710 #endif 711 573 712 /* Read from stream STREAM, printing to standard output any selected fields. */ 574 713 575 714 static void … … 692 831 } 693 832 } 694 833 834 #if HAVE_MBRTOWC 835 static void 836 cut_fields_mb (FILE *stream) 837 { 838 int c; 839 unsigned int field_idx; 840 int found_any_selected_field; 841 int buffer_first_field; 842 int empty_input; 843 char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 844 char *bufpos; /* Next read position of BUF. */ 845 size_t buflen; /* The length of the byte sequence in buf. */ 846 wint_t wc = 0; /* A gotten wide character. */ 847 size_t mblength; /* The byte size of a multibyte character which shows 848 as same character as WC. */ 849 mbstate_t state; /* State of the stream. */ 850 int convfail; /* 1, when conversion is failed. Otherwise 0. */ 851 852 found_any_selected_field = 0; 853 field_idx = 1; 854 bufpos = buf; 855 buflen = 0; 856 memset (&state, '\0', sizeof(mbstate_t)); 857 858 c = getc (stream); 859 empty_input = (c == EOF); 860 if (c != EOF) 861 ungetc (c, stream); 862 else 863 wc = WEOF; 864 865 /* To support the semantics of the -s flag, we may have to buffer 866 all of the first field to determine whether it is `delimited.' 867 But that is unnecessary if all non-delimited lines must be printed 868 and the first field has been selected, or if non-delimited lines 869 must be suppressed and the first field has *not* been selected. 870 That is because a non-delimited line has exactly one field. */ 871 buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL)); 872 873 while (1) 874 { 875 if (field_idx == 1 && buffer_first_field) 876 { 877 int len = 0; 878 879 while (1) 880 { 881 REFILL_BUFFER (buf, bufpos, buflen, stream); 882 883 GET_NEXT_WC_FROM_BUFFER 884 (wc, bufpos, buflen, mblength, state, convfail); 885 886 if (wc == WEOF) 887 break; 888 889 field_1_buffer = xrealloc (field_1_buffer, len + mblength); 890 memcpy (field_1_buffer + len, bufpos, mblength); 891 len += mblength; 892 buflen -= mblength; 893 bufpos += mblength; 894 895 if (!convfail && (wc == L'\n' || wc == wcdelim)) 896 break; 897 } 898 899 if (wc == WEOF) 900 break; 901 902 /* If the first field extends to the end of line (it is not 903 delimited) and we are printing all non-delimited lines, 904 print this one. */ 905 if (convfail || (!convfail && wc != wcdelim)) 906 { 907 if (suppress_non_delimited) 908 { 909 /* Empty. */ 910 } 911 else 912 { 913 fwrite (field_1_buffer, sizeof (char), len, stdout); 914 /* Make sure the output line is newline terminated. */ 915 if (convfail || (!convfail && wc != L'\n')) 916 putchar ('\n'); 917 } 918 continue; 919 } 920 921 if (print_kth (1, NULL)) 922 { 923 /* Print the field, but not the trailing delimiter. */ 924 fwrite (field_1_buffer, sizeof (char), len - 1, stdout); 925 found_any_selected_field = 1; 926 } 927 ++field_idx; 928 } 929 930 if (wc != WEOF) 931 { 932 if (print_kth (field_idx, NULL)) 933 { 934 if (found_any_selected_field) 935 { 936 fwrite (output_delimiter_string, sizeof (char), 937 output_delimiter_length, stdout); 938 } 939 found_any_selected_field = 1; 940 } 941 942 while (1) 943 { 944 REFILL_BUFFER (buf, bufpos, buflen, stream); 945 946 GET_NEXT_WC_FROM_BUFFER 947 (wc, bufpos, buflen, mblength, state, convfail); 948 949 if (wc == WEOF) 950 break; 951 else if (!convfail && (wc == wcdelim || wc == L'\n')) 952 { 953 buflen -= mblength; 954 bufpos += mblength; 955 break; 956 } 957 958 if (print_kth (field_idx, NULL)) 959 fwrite (bufpos, mblength, sizeof(char), stdout); 960 961 buflen -= mblength; 962 bufpos += mblength; 963 } 964 } 965 966 if ((!convfail || wc == L'\n') && buflen < 1) 967 wc = WEOF; 968 969 if (!convfail && wc == wcdelim) 970 ++field_idx; 971 else if (wc == WEOF || (!convfail && wc == L'\n')) 972 { 973 if (found_any_selected_field 974 || (!empty_input && !(suppress_non_delimited && field_idx == 1))) 975 putchar ('\n'); 976 if (wc == WEOF) 977 break; 978 field_idx = 1; 979 found_any_selected_field = 0; 980 } 981 } 982 } 983 #endif 984 695 985 static void 696 986 cut_stream (FILE *stream) 697 987 { 698 if (operating_mode == byte_mode) 699 cut_bytes (stream); 988 #if HAVE_MBRTOWC 989 if (MB_CUR_MAX > 1 && !force_singlebyte_mode) 990 { 991 switch (operating_mode) 992 { 993 case byte_mode: 994 if (byte_mode_character_aware) 995 cut_characters_or_cut_bytes_no_split (stream); 996 else 997 cut_bytes (stream); 998 break; 999 1000 case character_mode: 1001 cut_characters_or_cut_bytes_no_split (stream); 1002 break; 1003 1004 case field_mode: 1005 cut_fields_mb (stream); 1006 break; 1007 1008 default: 1009 abort (); 1010 } 1011 } 700 1012 else 701 cut_fields (stream); 1013 #endif 1014 { 1015 if (operating_mode == field_mode) 1016 cut_fields (stream); 1017 else 1018 cut_bytes (stream); 1019 } 702 1020 } 703 1021 704 1022 /* Process file FILE to standard output. … … 748 1066 bool ok; 749 1067 bool delim_specified = false; 750 1068 char *spec_list_string IF_LINT(= NULL); 1069 char mbdelim[MB_LEN_MAX + 1]; 1070 size_t delimlen = 0; 751 1071 752 1072 initialize_main (&argc, &argv); 753 1073 program_name = argv[0]; … … 770 1090 switch (optc) 771 1091 { 772 1092 case 'b': 773 case 'c':774 1093 /* Build the byte list. */ 775 1094 if (operating_mode != undefined_mode) 776 1095 FATAL_ERROR (_("only one type of list may be specified")); … … 778 1097 spec_list_string = optarg; 779 1098 break; 780 1099 1100 case 'c': 1101 /* Build the character list. */ 1102 if (operating_mode != undefined_mode) 1103 FATAL_ERROR (_("only one type of list may be specified")); 1104 operating_mode = character_mode; 1105 spec_list_string = optarg; 1106 break; 1107 781 1108 case 'f': 782 1109 /* Build the field list. */ 783 1110 if (operating_mode != undefined_mode) … … 789 1116 case 'd': 790 1117 /* New delimiter. */ 791 1118 /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */ 792 if (optarg[0] != '\0' && optarg[1] != '\0') 793 FATAL_ERROR (_("the delimiter must be a single character")); 794 delim = optarg[0]; 795 delim_specified = true; 1119 #if HAVE_MBRTOWC 1120 { 1121 if(MB_CUR_MAX > 1) 1122 { 1123 mbstate_t state; 1124 1125 memset (&state, '\0', sizeof(mbstate_t)); 1126 delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state); 1127 1128 if (delimlen == (size_t)-1 || delimlen == (size_t)-2) 1129 ++force_singlebyte_mode; 1130 else 1131 { 1132 delimlen = (delimlen < 1) ? 1 : delimlen; 1133 if (wcdelim != L'\0' && *(optarg + delimlen) != '\0') 1134 FATAL_ERROR (_("the delimiter must be a single character")); 1135 memcpy (mbdelim, optarg, delimlen); 1136 } 1137 } 1138 1139 if (MB_CUR_MAX <= 1 || force_singlebyte_mode) 1140 #endif 1141 { 1142 if (optarg[0] != '\0' && optarg[1] != '\0') 1143 FATAL_ERROR (_("the delimiter must be a single character")); 1144 delim = (unsigned char) optarg[0]; 1145 } 1146 delim_specified = true; 1147 } 796 1148 break; 797 1149 798 1150 case OUTPUT_DELIMITER_OPTION: … … 805 1157 break; 806 1158 807 1159 case 'n': 1160 byte_mode_character_aware = 1; 808 1161 break; 809 1162 810 1163 case 's': … … 827 1180 if (operating_mode == undefined_mode) 828 1181 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields")); 829 1182 830 if (delim != '\0'&& operating_mode != field_mode)1183 if (delim_specified && operating_mode != field_mode) 831 1184 FATAL_ERROR (_("an input delimiter may be specified only\ 832 1185 when operating on fields")); 833 1186 … … 854 1207 } 855 1208 856 1209 if (!delim_specified) 857 delim = '\t'; 1210 { 1211 delim = '\t'; 1212 #ifdef HAVE_MBRTOWC 1213 wcdelim = L'\t'; 1214 mbdelim[0] = '\t'; 1215 mbdelim[1] = '\0'; 1216 delimlen = 1; 1217 #endif 1218 } 858 1219 859 1220 if (output_delimiter_string == NULL) 860 1221 { 861 static char dummy[2]; 862 dummy[0] = delim; 863 dummy[1] = '\0'; 864 output_delimiter_string = dummy; 865 output_delimiter_length = 1; 1222 #ifdef HAVE_MBRTOWC 1223 if (MB_CUR_MAX > 1 && !force_singlebyte_mode) 1224 { 1225 output_delimiter_string = xstrdup(mbdelim); 1226 output_delimiter_length = delimlen; 1227 } 1228 1229 if (MB_CUR_MAX <= 1 || force_singlebyte_mode) 1230 #endif 1231 { 1232 static char dummy[2]; 1233 dummy[0] = delim; 1234 dummy[1] = '\0'; 1235 output_delimiter_string = dummy; 1236 output_delimiter_length = 1; 1237 } 866 1238 } 867 1239 868 1240 if (optind == argc) -
coreutils-6.9/src/expand.c
old new 38 38 #include <stdio.h> 39 39 #include <getopt.h> 40 40 #include <sys/types.h> 41 42 /* Get mbstate_t, mbrtowc(), wcwidth(). */ 43 #if HAVE_WCHAR_H 44 # include <wchar.h> 45 #endif 46 41 47 #include "system.h" 42 48 #include "error.h" 43 49 #include "quote.h" 44 50 #include "xstrndup.h" 45 51 52 /* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 53 installation; work around this configuration error. */ 54 #if !defined MB_LEN_MAX || MB_LEN_MAX < 2 55 # define MB_LEN_MAX 16 56 #endif 57 58 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 59 #if HAVE_MBRTOWC && defined mbstate_t 60 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 61 #endif 62 46 63 /* The official name of this program (e.g., no `g' prefix). */ 47 64 #define PROGRAM_NAME "expand" 48 65 … … 183 200 stops = num_start + len - 1; 184 201 } 185 202 } 203 186 204 else 187 205 { 188 206 error (0, 0, _("tab size contains invalid character(s): %s"), … … 365 383 } 366 384 } 367 385 386 #if HAVE_MBRTOWC 387 static void 388 expand_multibyte (void) 389 { 390 FILE *fp; /* Input strem. */ 391 mbstate_t i_state; /* Current shift state of the input stream. */ 392 mbstate_t i_state_bak; /* Back up the I_STATE. */ 393 mbstate_t o_state; /* Current shift state of the output stream. */ 394 char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 395 char *bufpos; /* Next read position of BUF. */ 396 size_t buflen = 0; /* The length of the byte sequence in buf. */ 397 wchar_t wc; /* A gotten wide character. */ 398 size_t mblength; /* The byte size of a multibyte character 399 which shows as same character as WC. */ 400 int tab_index = 0; /* Index in `tab_list' of next tabstop. */ 401 int column = 0; /* Column on screen of the next char. */ 402 int next_tab_column; /* Column the next tab stop is on. */ 403 int convert = 1; /* If nonzero, perform translations. */ 404 405 fp = next_file ((FILE *) NULL); 406 if (fp == NULL) 407 return; 408 409 memset (&o_state, '\0', sizeof(mbstate_t)); 410 memset (&i_state, '\0', sizeof(mbstate_t)); 411 412 for (;;) 413 { 414 /* Refill the buffer BUF. */ 415 if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp)) 416 { 417 memmove (buf, bufpos, buflen); 418 buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp); 419 bufpos = buf; 420 } 421 422 /* No character is left in BUF. */ 423 if (buflen < 1) 424 { 425 fp = next_file (fp); 426 427 if (fp == NULL) 428 break; /* No more files. */ 429 else 430 { 431 memset (&i_state, '\0', sizeof(mbstate_t)); 432 continue; 433 } 434 } 435 436 /* Get a wide character. */ 437 i_state_bak = i_state; 438 mblength = mbrtowc (&wc, bufpos, buflen, &i_state); 439 440 switch (mblength) 441 { 442 case (size_t)-1: /* illegal byte sequence. */ 443 case (size_t)-2: 444 mblength = 1; 445 i_state = i_state_bak; 446 if (convert) 447 { 448 ++column; 449 if (convert_entire_line == 0) 450 convert = 0; 451 } 452 putchar (*bufpos); 453 break; 454 455 case 0: /* null. */ 456 mblength = 1; 457 if (convert && convert_entire_line == 0) 458 convert = 0; 459 putchar ('\0'); 460 break; 461 462 default: 463 if (wc == L'\n') /* LF. */ 464 { 465 tab_index = 0; 466 column = 0; 467 convert = 1; 468 putchar ('\n'); 469 } 470 else if (wc == L'\t' && convert) /* Tab. */ 471 { 472 if (tab_size == 0) 473 { 474 /* Do not let tab_index == first_free_tab; 475 stop when it is 1 less. */ 476 while (tab_index < first_free_tab - 1 477 && column >= tab_list[tab_index]) 478 tab_index++; 479 next_tab_column = tab_list[tab_index]; 480 if (tab_index < first_free_tab - 1) 481 tab_index++; 482 if (column >= next_tab_column) 483 next_tab_column = column + 1; 484 } 485 else 486 next_tab_column = column + tab_size - column % tab_size; 487 488 while (column < next_tab_column) 489 { 490 putchar (' '); 491 ++column; 492 } 493 } 494 else /* Others. */ 495 { 496 if (convert) 497 { 498 if (wc == L'\b') 499 { 500 if (column > 0) 501 --column; 502 } 503 else 504 { 505 int width; /* The width of WC. */ 506 507 width = wcwidth (wc); 508 column += (width > 0) ? width : 0; 509 if (convert_entire_line == 0) 510 convert = 0; 511 } 512 } 513 fwrite (bufpos, sizeof(char), mblength, stdout); 514 } 515 } 516 buflen -= mblength; 517 bufpos += mblength; 518 } 519 } 520 #endif 521 368 522 int 369 523 main (int argc, char **argv) 370 524 { … … 429 583 430 584 file_list = (optind < argc ? &argv[optind] : stdin_argv); 431 585 432 expand (); 586 #if HAVE_MBRTOWC 587 if (MB_CUR_MAX > 1) 588 expand_multibyte (); 589 else 590 #endif 591 expand (); 433 592 434 593 if (have_read_stdin && fclose (stdin) != 0) 435 594 error (EXIT_FAILURE, errno, "-"); -
coreutils-6.9/src/fold.c
old new 23 23 #include <getopt.h> 24 24 #include <sys/types.h> 25 25 26 /* Get mbstate_t, mbrtowc(), wcwidth(). */ 27 #if HAVE_WCHAR_H 28 # include <wchar.h> 29 #endif 30 31 /* Get iswprint(), iswblank(), wcwidth(). */ 32 #if HAVE_WCTYPE_H 33 # include <wctype.h> 34 #endif 35 26 36 #include "system.h" 27 37 #include "error.h" 28 38 #include "quote.h" 29 39 #include "xstrtol.h" 30 40 41 /* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 42 installation; work around this configuration error. */ 43 #if !defined MB_LEN_MAX || MB_LEN_MAX < 2 44 # undef MB_LEN_MAX 45 # define MB_LEN_MAX 16 46 #endif 47 48 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 49 #if HAVE_MBRTOWC && defined mbstate_t 50 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 51 #endif 52 31 53 #define TAB_WIDTH 8 32 54 33 55 /* The official name of this program (e.g., no `g' prefix). */ … … 35 57 36 58 #define AUTHORS "David MacKenzie" 37 59 60 #define FATAL_ERROR(Message) \ 61 do \ 62 { \ 63 error (0, 0, (Message)); \ 64 usage (2); \ 65 } \ 66 while (0) 67 68 enum operating_mode 69 { 70 /* Fold texts by columns that are at the given positions. */ 71 column_mode, 72 73 /* Fold texts by bytes that are at the given positions. */ 74 byte_mode, 75 76 /* Fold texts by characters that are at the given positions. */ 77 character_mode, 78 }; 79 38 80 /* The name this program was run with. */ 39 81 char *program_name; 40 82 83 /* The argument shows current mode. (Default: column_mode) */ 84 static enum operating_mode operating_mode; 85 41 86 /* If nonzero, try to break on whitespace. */ 42 87 static bool break_spaces; 43 88 44 /* If nonzero, count bytes, not column positions. */45 static bool count_bytes;46 47 89 /* If nonzero, at least one of the files we read was standard input. */ 48 90 static bool have_read_stdin; 49 91 50 static char const shortopts[] = "b sw:0::1::2::3::4::5::6::7::8::9::";92 static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::"; 51 93 52 94 static struct option const longopts[] = 53 95 { 54 96 {"bytes", no_argument, NULL, 'b'}, 97 {"characters", no_argument, NULL, 'c'}, 55 98 {"spaces", no_argument, NULL, 's'}, 56 99 {"width", required_argument, NULL, 'w'}, 57 100 {GETOPT_HELP_OPTION_DECL}, … … 81 124 "), stdout); 82 125 fputs (_("\ 83 126 -b, --bytes count bytes rather than columns\n\ 127 -c, --characters count characters rather than columns\n\ 84 128 -s, --spaces break at spaces\n\ 85 129 -w, --width=WIDTH use WIDTH columns instead of 80\n\ 86 130 "), stdout); … … 98 142 static size_t 99 143 adjust_column (size_t column, char c) 100 144 { 101 if ( !count_bytes)145 if (operating_mode != byte_mode) 102 146 { 103 147 if (c == '\b') 104 148 { … … 117 161 return column; 118 162 } 119 163 120 /* Fold file FILENAME, or standard input if FILENAME is "-", 121 to stdout, with maximum line length WIDTH. 122 Return true if successful. */ 123 124 static bool 125 fold_file (char const *filename, size_t width) 164 static void 165 fold_text (FILE *istream, size_t width, int *saved_errno) 126 166 { 127 FILE *istream;128 167 int c; 129 168 size_t column = 0; /* Screen column where next char will go. */ 130 169 size_t offset_out = 0; /* Index in `line_out' for next char. */ 131 170 static char *line_out = NULL; 132 171 static size_t allocated_out = 0; 133 int saved_errno;134 135 if (STREQ (filename, "-"))136 {137 istream = stdin;138 have_read_stdin = true;139 }140 else141 istream = fopen (filename, "r");142 143 if (istream == NULL)144 {145 error (0, errno, "%s", filename);146 return false;147 }148 172 149 173 while ((c = getc (istream)) != EOF) 150 174 { … … 172 196 bool found_blank = false; 173 197 size_t logical_end = offset_out; 174 198 199 /* If LINE_OUT has no wide character, 200 put a new wide character in LINE_OUT 201 if column is bigger than width. */ 202 if (offset_out == 0) 203 { 204 line_out[offset_out++] = c; 205 continue; 206 } 207 175 208 /* Look for the last blank. */ 176 209 while (logical_end) 177 210 { … … 218 251 line_out[offset_out++] = c; 219 252 } 220 253 221 saved_errno = errno;254 *saved_errno = errno; 222 255 223 256 if (offset_out) 224 257 fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); 225 258 259 free(line_out); 260 } 261 262 #if HAVE_MBRTOWC 263 static void 264 fold_multibyte_text (FILE *istream, int width, int *saved_errno) 265 { 266 char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 267 size_t buflen = 0; /* The length of the byte sequence in buf. */ 268 char *bufpos; /* Next read position of BUF. */ 269 wint_t wc; /* A gotten wide character. */ 270 size_t mblength; /* The byte size of a multibyte character which shows 271 as same character as WC. */ 272 mbstate_t state, state_bak; /* State of the stream. */ 273 int convfail; /* 1, when conversion is failed. Otherwise 0. */ 274 275 char *line_out = NULL; 276 size_t offset_out = 0; /* Index in `line_out' for next char. */ 277 size_t allocated_out = 0; 278 279 int increment; 280 size_t column =
