Efficiently replace all accented characters in a string?
对于穷人在客户端实现近归类正确排序的情况,我需要一个JavaScript函数,该函数可以有效地替换字符串中的单个字符。
这是我的意思(请注意,这适用于德语文本,其他语言则不同):
1 2 | native sorting gets it wrong: a b c o u z ?¤ ?? ?? collation-correct would be: a ?¤ b c o ?? u ?? z |
基本上,我需要将给定字符串的所有出现的"?¤"都替换为" a"(依此类推)。这样,本机排序的结果将非常接近用户的期望(或数据库将返回的结果)。
其他语言仅具有以下功能:Python提供
这是我现在所拥有的。
1 2 3 4 5 6 7 8 9 10 11 12 | // s would be a rather short string (something like // 200 characters at max, most of the time much less) function makeSortString(s) { var translate = { "?¤":"a","??":"o","??":"u", "??":"A","?–":"O","??":"U" // probably more to come }; var translate_re = /[???¤???–????]/g; return ( s.replace(translate_re, function(match) { return translate[match]; }) ); } |
对于初学者来说,我不喜欢每次调用该函数都会重新构建正则表达式的事实。我猜想闭包可以在这方面有所帮助,但是由于某种原因,我似乎并没有掌握它。
有人能想到更有效的方法吗?
以下答案分为两类:
这是基于Unicode标准的更完整版本,摘自此处:http://semplicewebsites.com/removing-accents-javascript
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 | var Latinise={};Latinise.latin_map={"??":"A", "??":"A", "áo?":"A", "áo?":"A", "áo°":"A", "áo2":"A", "áo′":"A", "??":"A", "??":"A", "áo¤":"A", "áo?":"A", "áo|":"A", "áo¨":"A", "áoa":"A", "??":"A", "??":"A", "è|":"A", "?":"A", "áo":"A", "è€":"A", "?€":"A", "áo¢":"A", "è?":"A", "?€":"A", "??":"A", "?…":"A", "?o":"A", "á?€":"A", "èo":"A", "??":"A", "ê?2":"AA", "??":"AE", "??":"AE", "?¢":"AE", "ê?′":"AO", "ê??":"AU", "ê??":"AV", "ê?o":"AV", "ê??":"AY", "á??":"B", "á??":"B", "??":"B", "á??":"B", "é?":"B", "??":"B", "??":"C", "??":"C", "??":"C", "á??":"C", "??":"C", "??":"C", "??":"C", "è?":"C", "??":"D", "á??":"D", "á?’":"D", "á??":"D", "á??":"D", "??":"D", "á??":"D", "?2":"D", "?…":"D", "??":"D", "??":"D", "?±":"DZ", "??":"DZ", "?‰":"E", "?"":"E", "??":"E", "è¨":"E", "á??":"E", "??":"E", "áo?":"E", "á??":"E", "á?€":"E", "á??":"E", "á??":"E", "á??":"E", "??":"E", "?–":"E", "áo?":"E", "è?":"E", "??":"E", "áoo":"E", "è?":"E", "?’":"E", "á?–":"E", "á?"":"E", "??":"E", "é?":"E", "áo?":"E", "á??":"E", "ê?a":"ET", "á??":"F", "?‘":"F", "?′":"G", "??":"G", "?|":"G", "?¢":"G", "??":"G", "?":"G", "?"":"G", "á?":"G", "?¤":"G", "á?a":"H", "è?":"H", "á?¨":"H", "?¤":"H", "a±§":"H", "á?|":"H", "á?¢":"H", "á?¤":"H", "?|":"H", "??":"I", "??":"I", "??":"I", "??":"I", "??":"I", "á??":"I", "?°":"I", "á??":"I", "è?":"I", "??":"I", "á??":"I", "è?":"I", "?a":"I", "??":"I", "?—":"I", "?¨":"I", "á??":"I", "ê?1":"D", "ê??":"F", "ê??":"G", "ê??":"R", "ê??":"S", "ê??":"T", "ê??":"IS", "?′":"J", "é?":"J", "á?°":"K", "?¨":"K", "??":"K", "a±?":"K", "ê??":"K", "á?2":"K", "??":"K", "á?′":"K", "ê?€":"K", "ê??":"K", "?1":"L", "è?":"L", "??":"L", "??":"L", "á??":"L", "á??":"L", "á??":"L", "a±":"L", "ê??":"L", "á?o":"L", "??":"L", "a±¢":"L", "??":"L", "??":"L", "??":"LJ", "á??":"M", "á1€":"M", "á1?":"M", "a±?":"M", "??":"N", "??":"N", "?…":"N", "á1?":"N", "á1?":"N", "á1?":"N", "??":"N", "??":"N", "á1?":"N", "è":"N", "??":"N", "?‘":"N", "??":"NJ", "?"":"O", "??":"O", "?‘":"O", "?"":"O", "á??":"O", "á??":"O", "á?’":"O", "á?"":"O", "á?–":"O", "?–":"O", "èa":"O", "è?":"O", "è°":"O", "á??":"O", "??":"O", "è?":"O", "?’":"O", "á??":"O", "?":"O", "á??":"O", "á?¢":"O", "á??":"O", "á??":"O", "á?":"O", "è?":"O", "ê??":"O", "ê??":"O", "??":"O", "á1’":"O", "á1?":"O", "??":"O", "?a":"O", "??":"O", "??":"O", "??":"O", "??":"O", "á1?":"O", "á1?":"O", "è?":"O", "?¢":"OI", "ê??":"OO", "??":"E", "??":"O", "è¢":"OU", "á1"":"P", "á1–":"P", "ê?’":"P", "?¤":"P", "ê?"":"P", "a±£":"P", "ê??":"P", "ê??":"Q", "ê?–":"Q", "?"":"R", "??":"R", "?–":"R", "á1?":"R", "á1?":"R", "á1?":"R", "è?":"R", "è’":"R", "á1?":"R", "é?":"R", "a±¤":"R", "ê??":"C", "??":"E", "??":"S", "á1¤":"S", "?":"S", "á1|":"S", "??":"S", "??":"S", "è?":"S", "á1":"S", "á1¢":"S", "á1¨":"S", "?¤":"T", "?¢":"T", "á1°":"T", "è?":"T", "è?":"T", "á1a":"T", "á1?":"T", "??":"T", "á1?":"T", "??":"T", "?|":"T", "a±ˉ":"A", "ê?€":"L", "??":"M", "é…":"V", "ê?¨":"TZ", "??":"U", "??":"U", "?"":"U", "??":"U", "á1?":"U", "??":"U", "?—":"U", "??":"U", "??":"U", "??":"U", "á12":"U", "á?¤":"U", "?°":"U", "è"":"U", "??":"U", "á?|":"U", "?ˉ":"U", "á?¨":"U", "á?°":"U", "á?a":"U", "á??":"U", "á??":"U", "è–":"U", "?a":"U", "á1o":"U", "?2":"U", "??":"U", "?¨":"U", "á1?":"U", "á1′":"U", "ê??":"V", "á1?":"V", "?2":"V", "á1?":"V", "ê?":"VY", "áo?":"W", "?′":"W", "áo?":"W", "áo?":"W", "áo?":"W", "áo€":"W", "a±2":"W", "áo?":"X", "áo?":"X", "??":"Y", "??":"Y", "??":"Y", "áo?":"Y", "á?′":"Y", "á?2":"Y", "?3":"Y", "á??":"Y", "á??":"Y", "è2":"Y", "é?":"Y", "á??":"Y", "?1":"Z", "??":"Z", "áo?":"Z", "a±?":"Z", "??":"Z", "áo’":"Z", "è¤":"Z", "áo"":"Z", "?μ":"Z", "?2":"IJ", "?’":"OE", "á′€":"A", "á′?":"AE", "ê?":"B", "á′?":"B", "á′?":"C", "á′…":"D", "á′?":"E", "ê?°":"F", "é¢":"G", "ê?":"G", "ê?":"H", "éa":"I", "ê?":"R", "á′?":"J", "á′?":"K", "ê?":"L", "á′?":"L", "á′?":"M", "é′":"N", "á′?":"O", "é?":"OE", "á′?":"O", "á′?":"OU", "á′?":"P", "ê€":"R", "á′?":"N", "á′?":"R", "ê?±":"S", "á′?":"T", "a±?":"E", "á′?":"R", "á′?":"U", "á′":"V", "á′?":"W", "ê?":"Y", "á′¢":"Z", "??":"a", "??":"a", "áoˉ":"a", "áo·":"a", "áo±":"a", "áo3":"a", "áoμ":"a", "??":"a", "?¢":"a", "áo¥":"a", "áo-":"a", "áo§":"a", "áo?":"a", "áo?":"a", "?¤":"a", "??":"a", "è§":"a", "??":"a", "áo?":"a", "è?":"a", "?":"a", "áo£":"a", "è?":"a", "??":"a", "?…":"a", "á??":"a", "áo?":"a", "?¥":"a", "??":"a", "á??":"a", "a±¥":"a", "?£":"a", "ê?3":"aa", "?|":"ae", "??":"ae", "?£":"ae", "ê?μ":"ao", "ê?·":"au", "ê?1":"av", "ê??":"av", "ê??":"ay", "á??":"b", "á?…":"b", "é"":"b", "á??":"b", "áμ?":"b", "á?€":"b", "?€":"b", "??":"b", "éμ":"o", "??":"c", "??":"c", "?§":"c", "á?‰":"c", "?‰":"c", "é?":"c", "??":"c", "??":"c", "è?":"c", "??":"d", "á?‘":"d", "á?"":"d", "è?":"d", "á??":"d", "á??":"d", "é—":"d", "á?‘":"d", "á??":"d", "áμ-":"d", "á??":"d", "?‘":"d", "é–":"d", "??":"d", "?±":"i", "è·":"j", "é?":"j", "ê?":"j", "?3":"dz", "??":"dz", "??":"e", "??":"e", "??":"e", "è?":"e", "á??":"e", "?a":"e", "áo?":"e", "á??":"e", "á??":"e", "á??":"e", "á?…":"e", "á??":"e", "??":"e", "?—":"e", "áo1":"e", "è…":"e", "?¨":"e", "áo?":"e", "è?":"e", "?"":"e", "á?—":"e", "á??":"e", "a±?":"e", "??":"e", "á?’":"e", "é?":"e", "áo?":"e", "á??":"e", "ê??":"et", "á??":"f", "?’":"f", "áμ?":"f", "á??":"f", "?μ":"g", "??":"g", "?§":"g", "?£":"g", "??":"g", "??":"g", "é":"g", "á??":"g", "á??":"g", "?¥":"g", "á??":"h", "è?":"h", "á??":"h", "?¥":"h", "a±¨":"h", "á?§":"h", "á?£":"h", "á?¥":"h", "é|":"h", "áo–":"h", "?§":"h", "??":"hv", "?-":"i", "?-":"i", "??":"i", "??":"i", "?ˉ":"i", "á?ˉ":"i", "á??":"i", "è‰":"i", "??":"i", "á?‰":"i", "è?":"i", "??":"i", "?ˉ":"i", "á?–":"i", "é¨":"i", "??":"i", "á?-":"i", "ê?o":"d", "ê??":"f", "áμ1":"g", "ê??":"r", "ê?…":"s", "ê??":"t", "ê?-":"is", "?°":"j", "?μ":"j", "ê?":"j", "é‰":"j", "á?±":"k", "??":"k", "?·":"k", "a±a":"k", "ê??":"k", "á?3":"k", "??":"k", "á?μ":"k", "á??":"k", "ê??":"k", "ê?…":"k", "?o":"l", "??":"l", "é?":"l", "??":"l", "??":"l", "á??":"l", "è′":"l", "á?·":"l", "á?1":"l", "a±?":"l", "ê?‰":"l", "á??":"l", "?€":"l", "é?":"l", "á?…":"l", "é-":"l", "??":"l", "?‰":"lj", "??":"s", "áo?":"s", "áo?":"s", "áo?":"s", "á??":"m", "á1?":"m", "á1?":"m", "é±":"m", "áμˉ":"m", "á??":"m", "??":"n", "??":"n", "??":"n", "á1?":"n", "èμ":"n", "á1…":"n", "á1?":"n", "?1":"n", "é2":"n", "á1‰":"n", "??":"n", "áμ°":"n", "á??":"n", "é3":"n", "?±":"n", "??":"nj", "?3":"o", "??":"o", "?’":"o", "?′":"o", "á?‘":"o", "á??":"o", "á?"":"o", "á??":"o", "á?—":"o", "??":"o", "è?":"o", "èˉ":"o", "è±":"o", "á??":"o", "?‘":"o", "è?":"o", "?2":"o", "á??":"o", "??":"o", "á??":"o", "á?£":"o", "á??":"o", "á??":"o", "á??":"o", "è?":"o", "ê??":"o", "ê??":"o", "a±o":"o", "??":"o", "á1"":"o", "á1‘":"o", "??":"o", "?-":"o", "??":"o", "??":"o", "?μ":"o", "á1?":"o", "á1?":"o", "è-":"o", "?£":"oi", "ê??":"oo", "é?":"e", "á?"":"e", "é"":"o", "á?—":"o", "è£":"ou", "á1?":"p", "á1—":"p", "ê?"":"p", "?¥":"p", "áμ±":"p", "á??":"p", "ê??":"p", "áμ?":"p", "ê?‘":"p", "ê??":"q", "ê":"q", "é?":"q", "ê?—":"q", "??":"r", "??":"r", "?—":"r", "á1?":"r", "á1?":"r", "á1?":"r", "è‘":"r", "é?":"r", "áμ3":"r", "è"":"r", "á1?":"r", "é?":"r", "áμ2":"r", "á?‰":"r", "é?":"r", "é?":"r", "a??":"c", "ê??":"c", "é?":"e", "é?":"r", "??":"s", "á1¥":"s", "??":"s", "á1§":"s", "??":"s", "??":"s", "è?":"s", "á1?":"s", "á1£":"s", "á1?":"s", "ê?":"s", "áμ′":"s", "á??":"s", "è?":"s", "é?":"g", "á′‘":"o", "á′"":"o", "á′?":"u", "?¥":"t", "?£":"t", "á1±":"t", "è?":"t", "è?":"t", "áo—":"t", "a±|":"t", "á1?":"t", "á1-":"t", "?-":"t", "á1ˉ":"t", "áμμ":"t", "??":"t", "ê?":"t", "?§":"t", "áμo":"th", "é?":"a", "á′?":"ae", "??":"e", "áμ·":"g", "é¥":"h", "ê?":"h", "êˉ":"h", "á′‰":"i", "ê?":"k", "ê??":"l", "éˉ":"m", "é°":"m", "á′"":"oe", "é1":"r", "é?":"r", "éo":"r", "a±1":"r", "ê?":"t", "ê?":"v", "ê?":"w", "ê?":"y", "ê??":"tz", "?o":"u", "?-":"u", "?"":"u", "??":"u", "á1·":"u", "??":"u", "??":"u", "??":"u", "??":"u", "?–":"u", "á13":"u", "á?¥":"u", "?±":"u", "è?":"u", "?1":"u", "á?§":"u", "?°":"u", "á??":"u", "á?±":"u", "á??":"u", "á?-":"u", "á?ˉ":"u", "è—":"u", "??":"u", "á1?":"u", "?3":"u", "á??":"u", "?ˉ":"u", "??":"u", "á11":"u", "á1μ":"u", "áμ?":"ue", "ê??":"um", "a±′":"v", "ê??":"v", "á1?":"v", "ê?":"v", "á??":"v", "a±±":"v", "á1?":"v", "ê??":"vy", "áo?":"w", "?μ":"w", "áo…":"w", "áo?":"w", "áo‰":"w", "áo?":"w", "a±3":"w", "áo?":"w", "áo?":"x", "áo?":"x", "á??":"x", "??":"y", "?·":"y", "??":"y", "áo?":"y", "á?μ":"y", "á?3":"y", "?′":"y", "á?·":"y", "á??":"y", "è3":"y", "áo?":"y", "é?":"y", "á?1":"y", "?o":"z", "??":"z", "áo‘":"z", "ê‘":"z", "a±?":"z", "??":"z", "áo"":"z", "è¥":"z", "áo?":"z", "áμ?":"z", "á??":"z", "ê?":"z", "??":"z", "é€":"z", "??€":"ff", "???":"ffi", "???":"ffl", "???":"fi", "???":"fl", "?3":"ij", "?"":"oe", "???":"st", "a??":"a", "a?‘":"e", "áμ¢":"i", "a±?":"j", "a?’":"o", "áμ£":"r", "áμ¤":"u", "áμ¥":"v", "a?"":"x"}; String.prototype.latinise=function(){return this.replace(/[^A-Za-z0-9\\[\\] ]/g,function(a){return Latinise.latin_map[a]||a})}; String.prototype.latinize=String.prototype.latinise; String.prototype.isLatin=function(){return this==this.latinise()} |
一些例子:
1 2 3 4 5 6 7 8 | >"Piqu??".latinize(); "Pique" >"Piqu??".isLatin(); false >"Pique".isLatin(); true >"Piqu??".latinise().isLatin(); true |
我无法说出您正在尝试对函数本身进行什么操作,但是,如果您不喜欢每次都构建正则表达式,则这里有两个解决方案和一些注意事项。
这里是执行此操作的一种方法:
1 2 3 4 5 6 7 8 9 10 | function makeSortString(s) { if(!makeSortString.translate_re) makeSortString.translate_re = /[???¤???–????]/g; var translate = { "?¤":"a","??":"o","??":"u", "??":"A","?–":"O","??":"U" // probably more to come }; return ( s.replace(makeSortString.translate_re, function(match) { return translate[match]; }) ); } |
这显然会使正则表达式成为函数本身的属性。您对此可能不喜欢的唯一事情(或者您可能会猜到这取决于情况)是,现在可以在函数主体外部修改正则表达式。因此,有人可以这样做来修改相互使用的正则表达式:
1 | makeSortString.translate_re = /[a-z]/g; |
因此,有一个选项。
一种获取闭包(从而防止某人修改正则表达式)的方法是将其定义为匿名函数分配,如下所示:
1 2 3 4 5 6 7 8 9 10 11 12 | var makeSortString = (function() { var translate_re = /[???¤???–????]/g; return function(s) { var translate = { "?¤":"a","??":"o","??":"u", "??":"A","?–":"O","??":"U" // probably more to come }; return ( s.replace(translate_re, function(match) { return translate[match]; }) ); } })(); |
希望这对您有用。
更新:还早,我不知道为什么以前看不到明显的东西,但是将
1 2 3 4 5 6 7 8 9 10 11 12 | var makeSortString = (function() { var translate_re = /[???¤???–????]/g; var translate = { "?¤":"a","??":"o","??":"u", "??":"A","?–":"O","??":"U" // probably more to come }; return function(s) { return ( s.replace(translate_re, function(match) { return translate[match]; }) ); } })(); |
这类口音的正确术语是变音符号。谷歌搜索该术语后,我发现此函数是
完整功能以备将来参考:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | function removeDiacritics (str) { var defaultDiacriticsRemovalMap = [ {'base':'A', 'letters':/[\\u0041\\u24B6\\uFF21\\u00C0\\u00C1\\u00C2\\u1EA6\\u1EA4\\u1EAA\\u1EA8\\u00C3\\u0100\\u0102\\u1EB0\\u1EAE\\u1EB4\\u1EB2\\u0226\\u01E0\\u00C4\\u01DE\\u1EA2\\u00C5\\u01FA\\u01CD\\u0200\\u0202\\u1EA0\\u1EAC\\u1EB6\\u1E00\\u0104\\u023A\\u2C6F]/g}, {'base':'AA','letters':/[\\uA732]/g}, {'base':'AE','letters':/[\\u00C6\\u01FC\\u01E2]/g}, {'base':'AO','letters':/[\\uA734]/g}, {'base':'AU','letters':/[\\uA736]/g}, {'base':'AV','letters':/[\\uA738\\uA73A]/g}, {'base':'AY','letters':/[\\uA73C]/g}, {'base':'B', 'letters':/[\\u0042\\u24B7\\uFF22\\u1E02\\u1E04\\u1E06\\u0243\\u0182\\u0181]/g}, {'base':'C', 'letters':/[\\u0043\\u24B8\\uFF23\\u0106\\u0108\\u010A\\u010C\\u00C7\\u1E08\\u0187\\u023B\\uA73E]/g}, {'base':'D', 'letters':/[\\u0044\\u24B9\\uFF24\\u1E0A\\u010E\\u1E0C\\u1E10\\u1E12\\u1E0E\\u0110\\u018B\\u018A\\u0189\\uA779]/g}, {'base':'DZ','letters':/[\\u01F1\\u01C4]/g}, {'base':'Dz','letters':/[\\u01F2\\u01C5]/g}, {'base':'E', 'letters':/[\\u0045\\u24BA\\uFF25\\u00C8\\u00C9\\u00CA\\u1EC0\\u1EBE\\u1EC4\\u1EC2\\u1EBC\\u0112\\u1E14\\u1E16\\u0114\\u0116\\u00CB\\u1EBA\\u011A\\u0204\\u0206\\u1EB8\\u1EC6\\u0228\\u1E1C\\u0118\\u1E18\\u1E1A\\u0190\\u018E]/g}, {'base':'F', 'letters':/[\\u0046\\u24BB\\uFF26\\u1E1E\\u0191\\uA77B]/g}, {'base':'G', 'letters':/[\\u0047\\u24BC\\uFF27\\u01F4\\u011C\\u1E20\\u011E\\u0120\\u01E6\\u0122\\u01E4\\u0193\\uA7A0\\uA77D\\uA77E]/g}, {'base':'H', 'letters':/[\\u0048\\u24BD\\uFF28\\u0124\\u1E22\\u1E26\\u021E\\u1E24\\u1E28\\u1E2A\\u0126\\u2C67\\u2C75\\uA78D]/g}, {'base':'I', 'letters':/[\\u0049\\u24BE\\uFF29\\u00CC\\u00CD\\u00CE\\u0128\\u012A\\u012C\\u0130\\u00CF\\u1E2E\\u1EC8\\u01CF\\u0208\\u020A\\u1ECA\\u012E\\u1E2C\\u0197]/g}, {'base':'J', 'letters':/[\\u004A\\u24BF\\uFF2A\\u0134\\u0248]/g}, {'base':'K', 'letters':/[\\u004B\\u24C0\\uFF2B\\u1E30\\u01E8\\u1E32\\u0136\\u1E34\\u0198\\u2C69\\uA740\\uA742\\uA744\\uA7A2]/g}, {'base':'L', 'letters':/[\\u004C\\u24C1\\uFF2C\\u013F\\u0139\\u013D\\u1E36\\u1E38\\u013B\\u1E3C\\u1E3A\\u0141\\u023D\\u2C62\\u2C60\\uA748\\uA746\\uA780]/g}, {'base':'LJ','letters':/[\\u01C7]/g}, {'base':'Lj','letters':/[\\u01C8]/g}, {'base':'M', 'letters':/[\\u004D\\u24C2\\uFF2D\\u1E3E\\u1E40\\u1E42\\u2C6E\\u019C]/g}, {'base':'N', 'letters':/[\\u004E\\u24C3\\uFF2E\\u01F8\\u0143\\u00D1\\u1E44\\u0147\\u1E46\\u0145\\u1E4A\\u1E48\\u0220\\u019D\\uA790\\uA7A4]/g}, {'base':'NJ','letters':/[\\u01CA]/g}, {'base':'Nj','letters':/[\\u01CB]/g}, {'base':'O', 'letters':/[\\u004F\\u24C4\\uFF2F\\u00D2\\u00D3\\u00D4\\u1ED2\\u1ED0\\u1ED6\\u1ED4\\u00D5\\u1E4C\\u022C\\u1E4E\\u014C\\u1E50\\u1E52\\u014E\\u022E\\u0230\\u00D6\\u022A\\u1ECE\\u0150\\u01D1\\u020C\\u020E\\u01A0\\u1EDC\\u1EDA\\u1EE0\\u1EDE\\u1EE2\\u1ECC\\u1ED8\\u01EA\\u01EC\\u00D8\\u01FE\\u0186\\u019F\\uA74A\\uA74C]/g}, {'base':'OI','letters':/[\\u01A2]/g}, {'base':'OO','letters':/[\\uA74E]/g}, {'base':'OU','letters':/[\\u0222]/g}, {'base':'P', 'letters':/[\\u0050\\u24C5\\uFF30\\u1E54\\u1E56\\u01A4\\u2C63\\uA750\\uA752\\uA754]/g}, {'base':'Q', 'letters':/[\\u0051\\u24C6\\uFF31\\uA756\\uA758\\u024A]/g}, {'base':'R', 'letters':/[\\u0052\\u24C7\\uFF32\\u0154\\u1E58\\u0158\\u0210\\u0212\\u1E5A\\u1E5C\\u0156\\u1E5E\\u024C\\u2C64\\uA75A\\uA7A6\\uA782]/g}, {'base':'S', 'letters':/[\\u0053\\u24C8\\uFF33\\u1E9E\\u015A\\u1E64\\u015C\\u1E60\\u0160\\u1E66\\u1E62\\u1E68\\u0218\\u015E\\u2C7E\\uA7A8\\uA784]/g}, {'base':'T', 'letters':/[\\u0054\\u24C9\\uFF34\\u1E6A\\u0164\\u1E6C\\u021A\\u0162\\u1E70\\u1E6E\\u0166\\u01AC\\u01AE\\u023E\\uA786]/g}, {'base':'TZ','letters':/[\\uA728]/g}, {'base':'U', 'letters':/[\\u0055\\u24CA\\uFF35\\u00D9\\u00DA\\u00DB\\u0168\\u1E78\\u016A\\u1E7A\\u016C\\u00DC\\u01DB\\u01D7\\u01D5\\u01D9\\u1EE6\\u016E\\u0170\\u01D3\\u0214\\u0216\\u01AF\\u1EEA\\u1EE8\\u1EEE\\u1EEC\\u1EF0\\u1EE4\\u1E72\\u0172\\u1E76\\u1E74\\u0244]/g}, {'base':'V', 'letters':/[\\u0056\\u24CB\\uFF36\\u1E7C\\u1E7E\\u01B2\\uA75E\\u0245]/g}, {'base':'VY','letters':/[\\uA760]/g}, {'base':'W', 'letters':/[\\u0057\\u24CC\\uFF37\\u1E80\\u1E82\\u0174\\u1E86\\u1E84\\u1E88\\u2C72]/g}, {'base':'X', 'letters':/[\\u0058\\u24CD\\uFF38\\u1E8A\\u1E8C]/g}, {'base':'Y', 'letters':/[\\u0059\\u24CE\\uFF39\\u1EF2\\u00DD\\u0176\\u1EF8\\u0232\\u1E8E\\u0178\\u1EF6\\u1EF4\\u01B3\\u024E\\u1EFE]/g}, {'base':'Z', 'letters':/[\\u005A\\u24CF\\uFF3A\\u0179\\u1E90\\u017B\\u017D\\u1E92\\u1E94\\u01B5\\u0224\\u2C7F\\u2C6B\\uA762]/g}, {'base':'a', 'letters':/[\\u0061\\u24D0\\uFF41\\u1E9A\\u00E0\\u00E1\\u00E2\\u1EA7\\u1EA5\\u1EAB\\u1EA9\\u00E3\\u0101\\u0103\\u1EB1\\u1EAF\\u1EB5\\u1EB3\\u0227\\u01E1\\u00E4\\u01DF\\u1EA3\\u00E5\\u01FB\\u01CE\\u0201\\u0203\\u1EA1\\u1EAD\\u1EB7\\u1E01\\u0105\\u2C65\\u0250]/g}, {'base':'aa','letters':/[\\uA733]/g}, {'base':'ae','letters':/[\\u00E6\\u01FD\\u01E3]/g}, {'base':'ao','letters':/[\\uA735]/g}, {'base':'au','letters':/[\\uA737]/g}, {'base':'av','letters':/[\\uA739\\uA73B]/g}, {'base':'ay','letters':/[\\uA73D]/g}, {'base':'b', 'letters':/[\\u0062\\u24D1\\uFF42\\u1E03\\u1E05\\u1E07\\u0180\\u0183\\u0253]/g}, {'base':'c', 'letters':/[\\u0063\\u24D2\\uFF43\\u0107\\u0109\\u010B\\u010D\\u00E7\\u1E09\\u0188\\u023C\\uA73F\\u2184]/g}, {'base':'d', 'letters':/[\\u0064\\u24D3\\uFF44\\u1E0B\\u010F\\u1E0D\\u1E11\\u1E13\\u1E0F\\u0111\\u018C\\u0256\\u0257\\uA77A]/g}, {'base':'dz','letters':/[\\u01F3\\u01C6]/g}, {'base':'e', 'letters':/[\\u0065\\u24D4\\uFF45\\u00E8\\u00E9\\u00EA\\u1EC1\\u1EBF\\u1EC5\\u1EC3\\u1EBD\\u0113\\u1E15\\u1E17\\u0115\\u0117\\u00EB\\u1EBB\\u011B\\u0205\\u0207\\u1EB9\\u1EC7\\u0229\\u1E1D\\u0119\\u1E19\\u1E1B\\u0247\\u025B\\u01DD]/g}, {'base':'f', 'letters':/[\\u0066\\u24D5\\uFF46\\u1E1F\\u0192\\uA77C]/g}, {'base':'g', 'letters':/[\\u0067\\u24D6\\uFF47\\u01F5\\u011D\\u1E21\\u011F\\u0121\\u01E7\\u0123\\u01E5\\u0260\\uA7A1\\u1D79\\uA77F]/g}, {'base':'h', 'letters':/[\\u0068\\u24D7\\uFF48\\u0125\\u1E23\\u1E27\\u021F\\u1E25\\u1E29\\u1E2B\\u1E96\\u0127\\u2C68\\u2C76\\u0265]/g}, {'base':'hv','letters':/[\\u0195]/g}, {'base':'i', 'letters':/[\\u0069\\u24D8\\uFF49\\u00EC\\u00ED\\u00EE\\u0129\\u012B\\u012D\\u00EF\\u1E2F\\u1EC9\\u01D0\\u0209\\u020B\\u1ECB\\u012F\\u1E2D\\u0268\\u0131]/g}, {'base':'j', 'letters':/[\\u006A\\u24D9\\uFF4A\\u0135\\u01F0\\u0249]/g}, {'base':'k', 'letters':/[\\u006B\\u24DA\\uFF4B\\u1E31\\u01E9\\u1E33\\u0137\\u1E35\\u0199\\u2C6A\\uA741\\uA743\\uA745\\uA7A3]/g}, {'base':'l', 'letters':/[\\u006C\\u24DB\\uFF4C\\u0140\\u013A\\u013E\\u1E37\\u1E39\\u013C\\u1E3D\\u1E3B\\u017F\\u0142\\u019A\\u026B\\u2C61\\uA749\\uA781\\uA747]/g}, {'base':'lj','letters':/[\\u01C9]/g}, {'base':'m', 'letters':/[\\u006D\\u24DC\\uFF4D\\u1E3F\\u1E41\\u1E43\\u0271\\u026F]/g}, {'base':'n', 'letters':/[\\u006E\\u24DD\\uFF4E\\u01F9\\u0144\\u00F1\\u1E45\\u0148\\u1E47\\u0146\\u1E4B\\u1E49\\u019E\\u0272\\u0149\\uA791\\uA7A5]/g}, {'base':'nj','letters':/[\\u01CC]/g}, {'base':'o', 'letters':/[\\u006F\\u24DE\\uFF4F\\u00F2\\u00F3\\u00F4\\u1ED3\\u1ED1\\u1ED7\\u1ED5\\u00F5\\u1E4D\\u022D\\u1E4F\\u014D\\u1E51\\u1E53\\u014F\\u022F\\u0231\\u00F6\\u022B\\u1ECF\\u0151\\u01D2\\u020D\\u020F\\u01A1\\u1EDD\\u1EDB\\u1EE1\\u1EDF\\u1EE3\\u1ECD\\u1ED9\\u01EB\\u01ED\\u00F8\\u01FF\\u0254\\uA74B\\uA74D\\u0275]/g}, {'base':'oi','letters':/[\\u01A3]/g}, {'base':'ou','letters':/[\\u0223]/g}, {'base':'oo','letters':/[\\uA74F]/g}, {'base':'p','letters':/[\\u0070\\u24DF\\uFF50\\u1E55\\u1E57\\u01A5\\u1D7D\\uA751\\uA753\\uA755]/g}, {'base':'q','letters':/[\\u0071\\u24E0\\uFF51\\u024B\\uA757\\uA759]/g}, {'base':'r','letters':/[\\u0072\\u24E1\\uFF52\\u0155\\u1E59\\u0159\\u0211\\u0213\\u1E5B\\u1E5D\\u0157\\u1E5F\\u024D\\u027D\\uA75B\\uA7A7\\uA783]/g}, {'base':'s','letters':/[\\u0073\\u24E2\\uFF53\\u00DF\\u015B\\u1E65\\u015D\\u1E61\\u0161\\u1E67\\u1E63\\u1E69\\u0219\\u015F\\u023F\\uA7A9\\uA785\\u1E9B]/g}, {'base':'t','letters':/[\\u0074\\u24E3\\uFF54\\u1E6B\\u1E97\\u0165\\u1E6D\\u021B\\u0163\\u1E71\\u1E6F\\u0167\\u01AD\\u0288\\u2C66\\uA787]/g}, {'base':'tz','letters':/[\\uA729]/g}, {'base':'u','letters':/[\\u0075\\u24E4\\uFF55\\u00F9\\u00FA\\u00FB\\u0169\\u1E79\\u016B\\u1E7B\\u016D\\u00FC\\u01DC\\u01D8\\u01D6\\u01DA\\u1EE7\\u016F\\u0171\\u01D4\\u0215\\u0217\\u01B0\\u1EEB\\u1EE9\\u1EEF\\u1EED\\u1EF1\\u1EE5\\u1E73\\u0173\\u1E77\\u1E75\\u0289]/g}, {'base':'v','letters':/[\\u0076\\u24E5\\uFF56\\u1E7D\\u1E7F\\u028B\\uA75F\\u028C]/g}, {'base':'vy','letters':/[\\uA761]/g}, {'base':'w','letters':/[\\u0077\\u24E6\\uFF57\\u1E81\\u1E83\\u0175\\u1E87\\u1E85\\u1E98\\u1E89\\u2C73]/g}, {'base':'x','letters':/[\\u0078\\u24E7\\uFF58\\u1E8B\\u1E8D]/g}, {'base':'y','letters':/[\\u0079\\u24E8\\uFF59\\u1EF3\\u00FD\\u0177\\u1EF9\\u0233\\u1E8F\\u00FF\\u1EF7\\u1E99\\u1EF5\\u01B4\\u024F\\u1EFF]/g}, {'base':'z','letters':/[\\u007A\\u24E9\\uFF5A\\u017A\\u1E91\\u017C\\u017E\\u1E93\\u1E95\\u01B6\\u0225\\u0240\\u2C6C\\uA763]/g} ]; for(var i=0; i<defaultDiacriticsRemovalMap.length; i++) { str = str.replace(defaultDiacriticsRemovalMap[i].letters, defaultDiacriticsRemovalMap[i].base); } return str; } |
https://stackoverflow.com/a/37511463
With ES2015/ES6 String.Prototype.Normalize(),
1
2
3 const str ="Cr?¨me Brul??e"
str.normalize('NFD').replace(/[\\u0300-\\u036f]/g,"")
> 'Creme Brulee'Two things are happening here:
normalize() ing toNFD Unicode normal form decomposes combined graphemes into the combination of simple ones. The?¨ ofCr?¨me ends up expressed ase +ì€ .Using a regex character class to match the U+0300 a?’ U+036F range, it is now trivial to g lobally get rid of the diacritics, which the Unicode standard conveniently groups as the Combining Diacritical Marks Unicode block.See comment for performance testing.
Alternatively, if you just want sorting
Intl.Collator has sufficient support ~85% right now, a polyfill is also available here but I haven't tested it.
1
2
3
4
5
6
7
8
9 const c = new Intl.Collator();
['creme brulee', 'cr?¨me brul??e', 'crame brulai', 'crome brouill??',
'creme brulay', 'creme brulf??', 'creme brul??a'].sort(c.compare)
[ 'crame brulai','creme brulay','creme brul??a','creme brulee',
'cr?¨me brul??e','creme brulf??','crome brouill??' ]
['creme brulee', 'cr?¨me brul??e', 'crame brulai', 'crome brouill??'].sort((a,b) => a>b)
["crame brulai","creme brulee","crome brouill??","cr?¨me brul??e"]
仅应归一化链并运行替换代码:
1 2 3 | var str ="Letras ?? ?‰ ?? ?" ?? ?‘ - ?? ?? ?- ?3 ?o ?±..."; console.log (str.normalize ("NFKD").replace (/[\\u0300-\\u036F]/g,"")); // Letras A E I O U N - a e i o u n... |
请参见规范化
然后您可以使用此功能:
1 2 3 4 5 6 | function noTilde (s) { if (s.normalize != undefined) { s = s.normalize ("NFKD"); } return s.replace (/[\\u0300-\\u036F]/g,""); } |
我认为这可能更干净/更好(尽管我尚未测试其性能):
1 2 3 4 5 6 7 | String.prototype.stripAccents = function() { var translate_re = /[????¢?£?¤?§?¨???a?????-???ˉ?±?2?3?′?μ???1?o?????????€?????????????‰?????????????‘?’?"?"???–??????????]/g; var translate = 'aaaaaceeeeiiiinooooouuuuyyAAAAACEEEEIIIINOOOOOUUUUY'; return (this.replace(translate_re, function(match){ return translate.substr(translate_re.source.indexOf(match)-1, 1); }) ); }; |
或者,如果您仍然过于担心性能,让我们两全其美:
1 2 3 4 5 6 7 8 9 | String.prototype.stripAccents = function() { var in_chrs = '????¢?£?¤?§?¨???a?????-???ˉ?±?2?3?′?μ???1?o?????????€?????????????‰?????????????‘?’?"?"???–??????????', out_chrs = 'aaaaaceeeeiiiinooooouuuuyyAAAAACEEEEIIIINOOOOOUUUUY', transl = {}; eval('var chars_rgx = /['+in_chrs+']/g'); for(var i = 0; i < in_chrs.length; i++){ transl[in_chrs.charAt(i)] = out_chrs.charAt(i); } return this.replace(chars_rgx, function(match){ return transl[match]; }); }; |
编辑(@Tomalak提供)
我很欣赏这个主意。但是,实现存在一些问题,如下面的注释中所述。
这是我要实现的方式。
1 2 3 4 5 6 7 8 9 10 11 12 13 | var stripAccents = (function () { var in_chrs = '????¢?£?¤?§?¨???a?????-???ˉ?±?2?3?′?μ???1?o?????????€?????????????‰?????????????‘?’?"?"???–??????????', out_chrs = 'aaaaaceeeeiiiinooooouuuuyyAAAAACEEEEIIIINOOOOOUUUUY', chars_rgx = new RegExp('[' + in_chrs + ']', 'g'), transl = {}, i, lookup = function (m) { return transl[m] || m; }; for (i=0; i<in_chrs.length; i++) { transl[ in_chrs[i] ] = out_chrs[i]; } return function (s) { return s.replace(chars_rgx, lookup); } })(); |
基于Jason Bunting的解决方案,这是我现在使用的方法。
整个问题是针对jQuery tablesorter插件的:对于使用tablesorter插件对非英语表进行(几乎正确)排序,必须使用自定义的
这个:
- 将最常见的带重音字母转换为无重音字母(支持的字母列表可以轻松扩展)
-
将德语格式(
'dd.mm.yyyy' )的日期更改为公认的格式('yyyy-mm-dd' )
请小心将JavaScript文件保存为UTF-8编码,否则将无法正常工作。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | // file encoding must be UTF-8! function getTextExtractor() { return (function() { var patternLetters = /[???¤???–????????¢???¨?a?o?1???3?2?′???€???‰???????????"?’?"??]/g; var patternDateDmy = /^(?:\\D+)?(\\d{1,2})\\.(\\d{1,2})\\.(\\d{2,4})$/; var lookupLetters = { "?¤":"a","??":"o","??":"u", "??":"A","?–":"O","??":"U", "??":"a","?":"a","?¢":"a", "??":"e","?¨":"e","?a":"e", "?o":"u","?1":"u","??":"u", "?3":"o","?2":"o","?′":"o", "??":"A","?€":"A","??":"A", "?‰":"E","??":"E","??":"E", "??":"U","??":"U","??":"U", "?"":"O","?’":"O","?"":"O", "??":"s" }; var letterTranslator = function(match) { return lookupLetters[match] || match; } return function(node) { var text = $.trim($(node).text()); var date = text.match(patternDateDmy); if (date) return [date[3], date[2], date[1]].join("-"); else return text.replace(patternLetters, letterTranslator); } })(); } |
您可以像这样使用它:
1 2 3 | $("table.sortable").tablesorter({ textExtraction: getTextExtractor() }); |
您的请求的完整解决方案是:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | function convert_accented_characters(str){ var conversions = new Object(); conversions['ae'] = '?¤|?||??'; conversions['oe'] = '??|?"'; conversions['ue'] = '??'; conversions['Ae'] = '??'; conversions['Ue'] = '??'; conversions['Oe'] = '?–'; conversions['A'] = '?€|??|??|??|??|?…|?o|?€|??|??|??'; conversions['a'] = '?|??|?¢|?£|?¥|??|??|??|?…|??|?a'; conversions['C'] = '??|??|??|??|??'; conversions['c'] = '?§|??|?‰|??|??'; conversions['D'] = '??|??|??'; conversions['d'] = '?°|??|?‘'; conversions['E'] = '??|?‰|??|??|?’|?"|?–|??|??'; conversions['e'] = '?¨|??|?a|??|?"|??|?—|??|??'; conversions['G'] = '??|??|?|?¢'; conversions['g'] = '??|??|??|?£'; conversions['H'] = '?¤|?|'; conversions['h'] = '?¥|?§'; conversions['I'] = '??|??|??|??|?¨|?a|??|??|??|?°'; conversions['i'] = '??|?-|??|?ˉ|??|??|?-|??|?ˉ|?±'; conversions['J'] = '?′'; conversions['j'] = '?μ'; conversions['K'] = '??'; conversions['k'] = '?·'; conversions['L'] = '?1|??|??|??|??'; conversions['l'] = '?o|??|??|?€|??'; conversions['N'] = '?‘|??|?…|??'; conversions['n'] = '?±|??|??|??|?‰'; conversions['O'] = '?’|?"|?"|??|??|??|?‘|??|?|??|??'; conversions['o'] = '?2|?3|?′|?μ|??|??|?’|?‘|??|??|??|?o'; conversions['R'] = '?"|?–|??'; conversions['r'] = '??|?—|??'; conversions['S'] = '??|??|??|?'; conversions['s'] = '??|??|??|??|??'; conversions['T'] = '?¢|?¤|?|'; conversions['t'] = '?£|?¥|?§'; conversions['U'] = '??|??|??|?¨|?a|??|??|?°|?2|?ˉ|?"|??|?—|??|??'; conversions['u'] = '?1|?o|??|??|??|?-|?ˉ|?±|?3|?°|?"|?–|??|??|??'; conversions['Y'] = '??|??|??'; conversions['y'] = '??|??|?·'; conversions['W'] = '?′'; conversions['w'] = '?μ'; conversions['Z'] = '?1|??|??'; conversions['z'] = '?o|??|??'; conversions['AE'] = '??|??'; conversions['ss'] = '??'; conversions['IJ'] = '?2'; conversions['ij'] = '?3'; conversions['OE'] = '?’'; conversions['f'] = '?’'; for(var i in conversions){ var re = new RegExp(conversions[i],"g"); str = str.replace(re,i); } return str; } |
如果您正在寻找一种将重音字符转换为非重音字符的方法,而不是一种对重音字符进行排序的方法,并且需要一点点改动,则可以使用String.localeCompare函数来查找基本的拉丁字符与扩展的匹配。例如,您可能想从页面标题中生成对人类友好的URL标记。如果是这样,您可以执行以下操作:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | var baseChars = []; for (var i = 97; i < 97 + 26; i++) { baseChars.push(String.fromCharCode(i)); } //if needed, handle fancy compound characters baseChars = baseChars.concat('ss,aa,ae,ao,au,av,ay,dz,hv,lj,nj,oi,ou,oo,tz,vy'.split(',')); function isUpperCase(c) { return c !== c.toLocaleLowerCase() } function toBaseChar(c, opts) { opts = opts || {}; //if (!('nonAlphaChar' in opts)) opts.nonAlphaChar = ''; //if (!('noMatchChar' in opts)) opts.noMatchChar = ''; if (!('locale' in opts)) opts.locale = 'en'; var cOpts = {sensitivity: 'base'}; //exit early for any non-alphabetical character if (c.localeCompare('9', opts.locale, cOpts) <= 0) return opts.nonAlphaChar === undefined ? c : opts.nonAlphaChar; for (var i = 0; i < baseChars.length; i++) { var baseChar = baseChars[i]; var comp = c.localeCompare(baseChar, opts.locale, cOpts); if (comp == 0) return (isUpperCase(c)) ? baseChar.toUpperCase() : baseChar; } return opts.noMatchChar === undefined ? c : opts.noMatchChar; } function latinify(str, opts) { return str.replace(/[^\\w\\s\\d]/g, function(c) { return toBaseChar(c, opts); }) } // Example: console.log(latinify('??e??tina Ts?—hesen?—stsestotse Tshivená?"a Emigli?na€"Rumagn?2l Sloven????ina Portugu?as Tiáo?ng Viá??t Stra??e')) //"Cestina Tsehesenestsestotse Tshivenda Emigliana€"Rumagnol Slovenscina Portugues Tieng Viet Strasse" |
这应该表现得很好,但是如果需要进一步优化,可以将二进制搜索与
我想不出一种比使用这种惊人的解决方案更简单的方法来有效地从字符串中删除所有变音符号。
查看实际情况:
1 2 3 4 | var string ="???¤???–????"; var string_norm = string.normalize('NFD').replace(/[\\u0300-\\u036f]/g, ''); console.log(string_norm); |
我为此制作了一个原型版:
1 2 3 4 5 6 7 8 9 10 11 | String.prototype.strip = function() { var translate_re = /[???¤???–?????? ]/g; var translate = { "?¤":"a","??":"o","??":"u", "??":"A","?–":"O","??":"U", "":"_","??":"ss" // probably more to come }; return (this.replace(translate_re, function(match){ return translate[match];}) ); }; |
使用方式:
1 2 | var teststring = '?¤ ?? ?? ?? ?– ?? ??'; teststring.strip(); |
这会将字符串更改为a_o_u_A_O_U_ss
基于现有答案和一些建议,我创建了这个答案:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 | String.prototype.removeAccents = function() { var removalMap = { 'A' : /[Aa’?????€????áo|áo¤áoaáo¨???€??áo°áo?áo′áo2è|?????áo¢?…?o??è€è?áoáo?áo?á?€??]/g, 'AA' : /[ê?2]/g, 'AE' : /[?????¢]/g, 'AO' : /[ê?′]/g, 'AU' : /[ê??]/g, 'AV' : /[ê??ê?o]/g, 'AY' : /[ê??]/g, 'B' : /[Ba’·??¢á??á??á??é?????]/g, 'C' : /[Ca’???£??????????á????è?ê??]/g, 'D' : /[Da’1??¤á????á??á??á?’á?????????‰ê?1]/g, 'DZ' : /[?±??]/g, 'Dz' : /[?2?…]/g, 'E' : /[Ea’o??¥???‰??á?€áo?á??á??áo??’á?"á?–?"?–??áoo??è?è?áo?á??è¨á????á??á??????]/g, 'F' : /[Fa’???|á???‘ê??]/g, 'G' : /[Ga’???§?′??á?????|?¢?¤?"ê?ê??ê??]/g, 'H' : /[Ha’???¨?¤á?¢á?|è?á?¤á?¨á?a?|a±§a±μê??]/g, 'I' : /[Ia’???????????¨?a???°??á??á????è?è?á????á???—]/g, 'J' : /[Ja’???a?′é?]/g, 'K' : /[Ka"€???á?°?¨á?2??á?′??a±?ê?€ê??ê??ê?¢]/g, 'L' : /[La"???????1??á??á????á??á?o??è?a±¢a±ê??ê??ê?€]/g, 'LJ' : /[??]/g, 'Lj' : /[??]/g, 'M' : /[Ma"???-á??á1€á1?a±???]/g, 'N' : /[Na"?????????‘á1???á1??…á1?á1?è??ê??ê?¤]/g, 'NJ' : /[??]/g, 'Nj' : /[??]/g, 'O' : /[Oa"???ˉ?’?"?"á?’á??á?–á?"??á1?è?á1???á1?á1’??è?è°?–èaá?????‘è?è??á??á??á?á??á?¢á??á???a??????????ê??ê??]/g, 'OI' : /[?¢]/g, 'OO' : /[ê??]/g, 'OU' : /[è¢]/g, 'P' : /[Pa"…??°á1"á1–?¤a±£ê??ê?’ê?"]/g, 'Q' : /[Qa"???±ê?–ê??é?]/g, 'R' : /[Ra"???2?"á1???è?è’á1?á1??–á1?é?a±¤ê??ê?|ê??]/g, 'S' : /[Sa"???3áo???á1¤??á1?á1|á1¢á1¨è???a±?ê?¨ê??]/g, 'T' : /[Ta"‰??′á1a?¤á1?è??¢á1°á1??|????è?ê??]/g, 'TZ' : /[ê?¨]/g, 'U' : /[Ua"???μ???????¨á1??aá1o???????—????á?|???°?"è"è–?ˉá?aá?¨á??á??á?°á?¤á12?2á1?á1′é?]/g, 'V' : /[Va"????á1?á1??2ê??é…]/g, 'VY' : /[ê?]/g, 'W' : /[Wa"???·áo€áo??′áo?áo?áo?a±2]/g, 'X' : /[Xa"????áo?áo?]/g, 'Y' : /[Ya"???1á?2????á??è2áo???á??á?′?3é?á??]/g, 'Z' : /[Za"???o?1áo?????áo’áo"?μè¤a±?a±?ê?¢]/g, 'a' : /[aa"????áo?????¢áo§áo¥áo?áo??£????áo±áoˉáoμáo3è§???¤??áo£?¥????è?è?áo?áo-áo·á???…a±¥é?]/g, 'aa' : /[ê?3]/g, 'ae' : /[?|???£]/g, 'ao' : /[ê?μ]/g, 'au' : /[ê?·]/g, 'av' : /[ê?1ê??]/g, 'ay' : /[ê??]/g, 'b' : /[ba"‘???á??á?…á???€??é"]/g, 'c' : /[ca"’??????‰?????§á?‰??è?ê??a??]/g, 'd' : /[da""???á????á??á?‘á?"á???‘??é–é—ê?o]/g, 'dz' : /[?3??]/g, 'e' : /[ea""??…?¨???aá??áo?á?…á??áo??"á??á?—???—??áo???è…è?áo1á??è?á????á??á??é?é???]/g, 'f' : /[fa"????á???’ê??]/g, 'g' : /[ga"–????μ??á???????§?£?¥éê??áμ1ê??]/g, 'h' : /[ha"—????¥á?£á?§è?á?¥á??á??áo–?§a±¨a±?é¥]/g, 'hv' : /[??]/g, 'i' : /[ia"???‰???-???????-?ˉá?ˉá?‰??è‰è?á???ˉá?-é¨?±]/g, 'j' : /[ja"?????μ?°é‰]/g, 'k' : /[ka"????á?±??á?3?·á?μ??a±aê??ê??ê?…ê?£]/g, 'l' : /[la"?????€?o??á?·á?1??á??á????????é?a±?ê?‰ê??ê??]/g, 'lj' : /[?‰]/g, 'm' : /[ma"????á??á1?á1?é±éˉ]/g, 'n' : /[na"?????1???±á1…??á1???á1?á1‰??é2?‰ê?‘ê?¥]/g, 'nj' : /[??]/g, 'o' : /[oa"?????2?3?′á?"á?‘á?—á???μá1?è-á1???á1‘á1"??èˉè±??è?á???‘?’è?è???á??á??á??á??á?£á??á?????-????é"ê??ê??éμ]/g, 'oi' : /[?£]/g, 'ou' : /[è£]/g, 'oo' : /[ê??]/g, 'p' : /[pa"????á1?á1—?¥áμ?ê?‘ê?"ê??]/g, 'q' : /[qa"??‘é?ê?—ê??]/g, 'r' : /[ra"???’??á1???è‘è"á1?á1??—á1?é?é?ê??ê?§ê??]/g, 's' : /[sa"¢??"????á1¥??á1???á1§á1£á1?è???è?ê??ê?…áo?]/g, 't' : /[ta"£??"á1?áo—?¥á1-è??£á1±á1ˉ?§?-ê?a±|ê??]/g, 'tz' : /[ê??]/g, 'u' : /[ua"¤????1?o????á11??á1??-???????–??á?§?ˉ?±?"è?è—?°á??á??á?ˉá?-á?±á?¥á13?3á1·á1μê‰]/g, 'v' : /[va"¥??–á1?á1?ê?ê??ê?]/g, 'vy' : /[ê??]/g, 'w' : /[wa"|??—áo?áo??μáo?áo…áo?áo‰a±3]/g, 'x' : /[xa"§???áo?áo?]/g, 'y' : /[ya"¨???á?3???·á?1è3áo???á?·áo?á?μ?′é?á??]/g, 'z' : /[za"?????oáo‘????áo"áo???è¥é€a±?ê?£]/g, }; var str = this; for(var latin in removalMap) { var nonLatin = removalMap[latin]; str = str.replace(nonLatin , latin); } return str; } |
它使用真实字符而不是unicode列表,并且运行良好。
您可以像
一样使用它
1 | "?…?…?…".removeAccents(); // returns"aaa" |
您可以轻松地将此函数转换为非字符串原型。但是,由于我很喜欢在这种情况下使用字符串原型,因此您必须自己进行操作。
Kierons解决方案的javascript的直接端口:https://github.com/rwarasaurus/nano/blob/master/system/helpers.php#L61-73:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | /** * Normalise a string replacing foreign characters * * @param {String} str * @return {String} str */ var normalize = (function () { var a = ['?€', '??', '??', '??', '??', '?…', '??', '??', '??', '?‰', '??', '??', '??', '??', '??', '??', '??', '?‘', '?’', '?"', '?"', '??', '?–', '??', '??', '??', '??', '??', '??', '??', '?', '??', '?¢', '?£', '?¤', '?¥', '?|', '?§', '?¨', '??', '?a', '??', '??', '?-', '??', '?ˉ', '?±', '?2', '?3', '?′', '?μ', '??', '??', '?1', '?o', '??', '??', '??', '??', '?€', '??', '??', '??', '??', '?…', '??', '??', '??', '?‰', '??', '??', '??', '??', '??', '??', '??', '?‘', '?’', '?"', '?"', '??', '?–', '?—', '??', '??', '??', '??', '??', '??', '??', '??', '?', '??', '?¢', '?£', '?¤', '?¥', '?|', '?§', '?¨', '??', '?a', '??', '??', '?-', '??', '?ˉ', '?°', '?±', '?2', '?3', '?′', '?μ', '??', '?·', '?1', '?o', '??', '??', '??', '??', '??', '?€', '??', '??', '??', '??', '?…', '??', '??', '??', '?‰', '??', '??', '??', '??', '??', '?‘', '?’', '?"', '?"', '??', '?–', '?—', '??', '??', '??', '??', '??', '??', '??', '??', '?', '??', '?¢', '?£', '?¤', '?¥', '?|', '?§', '?¨', '??', '?a', '??', '??', '?-', '??', '?ˉ', '?°', '?±', '?2', '?3', '?′', '?μ', '??', '?·', '??', '?1', '?o', '??', '??', '??', '??', '??', '?’', '?', '??', '?ˉ', '?°', '??', '??', '??', '??', '?‘', '?’', '?"', '?"', '??', '?–', '?—', '??', '??', '??', '??', '??', '?o', '??', '??', '??', '??', '??']; var b = ['A', 'A', 'A', 'A', 'A', 'A', 'AE', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I', 'D', 'N', 'O', 'O', 'O', 'O', 'O', 'O', 'U', 'U', 'U', 'U', 'Y', 's', 'a', 'a', 'a', 'a', 'a', 'a', 'ae', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 'n', 'o', 'o', 'o', 'o', 'o', 'o', 'u', 'u', 'u', 'u', 'y', 'y', 'A', 'a', 'A', 'a', 'A', 'a', 'C', 'c', 'C', 'c', 'C', 'c', 'C', 'c', 'D', 'd', 'D', 'd', 'E', 'e', 'E', 'e', 'E', 'e', 'E', 'e', 'E', 'e', 'G', 'g', 'G', 'g', 'G', 'g', 'G', 'g', 'H', 'h', 'H', 'h', 'I', 'i', 'I', 'i', 'I', 'i', 'I', 'i', 'I', 'i', 'IJ', 'ij', 'J', 'j', 'K', 'k', 'L', 'l', 'L', 'l', 'L', 'l', 'L', 'l', 'l', 'l', 'N', 'n', 'N', 'n', 'N', 'n', 'n', 'O', 'o', 'O', 'o', 'O', 'o', 'OE', 'oe', 'R', 'r', 'R', 'r', 'R', 'r', 'S', 's', 'S', 's', 'S', 's', 'S', 's', 'T', 't', 'T', 't', 'T', 't', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'W', 'w', 'Y', 'y', 'Y', 'Z', 'z', 'Z', 'z', 'Z', 'z', 's', 'f', 'O', 'o', 'U', 'u', 'A', 'a', 'I', 'i', 'O', 'o', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'A', 'a', 'AE', 'ae', 'O', 'o']; return function (str) { var i = a.length; while (i--) str = str.replace(a[i], b[i]); return str; }; }()); |
以及一个经过稍微修改的版本,使用字符映射而不是两个数组:
为了比较这两种方法,我做了一个简单的基准测试:http://jsperf.com/replace-foreign-characters
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 | /** * Normalise a string replacing foreign characters * * @param {String} str * @return {String} */ var normalize = (function () { var map = { "?€":"A", "??":"A", "??":"A", "??":"A", "??":"A", "?…":"A", "??":"AE", "??":"C", "??":"E", "?‰":"E", "??":"E", "??":"E", "??":"I", "??":"I", "??":"I", "??":"I", "??":"D", "?‘":"N", "?’":"O", "?"":"O", "?"":"O", "??":"O", "?–":"O", "??":"O", "??":"U", "??":"U", "??":"U", "??":"U", "??":"Y", "??":"s", "?":"a", "??":"a", "?¢":"a", "?£":"a", "?¤":"a", "?¥":"a", "?|":"ae", "?§":"c", "?¨":"e", "??":"e", "?a":"e", "??":"e", "??":"i", "?-":"i", "??":"i", "?ˉ":"i", "?±":"n", "?2":"o", "?3":"o", "?′":"o", "?μ":"o", "??":"o", "??":"o", "?1":"u", "?o":"u", "??":"u", "??":"u", "??":"y", "??":"y", "?€":"A", "??":"a", "??":"A", "??":"a", "??":"A", "?…":"a", "??":"C", "??":"c", "??":"C", "?‰":"c", "??":"C", "??":"c", "??":"C", "??":"c", "??":"D", "??":"d", "??":"D", "?‘":"d", "?’":"E", "?"":"e", "?"":"E", "??":"e", "?–":"E", "?—":"e", "??":"E", "??":"e", "??":"E", "??":"e", "??":"G", "??":"g", "??":"G", "??":"g", "?":"G", "??":"g", "?¢":"G", "?£":"g", "?¤":"H", "?¥":"h", "?|":"H", "?§":"h", "?¨":"I", "??":"i", "?a":"I", "??":"i", "??":"I", "?-":"i", "??":"I", "?ˉ":"i", "?°":"I", "?±":"i", "?2":"IJ", "?3":"ij", "?′":"J", "?μ":"j", "??":"K", "?·":"k", "?1":"L", "?o":"l", "??":"L", "??":"l", "??":"L", "??":"l", "??":"L", "?€":"l", "??":"l", "??":"l", "??":"N", "??":"n", "?…":"N", "??":"n", "??":"N", "??":"n", "?‰":"n", "??":"O", "??":"o", "??":"O", "??":"o", "??":"O", "?‘":"o", "?’":"OE", "?"":"oe", "?"":"R", "??":"r", "?–":"R", "?—":"r", "??":"R", "??":"r", "??":"S", "??":"s", "??":"S", "??":"s", "??":"S", "??":"s", "?":"S", "??":"s", "?¢":"T", "?£":"t", "?¤":"T", "?¥":"t", "?|":"T", "?§":"t", "?¨":"U", "??":"u", "?a":"U", "??":"u", "??":"U", "?-":"u", "??":"U", "?ˉ":"u", "?°":"U", "?±":"u", "?2":"U", "?3":"u", "?′":"W", "?μ":"w", "??":"Y", "?·":"y", "??":"Y", "?1":"Z", "?o":"z", "??":"Z", "??":"z", "??":"Z", "??":"z", "??":"s", "?’":"f", "?":"O", "??":"o", "?ˉ":"U", "?°":"u", "??":"A", "??":"a", "??":"I", "??":"i", "?‘":"O", "?’":"o", "?"":"U", "?"":"u", "??":"U", "?–":"u", "?—":"U", "??":"u", "??":"U", "??":"u", "??":"U", "??":"u", "?o":"A", "??":"a", "??":"AE", "??":"ae", "??":"O", "??":"o" }, nonWord = /\\W/g, mapping = function (c) { return map[c] || c; }; return function (str) { return str.replace(nonWord, mapping); }; }()); |
没有一个答案提到
1 2 3 4 5 6 7 | var list = ['a', 'b', 'c', 'o', 'u', 'z', '?¤', '??', '??']; list.sort((a, b) => a.localeCompare(b)); console.log(list); //Outputs ['a', '?¤', 'b', 'c', 'o', '??', 'u', '??', 'z'] |
虽然第二个和第三个参数不受较旧的浏览器支持。尽管如此,这是一个值得考虑的选择。
我只想使用String#localeCompare
发布解决方案
1 2 3 4 5 6 7 8 9 10 11 12 13 | const base_chars = [ '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '-', '_', ' ' ]; const fix = str => str.normalize('NFKD').split('') .map(c => base_chars.find(bc => bc.localeCompare(c, 'en', { sensitivity: 'base' })==0)) .join(''); const str = 'O?’ ?3???¥-123'; console.log(`fix(${str}) = ${fix(str)}`); |
很久以前,我在Java中进行了此操作,找到了基于单个字符串的其他人的解决方案,该字符串捕获了对转换很重要的Unicode表的一部分-其余部分转换为?或任何其他替换字符。因此,我尝试将其转换为JavaScript。请注意,我不是JS专家。 :-)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | TAB_00C0 ="AAAAAAACEEEEIIII" + "DNOOOOO*OUUUUYIs" + "aaaaaaaceeeeiiii" + "?nooooo/ouuuuy?y" + "AaAaAaCcCcCcCcDd" + "DdEeEeEeEeEeGgGg" + "GgGgHhHhIiIiIiIi" + "IiJjJjKkkLlLlLlL" + "lLlNnNnNnnNnOoOo" + "OoOoRrRrRrSsSsSs" + "SsTtTtTtUuUuUuUu" + "UuUuWwYyYZzZzZzF"; function stripDiacritics(source) { var result = source.split(''); for (var i = 0; i < result.length; i++) { var c = source.charCodeAt(i); if (c >= 0x00c0 && c <= 0x017f) { result[i] = String.fromCharCode(TAB_00C0.charCodeAt(c - 0x00c0)); } else if (c > 127) { result[i] = '?'; } } return result.join(''); } stripDiacritics("?upa, ??o? ?????¥???????|???¥a???°") |
这将转换大多数latin1 2 Unicode字符。它无法将单个字符转换为多个字符。我不知道它在JS上的性能,在Java中,这是迄今为止最快的常见解决方案(6-50x),没有映射,没有正则表达式,什么也没有。它产生严格的ASCII输出,可能会丢失信息,但是输出的大小与输入匹配。
我使用http://www.webtoolkitonline.com/javascript-tester.html测试了代码段,并按预期生成了
如果要实现将"?¤"放在" a"之后而不是被视为不相同的排序,则可以使用像我的函数。
您始终可以更改字母以进行不同甚至奇怪的排序。但是,如果您希望某些字母等效,那么您就必须操纵像
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | function sortbyalphabet(a,b) { alphabet ="0123456789Aa?€????????¢???£???¤BbCc???§Dd???¨?‰?????a????FfGgHh???????-???????ˉJjKkLlMmNn?‘?±Oo?’?2?"?3?"?′???μ?–??PpQqRrSsTt???1???o????????VvWwXx????????Zz"; a = a.toLowerCase(); b = b.toLowerCase(); shorterone = (a.length > b.length ? a : b); for (i=0; i<shorterone.length; i++){ diff = alphabet.indexOf(a.charAt(i)) - alphabet.indexOf(b.charAt(i)); if (diff!=0){ return diff; } } // sort the shorter first return a.length - b.length; } var n = ["ast","??ste","apfel","?¤pfel","?"]; console.log(n.sort(sortbyalphabet)); // should return ["apfel","ast","?","?¤pfel","?¤ste"] |
一种简单的方法:
1 2 3 | function remove-accents(p){ c='????£?¢?¤???¨?a???-?????ˉ?3?2?μ?′???o?1?????§???€???????‰???????????????"?’???–?"??????????';s='aaaaaeeeeiiiiooooouuuucAAAAAEEEEIIIIOOOOOUUUUC';n='';for(i=0;i<p.length;i++){if(c.search(p.substr(i,1))>=0){n+=s.substr(c.search(p.substr(i,1)),1);} else{n+=p.substr(i,1);}} return n; } |
这样做:
1 | remove-accents("Th?-s ?-s ?¢n acc?ant??d phr??se"); |
输出:
1 | "This is an accented phrase" |
克里斯·卡林(Answer os Crisalin)几乎是完美的。只是提高了性能,以避免在每次运行时都创建新的RegExp。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | var normalizeConversions = [ { regex: new RegExp('?¤|?||??', 'g'), clean: 'ae' }, { regex: new RegExp('??|?"', 'g'), clean: 'oe' }, { regex: new RegExp('??', 'g'), clean: 'ue' }, { regex: new RegExp('??', 'g'), clean: 'Ae' }, { regex: new RegExp('??', 'g'), clean: 'Ue' }, { regex: new RegExp('?–', 'g'), clean: 'Oe' }, { regex: new RegExp('?€|??|??|??|??|?…|?o|?€|??|??|??', 'g'), clean: 'A' }, { regex: new RegExp('?|??|?¢|?£|?¥|??|??|??|?…|??|?a', 'g'), clean: 'a' }, { regex: new RegExp('??|??|??|??|??', 'g'), clean: 'C' }, { regex: new RegExp('?§|??|?‰|??|??', 'g'), clean: 'c' }, { regex: new RegExp('??|??|??', 'g'), clean: 'D' }, { regex: new RegExp('?°|??|?‘', 'g'), clean: 'd' }, { regex: new RegExp('??|?‰|??|??|?’|?"|?–|??|??', 'g'), clean: 'E' }, { regex: new RegExp('?¨|??|?a|??|?"|??|?—|??|??', 'g'), clean: 'e' }, { regex: new RegExp('??|??|?|?¢', 'g'), clean: 'G' }, { regex: new RegExp('??|??|??|?£', 'g'), clean: 'g' }, { regex: new RegExp('?¤|?|', 'g'), clean: 'H' }, { regex: new RegExp('?¥|?§', 'g'), clean: 'h' }, { regex: new RegExp('??|??|??|??|?¨|?a|??|??|??|?°', 'g'), clean: 'I' }, { regex: new RegExp('??|?-|??|?ˉ|??|??|?-|??|?ˉ|?±', 'g'), clean: 'i' }, { regex: new RegExp('?′', 'g'), clean: 'J' }, { regex: new RegExp('?μ', 'g'), clean: 'j' }, { regex: new RegExp('??', 'g'), clean: 'K' }, { regex: new RegExp('?·', 'g'), clean: 'k' }, { regex: new RegExp('?1|??|??|??|??', 'g'), clean: 'L' }, { regex: new RegExp('?o|??|??|?€|??', 'g'), clean: 'l' }, { regex: new RegExp('?‘|??|?…|??', 'g'), clean: 'N' }, { regex: new RegExp('?±|??|??|??|?‰', 'g'), clean: 'n' }, { regex: new RegExp('?’|?"|?"|??|??|??|?‘|??|?|??|??', 'g'), clean: 'O' }, { regex: new RegExp('?2|?3|?′|?μ|??|??|?’|?‘|??|??|??|?o', 'g'), clean: 'o' }, { regex: new RegExp('?"|?–|??', 'g'), clean: 'R' }, { regex: new RegExp('??|?—|??', 'g'), clean: 'r' }, { regex: new RegExp('??|??|??|?', 'g'), clean: 'S' }, { regex: new RegExp('??|??|??|??|??', 'g'), clean: 's' }, { regex: new RegExp('?¢|?¤|?|', 'g'), clean: 'T' }, { regex: new RegExp('?£|?¥|?§', 'g'), clean: 't' }, { regex: new RegExp('??|??|??|?¨|?a|??|??|?°|?2|?ˉ|?"|??|?—|??|??', 'g'), clean: 'U' }, { regex: new RegExp('?1|?o|??|??|??|?-|?ˉ|?±|?3|?°|?"|?–|??|??|??', 'g'), clean: 'u' }, { regex: new RegExp('??|??|??', 'g'), clean: 'Y' }, { regex: new RegExp('??|??|?·', 'g'), clean: 'y' }, { regex: new RegExp('?′', 'g'), clean: 'W' }, { regex: new RegExp('?μ', 'g'), clean: 'w' }, { regex: new RegExp('?1|??|??', 'g'), clean: 'Z' }, { regex: new RegExp('?o|??|??', 'g'), clean: 'z' }, { regex: new RegExp('??|??', 'g'), clean: 'AE' }, { regex: new RegExp('??', 'g'), clean: 'ss' }, { regex: new RegExp('?2', 'g'), clean: 'IJ' }, { regex: new RegExp('?3', 'g'), clean: 'ij' }, { regex: new RegExp('?’', 'g'), clean: 'OE' }, { regex: new RegExp('?’', 'g'), clean: 'f' } ]; |
用法:
1 2 3 4 5 6 | function(str){ normalizeConversions.forEach(function(normalizeEntry){ str = str.replace(normalizeEntry.regex, normalizeEntry.clean); }); return str; }; |
对于使用TypeScript的小伙子以及那些不想处理字符串原型的小伙子们,这里是Ed的答案的typescript版本:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 | // Usage example: "Some string".replace(/[^a-zA-Z0-9-_]/g, char => ToLatinMap.get(char) || '') // Map: export let ToLatinMap: Map<string, string> = new Map<string, string>([ ["??","A"], ["??","A"], ["áo?","A"], ["áo?","A"], ["áo°","A"], ["áo2","A"], ["áo′","A"], ["??","A"], ["??","A"], ["áo¤","A"], ["áo?","A"], ["áo|","A"], ["áo¨","A"], ["áoa","A"], ["??","A"], ["??","A"], ["è|","A"], ["?","A"], ["áo","A"], ["è€","A"], ["?€","A"], ["áo¢","A"], ["è?","A"], ["?€","A"], ["??","A"], ["?…","A"], ["?o","A"], ["á?€","A"], ["èo","A"], ["??","A"], ["ê?2","AA"], ["??","AE"], ["??","AE"], ["?¢","AE"], ["ê?′","AO"], ["ê??","AU"], ["ê??","AV"], ["ê?o","AV"], ["ê??","AY"], ["á??","B"], ["á??","B"], ["??","B"], ["á??","B"], ["é?","B"], ["??","B"], ["??","C"], ["??","C"], ["??","C"], ["á??","C"], ["??","C"], ["??","C"], ["??","C"], ["è?","C"], ["??","D"], ["á??","D"], ["á?’","D"], ["á??","D"], ["á??","D"], ["??","D"], ["á??","D"], ["?2","D"], ["?…","D"], ["??","D"], ["??","D"], ["?±","DZ"], ["??","DZ"], ["?‰","E"], ["?"","E"], ["??","E"], ["è¨","E"], ["á??","E"], ["??","E"], ["áo?","E"], ["á??","E"], ["á?€","E"], ["á??","E"], ["á??","E"], ["á??","E"], ["??","E"], ["?–","E"], ["áo?","E"], ["è?","E"], ["??","E"], ["áoo","E"], ["è?","E"], ["?’","E"], ["á?–","E"], ["á?"","E"], ["??","E"], ["é?","E"], ["áo?","E"], ["á??","E"], ["ê?a","ET"], ["á??","F"], ["?‘","F"], ["?′","G"], ["??","G"], ["?|","G"], ["?¢","G"], ["??","G"], ["?","G"], ["?"","G"], ["á?","G"], ["?¤","G"], ["á?a","H"], ["è?","H"], ["á?¨","H"], ["?¤","H"], ["a±§","H"], ["á?|","H"], ["á?¢","H"], ["á?¤","H"], ["?|","H"], ["??","I"], ["??","I"], ["??","I"], ["??","I"], ["??","I"], ["á??","I"], ["?°","I"], ["á??","I"], ["è?","I"], ["??","I"], ["á??","I"], ["è?","I"], ["?a","I"], ["??","I"], ["?—","I"], ["?¨","I"], ["á??","I"], ["ê?1","D"], ["ê??","F"], ["ê??","G"], ["ê??","R"], ["ê??","S"], ["ê??","T"], ["ê??","IS"], ["?′","J"], ["é?","J"], ["á?°","K"], ["?¨","K"], ["??","K"], ["a±?","K"], ["ê??","K"], ["á?2","K"], ["??","K"], ["á?′","K"], ["ê?€","K"], ["ê??","K"], ["?1","L"], ["è?","L"], ["??","L"], ["??","L"], ["á??","L"], ["á??","L"], ["á??","L"], ["a±","L"], ["ê??","L"], ["á?o","L"], ["??","L"], ["a±¢","L"], ["??","L"], ["??","L"], ["??","LJ"], ["á??","M"], ["á1€","M"], ["á1?","M"], ["a±?","M"], ["??","N"], ["??","N"], ["?…","N"], ["á1?","N"], ["á1?","N"], ["á1?","N"], ["??","N"], ["??","N"], ["á1?","N"], ["è","N"], ["??","N"], ["?‘","N"], ["??","NJ"], ["?"","O"], ["??","O"], ["?‘","O"], ["?"","O"], ["á??","O"], ["á??","O"], ["á?’","O"], ["á?"","O"], ["á?–","O"], ["?–","O"], ["èa","O"], ["è?","O"], ["è°","O"], ["á??","O"], ["??","O"], ["è?","O"], ["?’","O"], ["á??","O"], ["?","O"], ["á??","O"], ["á?¢","O"], ["á??","O"], ["á??","O"], ["á?","O"], ["è?","O"], ["ê??","O"], ["ê??","O"], ["??","O"], ["á1’","O"], ["á1?","O"], ["??","O"], ["?a","O"], ["??","O"], ["??","O"], ["??","O"], ["??","O"], ["á1?","O"], ["á1?","O"], ["è?","O"], ["?¢","OI"], ["ê??","OO"], ["??","E"], ["??","O"], ["è¢","OU"], ["á1"","P"], ["á1–","P"], ["ê?’","P"], ["?¤","P"], ["ê?"","P"], ["a±£","P"], ["ê??","P"], ["ê??","Q"], ["ê?–","Q"], ["?"","R"], ["??","R"], ["?–","R"], ["á1?","R"], ["á1?","R"], ["á1?","R"], ["è?","R"], ["è’","R"], ["á1?","R"], ["é?","R"], ["a±¤","R"], ["ê??","C"], ["??","E"], ["??","S"], ["á1¤","S"], ["?","S"], ["á1|","S"], ["??","S"], ["??","S"], ["è?","S"], ["á1","S"], ["á1¢","S"], ["á1¨","S"], ["?¤","T"], ["?¢","T"], ["á1°","T"], ["è?","T"], ["è?","T"], ["á1a","T"], ["á1?","T"], ["??","T"], ["á1?","T"], ["??","T"], ["?|","T"], ["a±ˉ","A"], ["ê?€","L"], ["??","M"], ["é…","V"], ["ê?¨","TZ"], ["??","U"], ["??","U"], ["?"","U"], ["??","U"], ["á1?","U"], ["??","U"], ["?—","U"], ["??","U"], ["??","U"], ["??","U"], ["á12","U"], ["á?¤","U"], ["?°","U"], ["è"","U"], ["??","U"], ["á?|","U"], ["?ˉ","U"], ["á?¨","U"], ["á?°","U"], ["á?a","U"], ["á??","U"], ["á??","U"], ["è–","U"], ["?a","U"], ["á1o","U"], ["?2","U"], ["??","U"], ["?¨","U"], ["á1?","U"], ["á1′","U"], ["ê??","V"], ["á1?","V"], ["?2","V"], ["á1?","V"], ["ê?","VY"], ["áo?","W"], ["?′","W"], ["áo?","W"], ["áo?","W"], ["áo?","W"], ["áo€","W"], ["a±2","W"], ["áo?","X"], ["áo?","X"], ["??","Y"], ["??","Y"], ["??","Y"], ["áo?","Y"], ["á?′","Y"], ["á?2","Y"], ["?3","Y"], ["á??","Y"], ["á??","Y"], ["è2","Y"], ["é?","Y"], ["á??","Y"], ["?1","Z"], ["??","Z"], ["áo?","Z"], ["a±?","Z"], ["??","Z"], ["áo’","Z"], ["è¤","Z"], ["áo"","Z"], ["?μ","Z"], ["?2","IJ"], ["?’","OE"], ["á′€","A"], ["á′?","AE"], ["ê?","B"], ["á′?","B"], ["á′?","C"], ["á′…","D"], ["á′?","E"], ["ê?°","F"], ["é¢","G"], ["ê?","G"], ["ê?","H"], ["éa","I"], ["ê?","R"], ["á′?","J"], ["á′?","K"], ["ê?","L"], ["á′?","L"], ["á′?","M"], ["é′","N"], ["á′?","O"], ["é?","OE"], ["á′?","O"], ["á′?","OU"], ["á′?","P"], ["ê€","R"], ["á′?","N"], ["á′?","R"], ["ê?±","S"], ["á′?","T"], ["a±?","E"], ["á′?","R"], ["á′?","U"], ["á′","V"], ["á′?","W"], ["ê?","Y"], ["á′¢","Z"], ["??","a"], ["??","a"], ["áoˉ","a"], ["áo·","a"], ["áo±","a"], ["áo3","a"], ["áoμ","a"], ["??","a"], ["?¢","a"], ["áo¥","a"], ["áo-","a"], ["áo§","a"], ["áo?","a"], ["áo?","a"], ["?¤","a"], ["??","a"], ["è§","a"], ["??","a"], ["áo?","a"], ["è?","a"], ["?","a"], ["áo£","a"], ["è?","a"], ["??","a"], ["?…","a"], ["á??","a"], ["áo?","a"], ["?¥","a"], ["??","a"], ["á??","a"], ["a±¥","a"], ["?£","a"], ["ê?3","aa"], ["?|","ae"], ["??","ae"], ["?£","ae"], ["ê?μ","ao"], ["ê?·","au"], ["ê?1","av"], ["ê??","av"], ["ê??","ay"], ["á??","b"], ["á?…","b"], ["é"","b"], ["á??","b"], ["áμ?","b"], ["á?€","b"], ["?€","b"], ["??","b"], ["éμ","o"], ["??","c"], ["??","c"], ["?§","c"], ["á?‰","c"], ["?‰","c"], ["é?","c"], ["??","c"], ["??","c"], ["è?","c"], ["??","d"], ["á?‘","d"], ["á?"","d"], ["è?","d"], ["á??","d"], ["á??","d"], ["é—","d"], ["á?‘","d"], ["á??","d"], ["áμ-","d"], ["á??","d"], ["?‘","d"], ["é–","d"], ["??","d"], ["?±","i"], ["è·","j"], ["é?","j"], ["ê?","j"], ["?3","dz"], ["??","dz"], ["??","e"], ["??","e"], ["??","e"], ["è?","e"], ["á??","e"], ["?a","e"], ["áo?","e"], ["á??","e"], ["á??","e"], ["á??","e"], ["á?…","e"], ["á??","e"], ["??","e"], ["?—","e"], ["áo1","e"], ["è…","e"], ["?¨","e"], ["áo?","e"], ["è?","e"], ["?"","e"], ["á?—","e"], ["á??","e"], ["a±?","e"], ["??","e"], ["á?’","e"], ["é?","e"], ["áo?","e"], ["á??","e"], ["ê??","et"], ["á??","f"], ["?’","f"], ["áμ?","f"], ["á??","f"], ["?μ","g"], ["??","g"], ["?§","g"], ["?£","g"], ["??","g"], ["??","g"], ["é","g"], ["á??","g"], ["á??","g"], ["?¥","g"], ["á??","h"], ["è?","h"], ["á??","h"], ["?¥","h"], ["a±¨","h"], ["á?§","h"], ["á?£","h"], ["á?¥","h"], ["é|","h"], ["áo–","h"], ["?§","h"], ["??","hv"], ["?-","i"], ["?-","i"], ["??","i"], ["??","i"], ["?ˉ","i"], ["á?ˉ","i"], ["á??","i"], ["è‰","i"], ["??","i"], ["á?‰","i"], ["è?","i"], ["??","i"], ["?ˉ","i"], ["á?–","i"], ["é¨","i"], ["??","i"], ["á?-","i"], ["ê?o","d"], ["ê??","f"], ["áμ1","g"], ["ê??","r"], ["ê?…","s"], ["ê??","t"], ["ê?-","is"], ["?°","j"], ["?μ","j"], ["ê?","j"], ["é‰","j"], ["á?±","k"], ["??","k"], ["?·","k"], ["a±a","k"], ["ê??","k"], ["á?3","k"], ["??","k"], ["á?μ","k"], ["á??","k"], ["ê??","k"], ["ê?…","k"], ["?o","l"], ["??","l"], ["é?","l"], ["??","l"], ["??","l"], ["á??","l"], ["è′","l"], ["á?·","l"], ["á?1","l"], ["a±?","l"], ["ê?‰","l"], ["á??","l"], ["?€","l"], ["é?","l"], ["á?…","l"], ["é-","l"], ["??","l"], ["?‰","lj"], ["??","s"], ["áo?","s"], ["áo?","s"], ["áo?","s"], ["á??","m"], ["á1?","m"], ["á1?","m"], ["é±","m"], ["áμˉ","m"], ["á??","m"], ["??","n"], ["??","n"], ["??","n"], ["á1?","n"], ["èμ","n"], ["á1…","n"], ["á1?","n"], ["?1","n"], ["é2","n"], ["á1‰","n"], ["??","n"], ["áμ°","n"], ["á??","n"], ["é3","n"], ["?±","n"], ["??","nj"], ["?3","o"], ["??","o"], ["?’","o"], ["?′","o"], ["á?‘","o"], ["á??","o"], ["á?"","o"], ["á??","o"], ["á?—","o"], ["??","o"], ["è?","o"], ["èˉ","o"], ["è±","o"], ["á??","o"], ["?‘","o"], ["è?","o"], ["?2","o"], ["á??","o"], ["??","o"], ["á??","o"], ["á?£","o"], ["á??","o"], ["á??","o"], ["á??","o"], ["è?","o"], ["ê??","o"], ["ê??","o"], ["a±o","o"], ["??","o"], ["á1"","o"], ["á1‘","o"], ["??","o"], ["?-","o"], ["??","o"], ["??","o"], ["?μ","o"], ["á1?","o"], ["á1?","o"], ["è-","o"], ["?£","oi"], ["ê??","oo"], ["é?","e"], ["á?"","e"], ["é"","o"], ["á?—","o"], ["è£","ou"], ["á1?","p"], ["á1—","p"], ["ê?"","p"], ["?¥","p"], ["áμ±","p"], ["á??","p"], ["ê??","p"], ["áμ?","p"], ["ê?‘","p"], ["ê??","q"], ["ê","q"], ["é?","q"], ["ê?—","q"], ["??","r"], ["??","r"], ["?—","r"], ["á1?","r"], ["á1?","r"], ["á1?","r"], ["è‘","r"], ["é?","r"], ["áμ3","r"], ["è"","r"], ["á1?","r"], ["é?","r"], ["áμ2","r"], ["á?‰","r"], ["é?","r"], ["é?","r"], ["a??","c"], ["ê??","c"], ["é?","e"], ["é?","r"], ["??","s"], ["á1¥","s"], ["??","s"], ["á1§","s"], ["??","s"], ["??","s"], ["è?","s"], ["á1?","s"], ["á1£","s"], ["á1?","s"], ["ê?","s"], ["áμ′","s"], ["á??","s"], ["è?","s"], ["é?","g"], ["á′‘","o"], ["á′"","o"], ["á′?","u"], ["?¥","t"], ["?£","t"], ["á1±","t"], ["è?","t"], ["è?","t"], ["áo—","t"], ["a±|","t"], ["á1?","t"], ["á1-","t"], ["?-","t"], ["á1ˉ","t"], ["áμμ","t"], ["??","t"], ["ê?","t"], ["?§","t"], ["áμo","th"], ["é?","a"], ["á′?","ae"], ["??","e"], ["áμ·","g"], ["é¥","h"], ["ê?","h"], ["êˉ","h"], ["á′‰","i"], ["ê?","k"], ["ê??","l"], ["éˉ","m"], ["é°","m"], ["á′"","oe"], ["é1","r"], ["é?","r"], ["éo","r"], ["a±1","r"], ["ê?","t"], ["ê?","v"], ["ê?","w"], ["ê?","y"], ["ê??","tz"], ["?o","u"], ["?-","u"], ["?"","u"], ["??","u"], ["á1·","u"], ["??","u"], ["??","u"], ["??","u"], ["??","u"], ["?–","u"], ["á13","u"], ["á?¥","u"], ["?±","u"], ["è?","u"], ["?1","u"], ["á?§","u"], ["?°","u"], ["á??","u"], ["á?±","u"], ["á??","u"], ["á?-","u"], ["á?ˉ","u"], ["è—","u"], ["??","u"], ["á1?","u"], ["?3","u"], ["á??","u"], ["?ˉ","u"], ["??","u"], ["á11","u"], ["á1μ","u"], ["áμ?","ue"], ["ê??","um"], ["a±′","v"], ["ê??","v"], ["á1?","v"], ["ê?","v"], ["á??","v"], ["a±±","v"], ["á1?","v"], ["ê??","vy"], ["áo?","w"], ["?μ","w"], ["áo…","w"], ["áo?","w"], ["áo‰","w"], ["áo?","w"], ["a±3","w"], ["áo?","w"], ["áo?","x"], ["áo?","x"], ["á??","x"], ["??","y"], ["?·","y"], ["??","y"], ["áo?","y"], ["á?μ","y"], ["á?3","y"], ["?′","y"], ["á?·","y"], ["á??","y"], ["è3","y"], ["áo?","y"], ["é?","y"], ["á?1","y"], ["?o","z"], ["??","z"], ["áo‘","z"], ["ê‘","z"], ["a±?","z"], ["??","z"], ["áo"","z"], ["è¥","z"], ["áo?","z"], ["áμ?","z"], ["á??","z"], ["ê?","z"], ["??","z"], ["é€","z"], ["??€","ff"], ["???","ffi"], ["???","ffl"], ["???","fi"], ["???","fl"], ["?3","ij"], ["?"","oe"], ["???","st"], ["a??","a"], ["a?‘","e"], ["áμ¢","i"], ["a±?","j"], ["a?’","o"], ["áμ£","r"], ["áμ¤","u"], ["áμ¥","v"], ["a?"","x"], ]); |
如果您愿意,我已经用另一种方式解决了它。
在这里我使用了两个数组,其中将替换包含searchChars的数组和包含所需字符的replaceChars。
1 2 3 4 5 6 7 8 9 10 | var text ="your input string"; var searchChars = ['?…','??','?¥','?–','??']; // add more charecter. var replaceChars = ['A','A','a','O','o']; // exact same index to searchChars. var index; for (var i = 0; i < text.length; i++) { if( $.inArray(text[i], searchChars) >-1 ){ // $.inArray() is from jquery. index = searchChars.indexOf(text[i]); text = text.slice(0, i) + replaceChars[index] + text.slice(i+1,text.length); } } |