Sort Array Elements (string with numbers), natural sort
我有一个像
1 | ["IL0 Foo","PI0 Bar","IL10 Baz","IL3 Bob says hello"] |
并且需要对其进行排序,使其看起来像;
1 | ["IL0 Foo","IL3 Bob says hello","IL10 Baz","PI0 Bar"] |
我尝试了排序功能;
1 2 3 4 5 6 7 | function compare(a,b) { if (a < b) return -1; if (a > b) return 1; return 0; } |
但这给出了命令
1 | ["IL0 Foo","IL10 Baz","IL3 Bob says hello","PI0 Bar"] |
我试图考虑一种正则表达式可以使用,但无法解决这个问题。
如果有帮助,格式将始终为2个字母,x个数字,然后是任意数量的字符。
这称为"自然排序",可以像这样在JS中实现:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | function naturalCompare(a, b) { var ax = [], bx = []; a.replace(/(\d+)|(\D+)/g, function(_, $1, $2) { ax.push([$1 || Infinity, $2 ||""]) }); b.replace(/(\d+)|(\D+)/g, function(_, $1, $2) { bx.push([$1 || Infinity, $2 ||""]) }); while(ax.length && bx.length) { var an = ax.shift(); var bn = bx.shift(); var nn = (an[0] - bn[0]) || an[1].localeCompare(bn[1]); if(nn) return nn; } return ax.length - bx.length; } ///////////////////////// test = [ "img12.png", "img10.png", "img2.png", "img1.png", "img101.png", "img101a.png", "abc10.jpg", "abc10", "abc2.jpg", "20.jpg", "20", "abc", "abc2", "" ]; test.sort(naturalCompare) document.write("[cc lang="javascript"]" + JSON.stringify(test,0,3)); |
要以相反的顺序排序,只需交换参数即可:
1 | test.sort(function(a, b) { return naturalCompare(b, a) }) |
或简单地
1 | test = test.sort(naturalCompare).reverse(); |
您可以将
sensitivity
Which differences in the strings should lead to non-zero result values. Possible values are:
"base" : Only strings that differ in base letters compare as unequal. Examples:a ≠ b ,a = á ,a = A ."accent" : Only strings that differ in base letters or accents and other diacritic marks compare as unequal. Examples:a ≠ b ,a ≠ á ,a = A ."case" : Only strings that differ in base letters or case compare as unequal. Examples:a ≠ b ,a = á ,a ≠ A ."variant" : Strings that differ in base letters, accents and other diacritic marks, or case compare as unequal. Other differences may also be taken into consideration. Examples:a ≠ b ,a ≠ á ,a ≠ A .The default is"variant" for usage"sort"; it's locale dependent for usage"search".
numeric
Whether numeric collation should be used, such that"1" <"2" <"10". Possible values are
true andfalse ; the default isfalse . This option can be set through an options property or through a Unicode extension key; if both are provided, theoptions property takes precedence. Implementations are not required to support this property.
1 2 3 4 5 6 7 | var array = ["IL0 Foo","PI0 Bar","IL10 Baz","IL3 Bob says hello"]; array.sort(function (a,b) { return a.localeCompare(b, undefined, { numeric: true, sensitivity: 'base' }); }); console.log(array); |
1 2 3 4 5 6 7 8 9 10 11 12 13 | var re = /([a-z]+)(\d+)(.+)/i; var arr = ["IL0 Foo","PI0 Bar","IL10 Baz","IL3 Bob says hello"]; var order = arr.sort( function(a,b){ var ma = a.match(re), mb = b.match(re), a_str = ma[1], b_str = mb[1], a_num = parseInt(ma[2],10), b_num = parseInt(mb[2],10), a_rem = ma[3], b_rem = mb[3]; return a_str > b_str ? 1 : a_str < b_str ? -1 : a_num > b_num ? 1 : a_num < b_num ? -1 : a_rem > b_rem; }); |
字符串中填充数字的前导零,然后正常排序。
1 2 3 4 5 6 7 8 9 10 11 12 13 | var naturalSort = function (a, b) { a = ('' + a).replace(/(\d+)/g, function (n) { return ('0000' + n).slice(-5) }); b = ('' + b).replace(/(\d+)/g, function (n) { return ('0000' + n).slice(-5) }); return a.localeCompare(b); } var naturalSortModern = function (a, b) { return ('' + a).localeCompare(('' + b), 'en', { numeric: true }); } console.dir((["IL0 Foo","PI0 Bar","IL10 Baz","IL3 Bob says hello"].sort(naturalSort))); console.dir((["IL0 Foo","PI0 Bar","IL10 Baz","IL3 Bob says hello"].sort(naturalSortModern))); |
我非常喜欢georg的解决方案,但是我需要在数字前使用下划线(_)进行排序。这是我修改他的代码的方法:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | var chunkRgx = /(_+)|([0-9]+)|([^0-9_]+)/g; function naturalCompare(a, b) { var ax = [], bx = []; a.replace(chunkRgx, function(_, $1, $2, $3) { ax.push([$1 ||"0", $2 || Infinity, $3 ||""]) }); b.replace(chunkRgx, function(_, $1, $2, $3) { bx.push([$1 ||"0", $2 || Infinity, $3 ||""]) }); while(ax.length && bx.length) { var an = ax.shift(); var bn = bx.shift(); var nn = an[0].localeCompare(bn[0]) || (an[1] - bn[1]) || an[2].localeCompare(bn[2]); if(nn) return nn; } return ax.length - bx.length; } ///////////////////////// test = [ "img12.png", "img10.png", "img2.png", "img1.png", "img101.png", "img101a.png", "abc10.jpg", "abc10", "abc2.jpg", "20.jpg", "20", "abc", "abc2", "_abc", "_ab_c", "_ab__c", "_abc_d", "ab_", "abc_", "_ab_cd", "" ]; test.sort(naturalCompare) document.write("[cc lang="javascript"]" + JSON.stringify(test,0,3)); |
您可以执行如下正则表达式来获取字符串的非数字和数字部分:
1 2 | var s ="foo124bar23"; s.match(/[^\d]+|\d+/g) |
返回:
然后,在您的compare函数中,您可以迭代比较两个字符串的各个部分。第一个不匹配部分确定总体比较的结果。对于每个部分,在进行比较之前,请检查该部分是否以数字开头以及是否将其解析为数字。
再添加一个替代方法(为什么不这样做):
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | var ary = ["IL0 Foo","PI0 Bar","IL10 Hello","IL10 Baz","IL3 Bob says hello"]; // break out the three components in to an array //"IL10 Bar" => ['IL', 10, 'Bar'] function getParts(i){ i = i || ''; var parts = i.match(/^([a-z]+)([0-9]+)(\s.*)$/i); if (parts){ return [ parts[1], parseInt(parts[2], 10), parts[3] ]; } return []; // erroneous } ary.sort(function(a,b){ // grab the parts var _a = getParts(a), _b = getParts(b); // trouble parsing (both fail = no shift, otherwise // move the troubles element to end of the array) if(_a.length == 0 && _b.length == 0) return 0; if(_a.length == 0) return -1; if(_b.length == 0) return 1; // Compare letter portion if (_a[0] < _b[0]) return -1; if (_a[0] > _b[0]) return 1; // letters are equal, continue... // compare number portion if (_a[1] < _b[1]) return -1; if (_a[1] > _b[1]) return 1; // numbers are equal, continue... // compare remaining string if (_a[2] < _b[2]) return -1; if (_a[2] > _b[2]) return 1; // strings are equal, continue... // exact match return 0; }); |
jsfiddle示例
不漂亮,但请检查前两个字符代码。如果所有解析均相等并比较数字:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | var arr = ["IL0 Foo","IL10 Baz","IL3 Bob says hello","PI0 Bar"]; arr.sort(function (a1, b1) { var a = parseInt(a1.match(/\d+/g)[0], 10), b = parseInt(b1.match(/\d+/g)[0], 10), letterA = a1.charCodeAt(0), letterB = b1.charCodeAt(0), letterA1 = a1.charCodeAt(1), letterB1 = b1.charCodeAt(1); if (letterA > letterB) { return 1; } else if (letterB > letterA) { return -1; } else { if (letterA1 > letterB1) { return 1; } else if (letterB1 > letterA1) { return -1; } if (a < b) return -1; if (a > b) return 1; return 0; } }); |
例