arrays - Javascript number data grouping and outlier removal -
i have array follows:
var myarray = [3, 6, 8, 9, 16, 17, 19, 37]
i needing remove outliers group remaining data distinctive groups appear. in case 37
removed outlier , [3, 6, 8, 9]
returned first group , [16, 17, 19]
returned second.
here second example
var mysecondarray = [80, 90, 100, 200, 280, 281, 287, 500, 510, 520, 800]
200
, 800
removed outlier, [80, 90, 100]
first group, [280, 281, 287]
second , [500, 510, 520]
third.
i have written code works remove outliers on outside simple enough using first , third quartile. in other words have no problem removing 800
mysecondarray
outlier. not remove 280
outlier.
i suppose outlier defined group less n
members real issue what efficient method divide data appropriate number of groups?
any appreciated!
this simple implementation, may not perfect solution set of problems should suffice example - may work beyond well.
by looking @ average distance between numbers, , comparing distance distance on either side of each number, should possible remove outliers. following, same metric can used grouping.
function sum(arr){ return arr.filter(i => !isnan(i)).reduce((p,c) => p+c,0); }; function avg(arr){ return sum(arr) / arr.length; } function groupby(arr,dist){ var groups = []; var group = []; for(var = 0; < arr.length; i++){ group.push(arr[i]); if(arr[i+1] == undefined)continue; if(arr[i+1] - arr[i] > dist){ groups.push(group); group = []; } } groups.push(group); return groups; } function groupoutlier(arr){ var distbefore = arr.map((c,i,a) => == 0 ? undefined : c - a[i-1]); var distafter = arr.map((c,i,a) => == a.length-1 ? undefined : a[i+1] - c); var avgdist = avg(distafter); var result = arr.filter((c,i,a) => !(distbefore[i] == undefined ? distafter[i] > avgdist : (distafter[i] == undefined ? distbefore[i] > avgdist : distbefore[i] > avgdist && distafter[i] > avgdist))); return groupby(result,avgdist); } var myarray = [3, 6, 8, 9, 16, 17, 19, 37]; console.log(groupoutlier(myarray)); var mysecondarray = [80, 90, 100, 200, 280, 281, 287, 500, 510, 520, 800] console.log(groupoutlier(mysecondarray));
Comments
Post a Comment