javascript - how to avoid an item in an array be repeated in another cluster? -
i have array or objects, approx 58000 unique strings (partname).
parts = [ { _id: 59a942a8c0b7467bf08711df, partname: '0' }, { _id: 59a94299c0b7467bf084a917, partname: '9129' }, { _id: 59a94299c0b7467bf084a918, partname: '9130' }, .., .. ]
here code. i'm trying create clusters of similar strings.
the code this, partname connected cluster can appears again in cluster. want avoid this. 1 unique part name should connected 1 cluster.
here example get:
{ "9129": [ "9132", "9190", "9279" ] }, { "9130": [ "9132", "9180", "9190", "9430" ]
as can see strings 9132
, 9190
repeated in following cluster. so question is: how remove strings 9132
, 9190
, 9279
after have been connected cluster 9129
?
function createcluster(arrayofparts) { let clusterarray = []; (var = 0; < 5; i++) { let cluster = []; y = 1; console.log(arrayofparts[i]); (var j = y; j < arrayofparts.length; j++) { if ( fuzzball.token_sort_ratio( arrayofparts[i].partname, arrayofparts[j].partname ) > "70" ) { if ( arrayofparts[i].partname.tolowercase() !== arrayofparts[j].partname.tolowercase() && !cluster.includes(arrayofparts[j].partname) ) { cluster.push(arrayofparts[j].partname); } } } let obj = {}; obj[arrayofparts[i].partname] = cluster.sort(); clusterarray.push(obj); } console.log("clusterarray", json.stringify(clusterarray, null, 2)); console.log("clusterarray.length", clusterarray.length); }
you define bank of used strings outside scope of loop, add them , check if have been added there before adding them again?
alternatively, if have finite (and potentially iterable) number of part names, hold them keys true/false value represent 'usable' switch when used.
solution (using "bank of used strings" outside scope of loop):
function createcluster(arrayofparts) { let usedstrings = []; let clusterarray = []; (var = 0; < 5; i++) { let cluster = []; y = 1; if (usedstrings.includes(arrayofparts[i].partname)) { continue; } console.log(arrayofparts[i]); (var j = y; j < arrayofparts.length; j++) { if ( fuzzball.token_sort_ratio( arrayofparts[i].partname, arrayofparts[j].partname ) > "70" ) { if ( arrayofparts[i].partname.tolowercase() !== arrayofparts[j].partname.tolowercase() && !cluster.includes(arrayofparts[j].partname) && !usedstrings.includes(arrayofparts[j].partname) ) { cluster.push(arrayofparts[j].partname); usedstrings.push(arrayofparts[j].partname); } } } let obj = {}; obj[arrayofparts[i].partname] = cluster.sort(); clusterarray.push(obj); } console.log("clusterarray", json.stringify(clusterarray, null, 2)); console.log("clusterarray.length", clusterarray.length); }
Comments
Post a Comment