1 from sklearn
.cluster
import AffinityPropagation
2 from sklearn
.feature_extraction
import DictVectorizer
3 from collections
import defaultdict
9 def __init__(self
, records
):
10 self
.records
= records
13 self
.features_alt
= []
15 self
.clusters
= defaultdict(list)
16 self
.all_ids
= mlpccg
.CardDb
.CARDDB
._by
_id
.keys()
18 for record
in self
.records
.all():
19 if record
['Decklist']:
20 decklist
= mlpccg
.DeckList(name
=record
['Name'], url
=record
['Decklist'])
21 self
.features
+= [self
.extract_features(decklist
)]
22 self
.features_alt
+= [self
.extract_features_alt(decklist
)]
23 self
.decks
+= [decklist
]
24 self
.placements
[decklist
] = int(record
['Placement'])
26 self
.vectorizer
= DictVectorizer()
27 X
= self
.vectorizer
.fit_transform(self
.features
).toarray()
29 self
.af
= AffinityPropagation().fit(X
)
30 self
.labels
= self
.af
.labels_
32 self
.n_clusters
= len(set(self
.labels
)) - (1 if -1 in self
.labels
else 0)
34 for i
, deck
in enumerate(self
.decks
):
35 self
.clusters
[self
.labels
[i
]] += [deck
]
37 def extract_features_alt(self
, deck
):
38 features
= dict(zip(self
.all_ids
, [0] * len(self
.all_ids
)))
39 for card
in deck
.cards
:
40 features
[card
['id'].lower()] += 1
44 def extract_features(self
, deck
):
45 return self
.extract_features_alt(deck
)
48 'Blue': 0, 'Yellow': 0, 'Purple': 0, 'White': 0, 'Orange': 0, 'Pink': 0,
49 'Friend': 0, 'Event': 0, 'Resource': 0, 'Troublemaker': 0, 'Mane': 0, 'Problem': 0,
50 'Power': 0.0, 'Control': 0}
52 for card
in deck
.cards
:
53 if card
['type'] == 'Friend' or card
['type'] == 'Mane':
54 features
[card
['color']] += 1
56 features
[card
['type']] += 1
58 if card
.has_key('power'):
59 features
['Power'] += card
['power']
61 if card
.has_key('control'):
62 features
['Control'] += card
['control']
64 features
['Power'] = features
['Power'] / len(deck
.cards
)
68 def predict(self
, decklist
):
69 X
= self
.vectorizer
.fit_transform([self
.extract_features(decklist
)]).toarray()
70 return self
.af
.predict(X
)[0]
74 cluster_placements
= defaultdict(dict)
75 for i
, deck
in enumerate(self
.decks
):
76 stats
= cluster_placements
[self
.labels
[i
]]
78 stats
['sum'] += self
.placements
[deck
]
81 stats
['label'] = self
.labels
[i
]
82 stats
['sum'] = self
.placements
[deck
]
85 stats
['avg'] = stats
['sum'] / float(stats
['num'])
87 for stats
in sorted(cluster_placements
.values(), key
=lambda x
: x
['avg']):
88 ranking
+= [(stats
['avg'], stats
['label'])]
92 if __name__
== '__main__':
93 tournament_records
= mlpccg
.TournamentRecords()
94 clustering
= Clustering(tournament_records
)
96 for placement_avg
, cluster_label
in clustering
.ranking():
97 print '%.1f - %s' % (placement_avg
, '; '.join(list(set(map(lambda x
: x
.name
, clustering
.clusters
[cluster_label
])))))