1 from sklearn
.cluster
import AffinityPropagation
2 from sklearn
.feature_extraction
import DictVectorizer
3 from collections
import defaultdict
6 from mlpccg
.CardDb
import CARDDB
7 from mlpccg
.DeckList
import DeckList
10 def __init__(self
, records
):
14 self
.clusters
= defaultdict(list)
15 self
.all_ids
= CARDDB
._by
_id
.keys()
17 self
.records
= records
18 self
.vectorizer
= DictVectorizer()
20 for record
in self
.records
:
21 self
.features
+= [self
.extract_features_alt(record
['decklist'])]
22 self
.decks
+= [record
['decklist']]
23 self
.placements
[record
['decklist']] = int(record
['placement'])
25 X
= self
.vectorizer
.fit_transform(self
.features
).toarray()
26 self
.af
= AffinityPropagation().fit(X
)
27 self
.labels
= self
.af
.labels_
29 self
.n_clusters
= len(set(self
.labels
)) - (1 if -1 in self
.labels
else 0)
31 for i
, deck
in enumerate(self
.decks
):
32 self
.clusters
[self
.labels
[i
]] += [deck
]
34 def extract_features_alt(self
, deck
):
35 features
= dict(zip(self
.all_ids
, [0] * len(self
.all_ids
)))
36 for card
in deck
.cards
:
37 features
[card
['id'].lower()] += 1
41 def extract_features(self
, deck
):
43 'Blue': 0, 'Yellow': 0, 'Purple': 0, 'White': 0, 'Orange': 0, 'Pink': 0,
44 'Friend': 0, 'Event': 0, 'Resource': 0, 'Troublemaker': 0, 'Mane': 0, 'Problem': 0,
45 'Power': 0.0, 'Control': 0}
47 for card
in deck
.cards
:
48 if card
['type'] == 'Friend' or card
['type'] == 'Mane':
49 features
[card
['color']] += 1
51 features
[card
['type']] += 1
53 if card
.has_key('power'):
54 features
['Power'] += card
['power']
56 if card
.has_key('control'):
57 features
['Control'] += card
['control']
59 features
['Power'] = features
['Power'] / len(deck
.cards
)
63 def predict(self
, decklist
):
64 X
= self
.vectorizer
.fit_transform([self
.extract_features_alt(decklist
)]).toarray()
65 return self
.af
.predict(X
)[0]
69 cluster_placements
= defaultdict(dict)
70 for i
, deck
in enumerate(self
.decks
):
71 stats
= cluster_placements
[self
.labels
[i
]]
73 stats
['sum'] += self
.placements
[deck
]
76 stats
['label'] = self
.labels
[i
]
77 stats
['sum'] = self
.placements
[deck
]
80 stats
['avg'] = stats
['sum'] / float(stats
['num'])
82 for stats
in sorted(cluster_placements
.values(), key
=lambda x
: x
['avg']):
83 ranking
+= [(stats
['avg'], stats
['label'])]