1 from sklearn
.cluster
import AffinityPropagation
2 from sklearn
.feature_extraction
import DictVectorizer
3 from collections
import defaultdict
5 from mlpccg
.CardDb
import CARDDB
6 from mlpccg
.DeckList
import DeckList
9 def __init__(self
, records
):
13 self
.clusters
= defaultdict(list)
14 self
.all_ids
= CARDDB
._by
_id
.keys()
16 self
.records
= records
17 self
.vectorizer
= DictVectorizer()
19 for record
in self
.records
:
20 self
.features
+= [self
.extract_features_alt(record
['decklist'])]
21 self
.decks
+= [record
['decklist']]
22 self
.placements
[record
['decklist']] = int(record
['placement'])
24 X
= self
.vectorizer
.fit_transform(self
.features
).toarray()
25 self
.af
= AffinityPropagation().fit(X
)
26 self
.labels
= self
.af
.labels_
28 self
.n_clusters
= len(set(self
.labels
)) - (1 if -1 in self
.labels
else 0)
30 for i
, deck
in enumerate(self
.decks
):
31 self
.clusters
[self
.labels
[i
]] += [deck
]
33 def extract_features_alt(self
, deck
):
34 features
= dict(zip(self
.all_ids
, [0] * len(self
.all_ids
)))
35 for card
in deck
.cards
:
36 features
[card
['id'].lower()] += 1
40 def extract_features(self
, deck
):
42 'Blue': 0, 'Yellow': 0, 'Purple': 0, 'White': 0, 'Orange': 0, 'Pink': 0,
43 'Friend': 0, 'Event': 0, 'Resource': 0, 'Troublemaker': 0, 'Mane': 0, 'Problem': 0,
44 'Power': 0.0, 'Control': 0}
46 for card
in deck
.cards
:
47 if card
['type'] == 'Friend' or card
['type'] == 'Mane':
48 features
[card
['color']] += 1
50 features
[card
['type']] += 1
52 if card
.has_key('power'):
53 features
['Power'] += card
['power']
55 if card
.has_key('control'):
56 features
['Control'] += card
['control']
58 features
['Power'] = features
['Power'] / len(deck
.cards
)
62 def predict(self
, decklist
):
63 X
= self
.vectorizer
.fit_transform([self
.extract_features_alt(decklist
)]).toarray()
64 return self
.af
.predict(X
)[0]
68 cluster_placements
= defaultdict(dict)
69 for i
, deck
in enumerate(self
.decks
):
70 stats
= cluster_placements
[self
.labels
[i
]]
72 stats
['sum'] += self
.placements
[deck
]
75 stats
['label'] = self
.labels
[i
]
76 stats
['sum'] = self
.placements
[deck
]
79 stats
['avg'] = stats
['sum'] / float(stats
['num'])
81 for stats
in sorted(cluster_placements
.values(), key
=lambda x
: x
['avg']):
82 ranking
+= [(stats
['avg'], stats
['label'])]