fi_target.py 2.88 KB
Newer Older
aarongitrepos's avatar
All  
aarongitrepos committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
'''
Created on 29.09.2017

@author: lemmerfn
'''
from collections import namedtuple
from functools import total_ordering
import pysubgroup_mod as ps



@total_ordering
class FITarget:
    statistic_types = ('size_sg', 'size_dataset')

    def __repr__(self):
        return "T: Frequent Itemsets"

    def __eq__(self, other):
        return self.__dict__ == other.__dict__

    def __lt__(self, other):
        return str(self) < str(other)

    def get_attributes(self):
        return []

    def get_base_statistics(self, subgroup, data):
        _, size = ps.get_cover_array_and_size(subgroup, len(data), data)
        return size

    def calculate_statistics(self, subgroup_description, data, cached_statistics=None):
        if cached_statistics is None or not isinstance(cached_statistics, dict):
            statistics = dict()
        elif all(k in cached_statistics for k in FITarget.statistic_types):
            return cached_statistics
        else:
            statistics = cached_statistics

        _, size = ps.get_cover_array_and_size(subgroup_description, len(data), data)

        statistics['size_sg'] = size
        statistics['size_dataset'] = len(data)
        return statistics


class SimpleCountQF(ps.AbstractInterestingnessMeasure):
    tpl = namedtuple('CountQF_parameters', ('subgroup_size'))

    def __init__(self):
        self.required_stat_attrs = ('subgroup_size',)
        self.has_constant_statistics = True
        self.size_dataset = None

    def calculate_constant_statistics(self, data, target):
        self.size_dataset = len(data)

    def calculate_statistics(self, subgroup_description, target, data, statistics=None):
        _, size = ps.get_cover_array_and_size(subgroup_description, self.size_dataset, data)
        return SimpleCountQF.tpl(size)

    def gp_get_stats(self, _):
        return {"subgroup_size" : 1}

    def gp_get_null_vector(self):
        return {"subgroup_size":0}

    def gp_merge(self, l, r):
        l["subgroup_size"] += r["subgroup_size"]

    def gp_get_params(self, _cover_arr, v):
        return SimpleCountQF.tpl(v['subgroup_size'])

    def gp_to_str(self, stats):
        return str(stats['subgroup_size'])

    @property
    def gp_requires_cover_arr(self):
        return False


class CountQF(SimpleCountQF, ps.BoundedInterestingnessMeasure):
    def evaluate(self, subgroup, target, data, statistics=None):
        statistics = self.ensure_statistics(subgroup, target, data, statistics)
        return statistics.subgroup_size

    def optimistic_estimate(self, subgroup, target, data, statistics=None):
        statistics = self.ensure_statistics(subgroup, target, data, statistics)
        return statistics.subgroup_size



class AreaQF(SimpleCountQF):
    def evaluate(self, subgroup, target, data, statistics=None):
        statistics = self.ensure_statistics(subgroup, target, data, statistics)
        return statistics.subgroup_size * subgroup.depth