#!/usr/bin/env python # -*- coding:utf-8 -*- ''' Step 3 ''' from __future__ import print_function from base import Lo, create_csv, TARGET_DIR import os import csv import sys from s2 import RANK_ORDER los = Lo() xmodels = { 'IMSf': {}, 'IMSD': {}, 'IMHf': {}, 'IMHD': {}, 'IFSf': {}, 'IFSD': {}, 'IFHf': {}, 'IFHD': {}, 'CMSf': {}, 'CMSD': {}, 'CMHf': {}, 'CMHD': {}, 'CFSf': {}, 'CFSD': {}, 'CFHf': {}, 'CFHD': {}, 'PMSf': {}, 'PMSD': {}, 'PMHf': {}, 'PMHD': {}, 'PFSf': {}, 'PFSD': {}, 'PFHf': {}, 'PFHD': {}, } fs = ('Case1_LS.csv', 'Case1_Gender.csv', 'Case1_Level.csv', 'Case1_SciF.csv') def process_s2_data(): ''' docstring huhu? ''' xm_data = {} for ii in xmodels.keys(): fname = '%s-2.csv' % ii fpath = os.path.join(TARGET_DIR, 'part2', fname) with open(fpath, 'rb') as f: rows = csv.reader(f) rank = 0 for r in rows: if r[1] == "rank": rank = r[2] continue if ii not in xm_data: xm_data[ii] = {'all': set()} if rank not in xm_data[ii]: xm_data[ii][rank] = {} if r[0] not in xm_data[ii][rank]: xm_data[ii][rank][r[0]] = [] xm_data[ii][rank][r[0]] = r[1].split(',') xm_data[ii]['all'] = xm_data[ii]['all'].union(set(r[1].split(','))) return xm_data def produce_match_all(xdata): rows = [] for ii in xmodels.keys(): _los = set() for _rank in xdata[ii]: for i in ii: if i in xdata[ii][_rank]: _los = _los.union(set(xdata[ii][_rank][i])) rows.append([ii, ','.join(sorted(list(_los)))]) _f = 'step3_match_all.csv' create_csv(_f, rows, directory='part3') def produce_match_all_mra(xdata): for ii in xmodels.keys(): lo_count = {} for j in xdata[ii]['all']: if j not in lo_count: lo_count[j] = 0 for i in ii: for rank in RANK_ORDER: if rank in xdata[ii] and i in xdata[ii][rank] \ and j in xdata[ii][rank][i]: lo_count[j] += 1 rows = [] max_count = len(RANK_ORDER) for n in xrange(max_count, 0, -1): rows.append([ '%s/%s' % (n, max_count), ','.join([k for (k, v) in lo_count.items() if v == n]) ]) _f = '%s-all.csv' % (ii,) create_csv(_f, rows, directory='part3') def produce_match_rank_mra(xdata, rank): for ii in xmodels.keys(): lo_count = {} for j in xdata[ii]['all']: if j not in lo_count: lo_count[j] = 0 for i in ii: if rank in xdata[ii] and i in xdata[ii][rank] and \ j in xdata[ii][rank][i]: lo_count[j] += 1 rows = [] max_count = len(RANK_ORDER) for n in xrange(max_count, 0, -1): rows.append([ '%s/%s' % (n, max_count), ','.join([k for (k, v) in lo_count.items() if v == n]) ]) _f = '%s-rank-%s.csv' % (ii, rank) create_csv(_f, rows, directory='part3') def main(*argv): xmm = process_s2_data() produce_match_all(xmm) produce_match_all_mra(xmm) for i in RANK_ORDER: produce_match_rank_mra(xmm, i) if __name__ == '__main__': if len(sys.argv) > 1: main(sys.argv[1:]) else: main()