You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
118 lines
3.5 KiB
118 lines
3.5 KiB
#!/usr/bin/env python |
|
# -*- coding:utf-8 -*- |
|
''' |
|
Step 3 |
|
''' |
|
from __future__ import print_function |
|
from base import Lo, create_csv, TARGET_DIR |
|
import os |
|
import csv |
|
import sys |
|
from s2 import RANK_ORDER |
|
|
|
los = Lo() |
|
|
|
xmodels = { |
|
'IMSf': {}, 'IMSD': {}, 'IMHf': {}, 'IMHD': {}, 'IFSf': {}, 'IFSD': {}, |
|
'IFHf': {}, 'IFHD': {}, 'CMSf': {}, 'CMSD': {}, 'CMHf': {}, 'CMHD': {}, |
|
'CFSf': {}, 'CFSD': {}, 'CFHf': {}, 'CFHD': {}, 'PMSf': {}, 'PMSD': {}, |
|
'PMHf': {}, 'PMHD': {}, 'PFSf': {}, 'PFSD': {}, 'PFHf': {}, 'PFHD': {}, |
|
} |
|
fs = ('Case1_LS.csv', 'Case1_Gender.csv', 'Case1_Level.csv', 'Case1_SciF.csv') |
|
|
|
|
|
def process_s2_data(): |
|
''' |
|
docstring huhu? |
|
''' |
|
xm_data = {} |
|
for ii in xmodels.keys(): |
|
fname = '%s-2.csv' % ii |
|
fpath = os.path.join(TARGET_DIR, 'part2', fname) |
|
with open(fpath, 'rb') as f: |
|
rows = csv.reader(f) |
|
rank = 0 |
|
for r in rows: |
|
if r[1] == "rank": |
|
rank = r[2] |
|
continue |
|
if ii not in xm_data: |
|
xm_data[ii] = {'all': set()} |
|
if rank not in xm_data[ii]: |
|
xm_data[ii][rank] = {} |
|
if r[0] not in xm_data[ii][rank]: |
|
xm_data[ii][rank][r[0]] = [] |
|
xm_data[ii][rank][r[0]] = r[1].split(',') |
|
xm_data[ii]['all'] = xm_data[ii]['all'].union(set(r[1].split(','))) |
|
return xm_data |
|
|
|
|
|
def produce_match_all(xdata): |
|
rows = [] |
|
for ii in xmodels.keys(): |
|
_los = set() |
|
for _rank in xdata[ii]: |
|
for i in ii: |
|
if i in xdata[ii][_rank]: |
|
_los = _los.union(set(xdata[ii][_rank][i])) |
|
rows.append([ii, ','.join(sorted(list(_los)))]) |
|
_f = 'step3_match_all.csv' |
|
create_csv(_f, rows, directory='part3') |
|
|
|
|
|
def produce_match_all_mra(xdata): |
|
for ii in xmodels.keys(): |
|
lo_count = {} |
|
for j in xdata[ii]['all']: |
|
if j not in lo_count: |
|
lo_count[j] = 0 |
|
for i in ii: |
|
for rank in RANK_ORDER: |
|
if rank in xdata[ii] and i in xdata[ii][rank] \ |
|
and j in xdata[ii][rank][i]: |
|
lo_count[j] += 1 |
|
rows = [] |
|
max_count = len(RANK_ORDER) |
|
for n in xrange(max_count, 0, -1): |
|
rows.append([ |
|
'%s/%s' % (n, max_count), |
|
','.join([k for (k, v) in lo_count.items() if v == n]) |
|
]) |
|
_f = '%s-all.csv' % (ii,) |
|
create_csv(_f, rows, directory='part3') |
|
|
|
|
|
def produce_match_rank_mra(xdata, rank): |
|
for ii in xmodels.keys(): |
|
lo_count = {} |
|
for j in xdata[ii]['all']: |
|
if j not in lo_count: |
|
lo_count[j] = 0 |
|
for i in ii: |
|
if rank in xdata[ii] and i in xdata[ii][rank] and \ |
|
j in xdata[ii][rank][i]: |
|
lo_count[j] += 1 |
|
rows = [] |
|
max_count = len(RANK_ORDER) |
|
for n in xrange(max_count, 0, -1): |
|
rows.append([ |
|
'%s/%s' % (n, max_count), |
|
','.join([k for (k, v) in lo_count.items() if v == n]) |
|
]) |
|
_f = '%s-rank-%s.csv' % (ii, rank) |
|
create_csv(_f, rows, directory='part3') |
|
|
|
|
|
def main(*argv): |
|
xmm = process_s2_data() |
|
produce_match_all(xmm) |
|
produce_match_all_mra(xmm) |
|
for i in RANK_ORDER: |
|
produce_match_rank_mra(xmm, i) |
|
|
|
|
|
if __name__ == '__main__': |
|
if len(sys.argv) > 1: |
|
main(sys.argv[1:]) |
|
else: |
|
main()
|
|
|