You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

119 lines
3.5 KiB

11 years ago
#!/usr/bin/env python
# -*- coding:utf-8 -*-
'''
11 years ago
Step 3
'''
11 years ago
from __future__ import print_function
from base import Lo, create_csv, TARGET_DIR
11 years ago
import os
import csv
import sys
from s2 import RANK_ORDER
11 years ago
los = Lo()
xmodels = {
'IMSf': {}, 'IMSD': {}, 'IMHf': {}, 'IMHD': {}, 'IFSf': {}, 'IFSD': {},
'IFHf': {}, 'IFHD': {}, 'CMSf': {}, 'CMSD': {}, 'CMHf': {}, 'CMHD': {},
'CFSf': {}, 'CFSD': {}, 'CFHf': {}, 'CFHD': {}, 'PMSf': {}, 'PMSD': {},
'PMHf': {}, 'PMHD': {}, 'PFSf': {}, 'PFSD': {}, 'PFHf': {}, 'PFHD': {},
}
fs = ('Case1_LS.csv', 'Case1_Gender.csv', 'Case1_Level.csv', 'Case1_SciF.csv')
def process_s2_data():
'''
docstring huhu?
'''
11 years ago
xm_data = {}
for ii in xmodels.keys():
fname = '%s-2.csv' % ii
fpath = os.path.join(TARGET_DIR, 'part2', fname)
11 years ago
with open(fpath, 'rb') as f:
rows = csv.reader(f)
rank = 0
for r in rows:
if r[1] == "rank":
rank = r[2]
continue
if ii not in xm_data:
xm_data[ii] = {'all': set()}
11 years ago
if rank not in xm_data[ii]:
xm_data[ii][rank] = {}
if r[0] not in xm_data[ii][rank]:
xm_data[ii][rank][r[0]] = []
xm_data[ii][rank][r[0]] = r[1].split(',')
xm_data[ii]['all'] = xm_data[ii]['all'].union(set(r[1].split(',')))
11 years ago
return xm_data
def produce_match_all(xdata):
11 years ago
rows = []
11 years ago
for ii in xmodels.keys():
_los = set()
11 years ago
for _rank in xdata[ii]:
for i in ii:
if i in xdata[ii][_rank]:
_los = _los.union(set(xdata[ii][_rank][i]))
rows.append([ii, ','.join(sorted(list(_los)))])
11 years ago
_f = 'step3_match_all.csv'
create_csv(_f, rows, directory='part3')
11 years ago
def produce_match_all_mra(xdata):
for ii in xmodels.keys():
lo_count = {}
for j in xdata[ii]['all']:
if j not in lo_count:
lo_count[j] = 0
for i in ii:
for rank in RANK_ORDER:
if rank in xdata[ii] and i in xdata[ii][rank] \
and j in xdata[ii][rank][i]:
lo_count[j] += 1
rows = []
max_count = len(RANK_ORDER)
for n in xrange(max_count, 0, -1):
rows.append([
'%s/%s' % (n, max_count),
','.join([k for (k, v) in lo_count.items() if v == n])
])
_f = '%s-all.csv' % (ii,)
create_csv(_f, rows, directory='part3')
def produce_match_rank_mra(xdata, rank):
for ii in xmodels.keys():
lo_count = {}
for j in xdata[ii]['all']:
if j not in lo_count:
lo_count[j] = 0
for i in ii:
if rank in xdata[ii] and i in xdata[ii][rank] and \
j in xdata[ii][rank][i]:
lo_count[j] += 1
rows = []
max_count = len(RANK_ORDER)
for n in xrange(max_count, 0, -1):
rows.append([
'%s/%s' % (n, max_count),
','.join([k for (k, v) in lo_count.items() if v == n])
])
_f = '%s-rank-%s.csv' % (ii, rank)
create_csv(_f, rows, directory='part3')
11 years ago
def main(*argv):
xmm = process_s2_data()
produce_match_all(xmm)
produce_match_all_mra(xmm)
for i in RANK_ORDER:
produce_match_rank_mra(xmm, i)
11 years ago
if __name__ == '__main__':
if len(sys.argv) > 1:
main(sys.argv[1:])
else:
main()