|
|
@ -1,3 +1,6 @@ |
|
|
|
|
|
|
|
''' |
|
|
|
|
|
|
|
WTH |
|
|
|
|
|
|
|
''' |
|
|
|
# -*- coding:utf-8 -*- |
|
|
|
# -*- coding:utf-8 -*- |
|
|
|
#!/usr/bin/env python |
|
|
|
#!/usr/bin/env python |
|
|
|
from __future__ import print_function |
|
|
|
from __future__ import print_function |
|
|
@ -18,6 +21,9 @@ fs = ('Case1_LS.csv', 'Case1_Gender.csv', 'Case1_Level.csv', 'Case1_SciF.csv') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def process_s2_data(): |
|
|
|
def process_s2_data(): |
|
|
|
|
|
|
|
''' |
|
|
|
|
|
|
|
docstring huhu? |
|
|
|
|
|
|
|
''' |
|
|
|
xm_data = {} |
|
|
|
xm_data = {} |
|
|
|
for ii in xmodels.keys(): |
|
|
|
for ii in xmodels.keys(): |
|
|
|
fname = '%s-2.csv' % ii |
|
|
|
fname = '%s-2.csv' % ii |
|
|
@ -30,29 +36,77 @@ def process_s2_data(): |
|
|
|
rank = r[2] |
|
|
|
rank = r[2] |
|
|
|
continue |
|
|
|
continue |
|
|
|
if ii not in xm_data: |
|
|
|
if ii not in xm_data: |
|
|
|
xm_data[ii] = {} |
|
|
|
xm_data[ii] = {'all': set()} |
|
|
|
if rank not in xm_data[ii]: |
|
|
|
if rank not in xm_data[ii]: |
|
|
|
xm_data[ii][rank] = {} |
|
|
|
xm_data[ii][rank] = {} |
|
|
|
xm_data[ii][rank] = r[1].split(',') |
|
|
|
if r[0] not in xm_data[ii][rank]: |
|
|
|
|
|
|
|
xm_data[ii][rank][r[0]] = [] |
|
|
|
|
|
|
|
xm_data[ii][rank][r[0]] = r[1].split(',') |
|
|
|
|
|
|
|
xm_data[ii]['all'] = xm_data[ii]['all'].union(set(r[1].split(','))) |
|
|
|
return xm_data |
|
|
|
return xm_data |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def produce_match_all(xdata): |
|
|
|
def produce_match_all(xdata): |
|
|
|
rows = [] |
|
|
|
rows = [] |
|
|
|
for ii in xmodels.keys(): |
|
|
|
for ii in xmodels.keys(): |
|
|
|
los = set() |
|
|
|
_los = set() |
|
|
|
for _rank in xdata[ii]: |
|
|
|
for _rank in xdata[ii]: |
|
|
|
los = los.union(set(xdata[ii][_rank])) |
|
|
|
for i in ii: |
|
|
|
rows.append([ii, ','.join(list(los))]) |
|
|
|
if i in xdata[ii][_rank]: |
|
|
|
|
|
|
|
_los = _los.union(set(xdata[ii][_rank][i])) |
|
|
|
|
|
|
|
rows.append([ii, ','.join(sorted(list(_los)))]) |
|
|
|
_f = 'step3_match_all.csv' |
|
|
|
_f = 'step3_match_all.csv' |
|
|
|
create_csv(_f, rows, directory='part3') |
|
|
|
create_csv(_f, rows, directory='part3') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def produce_match_all_mra(xdata): |
|
|
|
|
|
|
|
for ii in xmodels.keys(): |
|
|
|
|
|
|
|
lo_count = {} |
|
|
|
|
|
|
|
for j in xdata[ii]['all']: |
|
|
|
|
|
|
|
if j not in lo_count: |
|
|
|
|
|
|
|
lo_count[j] = 0 |
|
|
|
|
|
|
|
for i in ii: |
|
|
|
|
|
|
|
for rank in '1234': |
|
|
|
|
|
|
|
if i in xdata[ii][rank] and j in xdata[ii][rank][i]: |
|
|
|
|
|
|
|
lo_count[j] += 1 |
|
|
|
|
|
|
|
rows = [ |
|
|
|
|
|
|
|
['4/4', ','.join([k for (k, v) in lo_count.items() if v == 4])], |
|
|
|
|
|
|
|
['3/4', ','.join([k for (k, v) in lo_count.items() if v == 3])], |
|
|
|
|
|
|
|
['2/4', ','.join([k for (k, v) in lo_count.items() if v == 2])], |
|
|
|
|
|
|
|
['1/4', ','.join([k for (k, v) in lo_count.items() if v == 1])], |
|
|
|
|
|
|
|
] |
|
|
|
|
|
|
|
_f = '%s-all.csv' % (ii,) |
|
|
|
|
|
|
|
create_csv(_f, rows, directory='part3') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def produce_match_rank_mra(xdata, rank): |
|
|
|
|
|
|
|
for ii in xmodels.keys(): |
|
|
|
|
|
|
|
lo_count = {} |
|
|
|
|
|
|
|
for j in xdata[ii]['all']: |
|
|
|
|
|
|
|
if j not in lo_count: |
|
|
|
|
|
|
|
lo_count[j] = 0 |
|
|
|
|
|
|
|
for i in ii: |
|
|
|
|
|
|
|
if i in xdata[ii][rank] and j in xdata[ii][rank][i]: |
|
|
|
|
|
|
|
lo_count[j] += 1 |
|
|
|
|
|
|
|
rows = [ |
|
|
|
|
|
|
|
['4/4', ','.join([k for (k, v) in lo_count.items() if v == 4])], |
|
|
|
|
|
|
|
['3/4', ','.join([k for (k, v) in lo_count.items() if v == 3])], |
|
|
|
|
|
|
|
['2/4', ','.join([k for (k, v) in lo_count.items() if v == 2])], |
|
|
|
|
|
|
|
['1/4', ','.join([k for (k, v) in lo_count.items() if v == 1])], |
|
|
|
|
|
|
|
] |
|
|
|
|
|
|
|
_f = '%s-rank-%s.csv' % (ii, rank) |
|
|
|
|
|
|
|
create_csv(_f, rows, directory='part3') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main(*argv): |
|
|
|
def main(*argv): |
|
|
|
xm = process_s2_data() |
|
|
|
xmm = process_s2_data() |
|
|
|
print(xm['CMHf'].keys()) |
|
|
|
print(xmm['CMHf']['1'].keys()) |
|
|
|
produce_match_all(xm) |
|
|
|
produce_match_all(xmm) |
|
|
|
pass |
|
|
|
produce_match_all_mra(xmm) |
|
|
|
|
|
|
|
for i in '1234': |
|
|
|
|
|
|
|
produce_match_rank_mra(xmm, i) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
if __name__ == '__main__': |
|
|
|
if len(sys.argv) > 1: |
|
|
|
if len(sys.argv) > 1: |
|
|
|