You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
154 lines
3.8 KiB
154 lines
3.8 KiB
#!/usr/bin/env python |
|
# -*- coding:utf-8 -*- |
|
''' |
|
Step 6 |
|
''' |
|
from __future__ import print_function |
|
from base import create_csv, TARGET_DIR, MODEL_LIST |
|
import os |
|
import csv |
|
|
|
m2case = { |
|
'I': 'LS', |
|
'P': 'LS', |
|
'C': 'LS', |
|
'M': 'Gender', |
|
'F': 'Gender', |
|
'H': 'Level', |
|
'S': 'Level', |
|
'D': 'SciF', |
|
'f': 'SciF', |
|
} |
|
|
|
x2tiny_conv = [0, 0, 0, 4, 3, 4, 3, 5, 3, 3, 3, 0] |
|
merging_col = [ |
|
[19, 6], |
|
[20, 7], |
|
[21, 10], |
|
[22, 3], |
|
[23, 6], |
|
[24, 11], |
|
[25, 4], |
|
[26, 12], |
|
[27, 14], |
|
] |
|
|
|
|
|
def break_into_tiny_list(incoming, length): |
|
if incoming: |
|
return [incoming[i] for i in xrange(0, length)] |
|
else: |
|
return ['' for i in xrange(0, length)] |
|
|
|
|
|
def get_yynn(four_char_model): |
|
g = {} |
|
for m in four_char_model: |
|
g[m] = {} |
|
src_name = 'Case1_%s.csv-output.csv' % m2case[m] |
|
src_path = os.path.join(TARGET_DIR, src_name) |
|
with open(src_path, 'rb') as f: |
|
rows = csv.reader(f) |
|
for r in rows: |
|
if m != r[0]: |
|
continue |
|
for l in r[1].split(','): |
|
# g[m][l] = r[3:11] |
|
# break into pieces |
|
g[m][l] = [] |
|
for c in xrange(3, 11): |
|
g[m][l] += break_into_tiny_list(r[c], x2tiny_conv[c]) |
|
|
|
return g |
|
|
|
|
|
def get_lo_yynn(lo_id, char_model, **kwargs): |
|
''' |
|
This doesn't do anything special, just try-catch and leave other def clean |
|
''' |
|
if not kwargs.get('src') or not lo_id or not char_model: |
|
return ['' for i in xrange(0, 28)] |
|
src = kwargs.get('src') |
|
try: |
|
return src[char_model][lo_id] |
|
except KeyError: |
|
return ['' for i in xrange(0, 28)] |
|
|
|
|
|
def looop_each_4char_model(): |
|
''' |
|
Each model has 6 columns of data + # (1st col) |
|
''' |
|
loop_order = 0 |
|
offset = 6 |
|
for m in MODEL_LIST: |
|
yynn = get_yynn(m) |
|
data = [] |
|
fpath = os.path.join(TARGET_DIR, 'part5', 'sorted_all.csv') |
|
with open(fpath, 'rb') as f: |
|
rows = csv.reader(f) |
|
rows.next() |
|
for r in rows: |
|
b = [r[0], ] |
|
st = (offset*loop_order)+1 |
|
b += r[st: st+offset] |
|
if not b[2]: |
|
continue |
|
l = {'lo_id': b[2], 'children': []} |
|
for sm in m: |
|
c = b[:] |
|
c.append(sm) |
|
c += get_lo_yynn(b[2], sm, src=yynn) |
|
l['children'].append(c) |
|
|
|
l['merged_row'] = merge_row(l['children']) |
|
l['merged_col'] = merge_col(l['merged_row']) |
|
data.append(l) |
|
loop_order += 1 |
|
write_output(m, data) |
|
|
|
|
|
def merge_row(children): |
|
if len(children) < 1: |
|
return [] |
|
llen = len(children[0]) |
|
merged = children[0][0:8] + ['' for i in xrange(8, llen)] |
|
for i in children: |
|
for j in xrange(8, llen): |
|
if merged[j] == '': |
|
merged[j] = i[j] |
|
elif merged[j] != i[j]: |
|
if i[j] != '': |
|
merged[j] = 'Y/N' |
|
# get rid of 1-char-model name |
|
merged = merged[0:7]+merged[8:] |
|
return merged |
|
|
|
|
|
def merge_col(child): |
|
if len(child) < 27: |
|
return child |
|
offset = 7 |
|
merged = child[0:19+offset] |
|
for x, y in merging_col: |
|
x += offset |
|
y += offset |
|
if merged[y] == '': |
|
merged[y] = child[x] |
|
elif merged[y] != child[x]: |
|
if child[x] != '': |
|
merged[y] = 'Y/N' |
|
return merged |
|
|
|
|
|
def write_output(model_name, raw_data): |
|
r = [] |
|
for i in raw_data: |
|
row = [ |
|
'R_%s' % i['lo_id'][3:], |
|
] + i['merged_col'] |
|
r.append(row) |
|
create_csv('%s-6.csv' % model_name, r, directory='part6') |
|
|
|
|
|
looop_each_4char_model()
|
|
|