You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

155 lines
3.8 KiB

11 years ago
#!/usr/bin/env python
# -*- coding:utf-8 -*-
11 years ago
'''
Step 6
'''
from __future__ import print_function
from base import create_csv, TARGET_DIR, MODEL_LIST
import os
import csv
m2case = {
'I': 'LS',
'P': 'LS',
'C': 'LS',
'M': 'Gender',
'F': 'Gender',
'H': 'Level',
'S': 'Level',
'D': 'SciF',
'f': 'SciF',
}
x2tiny_conv = [0, 0, 0, 4, 3, 4, 3, 5, 3, 3, 3, 0]
merging_col = [
[19, 6],
[20, 7],
[21, 10],
[22, 3],
[23, 6],
[24, 11],
[25, 4],
[26, 12],
[27, 14],
]
def break_into_tiny_list(incoming, length):
if incoming:
return [incoming[i] for i in xrange(0, length)]
else:
return ['' for i in xrange(0, length)]
def get_yynn(four_char_model):
g = {}
for m in four_char_model:
g[m] = {}
src_name = 'Case1_%s.csv-output.csv' % m2case[m]
src_path = os.path.join(TARGET_DIR, src_name)
with open(src_path, 'rb') as f:
rows = csv.reader(f)
for r in rows:
if m != r[0]:
continue
for l in r[1].split(','):
# g[m][l] = r[3:11]
# break into pieces
g[m][l] = []
for c in xrange(3, 11):
g[m][l] += break_into_tiny_list(r[c], x2tiny_conv[c])
return g
def get_lo_yynn(lo_id, char_model, **kwargs):
'''
This doesn't do anything special, just try-catch and leave other def clean
'''
if not kwargs.get('src') or not lo_id or not char_model:
return ['' for i in xrange(0, 28)]
src = kwargs.get('src')
try:
return src[char_model][lo_id]
except KeyError:
return ['' for i in xrange(0, 28)]
def looop_each_4char_model():
'''
Each model has 6 columns of data + # (1st col)
'''
loop_order = 0
offset = 6
for m in MODEL_LIST:
yynn = get_yynn(m)
data = []
fpath = os.path.join(TARGET_DIR, 'part5', 'sorted_all.csv')
with open(fpath, 'rb') as f:
rows = csv.reader(f)
rows.next()
for r in rows:
b = [r[0], ]
st = (offset*loop_order)+1
b += r[st: st+offset]
if not b[2]:
continue
l = {'lo_id': b[2], 'children': []}
for sm in m:
c = b[:]
c.append(sm)
c += get_lo_yynn(b[2], sm, src=yynn)
l['children'].append(c)
l['merged_row'] = merge_row(l['children'])
l['merged_col'] = merge_col(l['merged_row'])
data.append(l)
loop_order += 1
write_output(m, data)
def merge_row(children):
if len(children) < 1:
return []
llen = len(children[0])
merged = children[0][0:8] + ['' for i in xrange(8, llen)]
for i in children:
for j in xrange(8, llen):
if merged[j] == '':
merged[j] = i[j]
elif merged[j] != i[j]:
if i[j] != '':
merged[j] = 'Y/N'
# get rid of 1-char-model name
merged = merged[0:7]+merged[8:]
return merged
def merge_col(child):
if len(child) < 27:
return child
offset = 7
merged = child[0:19+offset]
for x, y in merging_col:
x += offset
y += offset
if merged[y] == '':
merged[y] = child[x]
elif merged[y] != child[x]:
if child[x] != '':
merged[y] = 'Y/N'
return merged
def write_output(model_name, raw_data):
r = []
for i in raw_data:
row = [
'R_%s' % i['lo_id'][3:],
] + i['merged_col']
r.append(row)
create_csv('%s-6.csv' % model_name, r, directory='part6')
looop_each_4char_model()