You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
155 lines
3.8 KiB
155 lines
3.8 KiB
10 years ago
|
'''
|
||
|
Step 6
|
||
|
'''
|
||
|
# -*- coding:utf-8 -*-
|
||
|
#!/usr/bin/env python
|
||
|
from __future__ import print_function
|
||
|
from base import create_csv, TARGET_DIR, MODEL_LIST
|
||
|
import os
|
||
|
import csv
|
||
|
|
||
|
m2case = {
|
||
|
'I': 'LS',
|
||
|
'P': 'LS',
|
||
|
'C': 'LS',
|
||
|
'M': 'Gender',
|
||
|
'F': 'Gender',
|
||
|
'H': 'Level',
|
||
|
'S': 'Level',
|
||
|
'D': 'SciF',
|
||
|
'f': 'SciF',
|
||
|
}
|
||
|
|
||
|
x2tiny_conv = [0, 0, 0, 4, 3, 4, 3, 5, 3, 3, 3, 0]
|
||
|
merging_col = [
|
||
|
[19, 6],
|
||
|
[20, 7],
|
||
|
[21, 10],
|
||
|
[22, 3],
|
||
|
[23, 6],
|
||
|
[24, 11],
|
||
|
[25, 4],
|
||
|
[26, 12],
|
||
|
[27, 14],
|
||
|
]
|
||
|
|
||
|
|
||
|
def break_into_tiny_list(incoming, length):
|
||
|
if incoming:
|
||
|
return [incoming[i] for i in xrange(0, length)]
|
||
|
else:
|
||
|
return ['' for i in xrange(0, length)]
|
||
|
|
||
|
|
||
|
def get_yynn(four_char_model):
|
||
|
g = {}
|
||
|
for m in four_char_model:
|
||
|
g[m] = {}
|
||
|
src_name = 'Case1_%s.csv-output.csv' % m2case[m]
|
||
|
src_path = os.path.join(TARGET_DIR, src_name)
|
||
|
with open(src_path, 'rb') as f:
|
||
|
rows = csv.reader(f)
|
||
|
for r in rows:
|
||
|
if m != r[0]:
|
||
|
continue
|
||
|
for l in r[1].split(','):
|
||
|
# g[m][l] = r[3:11]
|
||
|
# break into pieces
|
||
|
g[m][l] = []
|
||
|
for c in xrange(3, 11):
|
||
|
g[m][l] += break_into_tiny_list(r[c], x2tiny_conv[c])
|
||
|
|
||
|
return g
|
||
|
|
||
|
|
||
|
def get_lo_yynn(lo_id, char_model, **kwargs):
|
||
|
'''
|
||
|
This doesn't do anything special, just try-catch and leave other def clean
|
||
|
'''
|
||
|
if not kwargs.get('src') or not lo_id or not char_model:
|
||
|
return ['' for i in xrange(0, 28)]
|
||
|
src = kwargs.get('src')
|
||
|
try:
|
||
|
return src[char_model][lo_id]
|
||
|
except KeyError:
|
||
|
return ['' for i in xrange(0, 28)]
|
||
|
|
||
|
|
||
|
def looop_each_4char_model():
|
||
|
'''
|
||
|
Each model has 6 columns of data + # (1st col)
|
||
|
'''
|
||
|
loop_order = 0
|
||
|
offset = 6
|
||
|
for m in MODEL_LIST:
|
||
|
yynn = get_yynn(m)
|
||
|
data = []
|
||
|
fpath = os.path.join(TARGET_DIR, 'part5', 'sorted_all.csv')
|
||
|
with open(fpath, 'rb') as f:
|
||
|
rows = csv.reader(f)
|
||
|
rows.next()
|
||
|
for r in rows:
|
||
|
b = [r[0], ]
|
||
|
st = (offset*loop_order)+1
|
||
|
b += r[st: st+offset]
|
||
|
if not b[2]:
|
||
|
continue
|
||
|
l = {'lo_id': b[2], 'children': []}
|
||
|
for sm in m:
|
||
|
c = b[:]
|
||
|
c.append(sm)
|
||
|
c += get_lo_yynn(b[2], sm, src=yynn)
|
||
|
l['children'].append(c)
|
||
|
|
||
|
l['merged_row'] = merge_row(l['children'])
|
||
|
l['merged_col'] = merge_col(l['merged_row'])
|
||
|
data.append(l)
|
||
|
loop_order += 1
|
||
|
write_output(m, data)
|
||
|
|
||
|
|
||
|
def merge_row(children):
|
||
|
if len(children) < 1:
|
||
|
return []
|
||
|
llen = len(children[0])
|
||
|
merged = children[0][0:8] + ['' for i in xrange(8, llen)]
|
||
|
for i in children:
|
||
|
for j in xrange(8, llen):
|
||
|
if merged[j] == '':
|
||
|
merged[j] = i[j]
|
||
|
elif merged[j] != i[j]:
|
||
|
if i[j] != '':
|
||
|
merged[j] = 'Y/N'
|
||
|
# get rid of 1-char-model name
|
||
|
merged = merged[0:7]+merged[8:]
|
||
|
return merged
|
||
|
|
||
|
|
||
|
def merge_col(child):
|
||
|
if len(child) < 27:
|
||
|
return child
|
||
|
offset = 7
|
||
|
merged = child[0:19+offset]
|
||
|
for x, y in merging_col:
|
||
|
x += offset
|
||
|
y += offset
|
||
|
if merged[y] == '':
|
||
|
merged[y] = child[x]
|
||
|
elif merged[y] != child[x]:
|
||
|
if child[x] != '':
|
||
|
merged[y] = 'Y/N'
|
||
|
return merged
|
||
|
|
||
|
|
||
|
def write_output(model_name, raw_data):
|
||
|
r = []
|
||
|
for i in raw_data:
|
||
|
row = [
|
||
|
'R_%s' % i['lo_id'][3:],
|
||
|
] + i['merged_col']
|
||
|
r.append(row)
|
||
|
create_csv('%s-6.csv' % model_name, r, directory='part6')
|
||
|
|
||
|
|
||
|
looop_each_4char_model()
|