#!/usr/bin/env python # -*- coding:utf-8 -*- ''' Step 6 ''' from __future__ import print_function from base import create_csv, TARGET_DIR, MODEL_LIST import os import csv m2case = { 'I': 'LS', 'P': 'LS', 'C': 'LS', 'M': 'Gender', 'F': 'Gender', 'H': 'Level', 'S': 'Level', 'D': 'SciF', 'f': 'SciF', } x2tiny_conv = [0, 0, 0, 4, 3, 4, 3, 5, 3, 3, 3, 0] merging_col = [ [19, 6], [20, 7], [21, 10], [22, 3], [23, 6], [24, 11], [25, 4], [26, 12], [27, 14], ] def break_into_tiny_list(incoming, length): if incoming: return [incoming[i] for i in xrange(0, length)] else: return ['' for i in xrange(0, length)] def get_yynn(four_char_model): g = {} for m in four_char_model: g[m] = {} src_name = 'Case1_%s.csv-output.csv' % m2case[m] src_path = os.path.join(TARGET_DIR, src_name) with open(src_path, 'rb') as f: rows = csv.reader(f) for r in rows: if m != r[0]: continue for l in r[1].split(','): # g[m][l] = r[3:11] # break into pieces g[m][l] = [] for c in xrange(3, 11): g[m][l] += break_into_tiny_list(r[c], x2tiny_conv[c]) return g def get_lo_yynn(lo_id, char_model, **kwargs): ''' This doesn't do anything special, just try-catch and leave other def clean ''' if not kwargs.get('src') or not lo_id or not char_model: return ['' for i in xrange(0, 28)] src = kwargs.get('src') try: return src[char_model][lo_id] except KeyError: return ['' for i in xrange(0, 28)] def looop_each_4char_model(): ''' Each model has 6 columns of data + # (1st col) ''' loop_order = 0 offset = 6 for m in MODEL_LIST: yynn = get_yynn(m) data = [] fpath = os.path.join(TARGET_DIR, 'part5', 'sorted_all.csv') with open(fpath, 'rb') as f: rows = csv.reader(f) rows.next() for r in rows: b = [r[0], ] st = (offset*loop_order)+1 b += r[st: st+offset] if not b[2]: continue l = {'lo_id': b[2], 'children': []} for sm in m: c = b[:] c.append(sm) c += get_lo_yynn(b[2], sm, src=yynn) l['children'].append(c) l['merged_row'] = merge_row(l['children']) l['merged_col'] = merge_col(l['merged_row']) data.append(l) loop_order += 1 write_output(m, data) def merge_row(children): if len(children) < 1: return [] llen = len(children[0]) merged = children[0][0:8] + ['' for i in xrange(8, llen)] for i in children: for j in xrange(8, llen): if merged[j] == '': merged[j] = i[j] elif merged[j] != i[j]: if i[j] != '': merged[j] = 'Y/N' # get rid of 1-char-model name merged = merged[0:7]+merged[8:] return merged def merge_col(child): if len(child) < 27: return child offset = 7 merged = child[0:19+offset] for x, y in merging_col: x += offset y += offset if merged[y] == '': merged[y] = child[x] elif merged[y] != child[x]: if child[x] != '': merged[y] = 'Y/N' return merged def write_output(model_name, raw_data): r = [] for i in raw_data: row = [ 'R_%s' % i['lo_id'][3:], ] + i['merged_col'] r.append(row) create_csv('%s-6.csv' % model_name, r, directory='part6') looop_each_4char_model()