#!/usr/bin/env python # -*- coding:utf-8 -*- ''' Step 8 ''' from __future__ import print_function from base import create_csv, TARGET_DIR, MODEL_LIST, RAW_DIR import os import csv COMPARSION_PAIR = [ (0, 0), (1, 1), (4, 2), (5, 3), (8, 4), (10, 5), (11, 6), (13, 7), (15, 8), (16, 9), (17, 10), (19, 11), (20, 12), (22, 13), (25, 14), (27, 15), (28, 16), (29, 17), (31, 18), ] def get_test_data(m): if not m: return [] result = [] lo_list = [] fpath = os.path.join(RAW_DIR, 'LO_TestData.csv') with open(fpath, 'rb') as f: rows = csv.reader(f) rows.next() for r in rows: if r[1] != m.upper(): continue result.append(r) lo_list.append(r[0]) return (lo_list, result) def get_ro_data(m, suffix): if not m or not suffix: return [] result = [] fname = '%s-%s.csv' % (m, suffix) fpath = os.path.join(TARGET_DIR, 'part7', fname) with open(fpath, 'rb') as f: rows = csv.reader(f) for r in rows: result.append(r) return result def get_similarity_count(l, r, **kwargs): C_PAIR = kwargs.get('pair', COMPARSION_PAIR) cnt = 0 for (x, y) in C_PAIR: r_value = r[y].replace('*', '') if l[x] == r_value: cnt += 1 elif r_value == 'Y/N' and l[x] in ('Y', 'N'): cnt += 1 return cnt def middleman(ls, rs): if not len(ls): return ls result = [] for lo in ls: data = lo[:] for ro in rs: sim_count = get_similarity_count(lo[:], ro) data.append(sim_count) result.append(data) return result def main(): header = ['LO_xxx', '____', ] for m in MODEL_LIST: l_list, lo_data = get_test_data(m) for i in '1234': m_header = header[:] m_header[1] = m ros = get_ro_data(m, i) m_header += [r[0] for r in ros] result = middleman(lo_data, ros) output_name = '%s-%s.csv' % (m, i) # process header -- get ind of first R_xxx number_ind = 0 cnt_ind = 0 for i in result[0]: try: float(i) number_ind = cnt_ind cnt_ind = 0 break except ValueError: cnt_ind += 1 __h = m_header[:2] + ['_' for i in xrange(0, number_ind-2)] + m_header[2:] result = [__h] + result create_csv(output_name, result, directory='part8') if __name__ == '__main__': main()