diff --git a/s8.py b/s8.py new file mode 100644 index 0000000..c87ed1e --- /dev/null +++ b/s8.py @@ -0,0 +1,83 @@ +# -*- coding:utf-8 -*- +''' +Step 7 +''' +#!/usr/bin/env python +from __future__ import print_function +from base import create_csv, TARGET_DIR, MODEL_LIST, RAW_DIR +import os +import csv + + +COMPARSION_PAIR = [ + (0, 0), (1, 1), (4, 2), (5, 3), + (8, 4), (10, 5), (11, 6), (13, 7), + (15, 8), (16, 9), (17, 10), (19, 11), + (20, 12), (22, 13), (25, 14), + (27, 15), (28, 16), (29, 17), (31, 18), +] + + +def get_test_data(m): + if not m: + return [] + result = [] + lo_list = [] + fpath = os.path.join(RAW_DIR, 'LO_TestData.csv') + with open(fpath, 'rb') as f: + rows = csv.reader(f) + rows.next() + for r in rows: + if r[1] != m.upper(): + continue + result.append(r) + lo_list.append(r[0]) + return (lo_list, result) + + +def get_ro_data(m, suffix): + if not m or not suffix: + return [] + result = [] + fname = '%s-%s.csv' % (m, suffix) + fpath = os.path.join(TARGET_DIR, 'part7', fname) + with open(fpath, 'rb') as f: + rows = csv.reader(f) + for r in rows: + result.append(r) + return result + + +def get_similarity_count(l, r): + cnt = 0 + for (x, y) in COMPARSION_PAIR: + if l[x] == r[y]: + cnt += 1 + elif r[y] == 'Y/N' and l[x] in ('Y', 'N'): + cnt += 1 + return cnt + + +def middleman(ls, rs): + if not len(ls): + return ls + result = [] + for lo in ls: + data = lo[:] + for ro in rs: + sim_count = get_similarity_count(lo[:], ro) + data.append(sim_count) + result.append(data) + return result + + +def main(): + for m in MODEL_LIST: + l_list, lo_data = get_test_data(m) + for i in '1234': + ros = get_ro_data(m, i) + result = middleman(lo_data, ros) + output_name = '%s-%s.csv' % (m, i) + create_csv(output_name, result, directory='part8') + +main()