You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
101 lines
2.5 KiB
101 lines
2.5 KiB
#!/usr/bin/env python |
|
# -*- coding:utf-8 -*- |
|
''' |
|
Step 8 |
|
''' |
|
from __future__ import print_function |
|
from base import create_csv, TARGET_DIR, MODEL_LIST, RAW_DIR |
|
import os |
|
import csv |
|
|
|
|
|
COMPARSION_PAIR = [ |
|
(0, 0), (1, 1), (4, 2), (5, 3), |
|
(8, 4), (10, 5), (11, 6), (13, 7), |
|
(15, 8), (16, 9), (17, 10), (19, 11), |
|
(20, 12), (22, 13), (25, 14), |
|
(27, 15), (28, 16), (29, 17), (31, 18), |
|
] |
|
|
|
|
|
def get_test_data(m): |
|
if not m: |
|
return [] |
|
result = [] |
|
lo_list = [] |
|
fpath = os.path.join(RAW_DIR, 'LO_TestData.csv') |
|
with open(fpath, 'rb') as f: |
|
rows = csv.reader(f) |
|
rows.next() |
|
for r in rows: |
|
if r[1] != m.upper(): |
|
continue |
|
result.append(r) |
|
lo_list.append(r[0]) |
|
return (lo_list, result) |
|
|
|
|
|
def get_ro_data(m, suffix): |
|
if not m or not suffix: |
|
return [] |
|
result = [] |
|
fname = '%s-%s.csv' % (m, suffix) |
|
fpath = os.path.join(TARGET_DIR, 'part7', fname) |
|
with open(fpath, 'rb') as f: |
|
rows = csv.reader(f) |
|
for r in rows: |
|
result.append(r) |
|
return result |
|
|
|
|
|
def get_similarity_count(l, r): |
|
cnt = 0 |
|
for (x, y) in COMPARSION_PAIR: |
|
r_value = r[y].replace('*', '') |
|
if l[x] == r_value: |
|
cnt += 1 |
|
elif r_value == 'Y/N' and l[x] in ('Y', 'N'): |
|
cnt += 1 |
|
return cnt |
|
|
|
|
|
def middleman(ls, rs): |
|
if not len(ls): |
|
return ls |
|
result = [] |
|
for lo in ls: |
|
data = lo[:] |
|
for ro in rs: |
|
sim_count = get_similarity_count(lo[:], ro) |
|
data.append(sim_count) |
|
result.append(data) |
|
return result |
|
|
|
|
|
def main(): |
|
header = ['LO_xxx', '____', ] |
|
for m in MODEL_LIST: |
|
l_list, lo_data = get_test_data(m) |
|
for i in '1234': |
|
m_header = header[:] |
|
m_header[1] = m |
|
ros = get_ro_data(m, i) |
|
m_header += [r[0] for r in ros] |
|
result = middleman(lo_data, ros) |
|
output_name = '%s-%s.csv' % (m, i) |
|
# process header -- get ind of first R_xxx |
|
number_ind = 0 |
|
cnt_ind = 0 |
|
for i in result[0]: |
|
try: |
|
float(i) |
|
number_ind = cnt_ind |
|
cnt_ind = 0 |
|
break |
|
except ValueError: |
|
cnt_ind += 1 |
|
__h = m_header[:2] + ['_' for i in xrange(0, number_ind-2)] + m_header[2:] |
|
result = [__h] + result |
|
create_csv(output_name, result, directory='part8') |
|
|
|
main()
|
|
|