You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

92 lines
2.1 KiB

# -*- coding:utf-8 -*-
'''
Step 7
'''
#!/usr/bin/env python
from __future__ import print_function
from base import create_csv, TARGET_DIR, MODEL_LIST, RAW_DIR
import os
import csv
COMPARSION_PAIR = [
(0, 0), (1, 1), (4, 2), (5, 3),
(8, 4), (10, 5), (11, 6), (13, 7),
(15, 8), (16, 9), (17, 10), (19, 11),
(20, 12), (22, 13), (25, 14),
(27, 15), (28, 16), (29, 17), (31, 18),
]
def get_test_data(m):
if not m:
return []
result = []
lo_list = []
fpath = os.path.join(RAW_DIR, 'LO_TestData.csv')
with open(fpath, 'rb') as f:
rows = csv.reader(f)
rows.next()
for r in rows:
if r[1] != m.upper():
continue
result.append(r)
lo_list.append(r[0])
return (lo_list, result)
def get_ro_data(m, suffix):
if not m or not suffix:
return []
result = []
fname = '%s-%s.csv' % (m, suffix)
fpath = os.path.join(TARGET_DIR, 'part7', fname)
with open(fpath, 'rb') as f:
rows = csv.reader(f)
for r in rows:
result.append(r)
return result
def get_similarity_count(l, r):
cnt = 0
for (x, y) in COMPARSION_PAIR:
if l[x] == r[y]:
cnt += 1
elif r[y] == 'Y/N' and l[x] in ('Y', 'N'):
cnt += 1
return cnt
def middleman(ls, rs):
if not len(ls):
return ls
result = []
for lo in ls:
data = lo[:]
for ro in rs:
sim_count = get_similarity_count(lo[:], ro)
data.append(sim_count)
result.append(data)
return result
def main():
header = [
'lo_id', '',
] + ['' for i in xrange(0, 19)]
for m in MODEL_LIST:
l_list, lo_data = get_test_data(m)
m_header = header[:]
m_header[1] = m
for i in '1234':
ros = get_ro_data(m, i)
for r in ros:
m_header.append(r[0])
result = middleman(lo_data, ros)
output_name = '%s-%s.csv' % (m, i)
result = [m_header] + result
create_csv(output_name, result, directory='part8')
main()