|
|
|
#!/usr/bin/env python
|
|
|
|
# -*- coding:utf-8 -*-
|
|
|
|
'''
|
|
|
|
Step 8
|
|
|
|
'''
|
|
|
|
from __future__ import print_function
|
|
|
|
from base import create_csv, TARGET_DIR, MODEL_LIST, RAW_DIR
|
|
|
|
import os
|
|
|
|
import csv
|
|
|
|
|
|
|
|
|
|
|
|
COMPARSION_PAIR = [
|
|
|
|
(0, 0), (1, 1), (4, 2), (5, 3),
|
|
|
|
(8, 4), (10, 5), (11, 6), (13, 7),
|
|
|
|
(15, 8), (16, 9), (17, 10), (19, 11),
|
|
|
|
(20, 12), (22, 13), (25, 14),
|
|
|
|
(27, 15), (28, 16), (29, 17), (31, 18),
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
def get_test_data(m):
|
|
|
|
if not m:
|
|
|
|
return []
|
|
|
|
result = []
|
|
|
|
lo_list = []
|
|
|
|
fpath = os.path.join(RAW_DIR, 'LO_TestData.csv')
|
|
|
|
with open(fpath, 'rb') as f:
|
|
|
|
rows = csv.reader(f)
|
|
|
|
rows.next()
|
|
|
|
for r in rows:
|
|
|
|
if r[1] != m.upper():
|
|
|
|
continue
|
|
|
|
result.append(r)
|
|
|
|
lo_list.append(r[0])
|
|
|
|
return (lo_list, result)
|
|
|
|
|
|
|
|
|
|
|
|
def get_ro_data(m, suffix):
|
|
|
|
if not m or not suffix:
|
|
|
|
return []
|
|
|
|
result = []
|
|
|
|
fname = '%s-%s.csv' % (m, suffix)
|
|
|
|
fpath = os.path.join(TARGET_DIR, 'part7', fname)
|
|
|
|
with open(fpath, 'rb') as f:
|
|
|
|
rows = csv.reader(f)
|
|
|
|
for r in rows:
|
|
|
|
result.append(r)
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
def get_similarity_count(l, r, **kwargs):
|
|
|
|
C_PAIR = kwargs.get('pair', COMPARSION_PAIR)
|
|
|
|
cnt = 0
|
|
|
|
for (x, y) in C_PAIR:
|
|
|
|
r_value = r[y].replace('*', '')
|
|
|
|
if l[x] == r_value:
|
|
|
|
cnt += 1
|
|
|
|
elif r_value == 'Y/N' and l[x] in ('Y', 'N'):
|
|
|
|
cnt += 1
|
|
|
|
return cnt
|
|
|
|
|
|
|
|
|
|
|
|
def middleman(ls, rs):
|
|
|
|
if not len(ls):
|
|
|
|
return ls
|
|
|
|
result = []
|
|
|
|
for lo in ls:
|
|
|
|
data = lo[:]
|
|
|
|
for ro in rs:
|
|
|
|
sim_count = get_similarity_count(lo[:], ro)
|
|
|
|
data.append(sim_count)
|
|
|
|
result.append(data)
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
header = ['LO_xxx', '____', ]
|
|
|
|
for m in MODEL_LIST:
|
|
|
|
l_list, lo_data = get_test_data(m)
|
|
|
|
for i in '1234':
|
|
|
|
m_header = header[:]
|
|
|
|
m_header[1] = m
|
|
|
|
ros = get_ro_data(m, i)
|
|
|
|
m_header += [r[0] for r in ros]
|
|
|
|
result = middleman(lo_data, ros)
|
|
|
|
output_name = '%s-%s.csv' % (m, i)
|
|
|
|
# process header -- get ind of first R_xxx
|
|
|
|
number_ind = 0
|
|
|
|
cnt_ind = 0
|
|
|
|
for i in result[0]:
|
|
|
|
try:
|
|
|
|
float(i)
|
|
|
|
number_ind = cnt_ind
|
|
|
|
cnt_ind = 0
|
|
|
|
break
|
|
|
|
except ValueError:
|
|
|
|
cnt_ind += 1
|
|
|
|
__h = m_header[:2] + ['_' for i in xrange(0, number_ind-2)] + m_header[2:]
|
|
|
|
result = [__h] + result
|
|
|
|
create_csv(output_name, result, directory='part8')
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|