lo-ranking/s8.py

#!/usr/bin/env python
# -*- coding:utf-8 -*-
'''
Step 8
'''
from __future__ import print_function
from base import create_csv, TARGET_DIR, MODEL_LIST, RAW_DIR
import os
import csv


COMPARSION_PAIR = [
    (0, 0), (1, 1), (4, 2), (5, 3),
    (8, 4), (10, 5), (11, 6), (13, 7),
    (15, 8), (16, 9), (17, 10), (19, 11),
    (20, 12), (22, 13), (25, 14),
    (27, 15), (28, 16), (29, 17), (31, 18),
]


def get_test_data(m):
    if not m:
        return []
    result = []
    lo_list = []
    fpath = os.path.join(RAW_DIR, 'LO_TestData.csv')
    with open(fpath, 'rb') as f:
        rows = csv.reader(f)
        rows.next()
        for r in rows:
            if r[1] != m.upper():
                continue
            result.append(r)
            lo_list.append(r[0])
    return (lo_list, result)


def get_ro_data(m, suffix):
    if not m or not suffix:
        return []
    result = []
    fname = '%s-%s.csv' % (m, suffix)
    fpath = os.path.join(TARGET_DIR, 'part7', fname)
    with open(fpath, 'rb') as f:
        rows = csv.reader(f)
        for r in rows:
            result.append(r)
    return result


def get_similarity_count(l, r):
    cnt = 0
    for (x, y) in COMPARSION_PAIR:
        r_value = r[y].replace('*', '')
        if l[x] == r_value:
            cnt += 1
        elif r_value == 'Y/N' and l[x] in ('Y', 'N'):
            cnt += 1
    return cnt


def middleman(ls, rs):
    if not len(ls):
        return ls
    result = []
    for lo in ls:
        data = lo[:]
        for ro in rs:
            sim_count = get_similarity_count(lo[:], ro)
            data.append(sim_count)
        result.append(data)
    return result


def main():
    header = [
        'lo_id', '',
    ] + ['' for i in xrange(0, 19)]
    for m in MODEL_LIST:
        l_list, lo_data = get_test_data(m)
        m_header = header[:]
        m_header[1] = m
        for i in '1234':
            ros = get_ro_data(m, i)
            for r in ros:
                m_header.append(r[0])
            result = middleman(lo_data, ros)
            output_name = '%s-%s.csv' % (m, i)
            result = [m_header] + result
            create_csv(output_name, result, directory='part8')

main()
s7 add yynn_count 11 years ago			`#!/usr/bin/env python`
[july] s8 -- :( past july 11 years ago			`# -- coding:utf-8 --`
			`'''`
s7 add yynn_count 11 years ago			`Step 8`
[july] s8 -- :( past july 11 years ago			`'''`
			`from __future__ import print_function`
			`from base import create_csv, TARGET_DIR, MODEL_LIST, RAW_DIR`
			`import os`
			`import csv`


			`COMPARSION_PAIR = [`
			`(0, 0), (1, 1), (4, 2), (5, 3),`
			`(8, 4), (10, 5), (11, 6), (13, 7),`
			`(15, 8), (16, 9), (17, 10), (19, 11),`
			`(20, 12), (22, 13), (25, 14),`
			`(27, 15), (28, 16), (29, 17), (31, 18),`
			`]`


			`def get_test_data(m):`
			`if not m:`
			`return []`
			`result = []`
			`lo_list = []`
			`fpath = os.path.join(RAW_DIR, 'LO_TestData.csv')`
			`with open(fpath, 'rb') as f:`
			`rows = csv.reader(f)`
			`rows.next()`
			`for r in rows:`
			`if r[1] != m.upper():`
			`continue`
			`result.append(r)`
			`lo_list.append(r[0])`
			`return (lo_list, result)`


			`def get_ro_data(m, suffix):`
			`if not m or not suffix:`
			`return []`
			`result = []`
			`fname = '%s-%s.csv' % (m, suffix)`
			`fpath = os.path.join(TARGET_DIR, 'part7', fname)`
			`with open(fpath, 'rb') as f:`
			`rows = csv.reader(f)`
			`for r in rows:`
			`result.append(r)`
			`return result`


			`def get_similarity_count(l, r):`
			`cnt = 0`
			`for (x, y) in COMPARSION_PAIR:`
[s8] bug fix -- get rid of asterik 11 years ago			`r_value = r[y].replace('*', '')`
			`if l[x] == r_value:`
[july] s8 -- :( past july 11 years ago			`cnt += 1`
[s8] bug fix -- get rid of asterik 11 years ago			`elif r_value == 'Y/N' and l[x] in ('Y', 'N'):`
[july] s8 -- :( past july 11 years ago			`cnt += 1`
			`return cnt`


			`def middleman(ls, rs):`
			`if not len(ls):`
			`return ls`
			`result = []`
			`for lo in ls:`
			`data = lo[:]`
			`for ro in rs:`
			`sim_count = get_similarity_count(lo[:], ro)`
			`data.append(sim_count)`
			`result.append(data)`
			`return result`


			`def main():`
[s8] bug fix - missing table header 11 years ago			`header = [`
			`'lo_id', '',`
			`] + ['' for i in xrange(0, 19)]`
[july] s8 -- :( past july 11 years ago			`for m in MODEL_LIST:`
			`l_list, lo_data = get_test_data(m)`
[s8] bug fix - missing table header 11 years ago			`m_header = header[:]`
			`m_header[1] = m`
[july] s8 -- :( past july 11 years ago			`for i in '1234':`
			`ros = get_ro_data(m, i)`
[s8] bug fix - missing table header 11 years ago			`for r in ros:`
			`m_header.append(r[0])`
			`result = middleman(lo_data, ros)`
[july] s8 -- :( past july 11 years ago			`output_name = '%s-%s.csv' % (m, i)`
[s8] bug fix - missing table header 11 years ago			`result = [m_header] + result`
[july] s8 -- :( past july 11 years ago			`create_csv(output_name, result, directory='part8')`

			`main()`