lo-ranking/s11.py

# -*- coding: utf-8 -*-
#!/usr/bin/env python
from __future__ import absolute_import, print_function
import os
import sys
import csv
import getopt
from base import RAW_DIR, TARGET_DIR, create_csv

OCT_RAW_DIR = os.path.join(RAW_DIR, '..', 'oct')
verbose = False
writefile = False


def get_keyword_level(ksearch=None):
    '''
    if ksearch is none will return dict with all K_search
    '''
    groupy = {}
    with open(os.path.join(OCT_RAW_DIR, 'keyword_level.csv'), 'rU') as f:
        rows = csv.reader(f)
        for row in rows:
            if row[1] not in groupy:
                groupy[row[0]] = row
    if ksearch and ksearch in groupy:
        return groupy[ksearch]
    else:
        return groupy


def get_keyword_weight(ksearch=None):
    '''
    if ksearch is none will return dict with all K_search
    '''
    groupy = {}
    with open(os.path.join(OCT_RAW_DIR, 'keyword_weight.csv'), 'rU') as f:
        rows = csv.reader(f)
        for row in rows:
            if row[1] not in groupy:
                groupy[row[0]] = row
    if ksearch and ksearch in groupy:
        return groupy[ksearch]
    else:
        return groupy


def get_s10_result(model, rtype):
    s10_fname = '%s-%s.csv' % (model, rtype)
    filepath = os.path.join(TARGET_DIR, 'part10', s10_fname)
    if not os.path.exists(filepath):
        print("S10's output file not found")
        sys.exit(2)
        ## TODO: this should force getting live data instead of throw error

    new_data = []
    with open(filepath, 'rb') as f:
        new_data = [row for row in csv.reader(f)]
    return new_data


def processingS11(model, rtype, ksearch):
    s10_data = get_s10_result(model, rtype)

    if not s10_data:
        print('[s10] zero record. Thus, no output for s11.')
        sys.exit()
    ## get keyword data
    level = get_keyword_level(ksearch)
    weight = get_keyword_weight(ksearch)

    a2s = [(s[:1] + s[35:]) for s in s10_data]
    # * by kw_weight and + with kw_level
    result = []
    for i in a2s:
        ### replace '' (blank) with 0 zero too
        data_only = [_ or 0 for _ in i[1:]]
        w_only = [_ or 0 for _ in weight[1:]]
        l_only = [_ or 0 for _ in level[1:]]

        ### [s11] (2) & (3)
        zipped_list = zip(data_only, w_only, l_only)
        multiply_by_weight = [(float(_[0])*float(_[1])) for _ in zipped_list]

        ## get sum KWT-x ***
        row_kwt_sum = [0, 0, 0, 0, 0]
        for _ in zipped_list:

            if float(_[2]) == 1:
                row_kwt_sum[1] += (float(_[0])*float(_[1]))
            elif float(_[2]) == 2:
                row_kwt_sum[2] += (float(_[0])*float(_[1]))
            elif float(_[2]) == 3:
                row_kwt_sum[3] += (float(_[0])*float(_[1]))
        row_kwt_sum[0] = sum(row_kwt_sum[1:4])
        row_kwt_sum[4] = sum(row_kwt_sum[1:3])
        result.append(i[:1]+multiply_by_weight+row_kwt_sum)

    ## concat with s10_data
    finale = []
    for i in xrange(0, len(result)):
        if verbose:
            print(','.join([('%s' % k if k != 0 else '-') for k in result[i]]))
        finale.append(
            s10_data[i]+result[i][1:]
        )

    if writefile:
        output_name = '%s-%s-%s.csv' % (model, rtype, ksearch)
        create_csv(output_name, finale, directory='part11')


def usage():
    print(
        '''Help:
    [opts]
    -m, --model     Model Name [4-char]
    -r, --rtype     R_Type [1-4]
    -k, --ksearch   K_search [a-s]
    -v              verbose
    -w              write output to file
Usage:
    $ python s11.py [opts] [value]
Example:
    $ python s11.py -mCFHD -r3 -kA
    $ python s11.py -mCFHD -r3 -kB -v               << show more information
    $ python s11.py -m CFHD -r 3 -k B -v             << same as above
    $ python s11.py -m CFHD -r 3 -k F -w     << write output file too.
    $ python s11.py -mCFHD -r3 -k C -vw      << show more information & output file.
    ''')


def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'm:r:k:vw', ['model=', 'rtype=', 'ksearch='])
    except getopt.GetoptError:
        usage()
        sys.exit(2)
    model_value = rtype_value = k_value = None
    for opt, arg in opts:
        if opt == "-v":
            global verbose
            verbose = True
        elif opt == "-w":
            global writefile
            writefile = True
        elif opt in ('-m', '--model'):
            ## TODO: filter model availabel and throw exception if invalid
            if len(arg) != 4:
                usage()
                sys.exit(2)
            model_value = arg.upper()
        elif opt in ('-r', '--rtype'):
            if arg not in '1234':
                usage()
                sys.exit(2)
            rtype_value = arg
        elif opt in ('-k', '--ksearch'):
            if arg.upper() not in 'ABCDEFGHIJKLMNOPQRS':
                usage()
                sys.exit(2)
            k_value = arg.upper()

    if not (model_value and rtype_value and k_value):
        usage()
        sys.exit(2)

    print('[s11] model: %s | r_type: %s | k_search: %s' % (
        model_value, rtype_value, k_value))

    processingS11(model_value, rtype_value, k_value)


if __name__ == '__main__':
    main()