#!/usr/bin/env python # -*- coding:utf-8 -*- ''' Step 7 ''' from __future__ import print_function from base import create_csv, TARGET_DIR, MODEL_LIST import os import csv import random ''' แก้ตรงนี้ไปก่อนละกัน No of Tree = “4Ts” และ No of R = “5” No of Tree ของคุณ ==== NoT_MATCH_ARG เป็น list หรือ tuple - ถ้าจะเอาอันไหนก็ใส่ใน list ไป เช่น เอา 4Ts, 2Ts >> ['4Ts', '2Ts'] - ถ้าเอาทุกอันจะใส่ ['4Ts', '3Ts', ... ] ให้ครบ หรือใช้ [] ก็ได้ No of R ของคุณ ======= TOTAL -- ถ้าต้องการทั้งหมดก็ = 0 แต่ว่า ผมไม่รับรองว่าจะใช้ได้ทั้งหมดเพราะว่า มันก็ขึ้นกับหลายอย่าง เช่น 1Ts มีไม่กี่อันอาจจะ error มั้ง? โดยเฉพาะเวลา random ''' NoT_MATCH_ARG = ['2Ts', '1Ts'] TOTAL = 10 SRC_PATH = os.path.join(TARGET_DIR, 'part6') def get_yynn_count(cols): n = 0 for c in cols: if c in ('Y', 'Y*', 'N', 'N*'): n += 1 elif c in ('Y/N', 'Y/N*'): n += 2 return n def get_by_order(): for m in MODEL_LIST: result = [] count = 0 with open(os.path.join(SRC_PATH, '%s-6.csv' % m), 'rb') as f: rows = csv.reader(f) for r in rows: if NoT_MATCH_ARG and r[2] not in NoT_MATCH_ARG: continue row = [ r[0], # R_id 'Ord_%s' % r[1], # no of R r[2], # No of Tree m, # model ] + r[7:] row.append(get_yynn_count(r)) # ?????????? result.append(row) count += 1 if count >= TOTAL and TOTAL > 0: break output_name = '%s-1.csv' % m create_csv(output_name, result, directory='part7') def get_by_random(): for m in MODEL_LIST: lo_list = [] result = [] with open(os.path.join(SRC_PATH, '%s-6.csv' % m), 'rb') as f: rows = csv.reader(f) for r in rows: l = [ r[0], # R_id '', # no of R r[2], # No of Tree m, # model ] + r[7:] l.append(get_yynn_count(r)) # ?????????? lo_list.append(l) picked = [] ok = False _TOTAL = len(lo_list) if TOTAL < 1 else TOTAL for i in xrange(0, _TOTAL): ok = False while not ok: pick = random.choice(lo_list) if NoT_MATCH_ARG and pick[2] not in NoT_MATCH_ARG: continue if pick[0] not in picked: picked.append(pick[0]) pick[1] = 'Ran_%s' % i ok = True result.append(pick) output_name = '%s-2.csv' % m create_csv(output_name, result, directory='part7') def get_by_group(model_name, step, **kwargs): new_prefix = kwargs.get('prefix', 'XX') output_suffix = kwargs.get('suffix', '3') result = [] count = 1 fpath = os.path.join(TARGET_DIR, 'part7', '%s-%s.csv' % (model_name, step)) with open(fpath, 'rb') as f: rows = csv.reader(f) # do nothing to 1st row first = rows.next() first[0] = '%s_%s' % (new_prefix, count) result.append(first) calibrator = first[:] for r in rows: count += 1 # มันดันเอาค่าที่รวมๆ ไปทำ YYNN ด้วยก็เลยตัด col สุดท้ายออก แค่นั้นแหละ _r = r[:-1] _r[0] = '%s_%s' % (new_prefix, count) for i in xrange(5, len(_r)): # get rid of asterik first calibrator[i] = calibrator[i].replace('*', '') # do comparison if _r[i] == 'Y/N' or _r[i] == calibrator[i] or calibrator[i] == '': pass elif (_r[i] == '' and calibrator[i] != '') or calibrator[i] == 'Y/N': _r[i] = '%s*' % calibrator[i] else: # one Y one N _r[i] = 'Y/N*' _r.append(get_yynn_count(_r)) # ?????????? result.append(_r) calibrator = _r[:] output_name = '%s-%s.csv' % (model_name, output_suffix) create_csv(output_name, result, directory='part7') get_by_order() get_by_random() for m in MODEL_LIST: for s in '12': get_by_group(m, s, prefix='Rg', suffix=('3' if s == '1' else '4'))