You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

142 lines
4.8 KiB

#!/usr/bin/env python
# -*- coding:utf-8 -*-
'''
Step 7
'''
from __future__ import print_function
from base import create_csv, TARGET_DIR, MODEL_LIST
import os
import csv
import random
'''
แกตรงนไปกอนละกน No of Tree = “4Ts” และ No of R = “5”
No of Tree ของคณ ==== NoT_MATCH_ARG เปน list หรอ tuple
- ถาจะเอาอนไหนกใสใน list ไป เชน เอา 4Ts, 2Ts >> ['4Ts', '2Ts']
- ถาเอาทกอนจะใส ['4Ts', '3Ts', ... ] ใหครบ หรอใช [] กได
No of R ของคณ ======= TOTAL -- ถาตองการทงหมดก = 0
แตา ผมไมบรองวาจะใชไดงหมดเพราะวา มนกนกบหลายอยาง
เชน 1Ts มไมนอาจจะ error มง? โดยเฉพาะเวลา random
'''
NoT_MATCH_ARG = ['2Ts', '1Ts']
TOTAL = 10
SRC_PATH = os.path.join(TARGET_DIR, 'part6')
def get_yynn_count(cols):
n = 0
for c in cols:
if c in ('Y', 'Y*', 'N', 'N*'):
n += 1
elif c in ('Y/N', 'Y/N*'):
n += 2
return n
def get_by_order():
for m in MODEL_LIST:
result = []
count = 0
with open(os.path.join(SRC_PATH, '%s-6.csv' % m), 'rb') as f:
rows = csv.reader(f)
for r in rows:
if NoT_MATCH_ARG and r[2] not in NoT_MATCH_ARG:
continue
row = [
r[0], # R_id
'Ord_%s' % r[1], # no of R
r[2], # No of Tree
m, # model
] + r[7:]
row.append(get_yynn_count(r)) # ??????????
result.append(row)
count += 1
if count >= TOTAL and TOTAL > 0:
break
output_name = '%s-1.csv' % m
create_csv(output_name, result, directory='part7')
def get_by_random():
for m in MODEL_LIST:
lo_list = []
result = []
with open(os.path.join(SRC_PATH, '%s-6.csv' % m), 'rb') as f:
rows = csv.reader(f)
for r in rows:
l = [
r[0], # R_id
'', # no of R
r[2], # No of Tree
m, # model
] + r[7:]
l.append(get_yynn_count(r)) # ??????????
lo_list.append(l)
picked = []
ok = False
_TOTAL = len(lo_list) if TOTAL < 1 else TOTAL
for i in xrange(0, _TOTAL):
ok = False
while not ok:
pick = random.choice(lo_list)
if NoT_MATCH_ARG and pick[2] not in NoT_MATCH_ARG:
continue
if pick[0] not in picked:
picked.append(pick[0])
pick[1] = 'Ran_%s' % i
ok = True
result.append(pick)
output_name = '%s-2.csv' % m
create_csv(output_name, result, directory='part7')
def get_by_group(model_name, step, **kwargs):
new_prefix = kwargs.get('prefix', 'XX')
output_suffix = kwargs.get('suffix', '3')
result = []
count = 1
fpath = os.path.join(TARGET_DIR, 'part7', '%s-%s.csv' % (model_name, step))
with open(fpath, 'rb') as f:
rows = csv.reader(f)
# do nothing to 1st row
first = rows.next()
first[0] = '%s_%s' % (new_prefix, count)
result.append(first)
calibrator = first[:]
for r in rows:
count += 1
# มนดนเอาคาทรวมๆ ไปทำ YYNN ดวยกเลยตด col สดทายออก แคนแหละ
_r = r[:-1]
_r[0] = '%s_%s' % (new_prefix, count)
for i in xrange(5, len(_r)):
# get rid of asterik first
calibrator[i] = calibrator[i].replace('*', '')
# do comparison
if _r[i] == 'Y/N' or _r[i] == calibrator[i] or calibrator[i] == '':
pass
elif (_r[i] == '' and calibrator[i] != '') or calibrator[i] == 'Y/N':
_r[i] = '%s*' % calibrator[i]
else: # one Y one N
_r[i] = 'Y/N*'
_r.append(get_yynn_count(_r)) # ??????????
result.append(_r)
calibrator = _r[:]
output_name = '%s-%s.csv' % (model_name, output_suffix)
create_csv(output_name, result, directory='part7')
get_by_order()
get_by_random()
for m in MODEL_LIST:
for s in '12':
get_by_group(m, s, prefix='Rg', suffix=('3' if s == '1' else '4'))