作成日: 2021/11/04 更新日: 2023/03/25 サイトの紹介と使い方
概要
- 1行の手書きの日本語の文字列をテキストに変換します。
- 文字の重なりは対象外とします。
- 1行は任意の矩形領域でアノテーションされているものとします。
1行の文字列の認識プログラムソース
失敗:015-kana.py
- ターゲット文字列の左から学習した文字列画像の横幅から小さい領域から認識させると、途中で違う文字と認識してしまいました。
- 問題点の予想は次の通りです。
- 学習量が少な過ぎる。
- モデルの構築が適切ではない。
import os,glob,sys
import numpy as np
from PIL import Image
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import model_from_json
import c006_env as env
def main():
json_string = open( env.fs_json,'r' ).read()
model = model_from_json(json_string)
model.load_weights( env.fs_hdf5 )
img = Image.open( env.target_file )
img = img.convert( env.img_mode )
# print( img.size[0],img.size[1] )
sx = img.size[0] * 60 / img.size[1]
isx = img.size[0] * 60 / img.size[1]
# print( int(sx),isx )
img = img.resize( (int(sx),60) )
# print( img.size[0],img.size[1] )
leftx = int(1)
rightx = env.size_start
charflag = -1
while rightx < isx:
print('start')
maxright = leftx+env.size_end
if( charflag == 0 ):
leftx = leftx + env.size_start
rightx = leftx + env.size_start
if( rightx>isx ):
break
print( leftx,rightx,maxright,isx )
while rightx < maxright:
if( rightx>isx ):
break
img_crop = img.crop( (leftx,1,rightx,60) )
img_crop = img_crop.resize( (60,60) )
data = np.asarray( img_crop )
x2 = data.tolist()
x1 = []
x1.append( x2 )
data = np.asarray( x1,dtype='int8' )
data = data.astype( 'float32') /255
pre = model.predict( data )
idx2,max = env.best_char(pre[0])
if( max>env.better_eval ):
print( 'result:',env.groups[idx2],idx2,max )
leftx = rightx+1
rightx = leftx + env.size_start
endx = leftx
charflag = 1
break
else:
charflag = 0
rightx += 1
print( 'success' )
main()
import sys,glob
import numpy as np
from PIL import Image
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import model_from_json
cls = sys.argv[1]
model_name = sys.argv[2]
target_f = sys.argv[3]
root_dir = '/home/kurodon/handwritten/dendo'
file_type = '/*.jpg'
img_mode = 'RGB'
img_size = 60
size_start = int(img_size * 1 )
size_end = int(img_size * 2 )
if( cls=="F04-01" ):
target_file = root_dir + '/target/single/' + target_f + file_type[2:]
family = "num_kana"
train_dir = root_dir+'/'+family+'/train/'
test_dir = root_dir+'/'+family+'/test/'
family_dir = [ family ]
f_num_kana = "0123456789"
f_num_kana += "アイウエオカキクケコサシスセソタチツテトナニヌネノ"
f_num_kana += "ハヒフヘホマミムメモヤユヨラリルレロワヲン"
group_dir = family
groups = f_num_kana
family_member = [ f_num_kana ]
family_name = cls
nb_classes = len( f_num_kana )
family_io = root_dir+'/io/'+family_name+'_'
npy_x_train = family_io+'x_train.npy'
npy_y_train = family_io+'y_train.npy'
fs_json = family_io+model_name+'model.json'
fs_hdf5 = family_io+model_name+'model.hdf5'
best_eval = 0.79
better_eval = 0.59
multi_eval = 0.3
def best_char( pred ):
max = 0.0
idx = -1
for ii in range(len(pred)):
if( max<pred[ii] ):
max = pred[ii]
idx = ii
return idx,max
def load_img3( rdir ):
x1 = []
y1 = []
for idx,group in enumerate( groups ):
img_dir = rdir + group
files = glob.glob( img_dir+file_type )
print( 'char {} '.format(group),format(len(files)) )
for ii,ff in enumerate( files ):
img = Image.open( ff )
img = img.resize( (img_size,img_size) )
img = img.convert( img_mode )
data = np.asarray( img )
x2 = data.tolist()
x1.append( x2 )
y1.append( idx )
ximg = np.array( x1,dtype='int8' )
yimg = np.array( y1,dtype='int8' )
return ximg,yimg
def eval_char3( eval_dir ):
json_string = open( fs_json,'r' ).read()
model = model_from_json(json_string)
model.load_weights( fs_hdf5 )
nncor = 0
n1 = 0
n2 = 0
n3 = 0
for jj,group in enumerate( groups ):
img_dir = root_dir+ '/' + family + '/' + eval_dir + '/' + group
files = glob.glob( img_dir+file_type )
nfiles = len(files)
ncor = 0
for kk,ff in enumerate(files):
img = Image.open( ff )
img = img.resize( (img_size,img_size) )
img = img.convert( img_mode )
data = np.asarray( img )
x2 = data.tolist()
x1 = []
x1.append( x2 )
data = np.asarray( x1,dtype='int8' )
data = data.astype( 'float32') /255
pre = model.predict( data )
idx2,max = best_char(pre[0])
if( jj==idx2 ):
ncor += 1
nncor += ncor
e1 = float( ncor ) / float( nfiles )
if( e1>best_eval ):
n1 += 1
elif( e1>better_eval ):
n2 += 1
else:
n3 += 1
print( group,ncor,' / ',nfiles )
print( 'best char :' ,n1,' / ',nb_classes, float(n1)/float(nb_classes) )
print( 'better char:' ,n2,' / ',nb_classes, float(n2)/float(nb_classes) )
print( 'wrong char :' ,n3,' / ',nb_classes, float(n3)/float(nb_classes) )
print( 'total :' ,nncor,' / ',nb_classes*nfiles,float(nncor)/float(nb_classes*nfiles) )
def alike_char( c_eval ):
json_string = open( fs_json,'r' ).read()
model = model_from_json(json_string)
model.load_weights( fs_hdf5 )
img_dir = root_dir+ '/' + family + '/test/' + c_eval
files = glob.glob( img_dir+file_type )
nfiles = len(files)
for ii,ff in enumerate(files):
img = Image.open( ff )
img = img.resize( (img_size,img_size) )
img = img.convert( img_mode )
data = np.asarray( img )
x2 = data.tolist()
x1 = []
x1.append( x2 )
data = np.asarray( x1,dtype='int8' )
data = data.astype( 'float32') /255
pre = model.predict( data )
cc = str(ii) + " " + c_eval + " "
idx2,max = best_char(pre[0])
cc += groups[idx2]
cc += " "
for jj in range(len(pre[0])):
if( pre[0][jj]>multi_eval ):
cc += str(pre[0][jj])
cc += ","
print( cc )
失敗:015-kana-02.py
- 文字間の空白を区切りとして、最初に文字を抽出しました。
- 問題:文字が左寄せ、中央寄せによって違う文字に認識されました。
- モデルの構築の見直しが必要です。
- 学習データをトリミングして作成してみます。
そして、すべてのデータを左上寄せにします。 - 問題の複雑さの要因を増やさないためにアスペクト比は変えないようにします。
- プログラムをいじって、最適な文字位置にしたとき、"アイウエオ"が"アイウユオ"と認識されました。
- 最適な文字位置以外のときは、0%~40%の認識率でした。
import os,glob,sys
import numpy as np
from PIL import Image
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import model_from_json
import c007_env as env
def main():
env.separate_char()
print( 'success' )
main()
import sys,glob
import numpy as np
from PIL import Image
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import model_from_json
cls = sys.argv[1]
model_name = sys.argv[2]
target_f = sys.argv[3]
root_dir = '/home/kurodon/handwritten/dendo'
file_type = '/*.jpg'
img_mode = 'RGB'
img_size = 60
size_start = int(img_size * 1 )
size_end = int(img_size * 2 )
max_char = 1000
std_span = 3
if( cls=="F04-01" ):
target_file = root_dir + '/target/single/' + target_f + file_type[2:]
family = "num_kana"
train_dir = root_dir+'/'+family+'/train/'
test_dir = root_dir+'/'+family+'/test/'
family_dir = [ family ]
f_num_kana = "0123456789"
f_num_kana += "アイウエオカキクケコサシスセソタチツテトナニヌネノ"
f_num_kana += "ハヒフヘホマミムメモヤユヨラリルレロワヲン"
group_dir = family
groups = f_num_kana
family_member = [ f_num_kana ]
family_name = cls
nb_classes = len( f_num_kana )
family_io = root_dir+'/io/'+family_name+'_'
npy_x_train = family_io+'x_train.npy'
npy_y_train = family_io+'y_train.npy'
fs_json = family_io+model_name+'model.json'
fs_hdf5 = family_io+model_name+'model.hdf5'
best_eval = 0.79
better_eval = 0.59
multi_eval = 0.3
def best_char( pred ):
max = 0.0
idx = -1
for ii in range(len(pred)):
if( max<pred[ii] ):
max = pred[ii]
idx = ii
return idx,max
def load_img3( rdir ):
x1 = []
y1 = []
for idx,group in enumerate( groups ):
img_dir = rdir + group
files = glob.glob( img_dir+file_type )
print( 'char {} '.format(group),format(len(files)) )
for ii,ff in enumerate( files ):
img = Image.open( ff )
img = img.resize( (img_size,img_size) )
img = img.convert( img_mode )
data = np.asarray( img )
x2 = data.tolist()
x1.append( x2 )
y1.append( idx )
ximg = np.array( x1,dtype='int8' )
yimg = np.array( y1,dtype='int8' )
return ximg,yimg
def eval_char3( eval_dir ):
json_string = open( fs_json,'r' ).read()
model = model_from_json(json_string)
model.load_weights( fs_hdf5 )
nncor = 0
n1 = 0
n2 = 0
n3 = 0
for jj,group in enumerate( groups ):
img_dir = root_dir+ '/' + family + '/' + eval_dir + '/' + group
files = glob.glob( img_dir+file_type )
nfiles = len(files)
ncor = 0
for kk,ff in enumerate(files):
img = Image.open( ff )
img = img.resize( (img_size,img_size) )
img = img.convert( img_mode )
data = np.asarray( img )
x2 = data.tolist()
x1 = []
x1.append( x2 )
data = np.asarray( x1,dtype='int8' )
data = data.astype( 'float32') /255
pre = model.predict( data )
idx2,max = best_char(pre[0])
if( jj==idx2 ):
ncor += 1
nncor += ncor
e1 = float( ncor ) / float( nfiles )
if( e1>best_eval ):
n1 += 1
elif( e1>better_eval ):
n2 += 1
else:
n3 += 1
print( group,ncor,' / ',nfiles )
print( 'best char :' ,n1,' / ',nb_classes, float(n1)/float(nb_classes) )
print( 'better char:' ,n2,' / ',nb_classes, float(n2)/float(nb_classes) )
print( 'wrong char :' ,n3,' / ',nb_classes, float(n3)/float(nb_classes) )
print( 'total :' ,nncor,' / ',nb_classes*nfiles,float(nncor)/float(nb_classes*nfiles) )
def alike_char( c_eval ):
json_string = open( fs_json,'r' ).read()
model = model_from_json(json_string)
model.load_weights( fs_hdf5 )
img_dir = root_dir+ '/' + family + '/test/' + c_eval
files = glob.glob( img_dir+file_type )
nfiles = len(files)
for ii,ff in enumerate(files):
img = Image.open( ff )
img = img.resize( (img_size,img_size) )
img = img.convert( img_mode )
data = np.asarray( img )
x2 = data.tolist()
x1 = []
x1.append( x2 )
data = np.asarray( x1,dtype='int8' )
data = data.astype( 'float32') /255
pre = model.predict( data )
cc = str(ii) + " " + c_eval + " "
idx2,max = best_char(pre[0])
cc += groups[idx2]
cc += " "
for jj in range(len(pre[0])):
if( pre[0][jj]>multi_eval ):
cc += str(pre[0][jj])
cc += ","
print( cc )
def separate_char():
img = Image.open( target_file )
img = img.convert( img_mode )
sx = img.size[0] * img_size / img.size[1]
# isx = int( img.size[0] )
isx = int( sx )
img = img.resize( (isx,img_size) )
data = np.asarray( img )
x2 = data.tolist()
x1 = []
x1.append( x2 )
data = np.asarray( x1,dtype='int' )
print( 'shape:',data.shape )
npos = np.zeros((2, max_char))
npos = np.asarray( npos,dtype='int' )
# print( npos.shape )
ipos = -1
ispan = 0
nchar = 0
iswitch = 0
rflag = 0
while ipos<(isx-1):
nn = 0
iflag = 0
ipos += 1
for jj in range( img_size ):
nn = 0
# if( ipos<10 ):
# print( data[0][jj][ipos-1][0],data[0][jj][ipos-1][1],data[0][jj][ipos-1][2] )
for kk in range( 3 ):
nn += data[0][jj][ipos][kk]
if( nn<(128*3) ):
iflag = 1
break
if( iflag==0 ):
ispan += 1
# print( 'ispan:',ispan,ipos )
if( rflag==1 ):
if( ispan>std_span ):
# print( 'nchar end :',nchar,'>',ipos )
npos[1][nchar] = ipos+1
rflag = 0
nchar += 1
ispan = 0
else:
if( rflag==0 ):
if( ispan>std_span ):
# print( 'nchar start:',nchar,'>',ipos )
npos[0][nchar] = ipos+1
rflag = 1
ispan = 0
print( 'nchar:',nchar )
for ii in range(nchar):
print( ii,npos[0][ii],npos[1][ii] )
json_string = open( fs_json,'r' ).read()
model = model_from_json( json_string )
model.load_weights( fs_hdf5 )
for ii in range(nchar):
leftx = npos[0][ii]
rightx = npos[1][ii]
# print( leftx,rightx )
img_crop = img.crop( (leftx,1,rightx,img_size) )
# img_crop = img_crop.resize( (img_size,img_size) )
img_crop2 = Image.new( img_mode, (img_size, img_size), (255,255,255) )
img_crop2.paste( img_crop, (25,1) )
# ff = root_dir + '/target/single/' + 'sep' + str(ii) + file_type[2:]
# img_crop2.save( ff , quality=95 )
data = np.asarray( img_crop2 )
x2 = data.tolist()
x1 = []
x1.append( x2 )
data = np.asarray( x1,dtype='int8' )
data = data.astype( 'float32') /255
pre = model.predict( data )
idx2,max = best_char(pre[0])
if( max>multi_eval ):
# if( 1 ):
print( 'result:',groups[idx2],idx2,max )
前の記事:
次の記事: