diff -Nur mysql-4.0.24/configure.in mysql-4.0.24.senna/configure.in --- mysql-4.0.24/configure.in 2005-03-05 09:38:15.000000000 +0900 +++ mysql-4.0.24.senna/configure.in 2005-04-05 19:23:35.000000000 +0900 @@ -776,6 +776,10 @@ ;; esac +# For senna +AC_CHECK_LIB(senna, sen_log) +AC_CHECK_LIB(mecab, mecab_new2) + #-------------------------------------------------------------------- # Check for TCP wrapper support #-------------------------------------------------------------------- diff -Nur mysql-4.0.24/include/myisam.h mysql-4.0.24.senna/include/myisam.h --- mysql-4.0.24/include/myisam.h 2005-03-05 09:38:14.000000000 +0900 +++ mysql-4.0.24.senna/include/myisam.h 2005-04-05 19:23:35.000000000 +0900 @@ -29,6 +29,10 @@ #include #endif +//#ifdef SENNA +#include +//#endif + /* defines used by myisam-funktions */ /* The following defines can be increased if necessary */ @@ -149,6 +153,11 @@ struct st_mi_s_param *s_temp); void (*store_key)(struct st_mi_keydef *keyinfo, uchar *key_pos, struct st_mi_s_param *s_temp); + +//#ifdef SENNA + sen_index *senna; +//#endif + } MI_KEYDEF; diff -Nur mysql-4.0.24/libmysqld/sql_db.cc mysql-4.0.24.senna/libmysqld/sql_db.cc --- mysql-4.0.24/libmysqld/sql_db.cc 2005-03-05 09:38:15.000000000 +0900 +++ mysql-4.0.24.senna/libmysqld/sql_db.cc 2005-04-05 19:24:02.000000000 +0900 @@ -252,6 +252,46 @@ (file->name[1] == '.' && !file->name[2]))) continue; + /* senna files is skip */ + /* ".SEN",".SEN.i",".SEN.i.c",".SEN.l" */ + { + int len = strlen(file->name); + if (len > 4) { + if (file->name[len-4] == '.' && + file->name[len-3] == 'S' && + file->name[len-2] == 'E' && + file->name[len-1] == 'N') + continue; + } + if (len > 6) { + if (file->name[len-6] == '.' && + file->name[len-5] == 'S' && + file->name[len-4] == 'E' && + file->name[len-3] == 'N' && + file->name[len-2] == '.' && + file->name[len-1] == 'i') + continue; + if (file->name[len-6] == '.' && + file->name[len-5] == 'S' && + file->name[len-4] == 'E' && + file->name[len-3] == 'N' && + file->name[len-2] == '.' && + file->name[len-1] == 'l') + continue; + } + if (len > 8) { + if (file->name[len-8] == '.' && + file->name[len-7] == 'S' && + file->name[len-6] == 'E' && + file->name[len-5] == 'N' && + file->name[len-4] == '.' && + file->name[len-3] == 'i' && + file->name[len-2] == '.' && + file->name[len-1] == 'c') + continue; + } + } + /* Check if file is a raid directory */ if ((isdigit(file->name[0]) || (file->name[0] >= 'a' && file->name[0] <= 'f')) && diff -Nur mysql-4.0.24/myisam/ft_nlq_search.c mysql-4.0.24.senna/myisam/ft_nlq_search.c --- mysql-4.0.24/myisam/ft_nlq_search.c 2005-03-05 09:38:14.000000000 +0900 +++ mysql-4.0.24.senna/myisam/ft_nlq_search.c 2005-04-05 19:23:35.000000000 +0900 @@ -33,6 +33,7 @@ MI_INFO *info; int ndocs; int curdoc; + sen_records *sir; FT_DOC doc[1]; }; @@ -176,8 +177,15 @@ FT_DOC *dptr; FT_INFO *dlist=NULL; my_off_t saved_lastpos=info->lastpos; + sen_records *sir; DBUG_ENTER("ft_init_nlq_search"); + // sen_log("ft_init_nlq_search(%p,%d,%p,%d,%d)", info, keynr, query, query_len, presort); + + sir = sen_index_sel(info->s->keyinfo[keynr].senna, query); + + // sen_log("sen_index_search done"); + /* black magic ON */ if ((int) (keynr = _mi_check_index(info,keynr)) < 0) DBUG_RETURN(NULL); @@ -221,6 +229,8 @@ dlist->info=aio.info; dptr=dlist->doc; + dlist->sir = sir; + tree_walk(&aio.dtree, (tree_walk_action) &walk_and_copy, &dptr, left_root_right); @@ -240,6 +250,32 @@ int ft_nlq_read_next(FT_INFO *handler, char *record) { MI_INFO *info= (MI_INFO *) handler->info; + // sen_log("ft_nlq_read_next(%p,%p)", handler, record); + + if (handler->sir) { + const int *pos; + + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + + if ((pos = sen_records_next(handler->sir))) { + + info->lastpos=*pos; + + if (!(*info->read_record)(info,info->lastpos,record)) + { + + // sen_log("ft_nlq_read_next' %p %x %d", info, *((uint *)record), info->lastpos); + + info->update|= HA_STATE_AKTIV; /* Record is read */ + return 0; + } + + return my_errno; + + } else { + return HA_ERR_END_OF_FILE; + } + } if (++handler->curdoc >= handler->ndocs) { @@ -267,9 +303,17 @@ FT_DOC *docs=handler->doc; my_off_t docid=handler->info->lastpos; + // sen_log("ft_nlq_find_relevance(docid=%d)", docid); + if (docid == HA_POS_ERROR) return -5.0; + if (!handler->sir) { return 0.0; } + + // sen_log("score = %d", sen_records_find(handler->sir, &docid)); + + return 1.0 * sen_records_find(handler->sir, &docid); + /* Assuming docs[] is sorted by dpos... */ for (a=0, b=handler->ndocs, c=(a+b)/2; b-a>1; c=(a+b)/2) @@ -289,18 +333,28 @@ void ft_nlq_close_search(FT_INFO *handler) { + sen_log("ft_nlq_close_search(%p)", handler); + sen_records_close(handler->sir); my_free((gptr)handler,MYF(0)); } float ft_nlq_get_relevance(FT_INFO *handler) { + // sen_log("ft_nlq_get_relevance(%p)", handler); + + if (!handler->sir) { return 0.0; } + + return 1.0 * sen_records_curr_score(handler->sir); + return (float) handler->doc[handler->curdoc].weight; } void ft_nlq_reinit_search(FT_INFO *handler) { + sen_log("ft_nlq_reinit_search(%p)", handler); + sen_records_rewind(handler->sir); handler->curdoc=-1; } diff -Nur mysql-4.0.24/myisam/ft_update.c mysql-4.0.24.senna/myisam/ft_update.c --- mysql-4.0.24/myisam/ft_update.c 2005-03-05 09:38:15.000000000 +0900 +++ mysql-4.0.24.senna/myisam/ft_update.c 2005-04-05 19:23:35.000000000 +0900 @@ -109,6 +109,8 @@ { TREE ptree; + // sen_log("_mi_ft_parserecord(%p,%d,%p,%p)", info, keynr, keybuf, record); + bzero((char*) &ptree, sizeof(ptree)); if (_mi_ft_parse(&ptree, info, keynr, record)) return NULL; @@ -170,6 +172,63 @@ return GEE_THEY_ARE_ABSOLUTELY_IDENTICAL; } +int ft_sen_index_add(MI_INFO *info, uint keynr, const byte *record, my_off_t pos) +{ + FT_SEG_ITERATOR ftsi; + char *buf, *p; + int len; + // sen_log("info=%p,keynr=%d,rec=%p,pos=%d",info,keynr,record,pos); + _mi_ft_segiterator_init(info, keynr, record, &ftsi); + for (len = 0; _mi_ft_segiterator(&ftsi); len += ftsi.len) { + // sen_log("len=%d", ftsi.len); + } + // sen_log("len=%d", len); + if (!len) { return -1; } + p = buf = malloc(len + 1); + _mi_ft_segiterator_init(info, keynr, record, &ftsi); + while (_mi_ft_segiterator(&ftsi)) { + if (ftsi.pos) { + memcpy(p, ftsi.pos, ftsi.len); + p += ftsi.len; + } + } + *p = '\0'; + // sen_log("buf=%s,len=%d", buf, strlen(buf)); + // sen_log("put > (%d, %d)", keynr, pos); + sen_index_upd(info->s->keyinfo[keynr].senna, &pos, NULL, buf); + // sen_log("put < (%d, %d)", keynr, pos); + free(buf); + return 0; +} + +int ft_sen_index_del(MI_INFO *info, uint keynr, const byte *record, my_off_t pos) +{ + FT_SEG_ITERATOR ftsi; + char *buf, *p; + int len; + // sen_log("info=%p,keynr=%d,rec=%p,pos=%d",info,keynr,record,pos); + _mi_ft_segiterator_init(info, keynr, record, &ftsi); + for (len = 0; _mi_ft_segiterator(&ftsi); len += ftsi.len) { + // sen_log("len=%d", ftsi.len); + } + // sen_log("len=%d", len); + if (!len) { return -1; } + p = buf = malloc(len + 1); + _mi_ft_segiterator_init(info, keynr, record, &ftsi); + while (_mi_ft_segiterator(&ftsi)) { + if (ftsi.pos) { + memcpy(p, ftsi.pos, ftsi.len); + p += ftsi.len; + } + } + *p = '\0'; + // sen_log("buf=%s,len=%d", buf, strlen(buf)); + // sen_log("put > (%d, %d)", keynr, pos); + sen_index_upd(info->s->keyinfo[keynr].senna, &pos, buf, NULL); + // sen_log("put < (%d, %d)", keynr, pos); + free(buf); + return 0; +} /* update a document entry */ @@ -182,6 +241,11 @@ uint key_length; int cmp, cmp2; + sen_log("_mi_ft_update(%p,%d,%p,%p,%p,%d)", info, keynr, keybuf, oldrec, newrec, pos); + + ft_sen_index_del(info, keynr, oldrec, pos); + ft_sen_index_add(info, keynr, newrec, pos); + if (!(old_word=oldlist=_mi_ft_parserecord(info, keynr, keybuf, oldrec))) goto err0; if (!(new_word=newlist=_mi_ft_parserecord(info, keynr, keybuf, newrec))) @@ -222,7 +286,6 @@ return error; } - /* adds a document to the collection */ int _mi_ft_add(MI_INFO *info, uint keynr, byte *keybuf, const byte *record, @@ -231,6 +294,10 @@ int error= -1; FT_WORD *wlist; + sen_log("_mi_ft_add(%p,%d,%p,%p,%d)", info, keynr, keybuf, record, pos); + + ft_sen_index_add(info, keynr, record, pos); + if ((wlist=_mi_ft_parserecord(info, keynr, keybuf, record))) { error=_mi_ft_store(info,keynr,keybuf,wlist,pos); @@ -247,6 +314,11 @@ { int error= -1; FT_WORD *wlist; + + sen_log("_mi_ft_del(%p,%d,%p,%p,%d)", info, keynr, keybuf, record, pos); + + ft_sen_index_del(info, keynr, record, pos); + if ((wlist=_mi_ft_parserecord(info, keynr, keybuf, record))) { error=_mi_ft_erase(info,keynr,keybuf,wlist,pos); diff -Nur mysql-4.0.24/myisam/mi_check.c mysql-4.0.24.senna/myisam/mi_check.c --- mysql-4.0.24/myisam/mi_check.c 2005-03-05 09:38:16.000000000 +0900 +++ mysql-4.0.24.senna/myisam/mi_check.c 2005-04-05 19:23:35.000000000 +0900 @@ -1819,6 +1819,8 @@ ulonglong key_map=share->state.key_map; DBUG_ENTER("mi_repair_by_sort"); + sen_log("mi_repair_by_sort"); + start_records=info->state->records; got_error=1; new_file= -1; @@ -2598,6 +2600,7 @@ my_free((char*) wptr, MYF(MY_ALLOW_ZERO_PTR)); if ((error=sort_get_next_record(sort_param))) DBUG_RETURN(error); + ft_sen_index_add(info, sort_param->key, sort_param->record, sort_param->filepos); if (!(wptr=_mi_ft_parserecord(info,sort_param->key, key,sort_param->record))) DBUG_RETURN(1); diff -Nur mysql-4.0.24/myisam/mi_close.c mysql-4.0.24.senna/myisam/mi_close.c --- mysql-4.0.24/myisam/mi_close.c 2005-03-05 09:38:16.000000000 +0900 +++ mysql-4.0.24.senna/myisam/mi_close.c 2005-04-05 19:23:35.000000000 +0900 @@ -31,6 +31,8 @@ DBUG_PRINT("enter",("base: %lx reopen: %u locks: %u", info,(uint) share->reopen, (uint) share->tot_locks)); + sen_log("closing index_file_name %s", share->index_file_name); + pthread_mutex_lock(&THR_LOCK_myisam); if (info->lock_type == F_EXTRA_LCK) info->lock_type=F_UNLCK; /* HA_EXTRA_NO_USER_CHANGE */ @@ -98,6 +100,27 @@ keys = share->state.header.keys; for(i=0; ikey_root_lock[i])); + + if (share->keyinfo[i].flag & HA_FULLTEXT) + { + sen_log("share->delay_key_write=%d", share->delay_key_write); + sen_log("unique_file_name %s", share->unique_file_name); + sen_log("data_file_name %s", share->data_file_name); + sen_log("index_file_name %s", share->index_file_name); + + { + char buf[PATH_MAX]; + strcpy(buf, share->unique_file_name); + sprintf(buf + strlen(buf) - 3, "%03d", i); + sen_log("closing (%s)", buf); + if (share->keyinfo[i].senna) { + sen_index_close(share->keyinfo[i].senna); + } + } + + } + + } } #endif diff -Nur mysql-4.0.24/myisam/mi_delete_table.c mysql-4.0.24.senna/myisam/mi_delete_table.c --- mysql-4.0.24/myisam/mi_delete_table.c 2005-03-05 09:38:15.000000000 +0900 +++ mysql-4.0.24.senna/myisam/mi_delete_table.c 2005-04-05 19:23:35.000000000 +0900 @@ -31,6 +31,7 @@ #endif DBUG_ENTER("mi_delete_table"); + sen_log("mi_delete_table(%s)", name); #ifdef EXTRA_DEBUG check_table_is_closed(name,"delete"); #endif @@ -49,6 +50,20 @@ #endif #endif /* USE_RAID */ + { + int i; + for (i = 0; i < 100; i++) { + snprintf(from, FN_REFLEN, "%s.%03d.SEN", name, i); + unlink(from); + snprintf(from, FN_REFLEN, "%s.%03d.SEN.i", name, i); + unlink(from); + snprintf(from, FN_REFLEN, "%s.%03d.SEN.i.c", name, i); + unlink(from); + snprintf(from, FN_REFLEN, "%s.%03d.SEN.l", name, i); + unlink(from); + } + } + fn_format(from,name,"",MI_NAME_IEXT,4); if (my_delete_with_symlink(from, MYF(MY_WME))) DBUG_RETURN(my_errno); diff -Nur mysql-4.0.24/myisam/mi_open.c mysql-4.0.24.senna/myisam/mi_open.c --- mysql-4.0.24/myisam/mi_open.c 2005-03-05 09:38:14.000000000 +0900 +++ mysql-4.0.24.senna/myisam/mi_open.c 2005-04-05 19:23:35.000000000 +0900 @@ -329,6 +329,25 @@ } if (share->keyinfo[i].flag & HA_FULLTEXT) { + sen_log("share->delay_key_write=%d", share->delay_key_write); + sen_log("unique_file_name %s", share->unique_file_name); + sen_log("data_file_name %s", share->data_file_name); + sen_log("index_file_name %s", share->index_file_name); + sen_log("share->keyinfo[%d].seg=%d", i, pos-FT_SEGS); + + { + char buf[PATH_MAX]; + strcpy(buf, share->unique_file_name); + sprintf(buf + strlen(buf) - 3, "%03d", i); + sen_log("open (%s)", buf); + share->keyinfo[i].senna = sen_index_open(buf); + if (!share->keyinfo[i].senna) { + /* make index files */ + sen_log("create index (%s)", buf); + share->keyinfo[i].senna = sen_index_create(buf, 0, SEN_INDEX_NORMALIZE, 8, sen_enc_default); + } + } + share->keyinfo[i].seg=pos-FT_SEGS; share->fulltext_index=1; } diff -Nur mysql-4.0.24/myisam/mi_rename.c mysql-4.0.24.senna/myisam/mi_rename.c --- mysql-4.0.24/myisam/mi_rename.c 2005-03-05 09:38:14.000000000 +0900 +++ mysql-4.0.24.senna/myisam/mi_rename.c 2005-04-05 19:23:35.000000000 +0900 @@ -49,6 +49,25 @@ #endif #endif /* USE_RAID */ + sen_log("mi_rename(%s,%s)", old_name, new_name); + { + int i; + for (i = 0; i < 100; i++) { + snprintf(from, FN_REFLEN, "%s.%03d.SEN", old_name, i); + snprintf(to, FN_REFLEN, "%s.%03d.SEN", new_name, i); + rename(from, to); + snprintf(from, FN_REFLEN, "%s.%03d.SEN.i", old_name, i); + snprintf(to, FN_REFLEN, "%s.%03d.SEN.i", new_name, i); + rename(from, to); + snprintf(from, FN_REFLEN, "%s.%03d.SEN.i.c", old_name, i); + snprintf(to, FN_REFLEN, "%s.%03d.SEN.i.c", new_name, i); + rename(from, to); + snprintf(from, FN_REFLEN, "%s.%03d.SEN.l", old_name, i); + snprintf(to, FN_REFLEN, "%s.%03d.SEN.l", new_name, i); + rename(from, to); + } + } + fn_format(from,old_name,"",MI_NAME_IEXT,4); fn_format(to,new_name,"",MI_NAME_IEXT,4); if (my_rename_with_symlink(from, to, MYF(MY_WME))) diff -Nur mysql-4.0.24/sql/sql_db.cc mysql-4.0.24.senna/sql/sql_db.cc --- mysql-4.0.24/sql/sql_db.cc 2005-04-05 19:12:11.000000000 +0900 +++ mysql-4.0.24.senna/sql/sql_db.cc 2005-04-05 19:23:54.000000000 +0900 @@ -252,6 +252,46 @@ (file->name[1] == '.' && !file->name[2]))) continue; + /* senna files is skip */ + /* ".SEN",".SEN.i",".SEN.i.c",".SEN.l" */ + { + int len = strlen(file->name); + if (len > 4) { + if (file->name[len-4] == '.' && + file->name[len-3] == 'S' && + file->name[len-2] == 'E' && + file->name[len-1] == 'N') + continue; + } + if (len > 6) { + if (file->name[len-6] == '.' && + file->name[len-5] == 'S' && + file->name[len-4] == 'E' && + file->name[len-3] == 'N' && + file->name[len-2] == '.' && + file->name[len-1] == 'i') + continue; + if (file->name[len-6] == '.' && + file->name[len-5] == 'S' && + file->name[len-4] == 'E' && + file->name[len-3] == 'N' && + file->name[len-2] == '.' && + file->name[len-1] == 'l') + continue; + } + if (len > 8) { + if (file->name[len-8] == '.' && + file->name[len-7] == 'S' && + file->name[len-6] == 'E' && + file->name[len-5] == 'N' && + file->name[len-4] == '.' && + file->name[len-3] == 'i' && + file->name[len-2] == '.' && + file->name[len-1] == 'c') + continue; + } + } + /* Check if file is a raid directory */ if ((isdigit(file->name[0]) || (file->name[0] >= 'a' && file->name[0] <= 'f')) &&