diff -ur snort-cvs-head/src/sfutil/bnfa_search.c snort.new/src/sfutil/bnfa_search.c --- snort-cvs-head/src/sfutil/bnfa_search.c 2010-01-26 18:10:23.000000000 +0000 +++ snort.new/src/sfutil/bnfa_search.c 2010-10-27 10:56:10.000000000 +0100 @@ -22,7 +22,7 @@ ** The matching lists of patterns for each state are stored separately as well. ** The compacted sparse format improves caching/performance. ** -** word 1 : state ( only low 24 bits are used ) +** word 1 : state number ( only low 24 bits are used ) ** word 2 : control word = cb << 24 | fs ** cb: control byte ** cb = mb | fb | nt @@ -33,11 +33,17 @@ ** fs: 24 bits for failure state transition index. ** word 3+ : transition word = input<<24 | next-state-index ** input : 8 bit character, input to state machine from search text -** next-state-index: 24 bits for index of next state +** next-state-index: 24 bits for index of word 2 of next state ** (if we reallly need 16M states, we can add a state->index lookup array) ** ...repeat for each state ... ** -** * if a state is empty it has words 1 and 2, but no transition words. +** * Because the next-state-index values give the index of word 2 of the +** next state, it is possible to omit word 1 for states for which the +** state number will not be needed. The state number is required only +** for states with the mb bit set; eliminating it for non-match states +** cuts the sparse array storage size by about 25%. +** +** * if a state is empty it has word 2, but no transition words. ** ** Construction: ** @@ -977,7 +983,10 @@ nps = 0; for(k=0;kbnfaNumStates;k++) { - nps++; /* state word */ + if( bnfa->bnfaMatchList[k] ) + { + nps++; /* state word (match states only) */ + } nps++; /* control word */ /* count transitions */ @@ -1027,6 +1036,7 @@ return -1; } bnfa->bnfaTransList = ps; + bnfa->bnfaTransListSize = nps; /* State Index list for pi - we need an array of bnfa_state_t items of size 'NumStates' @@ -1043,11 +1053,12 @@ */ for(k=0;kbnfaNumStates;k++) { - pi[k] = ps_index; /* save index of start of state 'k' */ + if( bnfa->bnfaMatchList[k] ) + { + ps[ ps_index++ ] = k; /* save the state we're in before the control word */ + } - ps[ ps_index ] = k; /* save the state were in as the 1st word */ - - ps_index++; /* skip past state word */ + pi[k] = ps_index; /* save index of start of state 'k' */ /* conver state 'k' to full format */ _bnfa_list_conv_row_to_full(bnfa, (bnfa_state_t)k, full ); @@ -1130,7 +1141,10 @@ } //ps_index = pi[k]; /* get index of next state */ - ps_index++; /* skip state id */ + if( bnfa->bnfaMatchList[k] ) + { + ps_index++; /* skip state id */ + } /* Full Format */ if( ps[ps_index] & BNFA_SPARSE_FULL_BIT ) @@ -1223,7 +1237,8 @@ { unsigned i,cw,fs,nt,fb,mb; - ps_index++; /* skip state number */ + if( bnfa->bnfaMatchList[k] ) + ps_index++; /* skip state number */ cw = ps[ps_index]; /* control word */ fb = (cw & BNFA_SPARSE_FULL_BIT)>>BNFA_SPARSE_VALUE_SHIFT; /* full storage bit */ @@ -1415,7 +1430,7 @@ BNFA_FREE(bnfa->bnfaFailState,bnfa->bnfaNumStates*sizeof(bnfa_state_t),bnfa->failstate_memory); BNFA_FREE(bnfa->bnfaMatchList,bnfa->bnfaNumStates*sizeof(bnfa_pattern_t*),bnfa->matchlist_memory); BNFA_FREE(bnfa->bnfaNextState,bnfa->bnfaNumStates*sizeof(bnfa_state_t*),bnfa->nextstate_memory); - BNFA_FREE(bnfa->bnfaTransList,(2*bnfa->bnfaNumStates+bnfa->bnfaNumTrans)*sizeof(bnfa_state_t*),bnfa->nextstate_memory); + BNFA_FREE(bnfa->bnfaTransList,bnfa->bnfaTransListSize*sizeof(bnfa_state_t*),bnfa->nextstate_memory); free( bnfa ); /* cannot update memory tracker when deleting bnfa so just 'free' it !*/ } @@ -1880,7 +1895,7 @@ /* * Sparse format for state table using single array storage * -* word 1: state +* word 1: state (match states only) * word 2: control-word = cb<<24| fs * cb : control-byte * : mb | fb | nt @@ -1889,6 +1904,8 @@ * nt : number of transitions 0..63 (more than 63 requires full format) * fs: failure-transition-state * word 3+: byte-value(0-255) << 24 | transition-state +* +* sindex is the index of word 2 */ static INLINE @@ -1902,7 +1919,7 @@ for(;;) { - pcs = pcx + sindex + 1; /* skip state-id == 1st word */ + pcs = pcx + sindex; if( pcs[0] & BNFA_SPARSE_FULL_BIT ) { @@ -1948,7 +1965,7 @@ /* * Sparse format for state table using single array storage * -* word 1: state +* word 1: state (match states only) * word 2: control-word = cb<<24| fs * cb : control-byte * : mb | fb | nt @@ -1957,6 +1974,8 @@ * nt : number of transitions 0..63 (more than 63 requires full format) * fs: failure-transition-state * word 3+: byte-value(0-255) << 24 | transition-state +* +* sindex is the index of word 2 */ static INLINE @@ -1970,7 +1989,7 @@ for(;;) { - pcs = pcx + sindex + 1; /* skip state-id == 1st word */ + pcs = pcx + sindex; if( pcs[0] & BNFA_SPARSE_FULL_BIT ) { @@ -2139,9 +2158,9 @@ /* Log matches in this state - if any */ if( sindex ) { - if( transList[sindex+1] & BNFA_SPARSE_MATCH_BIT ) + if( transList[sindex] & BNFA_SPARSE_MATCH_BIT ) { - mlist = MatchList[ transList[sindex] ]; + mlist = MatchList[ transList[sindex-1] ]; if( mlist ) { if( _add_queue(bnfa,mlist) ) @@ -2187,13 +2206,13 @@ sindex = _bnfa_get_next_state_csparse_nfa(transList,sindex,xlatcase[*T]); /* Log matches in this state - if any */ - if(sindex && (transList[sindex+1] & BNFA_SPARSE_MATCH_BIT) ) + if(sindex && (transList[sindex] & BNFA_SPARSE_MATCH_BIT) ) { /* Test for same as last state */ if( sindex == last_sindex ) continue; - mlist = MatchList[ transList[sindex] ]; + mlist = MatchList[ transList[sindex-1] ]; if( mlist ) { if( _add_queue(bnfa,mlist) ) @@ -2251,7 +2270,7 @@ sindex = _bnfa_get_next_state_csparse_nfa(transList,sindex,Tchar); /* Log matches in this state - if any */ - if( sindex && (transList[sindex+1] & BNFA_SPARSE_MATCH_BIT) ) + if( sindex && (transList[sindex] & BNFA_SPARSE_MATCH_BIT) ) { if( sindex == last_match ) continue; @@ -2260,12 +2279,12 @@ last_match = sindex; #ifdef MATCH_LIST_CNT - if( MatchList[ transList[sindex] ] ) + if( MatchList[ transList[sindex-1] ] ) MatchTestCnt[ transList[index] ]++; #endif { - mlist = MatchList[ transList[sindex] ]; + mlist = MatchList[ transList[sindex-1] ]; patrn = (bnfa_pattern_t*)mlist->data; index = T - Tx - patrn->n + 1; nfound++; @@ -2321,7 +2340,7 @@ sindex = _bnfa_get_next_state_csparse_nfa(transList,sindex,*T); /* Log matches in this state - if any */ - if( sindex && (transList[sindex+1] & BNFA_SPARSE_MATCH_BIT) ) + if( sindex && (transList[sindex] & BNFA_SPARSE_MATCH_BIT) ) { if( sindex == last_match ) continue; @@ -2330,7 +2349,7 @@ last_match = sindex; { - mlist = MatchList[ transList[sindex] ]; + mlist = MatchList[ transList[sindex-1] ]; patrn = (bnfa_pattern_t*)mlist->data; index = T - Tx - patrn->n + 1; nfound++; @@ -2389,7 +2408,7 @@ sindex = _bnfa_get_next_state_csparse_nfa(transList,sindex,Tchar); /* Log matches in this state - if any */ - if( sindex && (transList[sindex+1] & BNFA_SPARSE_MATCH_BIT) ) + if( sindex && (transList[sindex] & BNFA_SPARSE_MATCH_BIT) ) { if( sindex == last_match ) continue; @@ -2398,7 +2417,7 @@ last_match = sindex; { - mlist = MatchList[ transList[sindex] ]; + mlist = MatchList[ transList[sindex-1] ]; patrn = (bnfa_pattern_t*)mlist->data; index = T - Tx - patrn->n + 1; nfound++; diff -ur snort-cvs-head/src/sfutil/bnfa_search.h snort.new/src/sfutil/bnfa_search.h --- snort-cvs-head/src/sfutil/bnfa_search.h 2010-01-26 18:10:23.000000000 +0000 +++ snort.new/src/sfutil/bnfa_search.h 2010-10-27 10:56:15.000000000 +0100 @@ -139,6 +139,7 @@ bnfa_state_t * bnfaFailState; bnfa_state_t * bnfaTransList; + unsigned bnfaTransListSize; int bnfaForceFullZeroState; int bnfa_memory;