Unix & Linux
vim regular-expression search string function
Updated Sun, 17 Jul 2022 14:43:44 GMT

How to reverse-match a string in the Vim programming language?


I want to find the last index of any character in the [abc] set in the abcabc string but the search should start from the end of the string:

" Returns the 0th index but I want the 5th.
let a=match('abcabc', '[abc]')

I skimmed through Vim's "4. Builtin Functions" (:h functions) but the only method that looked promising,the reverse method, only operates on lists. A limitation I don't understand because functions like len were designed to work even with strings, numbers, and lists.

To solve the problem I came up with my following function:

function! s:Rvrs(str)
  let a=len(a:str)      
  let b=a - 1
  let c=''
  while b >= 0
    let c.=a:str[b]
    let b-=1
  endwhile
  return c
endfunction

So I can say let a=match(s:Rvrs('abcabc'), '[abc]').




Solution

I looked around, but did not find any built in function that looked like it would do what you want.

You might find the following functions useful though: (variations included for overlapping, and non-overlapping matches starting from the beginning or the end of the string; all of them support multi-character patterns with some restrictions or limitations around uses of \zs and/or \ze)

function! s:AllOverlappableMatches(str, pat)
    " Restriction: a:pat should not use \ze
    let indicies = []
    let index = 0
    let splits = split(a:str, '\ze'.a:pat, 1)
    for segment in splits
        if len(segment) == 0
            call add(indicies, index)
        else
            let index += len(segment)
        endif
    endfor
    return indicies
endfunction
function! s:AllOverlappableMatchesFromEnd(str, pat)
    " Restriction: a:pat should not use \ze
    return reverse(s:AllOverlappableMatches(a:str, a:pat))
endfunction
function! s:AllNonoverlappingMatches(str, pat)
    " If a:pat uses \zs, the returned indicies will be based on that
    " position.
    " If a:pst uses \ze, subsequent matches may re-use characters
    " after \ze that were consumed, but not 'matched' (due to \ze)
    " in earlier matches.
    let indicies = []
    let start = 0
    let next = 0
    while next != -1
        let next = match(a:str, a:pat, start)
        if next != -1
            call add(indicies, next)
            let start = matchend(a:str, a:pat, start)
        endif
    endwhile
    return indicies
endfunction
function! s:AllNonoverlappingMatchesFromEnd(str, pat)
    " If a:pat uses \zs, the returned indicies will be based on that
    " position.
    let str = a:str
    let indicies = []
    let start = len(a:str) - 1
    while start >= 0
        let next = match(str, '.*\zs' . a:pat, start)
        if next != -1
            call add(indicies, next)
            let str = str[ : next - 1]
        endif
        let start -= 1
    endwhile
    return indicies
endfunction
echo s:AllOverlappableMatchesFromEnd('abcabc', '[abc]')
" -> [5, 4, 3, 2, 1, 0]
echo s:AllOverlappableMatchesFromEnd('dabcabc', '[abc]')
" -> [6, 5, 4, 3, 2, 1]
echo s:AllOverlappableMatchesFromEnd('dab - cabc', '[abc]')
" -> [9, 8, 7, 6, 2, 1]
echo s:AllOverlappableMatchesFromEnd('dab - cabce', '[abc]')
" -> [9, 8, 7, 6, 2, 1]
echo s:AllOverlappableMatchesFromEnd('dab - cabc', '[abc]\{2}')
" -> [8, 7, 6, 1]
echo s:AllOverlappableMatches('dab - cabc', '[abc]\{2}')
" -> [1, 6, 7, 8]              0123456789
echo s:AllNonoverlappingMatches('dab - cabc', '[abc]\{2}')
" -> [1, 6, 8]                   0123456789
echo s:AllNonoverlappingMatchesFromEnd('dab - cabca', '[abc]\{2}')
" -> [9, 7, 1]                          0123456789A
echo s:AllNonoverlappingMatchesFromEnd('ab - cabca', '[abc]\{2}')
" -> [8, 6, 0]                          0123456789
echo s:AllNonoverlappingMatchesFromEnd('abcabc', '[abc]\{2}')
" -> [4, 2, 0]                          012345
echo s:AllNonoverlappingMatchesFromEnd(' ab c abcd', '[abc]\{2}')
" -> [7, 1]                             0123456789
echo s:AllNonoverlappingMatchesFromEnd('abcabc', '[abc]\{2}')
" -> [4, 2, 0]                          012345
echo s:AllNonoverlappingMatches( 'abcabcabbc', 'abc')
" -> [0, 3]                       0123456789
echo s:AllNonoverlappingMatchesFromEnd( 'abcdabcabbc', 'abc')
" -> [4, 0]                              0123456789A
" A multi-character, overlappable pattern
echo s:AllOverlappableMatchesFromEnd( 'aaaabcaaac', 'aaa')
" -> [6, 1, 0]                         0123456789