1/* 2 * This example code shows how to iterate over all regex matches in a file, 3 * emit the match location and print the contents of a capturing group. 4 */ 5 6#include <fcntl.h> 7#include <stdio.h> 8#include <stdlib.h> 9#include <string.h> 10#include <sys/mman.h> 11#include <sys/stat.h> 12#include <sys/types.h> 13#include <unistd.h> 14 15#include "rure.h" 16 17int main() { 18 /* Open a file and mmap it. */ 19 int fd = open("sherlock.txt", O_RDONLY); 20 if (fd == -1) { 21 perror("failed to open sherlock.txt"); 22 exit(1); 23 } 24 struct stat status; 25 if (fstat(fd, &status) == -1) { 26 perror("failed to stat sherlock.txt"); 27 exit(1); 28 } 29 if ((uintmax_t)status.st_size > SIZE_MAX) { 30 perror("file too big"); 31 exit(1); 32 } 33 if (status.st_size == 0) { 34 perror("file empty"); 35 exit(1); 36 } 37 size_t sherlock_len = (size_t)status.st_size; 38 const uint8_t *sherlock = (const uint8_t *)mmap( 39 NULL, status.st_size, PROT_READ, MAP_PRIVATE, fd, 0); 40 close(fd); 41 if (sherlock == MAP_FAILED) { 42 perror("could not mmap file"); 43 exit(1); 44 } 45 46 /* 47 * Compile the regular expression. A more convenient routine, 48 * rure_compile_must, is also available, which will abort the process if 49 * and print an error message to stderr if the regex compilation fails. 50 * We show the full gory details here as an example. 51 */ 52 const char *pattern = "(\\w+)\\s+Holmes"; 53 size_t pattern_len = strlen(pattern); 54 rure_error *err = rure_error_new(); 55 rure *re = rure_compile((const uint8_t *)pattern, pattern_len, 56 RURE_FLAG_UNICODE | RURE_FLAG_CASEI, NULL, err); 57 if (NULL == re) { 58 /* A null regex means compilation failed and an error exists. */ 59 printf("compilation of %s failed: %s\n", 60 pattern, rure_error_message(err)); 61 rure_error_free(err); 62 munmap((char*)sherlock, sherlock_len); 63 exit(1); 64 } 65 rure_error_free(err); 66 67 /* 68 * Create an iterator to find all successive non-overlapping matches. 69 * For each match, we extract the location of the capturing group. 70 */ 71 rure_match group0 = {0}; 72 rure_match group1 = {0}; 73 rure_captures *caps = rure_captures_new(re); 74 rure_iter *it = rure_iter_new(re); 75 76 while (rure_iter_next_captures(it, sherlock, sherlock_len, caps)) { 77 /* 78 * Get the location of the full match and the capturing group. 79 * We know that both accesses are successful since the body of the 80 * loop only executes if there is a match and both capture groups 81 * must match in order for the entire regex to match. 82 * 83 * N.B. The zeroth group corresponds to the full match of the regex. 84 */ 85 rure_captures_at(caps, 0, &group0); 86 rure_captures_at(caps, 1, &group1); 87 printf("%.*s (match at: %zu, %zu)\n", 88 (int)(group1.end - group1.start), 89 sherlock + group1.start, 90 group0.start, group0.end); 91 } 92 93 /* Free all our resources. */ 94 munmap((char*)sherlock, sherlock_len); 95 rure_captures_free(caps); 96 rure_iter_free(it); 97 rure_free(re); 98 return 0; 99} 100