I was able to reproduce a similar crash and was able to narrow down what code was the difference between segfaulting and not.
I do a bunch of initialization RUST code that gets called from library constructor code for libwisktrack.so
This includes opening a tracing file and tracker file and writing data including reading a config file.
All of which works fine.
Which seems to be calling mmap/mprotect and brk to define data segment during the library constructor phase.
fstat(3, {st_dev=makedev(0, 64), st_ino=188883803, st_mode=S_IFREG|0755, st_nlink=2, st_uid=19375, st_gid=25, st_blksize=32768, st_blocks=82672, st_size=42153864, st_atime=1609266198 /* 2020-12-29T10:23:18.259702000-0800 */, st_atime_nsec=259702000, st_mtime=1609266191 /* 2020-12-29T10:23:11.747711000-0800 */, st_mtime_nsec=747711000, st_ctime=1609266192 /* 2020-12-29T10:23:12.115697000-0800 */, st_ctime_nsec=115697000}) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f9ee49bf000
mmap(NULL, 8451856, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f9ee3f89000
mprotect(0x7f9ee4551000, 2093056, PROT_NONE) = 0
mmap(0x7f9ee4750000, 299008, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x5c7000) = 0x7f9ee4750000
close(3) = 0
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
..........................
getrandom(NULL, 0, GRND_NONBLOCK) = 0
getrandom("\xe7\x87\x0f\xd0\x07\x7b\x17\xd7\xf0\x16\xbb\x17\xd5\x38\xd4\x39\xd3\x92\x92\x28\x18\x62\xf0\xb0\x78\xcc\x02\x83\x2c\xc7\x32\x2c", 32, 0) = 32
brk(NULL) = 0x1b3d000
brk(0x1b5e000) = 0x1b5e000
getpid() = 28945
getpid() = 28945
write(2, "8AEjTregSeJaBuhOSncma9", 22) = 22
write(2, "-", 1) = 1
write(2, "28945", 5) = 5
write(2, ": Constructor: ", 15) = 15
write(2, "28945", 5) = 5
write(2, "\n\n", 2) = 2
The difference the causes the crash is an extra call to lazy_static::initialize(&APP64BITONLY_PATTERNS);
Which ends up regex compiling a large piece of data from config that is already read.
It does some string template "render" and regexset compiling RegexSet::new(&p)
which seems to result in a tone of brk syystem calls which I suspect is resizing the data segment of the program.
write(801, "O8YImgWZKrtUiywY0ExktE: APP64BITONLY_PATTERNS Reading....\n", 58) = 58
write(801, "O8YImgWZKrtUiywY0ExktE: p: [\"^/nobackup/sarvi/xewisktest/binos/linkfarm/x86_64_cge7/sdk/sysroots/x86_64\\\\-xesdk\\\\-linux/usr/bin/x86_64\\\\-cisco\\\\-linux/(x86_64\\\\-cisco\\\\-linux\\\\-addr2line|x86_64\\\\-cisco\\\\-linux\\\\-objdump|x86_64\\\\-cisco\\\\-linux\\\\-ld|x86_64\\\\-cisco\\\\-linux\\\\-readelf|x86_64\\\\-cisco\\\\-linux\\\\-ld\\\\.bfd|x86_64\\\\-cisco\\\\-linux\\\\-gcov|x86_64\\\\-cisco\\\\-linux\\\\-size|x86_64\\\\-cisco\\\\-linux\\\\-ar|x86_64\\\\-cisco\\\\-linux\\\\-gcc\\\\-nm|x86_64\\\\-cisco\\\\-linux\\\\-gcc|x86_64\\\\-cisco\\\\-linux\\\\-as|x86_64\\\\-cisco\\\\-linux\\\\-gcc\\\\-ranlib|x86_64\\\\-cisco\\\\-linux\\\\-merge\\\\-gcda|x86_64\\\\-cisco\\\\-linux\\\\-strings|x86_64\\\\-cisco\\\\-linux\\\\-objcopy|x86_64\\\\-cisco\\\\-linux\\\\-c\\\\+\\\\+filt|x86_64\\\\-cisco\\\\-linux\\\\-nm|x86_64\\\\-cisco\\\\-linux\\\\-gprof|x86_64\\\\-cisco\\\\-linux\\\\-cpp|x86_64\\\\-cisco\\\\-linux\\\\-elfedit|x86_64\\\\-cisco\\\\-linux\\\\-g\\\\+\\\\+|x86_64\\\\-cisco\\\\-linux\\\\-strip|x86_64\\\\-cisco\\\\-linux\\\\-gcov\\\\-dump|x86_64\\\\-cisco\\\\-linux\\\\-ranlib|x86_64\\\\-cisco\\\\-linux\\\\-gcc\\\\-ar)\", \"^/nobackup/sarvi/xewisktest/binos/linkfarm/x86_64_cge7/sysr"..., 12929) = 12929
brk(0x117d000) = 0x117d000
brk(0x1179000) = 0x1179000
brk(0x119b000) = 0x119b000
brk(0x119a000) = 0x119a000
brk(0x11bb000) = 0x11bb000
brk(0x11dc000) = 0x11dc000
brk(0x11fd000) = 0x11fd000
......
brk(0x146d000) = 0x146d000
brk(0x1451000) = 0x1451000
mmap(NULL, 434176, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f6000d25000
mmap(NULL, 434176, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f6000cbb000
brk(0x144f000) = 0x144f000
brk(0x1448000) = 0x1448000
write(801, "O8YImgWZKrtUiywY0ExktE: APP64BITONLY_PATTERNS Reading....Done\n", 62) = 62
The constructor completes fine.
write(2, ": Constructor Done: ", 20) = 20
write(2, "31780", 5) = 5
write(2, ", ", 2) = 2
write(2, "[\"/nobackup/sarvi/xewisktest/vob/ss.comp1/onep/tools/cthrift/cthrift-0.3.14/bin/./cthrift.exe\",\"--prefix\",\"onep\",\"-o\",\"presentation/InBoundToNeIdl-gen\",\"include/./InBoundToNeIdl.h\"]", 181) = 181
write(2, "\n\n", 2)
But the segfault/crash happens in the main program, way further down, possibly towards the end of the program
write(4, "zero__,\n \t},\n};\n#define cthrift_recv_struct_UtdFileRepuRetroAlertMsgIDL_zero__ cthrift_recv_struct_list_zero__\nstatic struct cthrift_struct_info__ cthrift_recv_struct_UtdFileRepuRetroAlertMsgIDL_info__[1] = {{\n \t\"struct_UtdFileRepuRetroAlertMsgIDL\",\n \tCTHRIFT_FLAGS_ISSET__,\n \toffsetof(struct UtdFileRepuRetroAlertMsgIDL, isset__),\n \tsizeof(((struct UtdFileRepuRetroAlertMsgIDL *)0)->isset__),\n \tsizeof(struct UtdFileRepuRetroAlertMsgIDL),\n \t5,\n \t5,\n \tcthrift_recv_struct_UtdFileRepuRetroAlertMsgIDL_fields__,\n }};\nconst char * onep_get_name_cthrift_recv_struct_UtdFileRepuRetroAlertMsgIDL_info__ (void) {\n \treturn (cthrift_recv_struct_UtdFileRepuRetroAlertMsgIDL_info__[0].cc_name_);\n}\nstatic struct cthrift_field_info__ cthrift_recv_struct_UtdFileAnalysisUploadAlertMsgIDL_fields__[] = {\n \t{ /* timestamp */\n \t\"timestamp\",\n \tCTHRIFT_FLAGS_READ__|CTHRIFT_FLAGS_ISSET__,\n \t1,\n \tCTHRIFT_TYPE_I64__,\n \t0,\n \toffsetof(struct UtdFileAnalysisUploadAlertMsgIDL, timestamp),\n \t0,\n"..., 4096) = 4096
--- SIGSEGV {si_signo=SIGSEGV, si_code=SEGV_MAPERR, si_addr=0x4} ---
+++ killed by SIGSEGV (core dumped) +++
I then reduce the regex compilation to a tiny operration
write(801, "FYIgeA920l8KgSbcWotJmA: p: [\"^NOMATCH/.*$\"]\n", 44) = 44
And I dont see the list of brk syscalls increasig the side data segmet. And there is no crash
So bottom line, it looks like some how doing large data malloc or compute operations in the library constructor some how seems to cause this crash in the main program.
Question: Are there any limitations how much malloc or data can be defined/used from within the library constructor function? Is there way to do this better.
One of the reasons I tried to move some of this code into a RUST Once() segment and tried calling during the syscalls intercepts during the main program execution, outside the library constructor. But that ran into its own set of problems of not having a consistent and reliable place to call the one time initialization code and problems with Once and vfork.