thread_local int tid = i.fetch_add(1);