2 * Copyright (C) 2016, 2017, 2018 "IoT.bzh"
3 * Author José Bollo <jose.bollo@iot.bzh>
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
20 #if defined(NO_JOBS_WATCHDOG)
21 # define HAS_WATCHDOG 0
23 # define HAS_WATCHDOG 1
32 #include <sys/syscall.h>
36 #include <sys/eventfd.h>
38 #include <systemd/sd-event.h>
41 #include <systemd/sd-daemon.h>
45 #include "sig-monitor.h"
48 #if defined(REMOVE_SYSTEMD_EVENT)
49 #include "fdev-epoll.h"
52 #define EVENT_TIMEOUT_TOP ((uint64_t)-1)
53 #define EVENT_TIMEOUT_CHILD ((uint64_t)10000)
57 /** Internal shortcut for callback */
58 typedef void (*job_cb_t)(int, void*);
60 /** Description of a pending job */
63 struct job *next; /**< link to the next job enqueued */
64 const void *group; /**< group of the request */
65 job_cb_t callback; /**< processing callback */
66 void *arg; /**< argument */
67 int timeout; /**< timeout in second for processing the request */
68 unsigned blocked: 1; /**< is an other request blocking this one ? */
69 unsigned dropped: 1; /**< is removed ? */
72 /** Description of handled event loops */
75 unsigned state; /**< encoded state */
76 int efd; /**< event notification */
77 struct sd_event *sdev; /**< the systemd event loop */
78 pthread_cond_t cond; /**< condition */
79 struct fdev *fdev; /**< handling of events */
80 struct thread *holder; /**< holder of the evloop */
83 #define EVLOOP_STATE_WAIT 1U
84 #define EVLOOP_STATE_RUN 2U
85 #define EVLOOP_STATE_LOCK 4U
87 /** Description of threads */
90 struct thread *next; /**< next thread of the list */
91 struct thread *upper; /**< upper same thread */
92 struct job *job; /**< currently processed job */
93 pthread_t tid; /**< the thread id */
94 volatile unsigned stop: 1; /**< stop requested */
95 volatile unsigned waits: 1; /**< is waiting? */
99 * Description of synchonous callback
103 struct thread thread; /**< thread loop data */
105 void (*callback)(int, void*); /**< the synchronous callback */
106 void (*enter)(int signum, void *closure, struct jobloop *jobloop);
107 /**< the entering synchronous routine */
109 void *arg; /**< the argument of the callback */
113 /* synchronisation of threads */
114 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
115 static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
117 /* count allowed, started and running threads */
118 static int allowed = 0; /** allowed count of threads */
119 static int started = 0; /** started count of threads */
120 static int running = 0; /** running count of threads */
121 static int remains = 0; /** allowed count of waiting jobs */
123 /* list of threads */
124 static struct thread *threads;
125 static _Thread_local struct thread *current_thread;
126 static _Thread_local struct evloop *current_evloop;
128 /* queue of pending jobs */
129 static struct job *first_job;
130 static struct job *free_jobs;
133 static struct evloop evloop[1];
135 #if defined(REMOVE_SYSTEMD_EVENT)
136 static struct fdev_epoll *fdevepoll;
141 * Create a new job with the given parameters
142 * @param group the group of the job
143 * @param timeout the timeout of the job (0 if none)
144 * @param callback the function that achieves the job
145 * @param arg the argument of the callback
146 * @return the created job unblock or NULL when no more memory
148 static struct job *job_create(
156 /* try recyle existing job */
159 free_jobs = job->next;
161 /* allocation without blocking */
162 pthread_mutex_unlock(&mutex);
163 job = malloc(sizeof *job);
164 pthread_mutex_lock(&mutex);
170 /* initialises the job */
172 job->timeout = timeout;
173 job->callback = callback;
182 * Adds 'job' at the end of the list of jobs, marking it
183 * as blocked if an other job with the same group is pending.
184 * @param job the job to add
186 static void job_add(struct job *job)
189 struct job *ijob, **pjob;
195 /* search end and blockers */
199 if (group && ijob->group == group)
210 * Get the next job to process or NULL if none.
211 * @return the first job that isn't blocked or NULL
213 static inline struct job *job_get()
215 struct job *job = first_job;
216 while (job && job->blocked)
222 * Releases the processed 'job': removes it
223 * from the list of jobs and unblock the first
224 * pending job of the same group if any.
225 * @param job the job to release
227 static inline void job_release(struct job *job)
229 struct job *ijob, **pjob;
232 /* first unqueue the job */
235 while (ijob != job) {
241 /* then unblock jobs of the same group */
245 while (ijob && ijob->group != group)
251 /* recycle the job */
252 job->next = free_jobs;
257 * Monitored cancel callback for a job.
258 * This function is called by the monitor
259 * to cancel the job when the safe environment
261 * @param signum 0 on normal flow or the number
262 * of the signal that interrupted the normal
264 * @param arg the job to run
266 static void job_cancel(int signum, void *arg)
268 struct job *job = arg;
269 job->callback(SIGABRT, job->arg);
272 #if defined(REMOVE_SYSTEMD_EVENT)
274 * Gets a fdev_epoll item.
275 * @return a fdev_epoll or NULL in case of error
277 static struct fdev_epoll *get_fdevepoll()
279 struct fdev_epoll *result;
283 result = fdevepoll = fdev_epoll_create();
290 * Monitored normal callback for events.
291 * This function is called by the monitor
292 * to run the event loop when the safe environment
294 * @param signum 0 on normal flow or the number
295 * of the signal that interrupted the normal
297 * @param arg the events to run
299 static void evloop_run(int signum, void *arg)
303 struct evloop *el = arg;
307 __atomic_store_n(&el->state, EVLOOP_STATE_LOCK|EVLOOP_STATE_RUN|EVLOOP_STATE_WAIT, __ATOMIC_RELAXED);
308 __atomic_store_n(&el->holder, current_thread, __ATOMIC_RELAXED);
310 rc = sd_event_prepare(se);
313 CRITICAL("sd_event_prepare returned an error (state: %d): %m", sd_event_get_state(se));
317 rc = sd_event_wait(se, (uint64_t)(int64_t)-1);
320 ERROR("sd_event_wait returned an error (state: %d): %m", sd_event_get_state(se));
323 __atomic_and_fetch(&el->state, ~(EVLOOP_STATE_WAIT), __ATOMIC_RELAXED);
326 rc = sd_event_dispatch(se);
329 ERROR("sd_event_dispatch returned an error (state: %d): %m", sd_event_get_state(se));
334 __atomic_and_fetch(&el->state, ~(EVLOOP_STATE_WAIT|EVLOOP_STATE_RUN), __ATOMIC_RELAXED);
338 #if defined(REMOVE_SYSTEMD_EVENT)
340 * Monitored normal loop for waiting events.
341 * @param signum 0 on normal flow or the number
342 * of the signal that interrupted the normal
344 * @param arg the events to run
346 static void monitored_wait_and_dispatch(int signum, void *arg)
348 struct fdev_epoll *fdev_epoll = arg;
350 fdev_epoll_wait_and_dispatch(fdev_epoll, -1);
356 * Main processing loop of threads processing jobs.
357 * The loop must be called with the mutex locked
358 * and it returns with the mutex locked.
359 * @param me the description of the thread to use
360 * TODO: how are timeout handled when reentering?
362 static void thread_run(volatile struct thread *me)
366 #if !defined(REMOVE_SYSTEMD_EVENT)
370 /* initialize description of itself and link it in the list */
371 me->tid = pthread_self();
374 me->upper = current_thread;
375 if (!current_thread) {
377 sig_monitor_init_timeouts();
380 threads = (struct thread*)me;
381 current_thread = (struct thread*)me;
383 /* loop until stopped */
385 /* release the event loop */
386 if (current_evloop) {
387 __atomic_and_fetch(¤t_evloop->state, ~EVLOOP_STATE_LOCK, __ATOMIC_RELAXED);
388 __atomic_store_n(¤t_evloop->holder, NULL, __ATOMIC_RELAXED);
389 current_evloop = NULL;
395 /* prepare running the job */
396 remains++; /* increases count of job that can wait */
397 job->blocked = 1; /* mark job as blocked */
398 me->job = job; /* record the job (only for terminate) */
401 pthread_mutex_unlock(&mutex);
402 sig_monitor(job->timeout, job->callback, job->arg);
403 pthread_mutex_lock(&mutex);
405 /* release the run job */
407 #if !defined(REMOVE_SYSTEMD_EVENT)
409 /* no job, check events */
411 if (el->sdev && !__atomic_load_n(&el->state, __ATOMIC_RELAXED)) {
413 __atomic_store_n(&el->state, EVLOOP_STATE_LOCK|EVLOOP_STATE_RUN|EVLOOP_STATE_WAIT, __ATOMIC_RELAXED);
414 __atomic_store_n(&el->holder, me, __ATOMIC_RELAXED);
416 pthread_mutex_unlock(&mutex);
417 sig_monitor(0, evloop_run, el);
418 pthread_mutex_lock(&mutex);
420 /* no job and not events */
423 ERROR("Entering job deep sleep! Check your bindings.");
425 pthread_cond_wait(&cond, &mutex);
430 } else if (waitevt) {
431 /* no job and not events */
434 ERROR("Entering job deep sleep! Check your bindings.");
436 pthread_cond_wait(&cond, &mutex);
440 /* wait for events */
442 pthread_mutex_unlock(&mutex);
443 sig_monitor(0, monitored_wait_and_dispatch, get_fdevepoll());
444 pthread_mutex_lock(&mutex);
450 /* release the event loop */
451 if (current_evloop) {
452 __atomic_and_fetch(¤t_evloop->state, ~EVLOOP_STATE_LOCK, __ATOMIC_RELAXED);
453 __atomic_store_n(&el->holder, NULL, __ATOMIC_RELAXED);
454 current_evloop = NULL;
457 /* unlink the current thread and cleanup */
462 current_thread = me->upper;
463 if (!current_thread) {
464 sig_monitor_clean_timeouts();
470 * Entry point for created threads.
471 * @param data not used
474 static void *thread_main(void *data)
478 pthread_mutex_lock(&mutex);
482 pthread_mutex_unlock(&mutex);
487 * Starts a new thread
488 * @return 0 in case of success or -1 in case of error
490 static int start_one_thread()
495 rc = pthread_create(&tid, NULL, thread_main, NULL);
498 WARNING("not able to start thread: %m");
505 * Queues a new asynchronous job represented by 'callback' and 'arg'
506 * for the 'group' and the 'timeout'.
507 * Jobs are queued FIFO and are possibly executed in parallel
508 * concurrently except for job of the same group that are
509 * executed sequentially in FIFO order.
510 * @param group The group of the job or NULL when no group.
511 * @param timeout The maximum execution time in seconds of the job
512 * or 0 for unlimited time.
513 * @param callback The function to execute for achieving the job.
514 * Its first parameter is either 0 on normal flow
515 * or the signal number that broke the normal flow.
516 * The remaining parameter is the parameter 'arg1'
518 * @param arg The second argument for 'callback'
519 * @return 0 in case of success or -1 in case of error
524 void (*callback)(int, void*),
531 pthread_mutex_lock(&mutex);
533 /* allocates the job */
534 job = job_create(group, timeout, callback, arg);
537 info = "out of memory";
541 /* check availability */
544 info = "too many jobs";
548 /* start a thread if needed */
549 if (running == started && started < allowed) {
550 /* all threads are busy and a new can be started */
551 rc = start_one_thread();
552 if (rc < 0 && started == 0) {
553 info = "can't start first thread";
562 /* signal an existing job */
563 pthread_cond_signal(&cond);
564 pthread_mutex_unlock(&mutex);
568 job->next = free_jobs;
571 ERROR("can't process job with threads: %s, %m", info);
572 pthread_mutex_unlock(&mutex);
577 * Internal helper function for 'jobs_enter'.
578 * @see jobs_enter, jobs_leave
580 static void enter_cb(int signum, void *closure)
582 struct sync *sync = closure;
583 sync->enter(signum, sync->arg, (void*)&sync->thread);
587 * Internal helper function for 'jobs_call'.
590 static void call_cb(int signum, void *closure)
592 struct sync *sync = closure;
593 sync->callback(signum, sync->arg);
594 jobs_leave((void*)&sync->thread);
598 * Internal helper for synchronous jobs. It enters
599 * a new thread loop for evaluating the given job
600 * as recorded by the couple 'sync_cb' and 'sync'.
601 * @see jobs_call, jobs_enter, jobs_leave
606 void (*sync_cb)(int signum, void *closure),
612 pthread_mutex_lock(&mutex);
614 /* allocates the job */
615 job = job_create(group, timeout, sync_cb, sync);
617 ERROR("out of memory");
619 pthread_mutex_unlock(&mutex);
626 /* run until stopped */
627 thread_run(&sync->thread);
628 pthread_mutex_unlock(&mutex);
633 * Enter a synchronisation point: activates the job given by 'callback'
634 * and 'closure' using 'group' and 'timeout' to control sequencing and
636 * @param group the group for sequencing jobs
637 * @param timeout the time in seconds allocated to the job
638 * @param callback the callback that will handle the job.
639 * it receives 3 parameters: 'signum' that will be 0
640 * on normal flow or the catched signal number in case
641 * of interrupted flow, the context 'closure' as given and
642 * a 'jobloop' reference that must be used when the job is
643 * terminated to unlock the current execution flow.
644 * @param closure the argument to the callback
645 * @return 0 on success or -1 in case of error
650 void (*callback)(int signum, void *closure, struct jobloop *jobloop),
656 sync.enter = callback;
658 return do_sync(group, timeout, enter_cb, &sync);
662 * Internal callback for evloop management.
663 * The effect of this function is hidden: it exits
664 * the waiting poll if any. Then it wakes up a thread
665 * awaiting the evloop using signal.
667 static int on_evloop_efd(sd_event_source *s, int fd, uint32_t revents, void *userdata)
670 struct evloop *evloop = userdata;
671 read(evloop->efd, &x, sizeof x);
672 pthread_mutex_lock(&mutex);
673 pthread_cond_broadcast(&evloop->cond);
674 pthread_mutex_unlock(&mutex);
679 * unlock the event loop if needed by sending
681 * @param el the event loop to unlock
682 * @param wait wait the unlocked state of the event loop
684 static void unlock_evloop(struct evloop *el, int wait)
686 /* wait for a modifiable event loop */
687 while (__atomic_load_n(&el->state, __ATOMIC_RELAXED) & EVLOOP_STATE_WAIT) {
689 write(el->efd, &x, sizeof x);
692 pthread_cond_wait(&el->cond, &mutex);
697 * Unlocks the execution flow designed by 'jobloop'.
698 * @param jobloop indication of the flow to unlock
699 * @return 0 in case of success of -1 on error
701 int jobs_leave(struct jobloop *jobloop)
706 pthread_mutex_lock(&mutex);
708 while (t && t != (struct thread*)jobloop)
715 pthread_cond_broadcast(&cond);
717 i = (int)(sizeof evloop / sizeof *evloop);
719 if (evloop[--i].holder == t) {
720 unlock_evloop(&evloop[i], 0);
726 pthread_mutex_unlock(&mutex);
731 * Calls synchronously the job represented by 'callback' and 'arg1'
732 * for the 'group' and the 'timeout' and waits for its completion.
733 * @param group The group of the job or NULL when no group.
734 * @param timeout The maximum execution time in seconds of the job
735 * or 0 for unlimited time.
736 * @param callback The function to execute for achieving the job.
737 * Its first parameter is either 0 on normal flow
738 * or the signal number that broke the normal flow.
739 * The remaining parameter is the parameter 'arg1'
741 * @param arg The second argument for 'callback'
742 * @return 0 in case of success or -1 in case of error
747 void (*callback)(int, void*),
752 sync.callback = callback;
755 return do_sync(group, timeout, call_cb, &sync);
759 #if !defined(REMOVE_SYSTEMD_EVENT)
760 __attribute__((unused))
762 static void evloop_callback(void *arg, uint32_t event, struct fdev *fdev)
764 sig_monitor(0, evloop_run, arg);
768 * Gets a sd_event item for the current thread.
769 * @return a sd_event or NULL in case of error
771 static struct sd_event *get_sd_event_locked()
776 /* creates the evloop on need */
779 /* start the creation */
781 /* creates the eventfd for waking up polls */
782 el->efd = eventfd(0, EFD_CLOEXEC);
784 ERROR("can't make eventfd for events");
787 /* create the systemd event loop */
788 rc = sd_event_new(&el->sdev);
790 ERROR("can't make new event loop");
793 /* put the eventfd in the event loop */
794 rc = sd_event_add_io(el->sdev, NULL, el->efd, EPOLLIN, on_evloop_efd, el);
796 ERROR("can't register eventfd");
797 #if !defined(REMOVE_SYSTEMD_EVENT)
798 sd_event_unref(el->sdev);
808 /* handle the event loop */
809 el->fdev = fdev_epoll_add(get_fdevepoll(), sd_event_get_fd(el->sdev));
811 ERROR("can't create fdev");
813 sd_event_unref(el->sdev);
817 memset(el, 0, sizeof *el);
820 fdev_set_autoclose(el->fdev, 0);
821 fdev_set_events(el->fdev, EPOLLIN);
822 fdev_set_callback(el->fdev, evloop_callback, el);
826 /* attach the event loop to the current thread */
827 if (current_evloop != el) {
828 if (current_evloop) {
829 __atomic_and_fetch(¤t_evloop->state, ~EVLOOP_STATE_LOCK, __ATOMIC_RELAXED);
830 __atomic_store_n(¤t_evloop->holder, NULL, __ATOMIC_RELAXED);
833 __atomic_or_fetch(&el->state, EVLOOP_STATE_LOCK, __ATOMIC_RELAXED);
834 __atomic_store_n(&el->holder, current_thread, __ATOMIC_RELAXED);
837 /* wait for a modifiable event loop */
838 unlock_evloop(el, 1);
844 * Gets a sd_event item for the current thread.
845 * @return a sd_event or NULL in case of error
847 struct sd_event *jobs_get_sd_event()
849 struct sd_event *result;
851 pthread_mutex_lock(&mutex);
852 result = get_sd_event_locked();
853 pthread_mutex_unlock(&mutex);
858 #if defined(REMOVE_SYSTEMD_EVENT)
860 * Gets the fdev_epoll item.
861 * @return a fdev_epoll or NULL in case of error
863 struct fdev_epoll *jobs_get_fdev_epoll()
865 struct fdev_epoll *result;
867 pthread_mutex_lock(&mutex);
868 result = get_fdevepoll();
869 pthread_mutex_unlock(&mutex);
876 * Enter the jobs processing loop.
877 * @param allowed_count Maximum count of thread for jobs including this one
878 * @param start_count Count of thread to start now, must be lower.
879 * @param waiter_count Maximum count of jobs that can be waiting.
880 * @param start The start routine to activate (can't be NULL)
881 * @return 0 in case of success or -1 in case of error.
883 int jobs_start(int allowed_count, int start_count, int waiter_count, void (*start)(int signum, void* arg), void *arg)
889 assert(allowed_count >= 1);
890 assert(start_count >= 0);
891 assert(waiter_count > 0);
892 assert(start_count <= allowed_count);
895 pthread_mutex_lock(&mutex);
897 /* check whether already running */
898 if (current_thread || allowed) {
899 ERROR("thread already started");
904 /* records the allowed count */
905 allowed = allowed_count;
908 remains = waiter_count;
911 /* set the watchdog */
912 if (sd_watchdog_enabled(0, NULL))
913 sd_event_set_watchdog(get_sd_event_locked(), 1);
916 /* start at least one thread */
918 while ((launched + 1) < start_count) {
919 if (start_one_thread() != 0) {
920 ERROR("Not all threads can be started");
926 /* queue the start job */
927 job = job_create(NULL, 0, start, arg);
929 ERROR("out of memory");
942 pthread_mutex_unlock(&mutex);
947 * Terminate all the threads and cancel all pending jobs.
949 void jobs_terminate()
951 struct job *job, *head, *tail;
952 pthread_t me, *others;
959 /* request all threads to stop */
960 pthread_mutex_lock(&mutex);
963 /* count the number of threads */
967 if (!t->upper && !pthread_equal(t->tid, me))
972 /* fill the array of threads */
973 others = alloca(count * sizeof *others);
977 if (!t->upper && !pthread_equal(t->tid, me))
978 others[count++] = t->tid;
982 /* stops the threads */
989 /* wait the threads */
990 pthread_cond_broadcast(&cond);
991 pthread_mutex_unlock(&mutex);
993 pthread_join(others[--count], NULL);
994 pthread_mutex_lock(&mutex);
996 /* cancel pending jobs of other threads */
1002 /* unlink the job */
1006 /* search if job is stacked for current */
1008 while (t && t->job != job)
1011 /* yes, relink it at end */
1019 /* no cancel the job */
1020 pthread_mutex_unlock(&mutex);
1021 sig_monitor(0, job_cancel, job);
1023 pthread_mutex_lock(&mutex);
1026 pthread_mutex_unlock(&mutex);