2 * Copyright (C) 2016, 2017, 2018 "IoT.bzh"
3 * Author José Bollo <jose.bollo@iot.bzh>
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
20 #if defined(NO_JOBS_WATCHDOG)
21 # define HAS_WATCHDOG 0
23 # define HAS_WATCHDOG 1
32 #include <sys/syscall.h>
36 #include <sys/eventfd.h>
38 #include <systemd/sd-event.h>
41 #include <systemd/sd-daemon.h>
45 #include "sig-monitor.h"
48 #if defined(REMOVE_SYSTEMD_EVENT)
49 #include "fdev-epoll.h"
52 #define EVENT_TIMEOUT_TOP ((uint64_t)-1)
53 #define EVENT_TIMEOUT_CHILD ((uint64_t)10000)
57 /** Internal shortcut for callback */
58 typedef void (*job_cb_t)(int, void*);
60 /** Description of a pending job */
63 struct job *next; /**< link to the next job enqueued */
64 const void *group; /**< group of the request */
65 job_cb_t callback; /**< processing callback */
66 void *arg; /**< argument */
67 int timeout; /**< timeout in second for processing the request */
68 unsigned blocked: 1; /**< is an other request blocking this one ? */
69 unsigned dropped: 1; /**< is removed ? */
72 /** Description of handled event loops */
75 unsigned state; /**< encoded state */
76 int efd; /**< event notification */
77 struct sd_event *sdev; /**< the systemd event loop */
78 struct fdev *fdev; /**< handling of events */
79 struct thread *holder; /**< holder of the evloop */
82 #define EVLOOP_STATE_WAIT 1U
83 #define EVLOOP_STATE_RUN 2U
85 /** Description of threads */
88 struct thread *next; /**< next thread of the list */
89 struct thread *upper; /**< upper same thread */
90 struct thread *nholder;/**< next holder for evloop */
91 pthread_cond_t *cwhold;/**< condition wait for holding */
92 struct job *job; /**< currently processed job */
93 pthread_t tid; /**< the thread id */
94 volatile unsigned stop: 1; /**< stop requested */
95 volatile unsigned waits: 1; /**< is waiting? */
99 * Description of synchronous callback
103 struct thread thread; /**< thread loop data */
105 void (*callback)(int, void*); /**< the synchronous callback */
106 void (*enter)(int signum, void *closure, struct jobloop *jobloop);
107 /**< the entering synchronous routine */
109 void *arg; /**< the argument of the callback */
113 /* synchronisation of threads */
114 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
115 static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
117 /* count allowed, started and running threads */
118 static int allowed = 0; /** allowed count of threads */
119 static int started = 0; /** started count of threads */
120 static int running = 0; /** running count of threads */
121 static int remains = 0; /** allowed count of waiting jobs */
123 /* list of threads */
124 static struct thread *threads;
125 static _Thread_local struct thread *current_thread;
127 /* queue of pending jobs */
128 static struct job *first_job;
129 static struct job *free_jobs;
132 static struct evloop evloop;
134 #if defined(REMOVE_SYSTEMD_EVENT)
135 static struct fdev_epoll *fdevepoll;
140 * Create a new job with the given parameters
141 * @param group the group of the job
142 * @param timeout the timeout of the job (0 if none)
143 * @param callback the function that achieves the job
144 * @param arg the argument of the callback
145 * @return the created job unblock or NULL when no more memory
147 static struct job *job_create(
155 /* try recyle existing job */
158 free_jobs = job->next;
160 /* allocation without blocking */
161 pthread_mutex_unlock(&mutex);
162 job = malloc(sizeof *job);
163 pthread_mutex_lock(&mutex);
165 ERROR("out of memory");
170 /* initialises the job */
172 job->timeout = timeout;
173 job->callback = callback;
182 * Adds 'job' at the end of the list of jobs, marking it
183 * as blocked if an other job with the same group is pending.
184 * @param job the job to add
186 static void job_add(struct job *job)
189 struct job *ijob, **pjob;
195 /* search end and blockers */
199 if (group && ijob->group == group)
211 * Get the next job to process or NULL if none.
212 * @return the first job that isn't blocked or NULL
214 static inline struct job *job_get()
216 struct job *job = first_job;
217 while (job && job->blocked)
225 * Releases the processed 'job': removes it
226 * from the list of jobs and unblock the first
227 * pending job of the same group if any.
228 * @param job the job to release
230 static inline void job_release(struct job *job)
232 struct job *ijob, **pjob;
235 /* first unqueue the job */
238 while (ijob != job) {
244 /* then unblock jobs of the same group */
248 while (ijob && ijob->group != group)
254 /* recycle the job */
255 job->next = free_jobs;
260 * Monitored cancel callback for a job.
261 * This function is called by the monitor
262 * to cancel the job when the safe environment
264 * @param signum 0 on normal flow or the number
265 * of the signal that interrupted the normal
267 * @param arg the job to run
269 static void job_cancel(int signum, void *arg)
271 struct job *job = arg;
272 job->callback(SIGABRT, job->arg);
275 #if defined(REMOVE_SYSTEMD_EVENT)
277 * Gets a fdev_epoll item.
278 * @return a fdev_epoll or NULL in case of error
280 static struct fdev_epoll *get_fdevepoll()
282 struct fdev_epoll *result;
286 result = fdevepoll = fdev_epoll_create();
293 * Monitored normal callback for events.
294 * This function is called by the monitor
295 * to run the event loop when the safe environment
297 * @param signum 0 on normal flow or the number
298 * of the signal that interrupted the normal
300 * @param arg the events to run
302 static void evloop_run(int signum, void *arg)
309 rc = sd_event_prepare(se);
312 CRITICAL("sd_event_prepare returned an error (state: %d): %m", sd_event_get_state(se));
316 rc = sd_event_wait(se, (uint64_t)(int64_t)-1);
319 ERROR("sd_event_wait returned an error (state: %d): %m", sd_event_get_state(se));
322 evloop.state = EVLOOP_STATE_RUN;
324 rc = sd_event_dispatch(se);
327 ERROR("sd_event_dispatch returned an error (state: %d): %m", sd_event_get_state(se));
335 * Internal callback for evloop management.
336 * The effect of this function is hidden: it exits
337 * the waiting poll if any.
339 static void evloop_on_efd_event()
342 read(evloop.efd, &x, sizeof x);
346 * wakeup the event loop if needed by sending
349 static void evloop_wakeup()
353 if (evloop.state & EVLOOP_STATE_WAIT) {
355 write(evloop.efd, &x, sizeof x);
360 * Release the currently held event loop
362 static void evloop_release()
364 struct thread *nh, *ct = current_thread;
366 if (evloop.holder == ct) {
370 pthread_cond_signal(nh->cwhold);
375 * get the eventloop for the current thread
377 static int evloop_get()
379 struct thread *ct = current_thread;
382 return evloop.holder == ct;
390 * acquire the eventloop for the current thread
392 static void evloop_acquire()
394 struct thread **pwait, *ct;
397 /* try to get the evloop */
399 /* failed, init waiting state */
403 pthread_cond_init(&cond, NULL);
405 /* queue current thread in holder list */
406 pwait = &evloop.holder;
408 pwait = &(*pwait)->nholder;
411 /* wake up the evloop */
414 /* wait to acquire the evloop */
415 pthread_cond_wait(&cond, &mutex);
416 pthread_cond_destroy(&cond);
420 #if defined(REMOVE_SYSTEMD_EVENT)
422 * Monitored normal loop for waiting events.
423 * @param signum 0 on normal flow or the number
424 * of the signal that interrupted the normal
426 * @param arg the events to run
428 static void monitored_wait_and_dispatch(int signum, void *arg)
430 struct fdev_epoll *fdev_epoll = arg;
432 fdev_epoll_wait_and_dispatch(fdev_epoll, -1);
439 * @param me the description of the thread to enter
441 static void thread_enter(volatile struct thread *me)
443 /* initialize description of itself and link it in the list */
444 me->tid = pthread_self();
447 me->upper = current_thread;
449 threads = (struct thread*)me;
450 current_thread = (struct thread*)me;
455 * @param me the description of the thread to leave
457 static void thread_leave()
459 struct thread **prv, *me;
461 /* unlink the current thread and cleanup */
468 current_thread = me->upper;
472 * Main processing loop of internal threads with processing jobs.
473 * The loop must be called with the mutex locked
474 * and it returns with the mutex locked.
475 * @param me the description of the thread to use
476 * TODO: how are timeout handled when reentering?
478 static void thread_run_internal(volatile struct thread *me)
485 /* loop until stopped */
487 /* release the current event loop */
493 /* prepare running the job */
494 job->blocked = 1; /* mark job as blocked */
495 me->job = job; /* record the job (only for terminate) */
498 pthread_mutex_unlock(&mutex);
499 sig_monitor(job->timeout, job->callback, job->arg);
500 pthread_mutex_lock(&mutex);
502 /* release the run job */
504 #if !defined(REMOVE_SYSTEMD_EVENT)
505 /* no job, check event loop wait */
506 } else if (evloop_get()) {
507 if (evloop.state != 0) {
509 CRITICAL("Can't enter dispatch while in dispatch!");
513 evloop.state = EVLOOP_STATE_RUN|EVLOOP_STATE_WAIT;
514 pthread_mutex_unlock(&mutex);
515 sig_monitor(0, evloop_run, NULL);
516 pthread_mutex_lock(&mutex);
519 /* no job and no event loop */
522 ERROR("Entering job deep sleep! Check your bindings.");
524 pthread_cond_wait(&cond, &mutex);
528 } else if (waitevt) {
529 /* no job and not events */
532 ERROR("Entering job deep sleep! Check your bindings.");
534 pthread_cond_wait(&cond, &mutex);
538 /* wait for events */
540 pthread_mutex_unlock(&mutex);
541 sig_monitor(0, monitored_wait_and_dispatch, get_fdevepoll());
542 pthread_mutex_lock(&mutex);
553 * Main processing loop of external threads.
554 * The loop must be called with the mutex locked
555 * and it returns with the mutex locked.
556 * @param me the description of the thread to use
558 static void thread_run_external(volatile struct thread *me)
563 /* loop until stopped */
566 pthread_cond_wait(&cond, &mutex);
572 * Root for created threads.
574 static void thread_main()
580 sig_monitor_init_timeouts();
581 thread_run_internal(&me);
582 sig_monitor_clean_timeouts();
588 * Entry point for created threads.
589 * @param data not used
592 static void *thread_starter(void *data)
594 pthread_mutex_lock(&mutex);
596 pthread_mutex_unlock(&mutex);
601 * Starts a new thread
602 * @return 0 in case of success or -1 in case of error
604 static int start_one_thread()
609 rc = pthread_create(&tid, NULL, thread_starter, NULL);
612 WARNING("not able to start thread: %m");
619 * Queues a new asynchronous job represented by 'callback' and 'arg'
620 * for the 'group' and the 'timeout'.
621 * Jobs are queued FIFO and are possibly executed in parallel
622 * concurrently except for job of the same group that are
623 * executed sequentially in FIFO order.
624 * @param group The group of the job or NULL when no group.
625 * @param timeout The maximum execution time in seconds of the job
626 * or 0 for unlimited time.
627 * @param callback The function to execute for achieving the job.
628 * Its first parameter is either 0 on normal flow
629 * or the signal number that broke the normal flow.
630 * The remaining parameter is the parameter 'arg1'
632 * @param arg The second argument for 'callback'
633 * @return 0 in case of success or -1 in case of error
638 void (*callback)(int, void*),
644 pthread_mutex_lock(&mutex);
646 /* allocates the job */
647 job = job_create(group, timeout, callback, arg);
651 /* check availability */
653 ERROR("can't process job with threads: too many jobs");
658 /* start a thread if needed */
659 if (running == started && started < allowed) {
660 /* all threads are busy and a new can be started */
661 rc = start_one_thread();
662 if (rc < 0 && started == 0) {
663 ERROR("can't start initial thread: %m");
671 /* signal an existing job */
672 pthread_cond_signal(&cond);
673 pthread_mutex_unlock(&mutex);
677 job->next = free_jobs;
680 pthread_mutex_unlock(&mutex);
685 * Internal helper function for 'jobs_enter'.
686 * @see jobs_enter, jobs_leave
688 static void enter_cb(int signum, void *closure)
690 struct sync *sync = closure;
691 sync->enter(signum, sync->arg, (void*)&sync->thread);
695 * Internal helper function for 'jobs_call'.
698 static void call_cb(int signum, void *closure)
700 struct sync *sync = closure;
701 sync->callback(signum, sync->arg);
702 jobs_leave((void*)&sync->thread);
706 * Internal helper for synchronous jobs. It enters
707 * a new thread loop for evaluating the given job
708 * as recorded by the couple 'sync_cb' and 'sync'.
709 * @see jobs_call, jobs_enter, jobs_leave
714 void (*sync_cb)(int signum, void *closure),
720 pthread_mutex_lock(&mutex);
722 /* allocates the job */
723 job = job_create(group, timeout, sync_cb, sync);
725 pthread_mutex_unlock(&mutex);
732 /* run until stopped */
734 thread_run_internal(&sync->thread);
736 thread_run_external(&sync->thread);
737 pthread_mutex_unlock(&mutex);
742 * Enter a synchronisation point: activates the job given by 'callback'
743 * and 'closure' using 'group' and 'timeout' to control sequencing and
745 * @param group the group for sequencing jobs
746 * @param timeout the time in seconds allocated to the job
747 * @param callback the callback that will handle the job.
748 * it receives 3 parameters: 'signum' that will be 0
749 * on normal flow or the catched signal number in case
750 * of interrupted flow, the context 'closure' as given and
751 * a 'jobloop' reference that must be used when the job is
752 * terminated to unlock the current execution flow.
753 * @param closure the argument to the callback
754 * @return 0 on success or -1 in case of error
759 void (*callback)(int signum, void *closure, struct jobloop *jobloop),
765 sync.enter = callback;
767 return do_sync(group, timeout, enter_cb, &sync);
771 * Unlocks the execution flow designed by 'jobloop'.
772 * @param jobloop indication of the flow to unlock
773 * @return 0 in case of success of -1 on error
775 int jobs_leave(struct jobloop *jobloop)
779 pthread_mutex_lock(&mutex);
781 while (t && t != (struct thread*)jobloop)
788 pthread_cond_broadcast(&cond);
792 pthread_mutex_unlock(&mutex);
797 * Calls synchronously the job represented by 'callback' and 'arg1'
798 * for the 'group' and the 'timeout' and waits for its completion.
799 * @param group The group of the job or NULL when no group.
800 * @param timeout The maximum execution time in seconds of the job
801 * or 0 for unlimited time.
802 * @param callback The function to execute for achieving the job.
803 * Its first parameter is either 0 on normal flow
804 * or the signal number that broke the normal flow.
805 * The remaining parameter is the parameter 'arg1'
807 * @param arg The second argument for 'callback'
808 * @return 0 in case of success or -1 in case of error
813 void (*callback)(int, void*),
818 sync.callback = callback;
821 return do_sync(group, timeout, call_cb, &sync);
825 * Internal callback for evloop management.
826 * The effect of this function is hidden: it exits
827 * the waiting poll if any. Then it wakes up a thread
828 * awaiting the evloop using signal.
830 static int on_evloop_efd(sd_event_source *s, int fd, uint32_t revents, void *userdata)
832 evloop_on_efd_event();
837 #if !defined(REMOVE_SYSTEMD_EVENT)
838 __attribute__((unused))
840 static void evloop_callback(void *arg, uint32_t event, struct fdev *fdev)
842 sig_monitor(0, evloop_run, arg);
846 * Gets a sd_event item for the current thread.
847 * @return a sd_event or NULL in case of error
849 static struct sd_event *get_sd_event_locked()
853 /* creates the evloop on need */
855 /* start the creation */
857 /* creates the eventfd for waking up polls */
858 evloop.efd = eventfd(0, EFD_CLOEXEC|EFD_SEMAPHORE);
859 if (evloop.efd < 0) {
860 ERROR("can't make eventfd for events");
863 /* create the systemd event loop */
864 rc = sd_event_new(&evloop.sdev);
866 ERROR("can't make new event loop");
869 /* put the eventfd in the event loop */
870 rc = sd_event_add_io(evloop.sdev, NULL, evloop.efd, EPOLLIN, on_evloop_efd, NULL);
872 ERROR("can't register eventfd");
873 #if !defined(REMOVE_SYSTEMD_EVENT)
874 sd_event_unref(evloop.sdev);
884 /* handle the event loop */
885 evloop.fdev = fdev_epoll_add(get_fdevepoll(), sd_event_get_fd(evloop.sdev));
887 ERROR("can't create fdev");
889 sd_event_unref(evloop.sdev);
893 memset(&evloop, 0, sizeof evloop);
896 fdev_set_autoclose(evloop.fdev, 0);
897 fdev_set_events(evloop.fdev, EPOLLIN);
898 fdev_set_callback(evloop.fdev, evloop_callback, NULL);
902 /* acquire the event loop */
909 * Gets a sd_event item for the current thread.
910 * @return a sd_event or NULL in case of error
912 struct sd_event *jobs_get_sd_event()
914 struct sd_event *result;
917 /* ensure an existing thread environment */
918 if (!current_thread) {
919 memset(<, 0, sizeof lt);
920 current_thread = <
924 pthread_mutex_lock(&mutex);
925 result = get_sd_event_locked();
926 pthread_mutex_unlock(&mutex);
928 /* release the faked thread environment if needed */
929 if (current_thread == <) {
931 * Releasing it is needed because there is no way to guess
932 * when it has to be released really. But here is where it is
933 * hazardous: if the caller modifies the eventloop when it
934 * is waiting, there is no way to make the change effective.
935 * A workaround to achieve that goal is for the caller to
936 * require the event loop a second time after having modified it.
938 NOTICE("Requiring sd_event loop out of binder callbacks is hazardous!");
939 if (verbose_wants(Log_Level_Info))
940 sig_monitor_dumpstack();
942 current_thread = NULL;
948 #if defined(REMOVE_SYSTEMD_EVENT)
950 * Gets the fdev_epoll item.
951 * @return a fdev_epoll or NULL in case of error
953 struct fdev_epoll *jobs_get_fdev_epoll()
955 struct fdev_epoll *result;
957 pthread_mutex_lock(&mutex);
958 result = get_fdevepoll();
959 pthread_mutex_unlock(&mutex);
966 * Enter the jobs processing loop.
967 * @param allowed_count Maximum count of thread for jobs including this one
968 * @param start_count Count of thread to start now, must be lower.
969 * @param waiter_count Maximum count of jobs that can be waiting.
970 * @param start The start routine to activate (can't be NULL)
971 * @return 0 in case of success or -1 in case of error.
973 int jobs_start(int allowed_count, int start_count, int waiter_count, void (*start)(int signum, void* arg), void *arg)
978 assert(allowed_count >= 1);
979 assert(start_count >= 0);
980 assert(waiter_count > 0);
981 assert(start_count <= allowed_count);
984 pthread_mutex_lock(&mutex);
986 /* check whether already running */
987 if (current_thread || allowed) {
988 ERROR("thread already started");
993 /* records the allowed count */
994 allowed = allowed_count;
997 remains = waiter_count;
1000 /* set the watchdog */
1001 if (sd_watchdog_enabled(0, NULL))
1002 sd_event_set_watchdog(get_sd_event_locked(), 1);
1005 /* start at least one thread: the current one */
1007 while (launched < start_count) {
1008 if (start_one_thread() != 0) {
1009 ERROR("Not all threads can be started");
1015 /* queue the start job */
1016 job = job_create(NULL, 0, start, arg);
1025 pthread_mutex_unlock(&mutex);
1030 * Terminate all the threads and cancel all pending jobs.
1032 void jobs_terminate()
1034 struct job *job, *head, *tail;
1035 pthread_t me, *others;
1040 me = pthread_self();
1042 /* request all threads to stop */
1043 pthread_mutex_lock(&mutex);
1046 /* count the number of threads */
1050 if (!t->upper && !pthread_equal(t->tid, me))
1055 /* fill the array of threads */
1056 others = alloca(count * sizeof *others);
1060 if (!t->upper && !pthread_equal(t->tid, me))
1061 others[count++] = t->tid;
1065 /* stops the threads */
1072 /* wait the threads */
1073 pthread_cond_broadcast(&cond);
1074 pthread_mutex_unlock(&mutex);
1076 pthread_join(others[--count], NULL);
1077 pthread_mutex_lock(&mutex);
1079 /* cancel pending jobs of other threads */
1085 /* unlink the job */
1089 /* search if job is stacked for current */
1091 while (t && t->job != job)
1094 /* yes, relink it at end */
1102 /* no cancel the job */
1103 pthread_mutex_unlock(&mutex);
1104 sig_monitor(0, job_cancel, job);
1106 pthread_mutex_lock(&mutex);
1109 pthread_mutex_unlock(&mutex);