Allow choice to trap or not program faults
[src/app-framework-binder.git] / src / jobs.c
index e3df8f5..a6357c8 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016, 2017 "IoT.bzh"
+ * Copyright (C) 2016, 2017, 2018 "IoT.bzh"
  * Author José Bollo <jose.bollo@iot.bzh>
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
 
 #define _GNU_SOURCE
 
+#if defined(NO_JOBS_WATCHDOG)
+#   define HAS_WATCHDOG 0
+#else
+#   define HAS_WATCHDOG 1
+#endif
+
 #include <stdlib.h>
 #include <stdint.h>
 #include <unistd.h>
 #include <signal.h>
+#include <string.h>
 #include <time.h>
 #include <sys/syscall.h>
 #include <pthread.h>
 #include <sys/eventfd.h>
 
 #include <systemd/sd-event.h>
+#include "fdev.h"
+#if HAS_WATCHDOG
+#include <systemd/sd-daemon.h>
+#endif
 
 #include "jobs.h"
 #include "sig-monitor.h"
 #include "verbose.h"
 
-#if 0
-#define _alert_ "do you really want to remove monitoring?"
-#define sig_monitor_init_timeouts()  ((void)0)
-#define sig_monitor_clean_timeouts() ((void)0)
-#define sig_monitor(to,cb,arg)       (cb(0,arg))
+#if defined(REMOVE_SYSTEMD_EVENT)
+#include "fdev-epoll.h"
 #endif
 
 #define EVENT_TIMEOUT_TOP      ((uint64_t)-1)
@@ -66,6 +74,7 @@ struct evloop
        int efd;               /**< event notification */
        struct sd_event *sdev; /**< the systemd event loop */
        pthread_cond_t  cond;  /**< condition */
+       struct fdev *fdev;     /**< handling of events */
 };
 
 #define EVLOOP_STATE_WAIT           1U
@@ -79,8 +88,8 @@ struct thread
        struct thread *upper;  /**< upper same thread */
        struct job *job;       /**< currently processed job */
        pthread_t tid;         /**< the thread id */
-       unsigned stop: 1;      /**< stop requested */
-       unsigned waits: 1;     /**< is waiting? */
+       volatile unsigned stop: 1;      /**< stop requested */
+       volatile unsigned waits: 1;     /**< is waiting? */
 };
 
 /**
@@ -120,6 +129,11 @@ static struct job *free_jobs;
 /* event loop */
 static struct evloop evloop[1];
 
+#if defined(REMOVE_SYSTEMD_EVENT)
+static struct fdev_epoll *fdevepoll;
+static int waitevt;
+#endif
+
 /**
  * Create a new job with the given parameters
  * @param group    the group of the job
@@ -141,7 +155,7 @@ static struct job *job_create(
        if (job)
                free_jobs = job->next;
        else {
-               /* allocation  without blocking */
+               /* allocation without blocking */
                pthread_mutex_unlock(&mutex);
                job = malloc(sizeof *job);
                pthread_mutex_lock(&mutex);
@@ -252,6 +266,23 @@ static void job_cancel(int signum, void *arg)
        job->callback(SIGABRT, job->arg);
 }
 
+#if defined(REMOVE_SYSTEMD_EVENT)
+/**
+ * Gets a fdev_epoll item.
+ * @return a fdev_epoll or NULL in case of error
+ */
+static struct fdev_epoll *get_fdevepoll()
+{
+       struct fdev_epoll *result;
+
+       result = fdevepoll;
+       if (!result)
+               result = fdevepoll = fdev_epoll_create();
+
+       return result;
+}
+#endif
+
 /**
  * Monitored normal callback for events.
  * This function is called by the monitor
@@ -269,11 +300,14 @@ static void evloop_run(int signum, void *arg)
        struct evloop *el = arg;
 
        if (!signum) {
+               current_evloop = el;
+               __atomic_store_n(&el->state, EVLOOP_STATE_LOCK|EVLOOP_STATE_RUN|EVLOOP_STATE_WAIT, __ATOMIC_RELAXED);
                se = el->sdev;
                rc = sd_event_prepare(se);
                if (rc < 0) {
                        errno = -rc;
-                       ERROR("sd_event_prepare returned an error (state: %d): %m", sd_event_get_state(se));
+                       CRITICAL("sd_event_prepare returned an error (state: %d): %m", sd_event_get_state(se));
+                       abort();
                } else {
                        if (rc == 0) {
                                rc = sd_event_wait(se, (uint64_t)(int64_t)-1);
@@ -282,7 +316,7 @@ static void evloop_run(int signum, void *arg)
                                        ERROR("sd_event_wait returned an error (state: %d): %m", sd_event_get_state(se));
                                }
                        }
-                       el->state &= ~(EVLOOP_STATE_WAIT);
+                       __atomic_and_fetch(&el->state, ~(EVLOOP_STATE_WAIT), __ATOMIC_RELAXED);
 
                        if (rc > 0) {
                                rc = sd_event_dispatch(se);
@@ -293,10 +327,27 @@ static void evloop_run(int signum, void *arg)
                        }
                }
        }
-       el->state &= ~(EVLOOP_STATE_WAIT|EVLOOP_STATE_RUN);
+       __atomic_and_fetch(&el->state, ~(EVLOOP_STATE_WAIT|EVLOOP_STATE_RUN), __ATOMIC_RELAXED);
 }
 
 
+#if defined(REMOVE_SYSTEMD_EVENT)
+/**
+ * Monitored normal loop for waiting events.
+ * @param signum 0 on normal flow or the number
+ *               of the signal that interrupted the normal
+ *               flow
+ * @param arg     the events to run
+ */
+static void monitored_wait_and_dispatch(int signum, void *arg)
+{
+       struct fdev_epoll *fdev_epoll = arg;
+       if (!signum) {
+               fdev_epoll_wait_and_dispatch(fdev_epoll, -1);
+       }
+}
+#endif
+
 /**
  * Main processing loop of threads processing jobs.
  * The loop must be called with the mutex locked
@@ -308,7 +359,9 @@ static void thread_run(volatile struct thread *me)
 {
        struct thread **prv;
        struct job *job;
+#if !defined(REMOVE_SYSTEMD_EVENT)
        struct evloop *el;
+#endif
 
        /* initialize description of itself and link it in the list */
        me->tid = pthread_self();
@@ -326,13 +379,13 @@ static void thread_run(volatile struct thread *me)
        /* loop until stopped */
        while (!me->stop) {
                /* release the event loop */
-               if (current_evloop && !(current_evloop->state & EVLOOP_STATE_RUN)) {
-                       current_evloop->state -= EVLOOP_STATE_LOCK;
+               if (current_evloop) {
+                       __atomic_and_fetch(&current_evloop->state, ~EVLOOP_STATE_LOCK, __ATOMIC_RELAXED);
                        current_evloop = NULL;
                }
 
                /* get a job */
-               job = job_get(first_job);
+               job = job_get();
                if (job) {
                        /* prepare running the job */
                        remains++; /* increases count of job that can wait */
@@ -346,12 +399,13 @@ static void thread_run(volatile struct thread *me)
 
                        /* release the run job */
                        job_release(job);
+#if !defined(REMOVE_SYSTEMD_EVENT)
                } else {
                        /* no job, check events */
                        el = &evloop[0];
-                       if (el->sdev && !el->state) {
+                       if (el->sdev && !__atomic_load_n(&el->state, __ATOMIC_RELAXED)) {
                                /* run the events */
-                               el->state = EVLOOP_STATE_LOCK|EVLOOP_STATE_RUN|EVLOOP_STATE_WAIT;
+                               __atomic_store_n(&el->state, EVLOOP_STATE_LOCK|EVLOOP_STATE_RUN|EVLOOP_STATE_WAIT, __ATOMIC_RELAXED);
                                current_evloop = el;
                                pthread_mutex_unlock(&mutex);
                                sig_monitor(0, evloop_run, el);
@@ -366,9 +420,33 @@ static void thread_run(volatile struct thread *me)
                                me->waits = 0;
                                running++;
                        }
+#else
+               } else if (waitevt) {
+                       /* no job and not events */
+                       running--;
+                       if (!running)
+                               ERROR("Entering job deep sleep! Check your bindings.");
+                       me->waits = 1;
+                       pthread_cond_wait(&cond, &mutex);
+                       me->waits = 0;
+                       running++;
+               } else {
+                       /* wait for events */
+                       waitevt = 1;
+                       pthread_mutex_unlock(&mutex);
+                       sig_monitor(0, monitored_wait_and_dispatch, get_fdevepoll());
+                       pthread_mutex_lock(&mutex);
+                       waitevt = 0;
+#endif
                }
        }
 
+       /* release the event loop */
+       if (current_evloop) {
+               __atomic_and_fetch(&current_evloop->state, ~EVLOOP_STATE_LOCK, __ATOMIC_RELAXED);
+               current_evloop = NULL;
+       }
+
        /* unlink the current thread and cleanup */
        prv = &threads;
        while (*prv != me)
@@ -556,7 +634,7 @@ static int do_sync(
  *                 of interrupted flow, the context 'closure' as given and
  *                 a 'jobloop' reference that must be used when the job is
  *                 terminated to unlock the current execution flow.
- * @param arg the argument to the callback
+ * @param closure the argument to the callback
  * @return 0 on success or -1 in case of error
  */
 int jobs_enter(
@@ -637,26 +715,35 @@ static int on_evloop_efd(sd_event_source *s, int fd, uint32_t revents, void *use
        struct evloop *evloop = userdata;
        read(evloop->efd, &x, sizeof x);
        pthread_mutex_lock(&mutex);
-       pthread_cond_broadcast(&evloop->cond);  
+       pthread_cond_broadcast(&evloop->cond);
        pthread_mutex_unlock(&mutex);
        return 1;
 }
 
+/* temporary hack */
+#if !defined(REMOVE_SYSTEMD_EVENT)
+__attribute__((unused))
+#endif
+static void evloop_callback(void *arg, uint32_t event, struct fdev *fdev)
+{
+       sig_monitor(0, evloop_run, arg);
+}
+
 /**
  * Gets a sd_event item for the current thread.
  * @return a sd_event or NULL in case of error
  */
-struct sd_event *jobs_get_sd_event()
+static struct sd_event *get_sd_event_locked()
 {
        struct evloop *el;
        uint64_t x;
        int rc;
 
-       pthread_mutex_lock(&mutex);
-
        /* creates the evloop on need */
        el = &evloop[0];
        if (!el->sdev) {
+               /* start the creation */
+               el->state = 0;
                /* creates the eventfd for waking up polls */
                el->efd = eventfd(0, EFD_CLOEXEC);
                if (el->efd < 0) {
@@ -673,37 +760,85 @@ struct sd_event *jobs_get_sd_event()
                rc = sd_event_add_io(el->sdev, NULL, el->efd, EPOLLIN, on_evloop_efd, el);
                if (rc < 0) {
                        ERROR("can't register eventfd");
+#if !defined(REMOVE_SYSTEMD_EVENT)
                        sd_event_unref(el->sdev);
                        el->sdev = NULL;
 error2:
                        close(el->efd);
 error1:
-                       pthread_mutex_unlock(&mutex);
                        return NULL;
                }
-               /* terminate creation */
-               el->state = 0;
+#else
+                       goto error3;
+               }
+               /* handle the event loop */
+               el->fdev = fdev_epoll_add(get_fdevepoll(), sd_event_get_fd(el->sdev));
+               if (!el->fdev) {
+                       ERROR("can't create fdev");
+error3:
+                       sd_event_unref(el->sdev);
+error2:
+                       close(el->efd);
+error1:
+                       memset(el, 0, sizeof *el);
+                       return NULL;
+               }
+               fdev_set_autoclose(el->fdev, 0);
+               fdev_set_events(el->fdev, EPOLLIN);
+               fdev_set_callback(el->fdev, evloop_callback, el);
+#endif
        }
 
        /* attach the event loop to the current thread */
        if (current_evloop != el) {
                if (current_evloop)
-                       current_evloop->state -= EVLOOP_STATE_LOCK;
+                       __atomic_and_fetch(&current_evloop->state, ~EVLOOP_STATE_LOCK, __ATOMIC_RELAXED);
                current_evloop = el;
-               el->state += EVLOOP_STATE_LOCK;
+               __atomic_or_fetch(&el->state, EVLOOP_STATE_LOCK, __ATOMIC_RELAXED);
        }
 
        /* wait for a modifiable event loop */
-       while (el->state & EVLOOP_STATE_WAIT) {
+       while (__atomic_load_n(&el->state, __ATOMIC_RELAXED) & EVLOOP_STATE_WAIT) {
                x = 1;
                write(el->efd, &x, sizeof x);
                pthread_cond_wait(&el->cond, &mutex);
        }
 
-       pthread_mutex_unlock(&mutex);
        return el->sdev;
 }
 
+/**
+ * Gets a sd_event item for the current thread.
+ * @return a sd_event or NULL in case of error
+ */
+struct sd_event *jobs_get_sd_event()
+{
+       struct sd_event *result;
+
+       pthread_mutex_lock(&mutex);
+       result = get_sd_event_locked();
+       pthread_mutex_unlock(&mutex);
+
+       return result;
+}
+
+#if defined(REMOVE_SYSTEMD_EVENT)
+/**
+ * Gets the fdev_epoll item.
+ * @return a fdev_epoll or NULL in case of error
+ */
+struct fdev_epoll *jobs_get_fdev_epoll()
+{
+       struct fdev_epoll *result;
+
+       pthread_mutex_lock(&mutex);
+       result = get_fdevepoll();
+       pthread_mutex_unlock(&mutex);
+
+       return result;
+}
+#endif
+
 /**
  * Enter the jobs processing loop.
  * @param allowed_count Maximum count of thread for jobs including this one
@@ -712,7 +847,7 @@ error1:
  * @param start         The start routine to activate (can't be NULL)
  * @return 0 in case of success or -1 in case of error.
  */
-int jobs_start(int allowed_count, int start_count, int waiter_count, void (*start)(int signum))
+int jobs_start(int allowed_count, int start_count, int waiter_count, void (*start)(int signum, void* arg), void *arg)
 {
        int rc, launched;
        struct thread me;
@@ -733,18 +868,18 @@ int jobs_start(int allowed_count, int start_count, int waiter_count, void (*star
                goto error;
        }
 
-       /* start */
-       if (sig_monitor_init() < 0) {
-               ERROR("failed to initialise signal handlers");
-               goto error;
-       }
-
        /* records the allowed count */
        allowed = allowed_count;
        started = 0;
        running = 0;
        remains = waiter_count;
 
+#if HAS_WATCHDOG
+       /* set the watchdog */
+       if (sd_watchdog_enabled(0, NULL))
+               sd_event_set_watchdog(get_sd_event_locked(), 1);
+#endif
+
        /* start at least one thread */
        launched = 0;
        while ((launched + 1) < start_count) {
@@ -756,7 +891,7 @@ int jobs_start(int allowed_count, int start_count, int waiter_count, void (*star
        }
 
        /* queue the start job */
-       job = job_create(NULL, 0, (job_cb_t)start, NULL);
+       job = job_create(NULL, 0, start, arg);
        if (!job) {
                ERROR("out of memory");
                errno = ENOMEM;