Add wait for start jobs 37/25037/4
authorScott Murray <scott.murray@konsulko.com>
Tue, 21 Jul 2020 02:04:11 +0000 (22:04 -0400)
committerScott Murray <scott.murray@konsulko.com>
Thu, 23 Jul 2020 01:55:35 +0000 (21:55 -0400)
The systemd job to start an application unit may remain queued long
enough after a call into the systemd D-Bus API that the subsequent
state check sees the unit as still inactive.  This results in the
application start being incorrectly reported as having failed, when
it will still actually proceed once the job is dequeued and run in
systemd.

To fix this, checking of the state of the job object returned by the
start D-Bus calls has been added in the various wrapper functions in
src/utils-systemd.c.  The timeout mechanism used in the new job_wait
helper function is based on the existing state checking logic of the
wait_state_stable function in src/afm-urun.c.

Bug-AGL: SPEC-3365, SPEC-3427, SPEC-3457

Signed-off-by: Scott Murray <scott.murray@konsulko.com>
Change-Id: I01077a4213944233506518869c3dc604bd40f693

src/utils-systemd.c
src/utils-systemd.h

index 0c22810..8ccb81b 100644 (file)
@@ -56,6 +56,8 @@ static const char sdb_destination[] = "org.freedesktop.systemd1";
 static const char sdbi_manager[] = "org.freedesktop.systemd1.Manager";
 static const char sdbi_unit[] = "org.freedesktop.systemd1.Unit";
 static const char sdbi_service[] = "org.freedesktop.systemd1.Service";
+static const char sdbi_job[] = "org.freedesktop.systemd1.Job";
+static const char sdbj_state[] = "State";
 static const char sdbm_reload[] = "Reload";
 static const char sdbm_start_unit[] = "StartUnit";
 static const char sdbm_restart_unit[] = "RestartUnit";
@@ -79,6 +81,12 @@ static const char *sds_state_names[] = {
        "failed"
 };
 
+static const char *sds_job_state_names[] = {
+       NULL,
+       "waiting",
+       "running"
+};
+
 static struct sd_bus *sysbus;
 static struct sd_bus *usrbus;
 
@@ -305,6 +313,46 @@ static enum SysD_State unit_state(struct sd_bus *bus, const char *dpath)
        return resu;
 }
 
+static int job_wait(struct sd_bus *bus, struct sd_bus_message *job)
+{
+       int rc;
+       sd_bus_error err = SD_BUS_ERROR_NULL;
+       const char *jpath = NULL;
+       char *jstate;
+       struct timespec tispec;
+       const int period_ms = 10;
+       const int trial_s = 10;
+       const int trial_count = (trial_s * 1000) / period_ms;
+       const int period_ns = period_ms * 1000000;
+       int trial;
+
+       /* Get job path */
+       rc = sd_bus_message_read_basic(job, 'o', &jpath);
+       if (rc < 0)
+               return rc;
+
+       /* Wait for job to enter "running" state */
+       rc = 0;
+       for (trial = 1 ; trial <= trial_count ; trial++) {
+               jstate = NULL;
+               if(sd_bus_get_property_string(bus, sdb_destination, jpath, sdbi_job, sdbj_state, &err, &jstate) >= 0) {
+                       if(jstate && strcmp(jstate, sds_job_state_names[SysD_Job_State_Running]) == 0) {
+                               free(jstate);
+                               break;
+                       } else {
+                               tispec.tv_sec = 0;
+                               tispec.tv_nsec = period_ns;
+                               nanosleep(&tispec, NULL);
+                       }
+                       free(jstate);
+               }
+       }
+       if(trial > trial_count)
+               rc = -1;
+
+       return rc;
+}
+
 static int unit_start(struct sd_bus *bus, const char *dpath)
 {
        int rc;
@@ -312,6 +360,9 @@ static int unit_start(struct sd_bus *bus, const char *dpath)
        sd_bus_error err = SD_BUS_ERROR_NULL;
 
        rc = sd_bus_call_method(bus, sdb_destination, dpath, sdbi_unit, sdbm_start, &err, &ret, "s", "replace");
+       if(ret) {
+               rc = job_wait(bus, ret);
+       }
        sd_bus_message_unref(ret);
        return rc;
 }
@@ -323,6 +374,9 @@ static int unit_restart(struct sd_bus *bus, const char *dpath)
        sd_bus_error err = SD_BUS_ERROR_NULL;
 
        rc = sd_bus_call_method(bus, sdb_destination, dpath, sdbi_unit, sdbm_restart, &err, &ret, "s", "replace");
+       if(ret) {
+               rc = job_wait(bus, ret);
+       }
        sd_bus_message_unref(ret);
        return rc;
 }
@@ -345,6 +399,9 @@ static int unit_start_name(struct sd_bus *bus, const char *name)
        sd_bus_error err = SD_BUS_ERROR_NULL;
 
        rc = sd_bus_call_method(bus, sdb_destination, sdb_path, sdbi_manager, sdbm_start_unit, &err, &ret, "ss", name, "replace");
+       if(ret) {
+               rc = job_wait(bus, ret);
+       }
        sd_bus_message_unref(ret);
        return rc;
 }
@@ -356,6 +413,9 @@ static int unit_restart_name(struct sd_bus *bus, const char *name)
        sd_bus_error err = SD_BUS_ERROR_NULL;
 
        rc = sd_bus_call_method(bus, sdb_destination, sdb_path, sdbi_manager, sdbm_restart_unit, &err, &ret, "ss", name, "replace");
+       if(ret) {
+               rc = job_wait(bus, ret);
+       }
        sd_bus_message_unref(ret);
        return rc;
 }
index 3bddfd6..6d43c71 100644 (file)
@@ -28,6 +28,12 @@ enum SysD_State {
     SysD_State_Failed
 };
 
+enum SysD_Job_State {
+    SysD_Job_State_INVALID,
+    SysD_Job_State_Waiting,
+    SysD_Job_State_Running
+};
+
 struct sd_bus;
 extern int systemd_get_bus(int isuser, struct sd_bus **ret);
 extern void systemd_set_bus(int isuser, struct sd_bus *bus);