Add pre-trained model for Snips
[src/snips-model-agl.git] / model / nlu_engine.json
diff --git a/model/nlu_engine.json b/model/nlu_engine.json
new file mode 100644 (file)
index 0000000..1c8a4dd
--- /dev/null
@@ -0,0 +1,242 @@
+{
+  "builtin_entity_parser": "builtin_entity_parser",
+  "config": {
+    "intent_parsers_configs": [
+      {
+        "ignore_stop_words": true,
+        "unit_name": "lookup_intent_parser"
+      },
+      {
+        "intent_classifier_config": {
+          "data_augmentation_config": {
+            "add_builtin_entities_examples": false,
+            "max_unknown_words": null,
+            "min_utterances": 20,
+            "noise_factor": 5,
+            "unknown_word_prob": 0.0,
+            "unknown_words_replacement_string": null
+          },
+          "featurizer_config": {
+            "added_cooccurrence_feature_ratio": 0.0,
+            "cooccurrence_vectorizer_config": {
+              "filter_stop_words": true,
+              "keep_order": true,
+              "unit_name": "cooccurrence_vectorizer",
+              "unknown_words_replacement_string": null,
+              "window_size": null
+            },
+            "pvalue_threshold": 0.4,
+            "tfidf_vectorizer_config": {
+              "unit_name": "tfidf_vectorizer",
+              "use_stemming": false,
+              "word_clusters_name": null
+            },
+            "unit_name": "featurizer"
+          },
+          "noise_reweight_factor": 1,
+          "unit_name": "log_reg_intent_classifier"
+        },
+        "slot_filler_config": {
+          "crf_args": {
+            "algorithm": "lbfgs",
+            "c1": 0.1,
+            "c2": 0.1
+          },
+          "data_augmentation_config": {
+            "add_builtin_entities_examples": true,
+            "capitalization_ratio": 0.2,
+            "min_utterances": 200
+          },
+          "feature_factory_configs": [
+            {
+              "args": {
+                "common_words_gazetteer_name": "top_10000_words_stemmed",
+                "n": 1,
+                "use_stemming": true
+              },
+              "factory_name": "ngram",
+              "offsets": [
+                -2,
+                -1,
+                0,
+                1,
+                2
+              ]
+            },
+            {
+              "args": {
+                "common_words_gazetteer_name": "top_10000_words_stemmed",
+                "n": 2,
+                "use_stemming": true
+              },
+              "factory_name": "ngram",
+              "offsets": [
+                -2,
+                1
+              ]
+            },
+            {
+              "args": {},
+              "factory_name": "is_digit",
+              "offsets": [
+                -1,
+                0,
+                1
+              ]
+            },
+            {
+              "args": {},
+              "factory_name": "is_first",
+              "offsets": [
+                -2,
+                -1,
+                0
+              ]
+            },
+            {
+              "args": {},
+              "factory_name": "is_last",
+              "offsets": [
+                0,
+                1,
+                2
+              ]
+            },
+            {
+              "args": {
+                "n": 1
+              },
+              "factory_name": "shape_ngram",
+              "offsets": [
+                0
+              ]
+            },
+            {
+              "args": {
+                "n": 2
+              },
+              "factory_name": "shape_ngram",
+              "offsets": [
+                -1,
+                0
+              ]
+            },
+            {
+              "args": {
+                "n": 3
+              },
+              "factory_name": "shape_ngram",
+              "offsets": [
+                -1
+              ]
+            },
+            {
+              "args": {
+                "entity_filter": {
+                  "automatically_extensible": false
+                },
+                "tagging_scheme_code": 2,
+                "use_stemming": true
+              },
+              "factory_name": "entity_match",
+              "offsets": [
+                -2,
+                -1,
+                0
+              ]
+            },
+            {
+              "args": {
+                "entity_filter": {
+                  "automatically_extensible": true
+                },
+                "tagging_scheme_code": 2,
+                "use_stemming": true
+              },
+              "drop_out": 0.5,
+              "factory_name": "entity_match",
+              "offsets": [
+                -2,
+                -1,
+                0
+              ]
+            },
+            {
+              "args": {
+                "tagging_scheme_code": 1
+              },
+              "factory_name": "builtin_entity_match",
+              "offsets": [
+                -2,
+                -1,
+                0
+              ]
+            },
+            {
+              "args": {
+                "cluster_name": "brown_clusters",
+                "use_stemming": false
+              },
+              "factory_name": "word_cluster",
+              "offsets": [
+                -2,
+                -1,
+                0,
+                1
+              ]
+            }
+          ],
+          "tagging_scheme": 1,
+          "unit_name": "crf_slot_filler"
+        },
+        "unit_name": "probabilistic_intent_parser"
+      }
+    ],
+    "unit_name": "nlu_engine"
+  },
+  "custom_entity_parser": "custom_entity_parser",
+  "dataset_metadata": {
+    "entities": {
+      "hvac_fan_speed_action": {
+        "automatically_extensible": false
+      },
+      "hvac_temperature_action": {
+        "automatically_extensible": false
+      },
+      "numeric_value": {
+        "automatically_extensible": false
+      },
+      "to_or_by": {
+        "automatically_extensible": false
+      },
+      "volume_control_action": {
+        "automatically_extensible": false
+      }
+    },
+    "language_code": "en",
+    "slot_name_mappings": {
+      "HVACFanSpeed": {
+        "hvac_fan_speed_action": "hvac_fan_speed_action",
+        "numeric_value": "numeric_value",
+        "to_or_by": "to_or_by"
+      },
+      "HVACTemperature": {
+        "hvac_temperature_action": "hvac_temperature_action",
+        "numeric_value": "numeric_value",
+        "to_or_by": "to_or_by"
+      },
+      "VolumeControl": {
+        "numeric_value": "numeric_value",
+        "to_or_by": "to_or_by",
+        "volume_control_action": "volume_control_action"
+      }
+    }
+  },
+  "intent_parsers": [
+    "lookup_intent_parser",
+    "probabilistic_intent_parser"
+  ],
+  "model_version": "0.20.0",
+  "training_package_version": "0.20.2",
+  "unit_name": "nlu_engine"
+}
\ No newline at end of file