diff --git a/docs/modules/nifi/images/.$nifi_overview.drawio.svg.bkp b/docs/modules/nifi/images/.$nifi_overview.drawio.svg.bkp new file mode 100644 index 00000000..edbddede --- /dev/null +++ b/docs/modules/nifi/images/.$nifi_overview.drawio.svg.bkp @@ -0,0 +1,4 @@ + + + +
Pod
<name>-node-<rg1>-1
Pod...
NiFi Operator
NiFi Operator
Pod
<name>-node-<rg1>-0
Pod...
ConfigMap
<name>-node-<rg1>
ConfigMap...
NifiCluster
<name>
NifiCluster...
create
create
read
read
Legend
Legend
Operator
Operator
Resource
Resource
Custom
Resource
Custom...
role group
<rg1>
role group...
Service
<name>-node-<rg2>
Service...
Pod
<name>-node-<rg2>-0
Pod...
Service
<name>
Service...
role
node
role...
references
references
role group
<rg2>
role group...
StatefulSet
<name>-node-<rg2>
StatefulSet...
ConfigMap
<name>-node-<rg2>
ConfigMap...
StatefulSet
<name>-node-<rg1>
StatefulSet...
Service
<name>-node-<rg1>
Service...
Text is not SVG - cannot display
\ No newline at end of file diff --git a/docs/modules/nifi/images/nifi_overview.drawio.svg b/docs/modules/nifi/images/nifi_overview.drawio.svg new file mode 100644 index 00000000..13466877 --- /dev/null +++ b/docs/modules/nifi/images/nifi_overview.drawio.svg @@ -0,0 +1,4 @@ + + + +
Pod
<name>-node-<rg1>-1
Pod...
NiFi Operator
NiFi Operator
Pod
<name>-node-<rg1>-0
Pod...
ConfigMap
<name>-node-<rg1>
ConfigMap...
NifiCluster
<name>
NifiCluster...
create
create
read
read
Legend
Legend
Operator
Operator
Resource
Resource
Custom
Resource
Custom...
role group
<rg1>
role group...
Service
<name>-node-<rg2>
Service...
Pod
<name>-node-<rg2>-0
Pod...
Service
<name>
Service...
role
node
role...
references
references
role group
<rg2>
role group...
StatefulSet
<name>-node-<rg2>
StatefulSet...
ConfigMap
<name>-node-<rg2>
ConfigMap...
StatefulSet
<name>-node-<rg1>
StatefulSet...
Service
<name>-node-<rg1>
Service...
Text is not SVG - cannot display
\ No newline at end of file diff --git a/docs/modules/nifi/pages/commandline_args.adoc b/docs/modules/nifi/pages/commandline_args.adoc deleted file mode 100644 index 606e6dcc..00000000 --- a/docs/modules/nifi/pages/commandline_args.adoc +++ /dev/null @@ -1,31 +0,0 @@ -== Command Line Parameters - -This operator accepts the following command line parameters: - -=== product-config - -*Default value*: `/etc/stackable/nifi-operator/config-spec/properties.yaml` - -*Required*: false - -*Multiple values:* false - -[source] ----- -stackable-nifi-operator run --product-config /foo/bar/properties.yaml ----- - -=== watch-namespace - -*Default value*: All namespaces - -*Required*: false - -*Multiple values:* false - -The operator will **only** watch for resources in the provided namespace `test`: - -[source] ----- -stackable-nifi-operator run --watch-namespace test ----- diff --git a/docs/modules/nifi/pages/configuration.adoc b/docs/modules/nifi/pages/configuration.adoc index 47ccbed5..526d07bf 100644 --- a/docs/modules/nifi/pages/configuration.adoc +++ b/docs/modules/nifi/pages/configuration.adoc @@ -1,7 +1,92 @@ = Configuration -include::commandline_args.adoc[] +== Command Line Parameters -include::partial$env_var_args.adoc[] +This operator accepts the following command line parameters: -include::partial$config_properties.adoc[] +=== product-config + +*Default value*: `/etc/stackable/nifi-operator/config-spec/properties.yaml` + +*Required*: false + +*Multiple values:* false + +[source] +---- +stackable-nifi-operator run --product-config /foo/bar/properties.yaml +---- + +=== watch-namespace + +*Default value*: All namespaces + +*Required*: false + +*Multiple values:* false + +The operator will **only** watch for resources in the provided namespace `test`: + +[source] +---- +stackable-nifi-operator run --watch-namespace test +---- + +== Environment variables + +This operator accepts the following environment variables: + +=== PRODUCT_CONFIG + +*Default value*: `/etc/stackable/nifi-operator/config-spec/properties.yaml` + +*Required*: false + +*Multiple values:* false + +[source] +---- +export PRODUCT_CONFIG=/foo/bar/properties.yaml +stackable-nifi-operator run +---- + +or via docker: + +---- +docker run \ + --name nifi-operator \ + --network host \ + --env KUBECONFIG=/home/stackable/.kube/config \ + --env PRODUCT_CONFIG=/my/product/config.yaml \ + --mount type=bind,source="$HOME/.kube/config",target="/home/stackable/.kube/config" \ + docker.stackable.tech/stackable/nifi-operator:latest +---- + +=== WATCH_NAMESPACE + +*Default value*: All namespaces + +*Required*: false + +*Multiple values:* false + +The operator will **only** watch for resources in the provided namespace `test`: + +[source] +---- +export WATCH_NAMESPACE=test +stackable-nifi-operator run +---- + +or via docker: + +[source] +---- +docker run \ +--name nifi-operator \ +--network host \ +--env KUBECONFIG=/home/stackable/.kube/config \ +--env WATCH_NAMESPACE=test \ +--mount type=bind,source="$HOME/.kube/config",target="/home/stackable/.kube/config" \ +docker.stackable.tech/stackable/nifi-operator:latest +---- diff --git a/docs/modules/nifi/pages/dependencies.adoc b/docs/modules/nifi/pages/dependencies.adoc deleted file mode 100644 index f84026ec..00000000 --- a/docs/modules/nifi/pages/dependencies.adoc +++ /dev/null @@ -1,16 +0,0 @@ -= Dependencies - -In contrast to the other Stackable operators, the config properties overwrite the property files (bootstrap.conf, nifi.properties...) in the deployed Apache NiFi package structure. There is no extra config directory. - -== ZooKeeper - -The state provider is ZooKeeper. -Which means a reference to an existing ZooKeeper ensemble must be provided - -Tested with version: - -* 3.5.8 - -Not working with version: - -* 3.4.14 diff --git a/docs/modules/nifi/pages/index.adoc b/docs/modules/nifi/pages/index.adoc index a817aba2..1f4205ce 100644 --- a/docs/modules/nifi/pages/index.adoc +++ b/docs/modules/nifi/pages/index.adoc @@ -1,8 +1,31 @@ = Stackable Operator for Apache NiFi +:description: The Stackable Operator for Apache NiFi is a Kubernetes operator that can manage Apache NiFi clusters. Learn about its features, resources, dependencies and demos, and see the list of supported NiFi versions. +:keywords: k8s, Kubernetes, Stackable Operator, Apache NiFi, open source, operator, data science, data exploration, big data -This is an operator for Kubernetes that can manage https://nifi.apache.org/[Apache NiFi] clusters. +This Operator manages https://nifi.apache.org/[Apache NiFi] clusters on Kubernetes. +Apache NiFi is an open-source data integration tool that provides a web-based interface for designing, monitoring and managing data flows between various systems and devices, using a visual programming approach. It supports a wide range of data sources, formats and features such as data provenance, security and clustering. -WARNING: This operator only works with images from the https://repo.stackable.tech/#browse/browse:docker:v2%2Fstackable%2Fnifi[Stackable] repository +== Getting started + +Get started with Apache NiFi and the Stackable Operator by following the xref:getting_started/index.adoc[] guide. It will guide you through the xref:getting_started/installation.adoc[installation] process and xref:getting_started/first_steps.adoc[connect] to the NiFi web interface. Afterwards have a look at the xref:usage_guide/index.adoc[] to learn how to configure your NiFi instance to your needs or run some <> to learn more about using NiFi with other components. + +== Operator Model + +The Operator manages the _NifiCluster_ custom resource. NiFi only has a single process that it needs to run, so the NifiCluster has only a single xref:concepts:roles-and-role-groups.adoc[role]: `node`. This role can be divided in multiple role groups. + +image::nifi_overview.drawio.svg[A diagram depicting the Kubernetes resources created by the Stackable Operator for Apache NiFi] + +For every role group the Operator creates a ConfigMap and StatefulSet which can have multiple replicas (Pods). Every role group is accessible through it's own Service, and there is a Service for the whole Cluster. + +== Dependencies + +Apache NiFi depends on Apache ZooKeeper which you can run in Kubernetes with the xref:zookeeper:index.adoc[]. + +== [[demos]]Demos + +NiFi is often a good choice as a first step in a data pipeline when it comes to fetching the data in various formats from various sources. The xref:stackablectl::demos/data-lakehouse-iceberg-trino-spark.adoc[] demo uses NiFi to fetch six different datasets in various formats. The data is then ingested into a Kafka topic. Apache Kafka is also xref:kafka:index.adoc[part of the Stackable platform]. + +The xref:stackablectl::demos/nifi-kafka-druid-earthquake-data.adoc[] and xref:stackablectl::demos/nifi-kafka-druid-water-level-data.adoc[] demo use NiFi in the same way, both demos showcase downloading data from web APIs and ingesting it into Kafka. == Supported Versions diff --git a/docs/modules/nifi/pages/usage_guide/cluster_operations.adoc b/docs/modules/nifi/pages/usage_guide/cluster-operations.adoc similarity index 100% rename from docs/modules/nifi/pages/usage_guide/cluster_operations.adoc rename to docs/modules/nifi/pages/usage_guide/cluster-operations.adoc diff --git a/docs/modules/nifi/pages/usage_guide/extra_volumes.adoc b/docs/modules/nifi/pages/usage_guide/extra-volumes.adoc similarity index 100% rename from docs/modules/nifi/pages/usage_guide/extra_volumes.adoc rename to docs/modules/nifi/pages/usage_guide/extra-volumes.adoc diff --git a/docs/modules/nifi/pages/usage_guide/index.adoc b/docs/modules/nifi/pages/usage_guide/index.adoc index c6c2a8e2..fc533392 100644 --- a/docs/modules/nifi/pages/usage_guide/index.adoc +++ b/docs/modules/nifi/pages/usage_guide/index.adoc @@ -1,3 +1,52 @@ = Usage guide -This section will help you to use various aspects of the Stackable Operator for Apache NiFi. For a general introduction into the operator follow the xref:getting_started/index.adoc[] guide. \ No newline at end of file +This section will help you to use various aspects of the Stackable Operator for Apache NiFi. For a general introduction into the operator follow the xref:getting_started/index.adoc[] guide. Below is a general overview of some configuration aspects, have a look at the sub pages for details. + +The cluster is configured via a YAML manifest file. This custom resource specifies the amount of replicas for each role group or role specific configuration like resource requests. +The following listing shows an example configuration: + +[source,yaml] +---- +apiVersion: nifi.stackable.tech/v1alpha1 +kind: NifiCluster +metadata: + name: simple-nifi +spec: + image: + productVersion: 1.18.0 + stackableVersion: "23.4.0" + clusterConfig: + zookeeperConfigMapName: simple-nifi-znode # <1> + authentication: # <2> + method: + SingleUser: + adminCredentialsSecret: + name: nifi-admin-credentials-simple + namespace: default + allowAnonymousAccess: true + extraVolumes: # <3> + - name: nifi-client-certs + secret: + secretName: nifi-client-certs + sensitiveProperties: + keySecret: nifi-sensitive-property-key + autoGenerate: true + nodes: + roleGroups: + default: + config: + resources: # <4> + cpu: + min: "500m" + max: "4" + memory: + limit: '2Gi' + replicas: 3 +---- + +<1>: The xref:usage_guide/zookeeper-connection.adoc[ZooKeeper instance] to use. +<2>: How users should xref:usage_guide/security.adoc[authenticate] themselves. +<3>: xref:usage_guide/extra-volumes.adoc[Extra volumes] with files that can be referenced in custom workflows. +<4>: xref:usage_guide/resource-configuration.adoc[CPU and memory configuration] can be set per role group. + +Not shown are the common settings for xref:usage_guide/cluster-operations.adoc[starting and stopping the cluster] and xref:usage_guide/pod-placement.adoc[distributing Pods]. Additionally you can set any NiFi setting using xref:usage_guide/configuration-environment-overrides.adoc[overrides]. You can also configure xref:usage_guide/log-aggregation.adoc[log aggregation]. diff --git a/docs/modules/nifi/pages/usage_guide/security.adoc b/docs/modules/nifi/pages/usage_guide/security.adoc index dfbe2818..faf7a5ea 100644 --- a/docs/modules/nifi/pages/usage_guide/security.adoc +++ b/docs/modules/nifi/pages/usage_guide/security.adoc @@ -4,11 +4,13 @@ Every user has to authenticate themselves before using NiFI. There are multiple options to set up the authentication of users. +All authentication related parameters are configured under `spec.clusterConfig.authentication`. === Single user -The default setting is to only provision a single user with administrative privileges. -You need to specify the username and password of the user. +Currently, the only supported authentication method is "SingleUser", which allows the definition of one admin user which can then access the cluster. +Specification of these users credentials happens via referring to a Secret in Kubernetes, this secret will need to contain at least the two keys `username` and `password`. +Extra keys may be present, but will be ignored by the operator. [source,yaml] ---- @@ -36,6 +38,11 @@ spec: Additional users can not be added. +==== Anonymous Access + +NiFi can be configured to allow anonymous access to the web UI, this is turned off by default, but can be enabled via the parameter `allowAnonymousAccess`. +This setting is independent of the configured authentication method and will override anything specified for the authentication provider. + [#authentication-ldap] === LDAP diff --git a/docs/modules/nifi/pages/usage_guide/zookeeper-connection.adoc b/docs/modules/nifi/pages/usage_guide/zookeeper-connection.adoc new file mode 100644 index 00000000..d28d9302 --- /dev/null +++ b/docs/modules/nifi/pages/usage_guide/zookeeper-connection.adoc @@ -0,0 +1,12 @@ += Connecting NiFi to ZooKeeper + +NiFi in cluster mode requires a ZooKeeper ensemble for state management and leader election purposes, this operator at the moment does not support single node deployments without ZooKeeper, hence this is a required setting. + +[source,yaml] +---- +spec: + clusterConfig: + zookeeperConfigMapName: simple-nifi-znode +---- + +Configuration happens via a ConfigMap, which needs to contain two keys called `ZOOKEEPER_HOSTS` with the value being the ZooKeeper connection string and `ZOOKEEPER_CHROOT` with the value being the ZooKeeper chroot. This ConfigMap typically is created by a ZookeeperZnode of the xref:zookeeper:index.adoc[ZooKeeper Operator]. diff --git a/docs/modules/nifi/partials/config_properties.adoc b/docs/modules/nifi/partials/config_properties.adoc deleted file mode 100644 index 7e6b944d..00000000 --- a/docs/modules/nifi/partials/config_properties.adoc +++ /dev/null @@ -1,106 +0,0 @@ -== Kubernetes custom resource options - -The cluster can be configured via a YAML file. This custom resource specifies the amount of replicas for each role group or role specific configuration like port definitions etc. -The following listing shows a fairly complete example that sets most available options, for more detail about the individual elements please refer to the table further down on the page. - -[source,yaml] ----- -apiVersion: nifi.stackable.tech/v1alpha1 -kind: NifiCluster -metadata: - name: simple-nifi -spec: - image: - productVersion: 1.18.0 - stackableVersion: "23.4.0-rc1" - clusterConfig: - authentication: - method: - SingleUser: - adminCredentialsSecret: - name: nifi-admin-credentials-simple - namespace: default - allowAnonymousAccess: true - sensitiveProperties: - keySecret: nifi-sensitive-property-key - autoGenerate: true - zookeeperConfigMapName: simple-nifi-znode - nodes: - roleGroups: - default: - selector: - matchLabels: - kubernetes.io/os: linux - config: - log: - rootLogLevel: INFO - replicas: 3 ----- - -=== Node Configuration - -[source,yaml] ----- -nodes: - roleGroups: - default: - selector: - matchLabels: - kubernetes.io/os: linux - config: {} - replicas: 3 ----- -The `nodes` element is used to define how many pods with which configuration should be rolled out. -It is possible to define multiple groups of nodes, each with its own distinct configuration, every `roleGroup` has the following elements: - - - selector: a Kubernetes `Selector` to specify criteria that can be used to target nodes - - config: The NiFi config to use for this group - - replicas: How many pods to roll out for this group - -=== Authentication - -[source,yaml] ----- -clusterConfig: - authentication: - method: - SingleUser: - adminCredentialsSecret: - name: nifi-admin-credentials-simple - namespace: default - allowAnonymousAccess: true ----- -All authentication related parameters are configured in the `authentication` element. - -==== Authentication Method - -Currently, the only supported authentication method is "SingleUser", which allows the definition of one admin user which can then access the cluster. -Specification of these users credentials happens via referring to a Secret in Kubernetes, this secret will need to contain at least the two keys `username` and `password`. -Extra keys may be present, but will be ignored by the operator. - -==== Anonymous Access - -NiFi can be configured to allow anonymous access to the web UI, this is turned off by default, but can be enabled via the parameter `allowAnonymousAccess`. -This setting is independent of the configured authentication method and will override anything specified for the authentication provider. - -=== ZooKeeper Connection - -[source,yaml] ----- -clusterConfig: - zookeeperConfigMapName: simple-nifi-znode ----- -NiFi in cluster mode requires a ZooKeeper ensemble for state management and leader election purposes, this operator at the moment does not support single node deployments without ZooKeeper, hence this is a required setting. -Configuration happens via a ConfigMap, which needs to contain two keys called `ZOOKEEPER_HOSTS` with the value being the ZooKeeper connection string and `ZOOKEEPER_CHROOT` with the value being the ZooKeeper chroot. This ConfigMap typically is created by a ZookeeperZnode of the https://github.com/stackabletech/zookeeper-operator[ZooKeeper Operator]. - -=== NiFi Configuration - -[source,yaml] ----- -config: - log: - rootLogLevel: INFO ----- -This is the actual NiFi configuration element. -At the moment only a very limited set of options is supported, but you can expect these to grow quickly over time. -Anything that is not mentioned here can be configured via configOverrides. This does not include properties for XML files like `state-management.xml`. diff --git a/docs/modules/nifi/partials/env_var_args.adoc b/docs/modules/nifi/partials/env_var_args.adoc deleted file mode 100644 index 7de05441..00000000 --- a/docs/modules/nifi/partials/env_var_args.adoc +++ /dev/null @@ -1,58 +0,0 @@ -== Environment variables - -This operator accepts the following environment variables: - -=== PRODUCT_CONFIG - -*Default value*: `/etc/stackable/nifi-operator/config-spec/properties.yaml` - -*Required*: false - -*Multiple values:* false - -[source] ----- -export PRODUCT_CONFIG=/foo/bar/properties.yaml -stackable-nifi-operator run ----- - -or via docker: - ----- -docker run \ - --name nifi-operator \ - --network host \ - --env KUBECONFIG=/home/stackable/.kube/config \ - --env PRODUCT_CONFIG=/my/product/config.yaml \ - --mount type=bind,source="$HOME/.kube/config",target="/home/stackable/.kube/config" \ - docker.stackable.tech/stackable/nifi-operator:latest ----- - -=== WATCH_NAMESPACE - -*Default value*: All namespaces - -*Required*: false - -*Multiple values:* false - -The operator will **only** watch for resources in the provided namespace `test`: - -[source] ----- -export WATCH_NAMESPACE=test -stackable-nifi-operator run ----- - -or via docker: - -[source] ----- -docker run \ ---name nifi-operator \ ---network host \ ---env KUBECONFIG=/home/stackable/.kube/config \ ---env WATCH_NAMESPACE=test \ ---mount type=bind,source="$HOME/.kube/config",target="/home/stackable/.kube/config" \ -docker.stackable.tech/stackable/nifi-operator:latest ----- diff --git a/docs/modules/nifi/partials/nav.adoc b/docs/modules/nifi/partials/nav.adoc index 29841b74..fb65e8b9 100644 --- a/docs/modules/nifi/partials/nav.adoc +++ b/docs/modules/nifi/partials/nav.adoc @@ -1,15 +1,15 @@ * xref:nifi:getting_started/index.adoc[] ** xref:nifi:getting_started/installation.adoc[] ** xref:nifi:getting_started/first_steps.adoc[] -* xref:nifi:dependencies.adoc[] -* xref:nifi:configuration.adoc[] * xref:nifi:usage_guide/index.adoc[] +** xref:nifi:usage_guide/cluster-operations.adoc[] +** xref:nifi:usage_guide/pod-placement.adoc[] +** xref:nifi:usage_guide/zookeeper-connection.adoc[] +** xref:nifi:usage_guide/extra-volumes.adoc[] ** xref:nifi:usage_guide/security.adoc[] ** xref:nifi:usage_guide/resource-configuration.adoc[] -** xref:nifi:usage_guide/extra_volumes.adoc[] -** xref:nifi:usage_guide/monitoring.adoc[] ** xref:nifi:usage_guide/log-aggregation.adoc[] -** xref:nifi:usage_guide/configuration-environment-overrides.adoc[] +** xref:nifi:usage_guide/monitoring.adoc[] ** xref:nifi:usage_guide/updating.adoc[] -** xref:nifi:usage_guide/pod-placement.adoc[] -** xref:nifi:usage_guide/cluster_operations.adoc[] +** xref:nifi:usage_guide/configuration-environment-overrides.adoc[] +* xref:nifi:configuration.adoc[] diff --git a/docs/modules/nifi/partials/supported-versions.adoc b/docs/modules/nifi/partials/supported-versions.adoc index f9fec206..75b84a74 100644 --- a/docs/modules/nifi/partials/supported-versions.adoc +++ b/docs/modules/nifi/partials/supported-versions.adoc @@ -2,12 +2,6 @@ // This is a separate file, since it is used by both the direct NiFi-Operator documentation, and the overarching // Stackable Platform documentation. -- 1.15.0 -- 1.15.1 -- 1.15.2 -- 1.15.3 -- 1.16.0 -- 1.16.1 -- 1.16.2 -- 1.16.3 -- 1.18.0 +* 1.15.0, 1.15.1, 1.15.2, 1.15.3 +* 1.16.0, 1.16.1, 1.16.2, 1.16.3 +* 1.18.0