Airflow

安装 Airflow

Base on Ubuntu 20.04

pip3 install apache-airflow==2.0.0

初始化后端 MySQL 数据库

CREATE DATABASE airflow CHARACTER SET utf8 COLLATE utf8_unicode_ci;
CREATE USER 'airflow' IDENTIFIED BY 'airflow';
GRANT ALL PRIVILEGES ON airflow.* TO 'airflow';

配置 MySQL 数据库连接

vim ~/airflow/airflow.cfg

sql_alchemy_conn = mysql://airflow:airflow@localhost:3306/airflow

安装 MySQL 驱动

apt install libmysqlclient-dev python3-dev
pip3 install 'apache-airflow[mysql]'

初始化数据库

过程可能有些慢, 耐心等待

airflow db init

配置Web认证

pip3 install apache-airflow[password]

airflow users create \
--username airflow \
--firstname airflow\
--lastname airflow \
--role Admin \ 
--email admin@example.org

配置到 Systemd

编辑 Webserver 启动文件

vim /etc/systemd/system/airflow-webserver.service

[Unit]
Description=Airflow
Documentation=https://airflow.apache.org/
After=network-online.target
Wants=network-online.target

[Service]
Environment="DAEMON_ARGS=  "
EnvironmentFile=-/etc/default/%p
ExecStart=/usr/local/bin/airflow webserver $DAEMON_ARGS
ExecReload=/bin/kill -HUP $MAINPID
Restart=on-failure

KillSignal=SIGINT

[Install]
WantedBy=multi-user.target

编辑 Scheduler 启动文件

vim /etc/systemd/system/airflow-scheduler.service

[Unit]
Description=Airflow
Documentation=https://airflow.apache.org/
After=network-online.target
Wants=network-online.target

[Service]
Environment="DAEMON_ARGS=  "
EnvironmentFile=-/etc/default/%p
ExecStart=/usr/local/bin/airflow scheduler $DAEMON_ARGS
ExecReload=/bin/kill -HUP $MAINPID
Restart=on-failure

KillSignal=SIGINT

[Install]
WantedBy=multi-user.target

使配置生效

systemctl daemon-reload

配置开机启动

systemctl enable airflow-scheduler.service 
systemctl enable airflow-webserver.service 

启动服务

systemctl start airflow-scheduler.service
systemctl start airflow-webserver.service 

查看服务状态

systemctl status airflow-scheduler.service 
systemctl status airflow-webserver.service 

资源

https://airflow.apachecn.org/#/zh/scheduler

评论栏