Table of Contents:

1. What is MLflow and Why Should You Use It?

2. Using MLflow with a Conda Environment

/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/
install/master/install.sh)"
brew install git
git clone https://github.com/Noodle-ai/mlflow_part1_condaEnv.git
conda env create --file conda.yaml
conda activate mlflow_demos
conda create --name mlflow_demos python=3.8.3
conda activate mlflow_demos
conda install -c anaconda jupyter=1.0.0
conda install -c conda-forge mlflow=1.8.0
conda install scikit-learn=0.22.1
conda install -c anaconda psycopg2=2.8.5
conda install -c anaconda boto3=1.14.12
conda env export --name mlflow_demos > conda.yaml
import mlflow

mlflow.start_run()

# Log a parameter (key-value pair)
mlflow.log_param("param1", 5)
# Log a metric; metrics can be updated throughout the run
mlflow.log_metric("foo", 1)
mlflow.log_metric("foo", 2)
mlflow.log_metric("foo", 3)
# Log an artifact (output file)
with open("output.txt", "w") as f:
    f.write("Hello world!")
mlflow.log_artifact("output.txt")

mlflow.end_run()
mlflow ui

Image for post

Image for post

Image for post

python train.py <alpha> <l1_ratio>
# Wine Quality Sampledef train(in_alpha, in_l1_ratio):
    import pandas as pd
    import numpy as np
    from sklearn.metrics import mean_squared_error, \              
        mean_absolute_error, r2_score
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import ElasticNet
    import mlflow
    import mlflow.sklearn    def eval_metrics(actual, pred):
        rmse = np.sqrt(mean_squared_error(actual, pred))
        mae = mean_absolute_error(actual, pred)
        r2 = r2_score(actual, pred)
        return rmse, mae, r2    np.random.seed(40)    
    # Read the wine-quality csv file from the URL csv_url =\
        'http://archive.ics.uci.edu/ml/machine-learning-databases/
    wine-quality/winequality-red.csv'
    data = pd.read_csv(csv_url, sep=';')    
    # Split the data into training and test sets (0.75, 0.25) split
    train, test = train_test_split(data)

    # The predicted column is "quality" which is a scalar [3, 9]
    train_x = train.drop(["quality"], axis=1)
    test_x = test.drop(["quality"], axis=1)
    train_y = train[["quality"]]
    test_y = test[["quality"]]

    # Set default values if no alpha is provided
    if float(in_alpha) is None:
        alpha = 0.5
    else:
        alpha = float(in_alpha)

    # Set default values if no l1_ratio is provided
    if float(in_l1_ratio) is None:
        l1_ratio = 0.5
    else:
        l1_ratio = float(in_l1_ratio)

    # Useful for multiple runs
    with mlflow.start_run():
        # Execute ElasticNet
        lr = ElasticNet(
            alpha=alpha, 
            l1_ratio=l1_ratio, 
            random_state=42
        )
        lr.fit(train_x, train_y)

        # Evaluate Metrics
        predicted_qualities = lr.predict(test_x)
        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

        # Print out metrics
        print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
        print("  RMSE: %s" % rmse)
        print("  MAE: %s" % mae)
        print("  R2: %s" % r2)

        # Log parameter, metrics, and model to MLflow
        mlflow.log_param("alpha", alpha)
        mlflow.log_param("l1_ratio", l1_ratio)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("r2", r2)
        mlflow.log_metric("mae", mae)
        mlflow.sklearn.log_model(lr, "model")

Image for post

Image for post

model_path = './mlruns/0/<run_id>/artifacts/model'
mlflow.<model_flavor>.load_model(modelpath)

Image for post

mlflow run . -P alpha=1.0 -P l1_ratio=1.0
mlflow run https://github.com/Noodle-ai/mlflow_part1_condaEnv -P alpha=1.0 
-P l1_ratio=0.8
mlflow.sklearn.log_model(lr, "model")

Image for post

mlflow models serve -m ./mlruns/0/<run_id>/artifacts/model -p 1234
curl -X POST -H "Content-Type:application/json; format=pandas-split" 
--data '{"columns":["alcohol", "chlorides", "citric acid", "density", 
"fixed acidity", "free sulfur dioxide", "pH", "residual sugar", "sulphates", 
"total sulfur dioxide", "volatile acidity"],"data":[[12.8, 0.029, 0.48, 0.98, 
6.2, 29, 3.33, 1.2, 0.39, 75, 0.66]]}' 
http://127.0.0.1:1234/invocations
[3.7783608837127516]

References

MLFlow

Concepts: https://www.mlflow.org/docs/latest/concepts.html

Quickstart: https://www.mlflow.org/docs/latest/quickstart.html

Tutorial: https://www.mlflow.org/docs/latest/tutorials-and-examples/tutorial.html#conda-example

Homebrew:

https://brew.sh/

Git:

https://www.atlassian.com/git/tutorials/install-git

Anaconda:

https://docs.anaconda.com/anaconda/install/mac-os/