%%capture
!pip install tfx

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

DATA_ROOT = '/content/drive/MyDrive/Colab Notebooks/ML-Ops/submission-1/data/'

import pandas as pd

water = pd.read_csv(DATA_ROOT+'water_potability.csv')
water.head()

water.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2011 entries, 0 to 2010
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   ph               2011 non-null   float64
 1   Hardness         2011 non-null   float64
 2   Solids           2011 non-null   float64
 3   Chloramines      2011 non-null   float64
 4   Sulfate          2011 non-null   float64
 5   Conductivity     2011 non-null   float64
 6   Organic_carbon   2011 non-null   float64
 7   Trihalomethanes  2011 non-null   float64
 8   Turbidity        2011 non-null   float64
 9   Potability       2011 non-null   int64  
dtypes: float64(9), int64(1)
memory usage: 157.2 KB

FEATURES = water.columns.tolist()
FEATURES

['ph',
 'Hardness',
 'Solids',
 'Chloramines',
 'Sulfate',
 'Conductivity',
 'Organic_carbon',
 'Trihalomethanes',
 'Turbidity',
 'Potability']

import os
import sys
import tensorflow as tf
import tfx
import tensorflow_model_analysis as tfma
import tensorflow_transform as tft
from tfx.components import (
    CsvExampleGen, StatisticsGen, SchemaGen, ExampleValidator,
    Transform, Trainer, Tuner, Evaluator, Pusher
)
from tfx.proto import example_gen_pb2, trainer_pb2, pusher_pb2
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext
from tfx.dsl.components.common.resolver import Resolver
from tfx.dsl.input_resolution.strategies.latest_blessed_model_strategy import LatestBlessedModelStrategy
from tfx.types import Channel, standard_artifacts

import keras
print('Version:')
print('---------------------------')
print(f"{'TensorFlow':15} {tf.__version__}")
print(f"{'Python':15} {sys.version.split()[0]}")  # Hanya versi Python
print(f"{'TFX':15} {tfx.__version__}")
print(f"{'TFMA':15} {tfma.__version__}")
print(f"{'Keras':15} {keras.__version__}")

Version:
---------------------------
TensorFlow      2.15.1
Python          3.10.12
TFX             1.15.1
TFMA            0.46.0
Keras           2.15.0

PIPELINE_NAME = 'mkavaldo-pipeline'
SCHEMA_PIPELINE_NAME = 'water-tfdv-schema'

PIPELINE_ROOT = os.path.join('pipelines', PIPELINE_NAME)
METADATA_PATH = os.path.join('metadata', PIPELINE_NAME, 'metadata.db')
SERVING_MODEL_DIR = os.path.join('serving_model', PIPELINE_NAME)

interactive_context = InteractiveContext(pipeline_root = PIPELINE_ROOT)

WARNING:absl:InteractiveContext metadata_connection_config not provided: using SQLite ML Metadata database at pipelines/mkavaldo-pipeline/metadata.sqlite.

output = example_gen_pb2.Output(
    split_config = example_gen_pb2.SplitConfig(splits = [
        example_gen_pb2.SplitConfig.Split(name = "train", hash_buckets = 9),
        example_gen_pb2.SplitConfig.Split(name = "eval", hash_buckets = 1)
    ])
)

example_gen = CsvExampleGen(input_base = DATA_ROOT, output_config = output)
interactive_context.run(example_gen)

WARNING:apache_beam.runners.interactive.interactive_environment:Dependencies required for Interactive Beam PCollection visualization are not available, please use: `pip install apache-beam[interactive]` to install necessary dependencies to enable all data visualization features.

WARNING:apache_beam.io.tfrecordio:Couldn't find python-snappy so the implementation of _TFRecordUtil._masked_crc32c is not as fast as it could be.

statistic_gen = StatisticsGen(
    examples = example_gen.outputs["examples"]
)

interactive_context.run(statistic_gen)

interactive_context.show(statistic_gen.outputs['statistics'])

schema_gen = SchemaGen(statistics = statistic_gen.outputs["statistics"])

interactive_context.run(schema_gen)

interactive_context.show(schema_gen.outputs["schema"])

example_validator = ExampleValidator(
    statistics = statistic_gen.outputs["statistics"],
    schema = schema_gen.outputs["schema"]
)

interactive_context.run(example_validator)

interactive_context.show(example_validator.outputs["anomalies"])

TRANSFORM_MODULE_FILE = "water_transform.py"

%%writefile {TRANSFORM_MODULE_FILE}

import tensorflow as tf
import tensorflow_transform as tft

LABEL_KEY = 'Potability'

FEATURES = [
  'ph',
  'Hardness',
  'Solids',
  'Chloramines',
  'Sulfate',
  'Conductivity',
  'Organic_carbon',
  'Trihalomethanes',
  'Turbidity'
]

def transformed_name(key):
    return key + '_xf'

def preprocessing_fn(inputs):
    outputs = {}

    for feature in FEATURES:
        # MinMaxScaler
        # outputs[transformed_name(feature)] = tft.scale_to_0_1(inputs[feature])
        # Z-Score
        outputs[transformed_name(feature)] = tft.scale_to_z_score(inputs[feature])

    # Transform the label
    outputs[transformed_name(LABEL_KEY)] = tf.cast(inputs[LABEL_KEY], tf.int64)

    return outputs

Writing water_transform.py

transform = Transform(
    examples = example_gen.outputs["examples"],
    schema = schema_gen.outputs['schema'],
    module_file = os.path.abspath(TRANSFORM_MODULE_FILE)
)

interactive_context.run(transform)

TUNER_MODULE_FILE = "water_tuner.py"

%%writefile {TUNER_MODULE_FILE}

import os
import tensorflow_transform as tft
import tensorflow as tf
import keras_tuner as kt
from tfx.v1.components import TunerFnResult
from tfx.components.trainer.fn_args_utils import FnArgs
from water_trainer import FEATURE_KEY, transformed_name, input_fn
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

def model_builder(hyperparameters):
    input_features = []

    for key in FEATURE_KEY:
        input_features.append(
            tf.keras.Input(shape=(1,), name=transformed_name(key))
        )

    merged_input = layers.concatenate(input_features)

    # Hyperparameter untuk layer Dense dan Dropout
    units_1 = hyperparameters.Choice('units_1', [128, 256, 512])
    units_2 = hyperparameters.Choice('units_2', [64, 128, 256])
    units_3 = hyperparameters.Choice('units_3', [32, 64, 128])
    dropout_rate_1 = hyperparameters.Choice('dropout_rate_1', [0.2, 0.3, 0.4])
    dropout_rate_2 = hyperparameters.Choice('dropout_rate_2', [0.3, 0.4, 0.5])

    # Layer 1
    x = layers.Dense(units_1, activation='relu')(merged_input)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(dropout_rate_1)(x)

    # Layer 2
    x = layers.Dense(units_2, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(dropout_rate_2)(x)

    # Layer 3
    x = layers.Dense(units_3, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(dropout_rate_2)(x)

    # Output untuk klasifikasi biner
    outputs = layers.Dense(1, activation='sigmoid')(x)

    model = Model(inputs=input_features, outputs=outputs)

    model.compile(
      optimizer=tf.keras.optimizers.Adam(
      learning_rate=hyperparameters.Choice('learning_rate', [0.0001, 0.00005, 0.0005, 0.001])),
      loss='binary_crossentropy',
      metrics=['accuracy']
    )

    return model

def tuner_fn(fn_args: FnArgs):
    tf_transform_output = tft.TFTransformOutput(fn_args.transform_graph_path)

    train_dataset = input_fn(fn_args.train_files, tf_transform_output, batch_size=64, num_epochs=3)
    eval_dataset = input_fn(fn_args.eval_files, tf_transform_output, batch_size=64, num_epochs=1)

    tuner = kt.RandomSearch(
        model_builder,
        objective='val_accuracy',
        max_trials=10,
        directory=fn_args.working_dir,
        project_name='kt_random_search'
    )

    # callbacks
    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True
    )

    model_checkpoint = ModelCheckpoint(
        filepath=os.path.join(fn_args.working_dir, 'best_model.keras'),
        monitor='val_loss',
        save_best_only=True,
        mode='min'
    )

    return TunerFnResult(
        tuner=tuner,
        fit_kwargs={
            "x": train_dataset,
            'validation_data': eval_dataset,
            'steps_per_epoch': fn_args.train_steps,
            'validation_steps': fn_args.eval_steps,
            "epochs": 100,
            'callbacks': [early_stopping, model_checkpoint]
        }
    )

Overwriting water_tuner.py

tuner = Tuner(
    module_file=os.path.abspath(TUNER_MODULE_FILE),
    examples=transform.outputs['transformed_examples'],
    transform_graph=transform.outputs['transform_graph'],
    schema=schema_gen.outputs['schema'],
    train_args=trainer_pb2.TrainArgs(splits=['train'], num_steps=200),
    eval_args=trainer_pb2.EvalArgs(splits=['eval'], num_steps=200),
)

interactive_context.run(tuner)

Trial 10 Complete [00h 02m 13s]
val_accuracy: 0.6744186282157898

Best val_accuracy So Far: 0.6744186282157898
Total elapsed time: 00h 07m 58s
Results summary
Results in pipelines/mkavaldo-pipeline/.temp/27/kt_random_search
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 04 summary
Hyperparameters:
units_1: 128
units_2: 64
units_3: 128
dropout_rate_1: 0.3
dropout_rate_2: 0.4
learning_rate: 5e-05
Score: 0.6744186282157898

Trial 09 summary
Hyperparameters:
units_1: 256
units_2: 128
units_3: 128
dropout_rate_1: 0.3
dropout_rate_2: 0.5
learning_rate: 5e-05
Score: 0.6744186282157898

Trial 00 summary
Hyperparameters:
units_1: 512
units_2: 128
units_3: 32
dropout_rate_1: 0.2
dropout_rate_2: 0.5
learning_rate: 0.0005
Score: 0.669767439365387

Trial 02 summary
Hyperparameters:
units_1: 256
units_2: 64
units_3: 64
dropout_rate_1: 0.4
dropout_rate_2: 0.4
learning_rate: 0.0005
Score: 0.669767439365387

Trial 01 summary
Hyperparameters:
units_1: 512
units_2: 128
units_3: 128
dropout_rate_1: 0.2
dropout_rate_2: 0.5
learning_rate: 0.0005
Score: 0.6651162505149841

Trial 08 summary
Hyperparameters:
units_1: 512
units_2: 64
units_3: 32
dropout_rate_1: 0.4
dropout_rate_2: 0.4
learning_rate: 0.001
Score: 0.6604651212692261

Trial 07 summary
Hyperparameters:
units_1: 512
units_2: 128
units_3: 64
dropout_rate_1: 0.3
dropout_rate_2: 0.5
learning_rate: 0.0001
Score: 0.6511628031730652

Trial 06 summary
Hyperparameters:
units_1: 512
units_2: 64
units_3: 128
dropout_rate_1: 0.4
dropout_rate_2: 0.3
learning_rate: 5e-05
Score: 0.6465116143226624

Trial 03 summary
Hyperparameters:
units_1: 256
units_2: 128
units_3: 64
dropout_rate_1: 0.4
dropout_rate_2: 0.4
learning_rate: 0.0005
Score: 0.6418604850769043

Trial 05 summary
Hyperparameters:
units_1: 256
units_2: 256
units_3: 32
dropout_rate_1: 0.3
dropout_rate_2: 0.3
learning_rate: 0.0001
Score: 0.6372092962265015

TRAINER_MODULE_FILE = "water_trainer.py"

%%writefile {TRAINER_MODULE_FILE}

import os
import tensorflow as tf
import tensorflow_transform as tft
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tfx.components.trainer.fn_args_utils import FnArgs
from tfx.components.trainer.executor import TrainerFnArgs

LABEL_KEY = 'Potability'
FEATURE_KEY = [
    'ph',
    'Hardness',
    'Solids',
    'Chloramines',
    'Sulfate',
    'Conductivity',
    'Organic_carbon',
    'Trihalomethanes',
    'Turbidity'
]

def transformed_name(key):
    return key + "_xf"

def gzip_reader_fn(filenames):
    return tf.data.TFRecordDataset(filenames, compression_type='GZIP')

def get_serve_tf_examples_fn(model, tf_transform_output):
    model.tft_layer = tf_transform_output.transform_features_layer()

    @tf.function
    def serve_tf_examples_fn(serialized_tf_examples):
        feature_spec = tf_transform_output.raw_feature_spec()
        feature_spec.pop(LABEL_KEY)
        parsed_features = tf.io.parse_example(
            serialized_tf_examples, feature_spec
        )

        transformed_features = model.tft_layer(parsed_features)
        outputs = model(transformed_features)
        return {"outputs": outputs}

    return serve_tf_examples_fn

def input_fn(file_pattern, tf_transform_output, batch_size=64, num_epochs=None) -> tf.data.Dataset:
    transform_feature_spec = (
        tf_transform_output.transformed_feature_spec().copy()
    )

    dataset = tf.data.experimental.make_batched_features_dataset(
        file_pattern=file_pattern,
        batch_size=batch_size,
        features=transform_feature_spec,
        reader=gzip_reader_fn,
        label_key=transformed_name(LABEL_KEY),
        num_epochs=num_epochs
    )

    dataset = dataset.repeat()

    return dataset

def get_model(show_summary=True):
    input_features = []
    for key in FEATURE_KEY:
        input_features.append(
            tf.keras.Input(shape=(1,), name=transformed_name(key))
        )

    merged_input = layers.concatenate(input_features)

    # Layer Dense 1
    x = layers.Dense(256, activation="relu")(merged_input)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)

    # Layer Dense 2
    x = layers.Dense(128, activation="relu")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)

    # Layer Dense 3
    x = layers.Dense(64, activation="relu")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)

    # Layer Dense 4
    x = layers.Dense(32, activation="relu")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)

    # Output layer untuk klasifikasi biner
    outputs = layers.Dense(1, activation="sigmoid")(x)

    model = Model(inputs=input_features, outputs=outputs)

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    return model

def run_fn(fn_args: FnArgs):
    tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)

    train_dataset = input_fn(fn_args.train_files, tf_transform_output, batch_size=32, num_epochs=3)
    eval_dataset = input_fn(fn_args.eval_files, tf_transform_output, batch_size=32, num_epochs=1)

    model = get_model()

    log_dir = os.path.join(os.path.dirname(fn_args.serving_model_dir), "logs")
    tensorboard_callback = tf.keras.callbacks.TensorBoard(
        log_dir=log_dir, update_freq="batch"
    )

    checkpoint_callback = ModelCheckpoint(
        filepath=os.path.join(fn_args.serving_model_dir, 'best_model.keras'),
        save_best_only=True,
        monitor='val_loss',
        mode='min'
    )

    early_stopping_callback = EarlyStopping(
        monitor='val_loss',
        patience=5,
        mode='min',
        restore_best_weights=True
    )

    model.fit(
        train_dataset,
        steps_per_epoch=fn_args.train_steps,
        validation_data=eval_dataset,
        validation_steps=fn_args.eval_steps,
        callbacks=[tensorboard_callback, checkpoint_callback, early_stopping_callback],
        epochs=300
    )

    signatures = {
        "serving_default": get_serve_tf_examples_fn(
            model, tf_transform_output
        ).get_concrete_function(
            tf.TensorSpec(shape=[None], dtype=tf.string, name="examples")
        ),
    }

    model.save(
        fn_args.serving_model_dir, save_format="tf", signatures=signatures
    )

Overwriting water_trainer.py

trainer = Trainer(
    module_file = os.path.abspath(TRAINER_MODULE_FILE),
    examples = transform.outputs['transformed_examples'],
    transform_graph = transform.outputs['transform_graph'],
    schema = schema_gen.outputs['schema'],
    hyperparameters=tuner.outputs['best_hyperparameters'],
    train_args=trainer_pb2.TrainArgs(splits=['train'], num_steps=1000),
    eval_args=trainer_pb2.EvalArgs(splits=['eval'], num_steps=1000),
)

interactive_context.run(trainer)

WARNING:absl:Examples artifact does not have payload_format custom property. Falling back to FORMAT_TF_EXAMPLE
WARNING:absl:Examples artifact does not have payload_format custom property. Falling back to FORMAT_TF_EXAMPLE
WARNING:absl:Examples artifact does not have payload_format custom property. Falling back to FORMAT_TF_EXAMPLE

Epoch 1/300
1000/1000 [==============================] - 13s 10ms/step - loss: 0.7781 - accuracy: 0.5511 - val_loss: 0.6857 - val_accuracy: 0.5953
Epoch 2/300
1000/1000 [==============================] - 12s 12ms/step - loss: 0.6878 - accuracy: 0.6028 - val_loss: 0.6651 - val_accuracy: 0.6372
Epoch 3/300
1000/1000 [==============================] - 11s 11ms/step - loss: 0.6458 - accuracy: 0.6398 - val_loss: 0.6479 - val_accuracy: 0.6604
Epoch 4/300
1000/1000 [==============================] - 10s 10ms/step - loss: 0.6219 - accuracy: 0.6633 - val_loss: 0.6367 - val_accuracy: 0.6511
Epoch 5/300
1000/1000 [==============================] - 11s 11ms/step - loss: 0.6047 - accuracy: 0.6755 - val_loss: 0.6410 - val_accuracy: 0.6465
Epoch 6/300
1000/1000 [==============================] - 11s 11ms/step - loss: 0.5917 - accuracy: 0.6882 - val_loss: 0.6393 - val_accuracy: 0.6512
Epoch 7/300
1000/1000 [==============================] - 12s 12ms/step - loss: 0.5812 - accuracy: 0.6972 - val_loss: 0.6341 - val_accuracy: 0.6419
Epoch 8/300
1000/1000 [==============================] - 10s 10ms/step - loss: 0.5709 - accuracy: 0.7048 - val_loss: 0.6382 - val_accuracy: 0.6510
Epoch 9/300
1000/1000 [==============================] - 10s 10ms/step - loss: 0.5647 - accuracy: 0.7108 - val_loss: 0.6375 - val_accuracy: 0.6696
Epoch 10/300
1000/1000 [==============================] - 11s 11ms/step - loss: 0.5516 - accuracy: 0.7209 - val_loss: 0.6373 - val_accuracy: 0.6465
Epoch 11/300
1000/1000 [==============================] - 12s 12ms/step - loss: 0.5451 - accuracy: 0.7258 - val_loss: 0.6383 - val_accuracy: 0.6327
Epoch 12/300
1000/1000 [==============================] - 10s 10ms/step - loss: 0.5390 - accuracy: 0.7282 - val_loss: 0.6387 - val_accuracy: 0.6373

from tfx.types.standard_artifacts import Model, ModelBlessing

model_resolver = Resolver(
    strategy_class = LatestBlessedModelStrategy,
    model = Channel(type = Model),
    model_blessing = Channel(type = ModelBlessing)
).with_id('Latest_blessed_model_resolver')

interactive_context.run(model_resolver)

metrics = [
    tfma.metrics.ExampleCount(name='example_count'),
    tf.keras.metrics.BinaryCrossentropy(name='binary_crossentropy'),
    tf.keras.metrics.BinaryAccuracy(name='accuracy'),
    tf.keras.metrics.Precision(name='precision', thresholds=[0.5]),
    tf.keras.metrics.Recall(name='recall', thresholds=[0.5]),
    tfma.metrics.ConfusionMatrixPlot(name='confusion_matrix_plot')  # Memastikan ini bekerja dengan baik
]

metrics_specs = tfma.metrics.specs_from_metrics(metrics)

eval_config = tfma.EvalConfig(
    model_specs=[tfma.ModelSpec(label_key='Potability')],
    slicing_specs=[tfma.SlicingSpec()],
    metrics_specs=metrics_specs
)

evaluator = Evaluator(
    examples=example_gen.outputs['examples'],
    model=trainer.outputs['model'],
    baseline_model=model_resolver.outputs['model'],
    eval_config=eval_config
)

interactive_context.run(evaluator)

/usr/local/lib/python3.10/dist-packages/tensorflow_model_analysis/metrics/binary_confusion_matrices.py:155: RuntimeWarning: invalid value encountered in divide
  false_omission_rate = fn / predicated_negatives

eval_result = evaluator.outputs['evaluation'].get()[0].uri
tfma_result = tfma.load_eval_result(eval_result)
tfma.view.render_slicing_metrics(tfma_result)
tfma.addons.fairness.view.widget_view.render_fairness_indicator(
    tfma_result
)

pusher = Pusher(
    model = trainer.outputs['model'],
    push_destination = pusher_pb2.PushDestination(
        filesystem = pusher_pb2.PushDestination.Filesystem(
            base_directory = "serving_model_dir/water-prediction-model"
        )
    )
)

interactive_context.run(pusher)

WARNING:absl:Pusher is going to push the model without validation. Consider using Evaluator or InfraValidator in your pipeline.

import shutil

shutil.make_archive('serving_model', 'zip', '/content/serving_model_dir')
shutil.make_archive('pipeline', 'zip', '/content/pipelines')

'/content/pipeline.zip'

import shutil

drive_path = '/content/drive/My Drive/pipeline.zip'
shutil.move('/content/pipeline.zip', drive_path)

'/content/drive/My Drive/pipeline.zip'

!pip freeze > requirements.txt

import pandas as pd
import requests
import json
import base64
import tensorflow as tf
from pprint import PrettyPrinter

pp = PrettyPrinter()
pp.pprint(requests.get("http://localhost:8080/v1/models/water-prediction-model").json())

{'model_version_status': [{'state': 'AVAILABLE',
                           'status': {'error_code': 'OK', 'error_message': ''},
                           'version': '1725114479'}]}

def prepare_json(inputs: dict) -> str:
    features = {k: tf.train.Feature(float_list=tf.train.FloatList(value=[float(v)])) 
                for k, v in inputs.items()}
    example = tf.train.Example(features=tf.train.Features(feature=features)).SerializeToString()
    encoded_example = base64.b64encode(example).decode()
    return json.dumps({"signature_name": "serving_default", "instances": [{"examples": {"b64": encoded_example}}]})

def prediction(index: int, data: pd.DataFrame, endpoint: str):
    # Get data dari index tertentu
    inputs = data.iloc[index].to_dict()
    
    # Menyiapkan data ke JSON 
    json_data = prepare_json(inputs)
    
    # POST request ke model endpoint
    response = requests.post(endpoint, data=json_data)
    
    # Mengembalikan response
    prediction = response.json().get("predictions")
    if prediction:
        result = "✅ Air aman diminum." if prediction[0][0] >= 0.5 else "💀 Air berbahaya untuk diminum."
    else:
        result = "Error: Prediksi tidak diketahui."
    
    print("Hasil:")
    print("------------------------------")
    print("-", result)
    print("------------------------------")

data = pd.read_csv("data/water_potability.csv")
endpoint = "http://localhost:8080/v1/models/water-prediction-model:predict"

# Index data 2
prediction(2, data, endpoint)

Hasil:
------------------------------
- 💀 Air berbahaya untuk diminum.
------------------------------

# Index data 2006
prediction(2006, data, endpoint)

Hasil:
------------------------------
- ✅ Air aman diminum.
------------------------------

	ph	Hardness	Solids	Chloramines	Sulfate	Conductivity	Organic_carbon	Trihalomethanes	Turbidity
0	8.316766	214.373394	22018.417441	8.059332	356.886136	363.266516	18.436524	100.341674	4.628771
1	9.092223	181.101509	17978.986339	6.546600	310.135738	398.410813	11.558279	31.997993	4.075075
2	5.584087	188.313324	28748.687739	7.544869	326.678363	280.467916	8.399735	54.917862	2.559708
3	10.223862	248.071735	28749.716544	7.513408	393.663396	283.651634	13.789695	84.603556	2.672989
4	8.635849	203.361523	13672.091764	4.563009	303.309771	474.607645	12.363817	62.798309	4.401425

Install Library¶

Setup Base¶

Import Library¶

Load Dataset¶

Import Library TFX¶

No anomalies found.

No anomalies found.

Transform¶

Tuner¶

Trainer¶

Resolver¶

Evaluator¶

Pusher¶

Download¶

Testing¶

Import Library¶

Connection Test¶

Prediction¶

	Type	Presence	Domain
Feature name
'Chloramines'	FLOAT	required	-
'Conductivity'	FLOAT	required	-
'Hardness'	FLOAT	required	-
'Organic_carbon'	FLOAT	required	-
'Potability'	INT	required	-
'Solids'	FLOAT	required	-
'Sulfate'	FLOAT	required	-
'Trihalomethanes'	FLOAT	required	-
'Turbidity'	FLOAT	required	-
'ph'	FLOAT	required	-