Open In Colab

Created: 30/08/2024


Nama: Maulana Kavaldo

IDDicoding: mkavaldo

Install Library¶

In [ ]:
%%capture
!pip install tfx

Setup Base¶

In [ ]:
from google.colab import drive
drive.mount('/content/drive')
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
In [ ]:
DATA_ROOT = '/content/drive/MyDrive/Colab Notebooks/ML-Ops/submission-1/data/'

Import Library¶

In [ ]:
import pandas as pd

Load Dataset¶

In [ ]:
water = pd.read_csv(DATA_ROOT+'water_potability.csv')
water.head()
Out[ ]:
ph Hardness Solids Chloramines Sulfate Conductivity Organic_carbon Trihalomethanes Turbidity Potability
0 8.316766 214.373394 22018.417441 8.059332 356.886136 363.266516 18.436524 100.341674 4.628771 0
1 9.092223 181.101509 17978.986339 6.546600 310.135738 398.410813 11.558279 31.997993 4.075075 0
2 5.584087 188.313324 28748.687739 7.544869 326.678363 280.467916 8.399735 54.917862 2.559708 0
3 10.223862 248.071735 28749.716544 7.513408 393.663396 283.651634 13.789695 84.603556 2.672989 0
4 8.635849 203.361523 13672.091764 4.563009 303.309771 474.607645 12.363817 62.798309 4.401425 0
In [ ]:
water.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2011 entries, 0 to 2010
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   ph               2011 non-null   float64
 1   Hardness         2011 non-null   float64
 2   Solids           2011 non-null   float64
 3   Chloramines      2011 non-null   float64
 4   Sulfate          2011 non-null   float64
 5   Conductivity     2011 non-null   float64
 6   Organic_carbon   2011 non-null   float64
 7   Trihalomethanes  2011 non-null   float64
 8   Turbidity        2011 non-null   float64
 9   Potability       2011 non-null   int64  
dtypes: float64(9), int64(1)
memory usage: 157.2 KB

Mendapatkan nama-nama features yang nantinya akan diperlukan untuk membuat modular.

In [ ]:
FEATURES = water.columns.tolist()
FEATURES
Out[ ]:
['ph',
 'Hardness',
 'Solids',
 'Chloramines',
 'Sulfate',
 'Conductivity',
 'Organic_carbon',
 'Trihalomethanes',
 'Turbidity',
 'Potability']

Import Library TFX¶

In [ ]:
import os
import sys
import tensorflow as tf
import tfx
import tensorflow_model_analysis as tfma
import tensorflow_transform as tft
from tfx.components import (
    CsvExampleGen, StatisticsGen, SchemaGen, ExampleValidator,
    Transform, Trainer, Tuner, Evaluator, Pusher
)
from tfx.proto import example_gen_pb2, trainer_pb2, pusher_pb2
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext
from tfx.dsl.components.common.resolver import Resolver
from tfx.dsl.input_resolution.strategies.latest_blessed_model_strategy import LatestBlessedModelStrategy
from tfx.types import Channel, standard_artifacts
In [ ]:
import keras
print('Version:')
print('---------------------------')
print(f"{'TensorFlow':15} {tf.__version__}")
print(f"{'Python':15} {sys.version.split()[0]}")  # Hanya versi Python
print(f"{'TFX':15} {tfx.__version__}")
print(f"{'TFMA':15} {tfma.__version__}")
print(f"{'Keras':15} {keras.__version__}")
Version:
---------------------------
TensorFlow      2.15.1
Python          3.10.12
TFX             1.15.1
TFMA            0.46.0
Keras           2.15.0
In [ ]:
PIPELINE_NAME = 'mkavaldo-pipeline'
SCHEMA_PIPELINE_NAME = 'water-tfdv-schema'

PIPELINE_ROOT = os.path.join('pipelines', PIPELINE_NAME)
METADATA_PATH = os.path.join('metadata', PIPELINE_NAME, 'metadata.db')
SERVING_MODEL_DIR = os.path.join('serving_model', PIPELINE_NAME)
In [ ]:
interactive_context = InteractiveContext(pipeline_root = PIPELINE_ROOT)
WARNING:absl:InteractiveContext metadata_connection_config not provided: using SQLite ML Metadata database at pipelines/mkavaldo-pipeline/metadata.sqlite.
In [ ]:
output = example_gen_pb2.Output(
    split_config = example_gen_pb2.SplitConfig(splits = [
        example_gen_pb2.SplitConfig.Split(name = "train", hash_buckets = 9),
        example_gen_pb2.SplitConfig.Split(name = "eval", hash_buckets = 1)
    ])
)

example_gen = CsvExampleGen(input_base = DATA_ROOT, output_config = output)
interactive_context.run(example_gen)
WARNING:apache_beam.runners.interactive.interactive_environment:Dependencies required for Interactive Beam PCollection visualization are not available, please use: `pip install apache-beam[interactive]` to install necessary dependencies to enable all data visualization features.
WARNING:apache_beam.io.tfrecordio:Couldn't find python-snappy so the implementation of _TFRecordUtil._masked_crc32c is not as fast as it could be.
Out[ ]:
ExecutionResult at 0x7fd0291b9f00
.execution_id 1
.component
CsvExampleGen at 0x7fd028d3af20
.inputs {}
.outputs
['examples']
Channel of type 'Examples' (1 artifact) at 0x7fd028d3b490
.type_name Examples
._artifacts
[0]
Artifact of type 'Examples' (uri: pipelines/mkavaldo-pipeline/CsvExampleGen/examples/1) at 0x7fd028d3b160
.type <class 'tfx.types.standard_artifacts.Examples'>
.uri pipelines/mkavaldo-pipeline/CsvExampleGen/examples/1
.span 0
.split_names ["train", "eval"]
.version 0
.exec_properties
['input_base'] /content/drive/MyDrive/Colab Notebooks/ML-Ops/submission-1/data/
['input_config'] { "splits": [ { "name": "single_split", "pattern": "*" } ] }
['output_config'] { "split_config": { "splits": [ { "hash_buckets": 9, "name": "train" }, { "hash_buckets": 1, "name": "eval" } ] } }
['output_data_format'] 6
['output_file_format'] 5
['custom_config'] None
['range_config'] None
['span'] 0
['version'] None
['input_fingerprint'] split:single_split,num_files:1,total_bytes:333640,xor_checksum:1725110686,sum_checksum:1725110686
.component.inputs {}
.component.outputs
['examples']
Channel of type 'Examples' (1 artifact) at 0x7fd028d3b490
.type_name Examples
._artifacts
[0]
Artifact of type 'Examples' (uri: pipelines/mkavaldo-pipeline/CsvExampleGen/examples/1) at 0x7fd028d3b160
.type <class 'tfx.types.standard_artifacts.Examples'>
.uri pipelines/mkavaldo-pipeline/CsvExampleGen/examples/1
.span 0
.split_names ["train", "eval"]
.version 0
In [ ]:
statistic_gen = StatisticsGen(
    examples = example_gen.outputs["examples"]
)

interactive_context.run(statistic_gen)
Out[ ]:
ExecutionResult at 0x7fd026d38280
.execution_id 2
.component
StatisticsGen at 0x7fd028d3b5b0
.inputs
['examples']
Channel of type 'Examples' (1 artifact) at 0x7fd028d3b490
.type_name Examples
._artifacts
[0]
Artifact of type 'Examples' (uri: pipelines/mkavaldo-pipeline/CsvExampleGen/examples/1) at 0x7fd028d3b160
.type <class 'tfx.types.standard_artifacts.Examples'>
.uri pipelines/mkavaldo-pipeline/CsvExampleGen/examples/1
.span 0
.split_names ["train", "eval"]
.version 0
.outputs
['statistics']
Channel of type 'ExampleStatistics' (1 artifact) at 0x7fd028d3b190
.type_name ExampleStatistics
._artifacts
[0]
Artifact of type 'ExampleStatistics' (uri: pipelines/mkavaldo-pipeline/StatisticsGen/statistics/2) at 0x7fd0cc20cf10
.type <class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri pipelines/mkavaldo-pipeline/StatisticsGen/statistics/2
.span 0
.split_names ["train", "eval"]
.exec_properties
['stats_options_json'] None
['exclude_splits'] []
.component.inputs
['examples']
Channel of type 'Examples' (1 artifact) at 0x7fd028d3b490
.type_name Examples
._artifacts
[0]
Artifact of type 'Examples' (uri: pipelines/mkavaldo-pipeline/CsvExampleGen/examples/1) at 0x7fd028d3b160
.type <class 'tfx.types.standard_artifacts.Examples'>
.uri pipelines/mkavaldo-pipeline/CsvExampleGen/examples/1
.span 0
.split_names ["train", "eval"]
.version 0
.component.outputs
['statistics']
Channel of type 'ExampleStatistics' (1 artifact) at 0x7fd028d3b190
.type_name ExampleStatistics
._artifacts
[0]
Artifact of type 'ExampleStatistics' (uri: pipelines/mkavaldo-pipeline/StatisticsGen/statistics/2) at 0x7fd0cc20cf10
.type <class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri pipelines/mkavaldo-pipeline/StatisticsGen/statistics/2
.span 0
.split_names ["train", "eval"]
In [ ]:
interactive_context.show(statistic_gen.outputs['statistics'])
Artifact at pipelines/mkavaldo-pipeline/StatisticsGen/statistics/2

'train' split:

'eval' split:

In [ ]:
schema_gen = SchemaGen(statistics = statistic_gen.outputs["statistics"])

interactive_context.run(schema_gen)
Out[ ]:
ExecutionResult at 0x7fd026d39840
.execution_id 3
.component
SchemaGen at 0x7fd028d3b5e0
.inputs
['statistics']
Channel of type 'ExampleStatistics' (1 artifact) at 0x7fd028d3b190
.type_name ExampleStatistics
._artifacts
[0]
Artifact of type 'ExampleStatistics' (uri: pipelines/mkavaldo-pipeline/StatisticsGen/statistics/2) at 0x7fd0cc20cf10
.type <class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri pipelines/mkavaldo-pipeline/StatisticsGen/statistics/2
.span 0
.split_names ["train", "eval"]
.outputs
['schema']
Channel of type 'Schema' (1 artifact) at 0x7fd028d3bb50
.type_name Schema
._artifacts
[0]
Artifact of type 'Schema' (uri: pipelines/mkavaldo-pipeline/SchemaGen/schema/3) at 0x7fd028d3bd00
.type <class 'tfx.types.standard_artifacts.Schema'>
.uri pipelines/mkavaldo-pipeline/SchemaGen/schema/3
.exec_properties
['infer_feature_shape'] 1
['exclude_splits'] []
.component.inputs
['statistics']
Channel of type 'ExampleStatistics' (1 artifact) at 0x7fd028d3b190
.type_name ExampleStatistics
._artifacts
[0]
Artifact of type 'ExampleStatistics' (uri: pipelines/mkavaldo-pipeline/StatisticsGen/statistics/2) at 0x7fd0cc20cf10
.type <class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri pipelines/mkavaldo-pipeline/StatisticsGen/statistics/2
.span 0
.split_names ["train", "eval"]
.component.outputs
['schema']
Channel of type 'Schema' (1 artifact) at 0x7fd028d3bb50
.type_name Schema
._artifacts
[0]
Artifact of type 'Schema' (uri: pipelines/mkavaldo-pipeline/SchemaGen/schema/3) at 0x7fd028d3bd00
.type <class 'tfx.types.standard_artifacts.Schema'>
.uri pipelines/mkavaldo-pipeline/SchemaGen/schema/3
In [ ]:
interactive_context.show(schema_gen.outputs["schema"])
Artifact at pipelines/mkavaldo-pipeline/SchemaGen/schema/3

Type Presence Valency Domain
Feature name
'Chloramines' FLOAT required -
'Conductivity' FLOAT required -
'Hardness' FLOAT required -
'Organic_carbon' FLOAT required -
'Potability' INT required -
'Solids' FLOAT required -
'Sulfate' FLOAT required -
'Trihalomethanes' FLOAT required -
'Turbidity' FLOAT required -
'ph' FLOAT required -
In [ ]:
example_validator = ExampleValidator(
    statistics = statistic_gen.outputs["statistics"],
    schema = schema_gen.outputs["schema"]
)

interactive_context.run(example_validator)
Out[ ]:
ExecutionResult at 0x7fd026c464a0
.execution_id 4
.component
ExampleValidator at 0x7fd025a7b430
.inputs
['statistics']
Channel of type 'ExampleStatistics' (1 artifact) at 0x7fd028d3b190
.type_name ExampleStatistics
._artifacts
[0]
Artifact of type 'ExampleStatistics' (uri: pipelines/mkavaldo-pipeline/StatisticsGen/statistics/2) at 0x7fd0cc20cf10
.type <class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri pipelines/mkavaldo-pipeline/StatisticsGen/statistics/2
.span 0
.split_names ["train", "eval"]
['schema']
Channel of type 'Schema' (1 artifact) at 0x7fd028d3bb50
.type_name Schema
._artifacts
[0]
Artifact of type 'Schema' (uri: pipelines/mkavaldo-pipeline/SchemaGen/schema/3) at 0x7fd028d3bd00
.type <class 'tfx.types.standard_artifacts.Schema'>
.uri pipelines/mkavaldo-pipeline/SchemaGen/schema/3
.outputs
['anomalies']
Channel of type 'ExampleAnomalies' (1 artifact) at 0x7fd025a7b640
.type_name ExampleAnomalies
._artifacts
[0]
Artifact of type 'ExampleAnomalies' (uri: pipelines/mkavaldo-pipeline/ExampleValidator/anomalies/4) at 0x7fd028d3b9a0
.type <class 'tfx.types.standard_artifacts.ExampleAnomalies'>
.uri pipelines/mkavaldo-pipeline/ExampleValidator/anomalies/4
.span 0
.split_names ["train", "eval"]
.exec_properties
['exclude_splits'] []
['custom_validation_config'] None
.component.inputs
['statistics']
Channel of type 'ExampleStatistics' (1 artifact) at 0x7fd028d3b190
.type_name ExampleStatistics
._artifacts
[0]
Artifact of type 'ExampleStatistics' (uri: pipelines/mkavaldo-pipeline/StatisticsGen/statistics/2) at 0x7fd0cc20cf10
.type <class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri pipelines/mkavaldo-pipeline/StatisticsGen/statistics/2
.span 0
.split_names ["train", "eval"]
['schema']
Channel of type 'Schema' (1 artifact) at 0x7fd028d3bb50
.type_name Schema
._artifacts
[0]
Artifact of type 'Schema' (uri: pipelines/mkavaldo-pipeline/SchemaGen/schema/3) at 0x7fd028d3bd00
.type <class 'tfx.types.standard_artifacts.Schema'>
.uri pipelines/mkavaldo-pipeline/SchemaGen/schema/3
.component.outputs
['anomalies']
Channel of type 'ExampleAnomalies' (1 artifact) at 0x7fd025a7b640
.type_name ExampleAnomalies
._artifacts
[0]
Artifact of type 'ExampleAnomalies' (uri: pipelines/mkavaldo-pipeline/ExampleValidator/anomalies/4) at 0x7fd028d3b9a0
.type <class 'tfx.types.standard_artifacts.ExampleAnomalies'>
.uri pipelines/mkavaldo-pipeline/ExampleValidator/anomalies/4
.span 0
.split_names ["train", "eval"]
In [ ]:
interactive_context.show(example_validator.outputs["anomalies"])
Artifact at pipelines/mkavaldo-pipeline/ExampleValidator/anomalies/4

'train' split:

No anomalies found.

'eval' split:

No anomalies found.

Transform¶

In [ ]:
TRANSFORM_MODULE_FILE = "water_transform.py"
In [ ]:
%%writefile {TRANSFORM_MODULE_FILE}

import tensorflow as tf
import tensorflow_transform as tft

LABEL_KEY = 'Potability'

FEATURES = [
  'ph',
  'Hardness',
  'Solids',
  'Chloramines',
  'Sulfate',
  'Conductivity',
  'Organic_carbon',
  'Trihalomethanes',
  'Turbidity'
]

def transformed_name(key):
    return key + '_xf'

def preprocessing_fn(inputs):
    outputs = {}

    for feature in FEATURES:
        # MinMaxScaler
        # outputs[transformed_name(feature)] = tft.scale_to_0_1(inputs[feature])
        # Z-Score
        outputs[transformed_name(feature)] = tft.scale_to_z_score(inputs[feature])

    # Transform the label
    outputs[transformed_name(LABEL_KEY)] = tf.cast(inputs[LABEL_KEY], tf.int64)

    return outputs
Writing water_transform.py
In [ ]:
transform = Transform(
    examples = example_gen.outputs["examples"],
    schema = schema_gen.outputs['schema'],
    module_file = os.path.abspath(TRANSFORM_MODULE_FILE)
)

interactive_context.run(transform)
Out[ ]:
ExecutionResult at 0x7fd09806f0a0
.execution_id 5
.component
Transform at 0x7fd025a79b10
.inputs
['examples']
Channel of type 'Examples' (1 artifact) at 0x7fd028d3b490
.type_name Examples
._artifacts
[0]
Artifact of type 'Examples' (uri: pipelines/mkavaldo-pipeline/CsvExampleGen/examples/1) at 0x7fd028d3b160
.type <class 'tfx.types.standard_artifacts.Examples'>
.uri pipelines/mkavaldo-pipeline/CsvExampleGen/examples/1
.span 0
.split_names ["train", "eval"]
.version 0
['schema']
Channel of type 'Schema' (1 artifact) at 0x7fd028d3bb50
.type_name Schema
._artifacts
[0]
Artifact of type 'Schema' (uri: pipelines/mkavaldo-pipeline/SchemaGen/schema/3) at 0x7fd028d3bd00
.type <class 'tfx.types.standard_artifacts.Schema'>
.uri pipelines/mkavaldo-pipeline/SchemaGen/schema/3
.outputs
['transform_graph']
Channel of type 'TransformGraph' (1 artifact) at 0x7fd025a7ba60
.type_name TransformGraph
._artifacts
[0]
Artifact of type 'TransformGraph' (uri: pipelines/mkavaldo-pipeline/Transform/transform_graph/5) at 0x7fd025a7b280
.type <class 'tfx.types.standard_artifacts.TransformGraph'>
.uri pipelines/mkavaldo-pipeline/Transform/transform_graph/5
['transformed_examples']
Channel of type 'Examples' (1 artifact) at 0x7fd025a7bac0
.type_name Examples
._artifacts
[0]
Artifact of type 'Examples' (uri: pipelines/mkavaldo-pipeline/Transform/transformed_examples/5) at 0x7fd025a7a110
.type <class 'tfx.types.standard_artifacts.Examples'>
.uri pipelines/mkavaldo-pipeline/Transform/transformed_examples/5
.span 0
.split_names ["train", "eval"]
.version 0
['updated_analyzer_cache']
Channel of type 'TransformCache' (1 artifact) at 0x7fd025a7b970
.type_name TransformCache
._artifacts
[0]
Artifact of type 'TransformCache' (uri: pipelines/mkavaldo-pipeline/Transform/updated_analyzer_cache/5) at 0x7fd025a7a800
.type <class 'tfx.types.standard_artifacts.TransformCache'>
.uri pipelines/mkavaldo-pipeline/Transform/updated_analyzer_cache/5
['pre_transform_schema']
Channel of type 'Schema' (1 artifact) at 0x7fd025a7b7c0
.type_name Schema
._artifacts
[0]
Artifact of type 'Schema' (uri: pipelines/mkavaldo-pipeline/Transform/pre_transform_schema/5) at 0x7fd025a7b7f0
.type <class 'tfx.types.standard_artifacts.Schema'>
.uri pipelines/mkavaldo-pipeline/Transform/pre_transform_schema/5
['pre_transform_stats']
Channel of type 'ExampleStatistics' (1 artifact) at 0x7fd025a7b1c0
.type_name ExampleStatistics
._artifacts
[0]
Artifact of type 'ExampleStatistics' (uri: pipelines/mkavaldo-pipeline/Transform/pre_transform_stats/5) at 0x7fd025a7b670
.type <class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri pipelines/mkavaldo-pipeline/Transform/pre_transform_stats/5
.span 0
.split_names
['post_transform_schema']
Channel of type 'Schema' (1 artifact) at 0x7fd025a7b2e0
.type_name Schema
._artifacts
[0]
Artifact of type 'Schema' (uri: pipelines/mkavaldo-pipeline/Transform/post_transform_schema/5) at 0x7fd025a7b940
.type <class 'tfx.types.standard_artifacts.Schema'>
.uri pipelines/mkavaldo-pipeline/Transform/post_transform_schema/5
['post_transform_stats']
Channel of type 'ExampleStatistics' (1 artifact) at 0x7fd025a79240
.type_name ExampleStatistics
._artifacts
[0]
Artifact of type 'ExampleStatistics' (uri: pipelines/mkavaldo-pipeline/Transform/post_transform_stats/5) at 0x7fd025a7b610
.type <class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri pipelines/mkavaldo-pipeline/Transform/post_transform_stats/5
.span 0
.split_names
['post_transform_anomalies']
Channel of type 'ExampleAnomalies' (1 artifact) at 0x7fd025a7b070
.type_name ExampleAnomalies
._artifacts
[0]
Artifact of type 'ExampleAnomalies' (uri: pipelines/mkavaldo-pipeline/Transform/post_transform_anomalies/5) at 0x7fd025a7b730
.type <class 'tfx.types.standard_artifacts.ExampleAnomalies'>
.uri pipelines/mkavaldo-pipeline/Transform/post_transform_anomalies/5
.span 0
.split_names
.exec_properties
['module_file'] None
['preprocessing_fn'] None
['stats_options_updater_fn'] None
['force_tf_compat_v1'] 0
['custom_config'] null
['splits_config'] None
['disable_statistics'] 0
['module_path'] water_transform@pipelines/mkavaldo-pipeline/_wheels/tfx_user_code_Transform-0.0+9d6f318419064eff7aa625f190750f4cdc18d0988d635bae9f9fcc7289e4288c-py3-none-any.whl
.component.inputs
['examples']
Channel of type 'Examples' (1 artifact) at 0x7fd028d3b490
.type_name Examples
._artifacts
[0]
Artifact of type 'Examples' (uri: pipelines/mkavaldo-pipeline/CsvExampleGen/examples/1) at 0x7fd028d3b160
.type <class 'tfx.types.standard_artifacts.Examples'>
.uri pipelines/mkavaldo-pipeline/CsvExampleGen/examples/1
.span 0
.split_names ["train", "eval"]
.version 0
['schema']
Channel of type 'Schema' (1 artifact) at 0x7fd028d3bb50
.type_name Schema
._artifacts
[0]
Artifact of type 'Schema' (uri: pipelines/mkavaldo-pipeline/SchemaGen/schema/3) at 0x7fd028d3bd00
.type <class 'tfx.types.standard_artifacts.Schema'>
.uri pipelines/mkavaldo-pipeline/SchemaGen/schema/3
.component.outputs
['transform_graph']
Channel of type 'TransformGraph' (1 artifact) at 0x7fd025a7ba60
.type_name TransformGraph
._artifacts
[0]
Artifact of type 'TransformGraph' (uri: pipelines/mkavaldo-pipeline/Transform/transform_graph/5) at 0x7fd025a7b280
.type <class 'tfx.types.standard_artifacts.TransformGraph'>
.uri pipelines/mkavaldo-pipeline/Transform/transform_graph/5
['transformed_examples']
Channel of type 'Examples' (1 artifact) at 0x7fd025a7bac0
.type_name Examples
._artifacts
[0]
Artifact of type 'Examples' (uri: pipelines/mkavaldo-pipeline/Transform/transformed_examples/5) at 0x7fd025a7a110
.type <class 'tfx.types.standard_artifacts.Examples'>
.uri pipelines/mkavaldo-pipeline/Transform/transformed_examples/5
.span 0
.split_names ["train", "eval"]
.version 0
['updated_analyzer_cache']
Channel of type 'TransformCache' (1 artifact) at 0x7fd025a7b970
.type_name TransformCache
._artifacts
[0]
Artifact of type 'TransformCache' (uri: pipelines/mkavaldo-pipeline/Transform/updated_analyzer_cache/5) at 0x7fd025a7a800
.type <class 'tfx.types.standard_artifacts.TransformCache'>
.uri pipelines/mkavaldo-pipeline/Transform/updated_analyzer_cache/5
['pre_transform_schema']
Channel of type 'Schema' (1 artifact) at 0x7fd025a7b7c0
.type_name Schema
._artifacts
[0]
Artifact of type 'Schema' (uri: pipelines/mkavaldo-pipeline/Transform/pre_transform_schema/5) at 0x7fd025a7b7f0
.type <class 'tfx.types.standard_artifacts.Schema'>
.uri pipelines/mkavaldo-pipeline/Transform/pre_transform_schema/5
['pre_transform_stats']
Channel of type 'ExampleStatistics' (1 artifact) at 0x7fd025a7b1c0
.type_name ExampleStatistics
._artifacts
[0]
Artifact of type 'ExampleStatistics' (uri: pipelines/mkavaldo-pipeline/Transform/pre_transform_stats/5) at 0x7fd025a7b670
.type <class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri pipelines/mkavaldo-pipeline/Transform/pre_transform_stats/5
.span 0
.split_names
['post_transform_schema']
Channel of type 'Schema' (1 artifact) at 0x7fd025a7b2e0
.type_name Schema
._artifacts
[0]
Artifact of type 'Schema' (uri: pipelines/mkavaldo-pipeline/Transform/post_transform_schema/5) at 0x7fd025a7b940
.type <class 'tfx.types.standard_artifacts.Schema'>
.uri pipelines/mkavaldo-pipeline/Transform/post_transform_schema/5
['post_transform_stats']
Channel of type 'ExampleStatistics' (1 artifact) at 0x7fd025a79240
.type_name ExampleStatistics
._artifacts
[0]
Artifact of type 'ExampleStatistics' (uri: pipelines/mkavaldo-pipeline/Transform/post_transform_stats/5) at 0x7fd025a7b610
.type <class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri pipelines/mkavaldo-pipeline/Transform/post_transform_stats/5
.span 0
.split_names
['post_transform_anomalies']
Channel of type 'ExampleAnomalies' (1 artifact) at 0x7fd025a7b070
.type_name ExampleAnomalies
._artifacts
[0]
Artifact of type 'ExampleAnomalies' (uri: pipelines/mkavaldo-pipeline/Transform/post_transform_anomalies/5) at 0x7fd025a7b730
.type <class 'tfx.types.standard_artifacts.ExampleAnomalies'>
.uri pipelines/mkavaldo-pipeline/Transform/post_transform_anomalies/5
.span 0
.split_names

Tuner¶

In [ ]:
TUNER_MODULE_FILE = "water_tuner.py"
In [ ]:
%%writefile {TUNER_MODULE_FILE}

import os
import tensorflow_transform as tft
import tensorflow as tf
import keras_tuner as kt
from tfx.v1.components import TunerFnResult
from tfx.components.trainer.fn_args_utils import FnArgs
from water_trainer import FEATURE_KEY, transformed_name, input_fn
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

def model_builder(hyperparameters):
    input_features = []

    for key in FEATURE_KEY:
        input_features.append(
            tf.keras.Input(shape=(1,), name=transformed_name(key))
        )

    merged_input = layers.concatenate(input_features)

    # Hyperparameter untuk layer Dense dan Dropout
    units_1 = hyperparameters.Choice('units_1', [128, 256, 512])
    units_2 = hyperparameters.Choice('units_2', [64, 128, 256])
    units_3 = hyperparameters.Choice('units_3', [32, 64, 128])
    dropout_rate_1 = hyperparameters.Choice('dropout_rate_1', [0.2, 0.3, 0.4])
    dropout_rate_2 = hyperparameters.Choice('dropout_rate_2', [0.3, 0.4, 0.5])

    # Layer 1
    x = layers.Dense(units_1, activation='relu')(merged_input)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(dropout_rate_1)(x)

    # Layer 2
    x = layers.Dense(units_2, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(dropout_rate_2)(x)

    # Layer 3
    x = layers.Dense(units_3, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(dropout_rate_2)(x)

    # Output untuk klasifikasi biner
    outputs = layers.Dense(1, activation='sigmoid')(x)

    model = Model(inputs=input_features, outputs=outputs)

    model.compile(
      optimizer=tf.keras.optimizers.Adam(
      learning_rate=hyperparameters.Choice('learning_rate', [0.0001, 0.00005, 0.0005, 0.001])),
      loss='binary_crossentropy',
      metrics=['accuracy']
    )

    return model

def tuner_fn(fn_args: FnArgs):
    tf_transform_output = tft.TFTransformOutput(fn_args.transform_graph_path)

    train_dataset = input_fn(fn_args.train_files, tf_transform_output, batch_size=64, num_epochs=3)
    eval_dataset = input_fn(fn_args.eval_files, tf_transform_output, batch_size=64, num_epochs=1)

    tuner = kt.RandomSearch(
        model_builder,
        objective='val_accuracy',
        max_trials=10,
        directory=fn_args.working_dir,
        project_name='kt_random_search'
    )

    # callbacks
    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True
    )

    model_checkpoint = ModelCheckpoint(
        filepath=os.path.join(fn_args.working_dir, 'best_model.keras'),
        monitor='val_loss',
        save_best_only=True,
        mode='min'
    )

    return TunerFnResult(
        tuner=tuner,
        fit_kwargs={
            "x": train_dataset,
            'validation_data': eval_dataset,
            'steps_per_epoch': fn_args.train_steps,
            'validation_steps': fn_args.eval_steps,
            "epochs": 100,
            'callbacks': [early_stopping, model_checkpoint]
        }
    )
Overwriting water_tuner.py
In [ ]:
tuner = Tuner(
    module_file=os.path.abspath(TUNER_MODULE_FILE),
    examples=transform.outputs['transformed_examples'],
    transform_graph=transform.outputs['transform_graph'],
    schema=schema_gen.outputs['schema'],
    train_args=trainer_pb2.TrainArgs(splits=['train'], num_steps=200),
    eval_args=trainer_pb2.EvalArgs(splits=['eval'], num_steps=200),
)

interactive_context.run(tuner)
Trial 10 Complete [00h 02m 13s]
val_accuracy: 0.6744186282157898

Best val_accuracy So Far: 0.6744186282157898
Total elapsed time: 00h 07m 58s
Results summary
Results in pipelines/mkavaldo-pipeline/.temp/27/kt_random_search
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 04 summary
Hyperparameters:
units_1: 128
units_2: 64
units_3: 128
dropout_rate_1: 0.3
dropout_rate_2: 0.4
learning_rate: 5e-05
Score: 0.6744186282157898

Trial 09 summary
Hyperparameters:
units_1: 256
units_2: 128
units_3: 128
dropout_rate_1: 0.3
dropout_rate_2: 0.5
learning_rate: 5e-05
Score: 0.6744186282157898

Trial 00 summary
Hyperparameters:
units_1: 512
units_2: 128
units_3: 32
dropout_rate_1: 0.2
dropout_rate_2: 0.5
learning_rate: 0.0005
Score: 0.669767439365387

Trial 02 summary
Hyperparameters:
units_1: 256
units_2: 64
units_3: 64
dropout_rate_1: 0.4
dropout_rate_2: 0.4
learning_rate: 0.0005
Score: 0.669767439365387

Trial 01 summary
Hyperparameters:
units_1: 512
units_2: 128
units_3: 128
dropout_rate_1: 0.2
dropout_rate_2: 0.5
learning_rate: 0.0005
Score: 0.6651162505149841

Trial 08 summary
Hyperparameters:
units_1: 512
units_2: 64
units_3: 32
dropout_rate_1: 0.4
dropout_rate_2: 0.4
learning_rate: 0.001
Score: 0.6604651212692261

Trial 07 summary
Hyperparameters:
units_1: 512
units_2: 128
units_3: 64
dropout_rate_1: 0.3
dropout_rate_2: 0.5
learning_rate: 0.0001
Score: 0.6511628031730652

Trial 06 summary
Hyperparameters:
units_1: 512
units_2: 64
units_3: 128
dropout_rate_1: 0.4
dropout_rate_2: 0.3
learning_rate: 5e-05
Score: 0.6465116143226624

Trial 03 summary
Hyperparameters:
units_1: 256
units_2: 128
units_3: 64
dropout_rate_1: 0.4
dropout_rate_2: 0.4
learning_rate: 0.0005
Score: 0.6418604850769043

Trial 05 summary
Hyperparameters:
units_1: 256
units_2: 256
units_3: 32
dropout_rate_1: 0.3
dropout_rate_2: 0.3
learning_rate: 0.0001
Score: 0.6372092962265015
Out[ ]:
ExecutionResult at 0x7fd0110a4d00
.execution_id 27
.component
Tuner at 0x7fd003729bd0
.inputs
['examples']
Channel of type 'Examples' (1 artifact) at 0x7fd025a7bac0
.type_name Examples
._artifacts
[0]
Artifact of type 'Examples' (uri: pipelines/mkavaldo-pipeline/Transform/transformed_examples/5) at 0x7fd025a7a110
.type <class 'tfx.types.standard_artifacts.Examples'>
.uri pipelines/mkavaldo-pipeline/Transform/transformed_examples/5
.span 0
.split_names ["train", "eval"]
.version 0
['schema']
Channel of type 'Schema' (1 artifact) at 0x7fd028d3bb50
.type_name Schema
._artifacts
[0]
Artifact of type 'Schema' (uri: pipelines/mkavaldo-pipeline/SchemaGen/schema/3) at 0x7fd028d3bd00
.type <class 'tfx.types.standard_artifacts.Schema'>
.uri pipelines/mkavaldo-pipeline/SchemaGen/schema/3
['transform_graph']
Channel of type 'TransformGraph' (1 artifact) at 0x7fd025a7ba60
.type_name TransformGraph
._artifacts
[0]
Artifact of type 'TransformGraph' (uri: pipelines/mkavaldo-pipeline/Transform/transform_graph/5) at 0x7fd025a7b280
.type <class 'tfx.types.standard_artifacts.TransformGraph'>
.uri pipelines/mkavaldo-pipeline/Transform/transform_graph/5
.outputs
['best_hyperparameters']
Channel of type 'HyperParameters' (1 artifact) at 0x7fd0032fb1c0
.type_name HyperParameters
._artifacts
[0]
Artifact of type 'HyperParameters' (uri: pipelines/mkavaldo-pipeline/Tuner/best_hyperparameters/27) at 0x7fd0032fa650
.type <class 'tfx.types.standard_artifacts.HyperParameters'>
.uri pipelines/mkavaldo-pipeline/Tuner/best_hyperparameters/27
['tuner_results']
Channel of type 'TunerResults' (1 artifact) at 0x7fd0032fbe20
.type_name TunerResults
._artifacts
[0]
Artifact of type 'TunerResults' (uri: pipelines/mkavaldo-pipeline/Tuner/tuner_results/27) at 0x7fd0032f8e80
.type <class 'tfx.types.standard_artifacts.TunerResults'>
.uri pipelines/mkavaldo-pipeline/Tuner/tuner_results/27
.exec_properties
['module_file'] None
['tuner_fn'] None
['train_args'] { "num_steps": 200, "splits": [ "train" ] }
['eval_args'] { "num_steps": 200, "splits": [ "eval" ] }
['tune_args'] None
['custom_config'] null
['module_path'] water_tuner@pipelines/mkavaldo-pipeline/_wheels/tfx_user_code_Tuner-0.0+234bc19118454338759d6413d88a5a84ec488197feaf97dc9931d0b45f07e93c-py3-none-any.whl
.component.inputs
['examples']
Channel of type 'Examples' (1 artifact) at 0x7fd025a7bac0
.type_name Examples
._artifacts
[0]
Artifact of type 'Examples' (uri: pipelines/mkavaldo-pipeline/Transform/transformed_examples/5) at 0x7fd025a7a110
.type <class 'tfx.types.standard_artifacts.Examples'>
.uri pipelines/mkavaldo-pipeline/Transform/transformed_examples/5
.span 0
.split_names ["train", "eval"]
.version 0
['schema']
Channel of type 'Schema' (1 artifact) at 0x7fd028d3bb50
.type_name Schema
._artifacts
[0]
Artifact of type 'Schema' (uri: pipelines/mkavaldo-pipeline/SchemaGen/schema/3) at 0x7fd028d3bd00
.type <class 'tfx.types.standard_artifacts.Schema'>
.uri pipelines/mkavaldo-pipeline/SchemaGen/schema/3
['transform_graph']
Channel of type 'TransformGraph' (1 artifact) at 0x7fd025a7ba60
.type_name TransformGraph
._artifacts
[0]
Artifact of type 'TransformGraph' (uri: pipelines/mkavaldo-pipeline/Transform/transform_graph/5) at 0x7fd025a7b280
.type <class 'tfx.types.standard_artifacts.TransformGraph'>
.uri pipelines/mkavaldo-pipeline/Transform/transform_graph/5
.component.outputs
['best_hyperparameters']
Channel of type 'HyperParameters' (1 artifact) at 0x7fd0032fb1c0
.type_name HyperParameters
._artifacts
[0]
Artifact of type 'HyperParameters' (uri: pipelines/mkavaldo-pipeline/Tuner/best_hyperparameters/27) at 0x7fd0032fa650
.type <class 'tfx.types.standard_artifacts.HyperParameters'>
.uri pipelines/mkavaldo-pipeline/Tuner/best_hyperparameters/27
['tuner_results']
Channel of type 'TunerResults' (1 artifact) at 0x7fd0032fbe20
.type_name TunerResults
._artifacts
[0]
Artifact of type 'TunerResults' (uri: pipelines/mkavaldo-pipeline/Tuner/tuner_results/27) at 0x7fd0032f8e80
.type <class 'tfx.types.standard_artifacts.TunerResults'>
.uri pipelines/mkavaldo-pipeline/Tuner/tuner_results/27

Trainer¶

In [ ]:
TRAINER_MODULE_FILE = "water_trainer.py"
In [ ]:
%%writefile {TRAINER_MODULE_FILE}

import os
import tensorflow as tf
import tensorflow_transform as tft
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tfx.components.trainer.fn_args_utils import FnArgs
from tfx.components.trainer.executor import TrainerFnArgs

LABEL_KEY = 'Potability'
FEATURE_KEY = [
    'ph',
    'Hardness',
    'Solids',
    'Chloramines',
    'Sulfate',
    'Conductivity',
    'Organic_carbon',
    'Trihalomethanes',
    'Turbidity'
]

def transformed_name(key):
    return key + "_xf"

def gzip_reader_fn(filenames):
    return tf.data.TFRecordDataset(filenames, compression_type='GZIP')

def get_serve_tf_examples_fn(model, tf_transform_output):
    model.tft_layer = tf_transform_output.transform_features_layer()

    @tf.function
    def serve_tf_examples_fn(serialized_tf_examples):
        feature_spec = tf_transform_output.raw_feature_spec()
        feature_spec.pop(LABEL_KEY)
        parsed_features = tf.io.parse_example(
            serialized_tf_examples, feature_spec
        )

        transformed_features = model.tft_layer(parsed_features)
        outputs = model(transformed_features)
        return {"outputs": outputs}

    return serve_tf_examples_fn

def input_fn(file_pattern, tf_transform_output, batch_size=64, num_epochs=None) -> tf.data.Dataset:
    transform_feature_spec = (
        tf_transform_output.transformed_feature_spec().copy()
    )

    dataset = tf.data.experimental.make_batched_features_dataset(
        file_pattern=file_pattern,
        batch_size=batch_size,
        features=transform_feature_spec,
        reader=gzip_reader_fn,
        label_key=transformed_name(LABEL_KEY),
        num_epochs=num_epochs
    )

    dataset = dataset.repeat()

    return dataset

def get_model(show_summary=True):
    input_features = []
    for key in FEATURE_KEY:
        input_features.append(
            tf.keras.Input(shape=(1,), name=transformed_name(key))
        )

    merged_input = layers.concatenate(input_features)

    # Layer Dense 1
    x = layers.Dense(256, activation="relu")(merged_input)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)

    # Layer Dense 2
    x = layers.Dense(128, activation="relu")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)

    # Layer Dense 3
    x = layers.Dense(64, activation="relu")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)

    # Layer Dense 4
    x = layers.Dense(32, activation="relu")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)

    # Output layer untuk klasifikasi biner
    outputs = layers.Dense(1, activation="sigmoid")(x)

    model = Model(inputs=input_features, outputs=outputs)

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    return model

def run_fn(fn_args: FnArgs):
    tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)

    train_dataset = input_fn(fn_args.train_files, tf_transform_output, batch_size=32, num_epochs=3)
    eval_dataset = input_fn(fn_args.eval_files, tf_transform_output, batch_size=32, num_epochs=1)

    model = get_model()

    log_dir = os.path.join(os.path.dirname(fn_args.serving_model_dir), "logs")
    tensorboard_callback = tf.keras.callbacks.TensorBoard(
        log_dir=log_dir, update_freq="batch"
    )

    checkpoint_callback = ModelCheckpoint(
        filepath=os.path.join(fn_args.serving_model_dir, 'best_model.keras'),
        save_best_only=True,
        monitor='val_loss',
        mode='min'
    )

    early_stopping_callback = EarlyStopping(
        monitor='val_loss',
        patience=5,
        mode='min',
        restore_best_weights=True
    )

    model.fit(
        train_dataset,
        steps_per_epoch=fn_args.train_steps,
        validation_data=eval_dataset,
        validation_steps=fn_args.eval_steps,
        callbacks=[tensorboard_callback, checkpoint_callback, early_stopping_callback],
        epochs=300
    )

    signatures = {
        "serving_default": get_serve_tf_examples_fn(
            model, tf_transform_output
        ).get_concrete_function(
            tf.TensorSpec(shape=[None], dtype=tf.string, name="examples")
        ),
    }

    model.save(
        fn_args.serving_model_dir, save_format="tf", signatures=signatures
    )
Overwriting water_trainer.py
In [ ]:
trainer = Trainer(
    module_file = os.path.abspath(TRAINER_MODULE_FILE),
    examples = transform.outputs['transformed_examples'],
    transform_graph = transform.outputs['transform_graph'],
    schema = schema_gen.outputs['schema'],
    hyperparameters=tuner.outputs['best_hyperparameters'],
    train_args=trainer_pb2.TrainArgs(splits=['train'], num_steps=1000),
    eval_args=trainer_pb2.EvalArgs(splits=['eval'], num_steps=1000),
)

interactive_context.run(trainer)
WARNING:absl:Examples artifact does not have payload_format custom property. Falling back to FORMAT_TF_EXAMPLE
WARNING:absl:Examples artifact does not have payload_format custom property. Falling back to FORMAT_TF_EXAMPLE
WARNING:absl:Examples artifact does not have payload_format custom property. Falling back to FORMAT_TF_EXAMPLE
Epoch 1/300
1000/1000 [==============================] - 13s 10ms/step - loss: 0.7781 - accuracy: 0.5511 - val_loss: 0.6857 - val_accuracy: 0.5953
Epoch 2/300
1000/1000 [==============================] - 12s 12ms/step - loss: 0.6878 - accuracy: 0.6028 - val_loss: 0.6651 - val_accuracy: 0.6372
Epoch 3/300
1000/1000 [==============================] - 11s 11ms/step - loss: 0.6458 - accuracy: 0.6398 - val_loss: 0.6479 - val_accuracy: 0.6604
Epoch 4/300
1000/1000 [==============================] - 10s 10ms/step - loss: 0.6219 - accuracy: 0.6633 - val_loss: 0.6367 - val_accuracy: 0.6511
Epoch 5/300
1000/1000 [==============================] - 11s 11ms/step - loss: 0.6047 - accuracy: 0.6755 - val_loss: 0.6410 - val_accuracy: 0.6465
Epoch 6/300
1000/1000 [==============================] - 11s 11ms/step - loss: 0.5917 - accuracy: 0.6882 - val_loss: 0.6393 - val_accuracy: 0.6512
Epoch 7/300
1000/1000 [==============================] - 12s 12ms/step - loss: 0.5812 - accuracy: 0.6972 - val_loss: 0.6341 - val_accuracy: 0.6419
Epoch 8/300
1000/1000 [==============================] - 10s 10ms/step - loss: 0.5709 - accuracy: 0.7048 - val_loss: 0.6382 - val_accuracy: 0.6510
Epoch 9/300
1000/1000 [==============================] - 10s 10ms/step - loss: 0.5647 - accuracy: 0.7108 - val_loss: 0.6375 - val_accuracy: 0.6696
Epoch 10/300
1000/1000 [==============================] - 11s 11ms/step - loss: 0.5516 - accuracy: 0.7209 - val_loss: 0.6373 - val_accuracy: 0.6465
Epoch 11/300
1000/1000 [==============================] - 12s 12ms/step - loss: 0.5451 - accuracy: 0.7258 - val_loss: 0.6383 - val_accuracy: 0.6327
Epoch 12/300
1000/1000 [==============================] - 10s 10ms/step - loss: 0.5390 - accuracy: 0.7282 - val_loss: 0.6387 - val_accuracy: 0.6373
Out[ ]:
ExecutionResult at 0x7fd0048b02e0
.execution_id 30
.component
Trainer at 0x7fd00d10b0a0
.inputs
['examples']
Channel of type 'Examples' (1 artifact) at 0x7fd025a7bac0
.type_name Examples
._artifacts
[0]
Artifact of type 'Examples' (uri: pipelines/mkavaldo-pipeline/Transform/transformed_examples/5) at 0x7fd025a7a110
.type <class 'tfx.types.standard_artifacts.Examples'>
.uri pipelines/mkavaldo-pipeline/Transform/transformed_examples/5
.span 0
.split_names ["train", "eval"]
.version 0
['transform_graph']
Channel of type 'TransformGraph' (1 artifact) at 0x7fd025a7ba60
.type_name TransformGraph
._artifacts
[0]
Artifact of type 'TransformGraph' (uri: pipelines/mkavaldo-pipeline/Transform/transform_graph/5) at 0x7fd025a7b280
.type <class 'tfx.types.standard_artifacts.TransformGraph'>
.uri pipelines/mkavaldo-pipeline/Transform/transform_graph/5
['schema']
Channel of type 'Schema' (1 artifact) at 0x7fd028d3bb50
.type_name Schema
._artifacts
[0]
Artifact of type 'Schema' (uri: pipelines/mkavaldo-pipeline/SchemaGen/schema/3) at 0x7fd028d3bd00
.type <class 'tfx.types.standard_artifacts.Schema'>
.uri pipelines/mkavaldo-pipeline/SchemaGen/schema/3
['hyperparameters']
Channel of type 'HyperParameters' (1 artifact) at 0x7fd0032fb1c0
.type_name HyperParameters
._artifacts
[0]
Artifact of type 'HyperParameters' (uri: pipelines/mkavaldo-pipeline/Tuner/best_hyperparameters/27) at 0x7fd0032fa650
.type <class 'tfx.types.standard_artifacts.HyperParameters'>
.uri pipelines/mkavaldo-pipeline/Tuner/best_hyperparameters/27
.outputs
['model']
Channel of type 'Model' (1 artifact) at 0x7fd0048b39a0
.type_name Model
._artifacts
[0]
Artifact of type 'Model' (uri: pipelines/mkavaldo-pipeline/Trainer/model/30) at 0x7fd0048b0700
.type <class 'tfx.types.standard_artifacts.Model'>
.uri pipelines/mkavaldo-pipeline/Trainer/model/30
['model_run']
Channel of type 'ModelRun' (1 artifact) at 0x7fd0048b3a00
.type_name ModelRun
._artifacts
[0]
Artifact of type 'ModelRun' (uri: pipelines/mkavaldo-pipeline/Trainer/model_run/30) at 0x7fd0048b20b0
.type <class 'tfx.types.standard_artifacts.ModelRun'>
.uri pipelines/mkavaldo-pipeline/Trainer/model_run/30
.exec_properties
['train_args'] { "num_steps": 1000, "splits": [ "train" ] }
['eval_args'] { "num_steps": 1000, "splits": [ "eval" ] }
['module_file'] None
['run_fn'] None
['trainer_fn'] None
['custom_config'] null
['module_path'] water_trainer@pipelines/mkavaldo-pipeline/_wheels/tfx_user_code_Trainer-0.0+1199d551e4ddbbe74d9afd76c4f66e07678fcf32714b198de8b60c8b4402e596-py3-none-any.whl
.component.inputs
['examples']
Channel of type 'Examples' (1 artifact) at 0x7fd025a7bac0
.type_name Examples
._artifacts
[0]
Artifact of type 'Examples' (uri: pipelines/mkavaldo-pipeline/Transform/transformed_examples/5) at 0x7fd025a7a110
.type <class 'tfx.types.standard_artifacts.Examples'>
.uri pipelines/mkavaldo-pipeline/Transform/transformed_examples/5
.span 0
.split_names ["train", "eval"]
.version 0
['transform_graph']
Channel of type 'TransformGraph' (1 artifact) at 0x7fd025a7ba60
.type_name TransformGraph
._artifacts
[0]
Artifact of type 'TransformGraph' (uri: pipelines/mkavaldo-pipeline/Transform/transform_graph/5) at 0x7fd025a7b280
.type <class 'tfx.types.standard_artifacts.TransformGraph'>
.uri pipelines/mkavaldo-pipeline/Transform/transform_graph/5
['schema']
Channel of type 'Schema' (1 artifact) at 0x7fd028d3bb50
.type_name Schema
._artifacts
[0]
Artifact of type 'Schema' (uri: pipelines/mkavaldo-pipeline/SchemaGen/schema/3) at 0x7fd028d3bd00
.type <class 'tfx.types.standard_artifacts.Schema'>
.uri pipelines/mkavaldo-pipeline/SchemaGen/schema/3
['hyperparameters']
Channel of type 'HyperParameters' (1 artifact) at 0x7fd0032fb1c0
.type_name HyperParameters
._artifacts
[0]
Artifact of type 'HyperParameters' (uri: pipelines/mkavaldo-pipeline/Tuner/best_hyperparameters/27) at 0x7fd0032fa650
.type <class 'tfx.types.standard_artifacts.HyperParameters'>
.uri pipelines/mkavaldo-pipeline/Tuner/best_hyperparameters/27
.component.outputs
['model']
Channel of type 'Model' (1 artifact) at 0x7fd0048b39a0
.type_name Model
._artifacts
[0]
Artifact of type 'Model' (uri: pipelines/mkavaldo-pipeline/Trainer/model/30) at 0x7fd0048b0700
.type <class 'tfx.types.standard_artifacts.Model'>
.uri pipelines/mkavaldo-pipeline/Trainer/model/30
['model_run']
Channel of type 'ModelRun' (1 artifact) at 0x7fd0048b3a00
.type_name ModelRun
._artifacts
[0]
Artifact of type 'ModelRun' (uri: pipelines/mkavaldo-pipeline/Trainer/model_run/30) at 0x7fd0048b20b0
.type <class 'tfx.types.standard_artifacts.ModelRun'>
.uri pipelines/mkavaldo-pipeline/Trainer/model_run/30

Resolver¶

In [ ]:
from tfx.types.standard_artifacts import Model, ModelBlessing
In [ ]:
model_resolver = Resolver(
    strategy_class = LatestBlessedModelStrategy,
    model = Channel(type = Model),
    model_blessing = Channel(type = ModelBlessing)
).with_id('Latest_blessed_model_resolver')

interactive_context.run(model_resolver)
Out[ ]:
ExecutionResult at 0x7fd00b3bb9a0
.execution_id 31
.component <tfx.dsl.components.common.resolver.Resolver object at 0x7fd001d38f70>
.component.inputs
['model'] ResolvedChannel(artifact_type=Model, LatestBlessedModelStrategy(Dict(model=Input(), model_blessing=Input()))["model"])
['model_blessing'] ResolvedChannel(artifact_type=ModelBlessing, LatestBlessedModelStrategy(Dict(model=Input(), model_blessing=Input()))["model_blessing"])
.component.outputs
['model']
Channel of type 'Model' (0 artifacts) at 0x7fd001d39b10
.type_name Model
._artifacts []
['model_blessing']
Channel of type 'ModelBlessing' (0 artifacts) at 0x7fd001d39990
.type_name ModelBlessing
._artifacts []

Evaluator¶

In [ ]:
metrics = [
    tfma.metrics.ExampleCount(name='example_count'),
    tf.keras.metrics.BinaryCrossentropy(name='binary_crossentropy'),
    tf.keras.metrics.BinaryAccuracy(name='accuracy'),
    tf.keras.metrics.Precision(name='precision', thresholds=[0.5]),
    tf.keras.metrics.Recall(name='recall', thresholds=[0.5]),
    tfma.metrics.ConfusionMatrixPlot(name='confusion_matrix_plot')  # Memastikan ini bekerja dengan baik
]

metrics_specs = tfma.metrics.specs_from_metrics(metrics)

eval_config = tfma.EvalConfig(
    model_specs=[tfma.ModelSpec(label_key='Potability')],
    slicing_specs=[tfma.SlicingSpec()],
    metrics_specs=metrics_specs
)
In [ ]:
evaluator = Evaluator(
    examples=example_gen.outputs['examples'],
    model=trainer.outputs['model'],
    baseline_model=model_resolver.outputs['model'],
    eval_config=eval_config
)

interactive_context.run(evaluator)
/usr/local/lib/python3.10/dist-packages/tensorflow_model_analysis/metrics/binary_confusion_matrices.py:155: RuntimeWarning: invalid value encountered in divide
  false_omission_rate = fn / predicated_negatives
Out[ ]:
ExecutionResult at 0x7fd0110e2b60
.execution_id 32
.component
Evaluator at 0x7fd001d67a00
.inputs
['examples']
Channel of type 'Examples' (1 artifact) at 0x7fd028d3b490
.type_name Examples
._artifacts
[0]
Artifact of type 'Examples' (uri: pipelines/mkavaldo-pipeline/CsvExampleGen/examples/1) at 0x7fd028d3b160
.type <class 'tfx.types.standard_artifacts.Examples'>
.uri pipelines/mkavaldo-pipeline/CsvExampleGen/examples/1
.span 0
.split_names ["train", "eval"]
.version 0
['model']
Channel of type 'Model' (1 artifact) at 0x7fd0048b39a0
.type_name Model
._artifacts
[0]
Artifact of type 'Model' (uri: pipelines/mkavaldo-pipeline/Trainer/model/30) at 0x7fd0048b0700
.type <class 'tfx.types.standard_artifacts.Model'>
.uri pipelines/mkavaldo-pipeline/Trainer/model/30
['baseline_model']
Channel of type 'Model' (0 artifacts) at 0x7fd001d39b10
.type_name Model
._artifacts []
.outputs
['evaluation']
Channel of type 'ModelEvaluation' (1 artifact) at 0x7fd001d657b0
.type_name ModelEvaluation
._artifacts
[0]
Artifact of type 'ModelEvaluation' (uri: pipelines/mkavaldo-pipeline/Evaluator/evaluation/32) at 0x7fd001d67b50
.type <class 'tfx.types.standard_artifacts.ModelEvaluation'>
.uri pipelines/mkavaldo-pipeline/Evaluator/evaluation/32
['blessing']
Channel of type 'ModelBlessing' (1 artifact) at 0x7fd001d64f10
.type_name ModelBlessing
._artifacts
[0]
Artifact of type 'ModelBlessing' (uri: pipelines/mkavaldo-pipeline/Evaluator/blessing/32) at 0x7fd001d66fb0
.type <class 'tfx.types.standard_artifacts.ModelBlessing'>
.uri pipelines/mkavaldo-pipeline/Evaluator/blessing/32
.exec_properties
['eval_config'] { "metrics_specs": [ { "example_weights": { "unweighted": true }, "metrics": [ { "class_name": "ExampleCount", "config": "{\"name\": \"example_count\"}" } ] }, { "example_weights": { "weighted": true }, "metrics": [ { "class_name": "WeightedExampleCount", "config": "{\"name\": \"weighted_example_count\"}" } ] }, { "metrics": [ { "class_name": "ExampleCount", "config": "{\"name\": \"example_count\"}" }, { "class_name": "BinaryCrossentropy", "config": "{\"dtype\": \"float32\", \"from_logits\": false, \"label_smoothing\": 0, \"name\": \"binary_crossentropy\"}", "module": "keras.src.metrics.probabilistic_metrics" }, { "class_name": "BinaryAccuracy", "config": "{\"name\": \"accuracy\", \"threshold\": 0.5}" }, { "class_name": "Precision", "config": "{\"class_id\": null, \"name\": \"precision\", \"thresholds\": [0.5], \"top_k\": null}" }, { "class_name": "Recall", "config": "{\"class_id\": null, \"name\": \"recall\", \"thresholds\": [0.5], \"top_k\": null}" }, { "class_name": "ConfusionMatrixPlot", "config": "{\"name\": \"confusion_matrix_plot\", \"num_thresholds\": 1000}" } ] } ], "model_specs": [ { "label_key": "Potability" } ], "slicing_specs": [ {} ] }
['feature_slicing_spec'] None
['fairness_indicator_thresholds'] null
['example_splits'] null
['module_file'] None
['module_path'] None
.component.inputs
['examples']
Channel of type 'Examples' (1 artifact) at 0x7fd028d3b490
.type_name Examples
._artifacts
[0]
Artifact of type 'Examples' (uri: pipelines/mkavaldo-pipeline/CsvExampleGen/examples/1) at 0x7fd028d3b160
.type <class 'tfx.types.standard_artifacts.Examples'>
.uri pipelines/mkavaldo-pipeline/CsvExampleGen/examples/1
.span 0
.split_names ["train", "eval"]
.version 0
['model']
Channel of type 'Model' (1 artifact) at 0x7fd0048b39a0
.type_name Model
._artifacts
[0]
Artifact of type 'Model' (uri: pipelines/mkavaldo-pipeline/Trainer/model/30) at 0x7fd0048b0700
.type <class 'tfx.types.standard_artifacts.Model'>
.uri pipelines/mkavaldo-pipeline/Trainer/model/30
['baseline_model']
Channel of type 'Model' (0 artifacts) at 0x7fd001d39b10
.type_name Model
._artifacts []
.component.outputs
['evaluation']
Channel of type 'ModelEvaluation' (1 artifact) at 0x7fd001d657b0
.type_name ModelEvaluation
._artifacts
[0]
Artifact of type 'ModelEvaluation' (uri: pipelines/mkavaldo-pipeline/Evaluator/evaluation/32) at 0x7fd001d67b50
.type <class 'tfx.types.standard_artifacts.ModelEvaluation'>
.uri pipelines/mkavaldo-pipeline/Evaluator/evaluation/32
['blessing']
Channel of type 'ModelBlessing' (1 artifact) at 0x7fd001d64f10
.type_name ModelBlessing
._artifacts
[0]
Artifact of type 'ModelBlessing' (uri: pipelines/mkavaldo-pipeline/Evaluator/blessing/32) at 0x7fd001d66fb0
.type <class 'tfx.types.standard_artifacts.ModelBlessing'>
.uri pipelines/mkavaldo-pipeline/Evaluator/blessing/32
In [ ]:
eval_result = evaluator.outputs['evaluation'].get()[0].uri
tfma_result = tfma.load_eval_result(eval_result)
tfma.view.render_slicing_metrics(tfma_result)
tfma.addons.fairness.view.widget_view.render_fairness_indicator(
    tfma_result
)

Pusher¶

In [ ]:
pusher = Pusher(
    model = trainer.outputs['model'],
    push_destination = pusher_pb2.PushDestination(
        filesystem = pusher_pb2.PushDestination.Filesystem(
            base_directory = "serving_model_dir/water-prediction-model"
        )
    )
)

interactive_context.run(pusher)
WARNING:absl:Pusher is going to push the model without validation. Consider using Evaluator or InfraValidator in your pipeline.
Out[ ]:
ExecutionResult at 0x7fd0077bb700
.execution_id 33
.component
Pusher at 0x7fd0058a3be0
.inputs
['model']
Channel of type 'Model' (1 artifact) at 0x7fd0048b39a0
.type_name Model
._artifacts
[0]
Artifact of type 'Model' (uri: pipelines/mkavaldo-pipeline/Trainer/model/30) at 0x7fd0048b0700
.type <class 'tfx.types.standard_artifacts.Model'>
.uri pipelines/mkavaldo-pipeline/Trainer/model/30
.outputs
['pushed_model']
Channel of type 'PushedModel' (1 artifact) at 0x7fd0084d4d00
.type_name PushedModel
._artifacts
[0]
Artifact of type 'PushedModel' (uri: pipelines/mkavaldo-pipeline/Pusher/pushed_model/33) at 0x7fd0032ebf70
.type <class 'tfx.types.standard_artifacts.PushedModel'>
.uri pipelines/mkavaldo-pipeline/Pusher/pushed_model/33
.exec_properties
['push_destination'] { "filesystem": { "base_directory": "serving_model_dir/water-prediction-model" } }
['custom_config'] null
.component.inputs
['model']
Channel of type 'Model' (1 artifact) at 0x7fd0048b39a0
.type_name Model
._artifacts
[0]
Artifact of type 'Model' (uri: pipelines/mkavaldo-pipeline/Trainer/model/30) at 0x7fd0048b0700
.type <class 'tfx.types.standard_artifacts.Model'>
.uri pipelines/mkavaldo-pipeline/Trainer/model/30
.component.outputs
['pushed_model']
Channel of type 'PushedModel' (1 artifact) at 0x7fd0084d4d00
.type_name PushedModel
._artifacts
[0]
Artifact of type 'PushedModel' (uri: pipelines/mkavaldo-pipeline/Pusher/pushed_model/33) at 0x7fd0032ebf70
.type <class 'tfx.types.standard_artifacts.PushedModel'>
.uri pipelines/mkavaldo-pipeline/Pusher/pushed_model/33

Download¶

In [ ]:
import shutil

shutil.make_archive('serving_model', 'zip', '/content/serving_model_dir')
shutil.make_archive('pipeline', 'zip', '/content/pipelines')
Out[ ]:
'/content/pipeline.zip'
In [ ]:
import shutil

drive_path = '/content/drive/My Drive/pipeline.zip'
shutil.move('/content/pipeline.zip', drive_path)
Out[ ]:
'/content/drive/My Drive/pipeline.zip'
In [ ]:
!pip freeze > requirements.txt

Testing¶

Import Library¶

In [ ]:
import pandas as pd
import requests
import json
import base64
import tensorflow as tf
from pprint import PrettyPrinter

Connection Test¶

In [ ]:
pp = PrettyPrinter()
pp.pprint(requests.get("http://localhost:8080/v1/models/water-prediction-model").json())
{'model_version_status': [{'state': 'AVAILABLE',
                           'status': {'error_code': 'OK', 'error_message': ''},
                           'version': '1725114479'}]}
In [ ]:
def prepare_json(inputs: dict) -> str:
    features = {k: tf.train.Feature(float_list=tf.train.FloatList(value=[float(v)])) 
                for k, v in inputs.items()}
    example = tf.train.Example(features=tf.train.Features(feature=features)).SerializeToString()
    encoded_example = base64.b64encode(example).decode()
    return json.dumps({"signature_name": "serving_default", "instances": [{"examples": {"b64": encoded_example}}]})

def prediction(index: int, data: pd.DataFrame, endpoint: str):
    # Get data dari index tertentu
    inputs = data.iloc[index].to_dict()
    
    # Menyiapkan data ke JSON 
    json_data = prepare_json(inputs)
    
    # POST request ke model endpoint
    response = requests.post(endpoint, data=json_data)
    
    # Mengembalikan response
    prediction = response.json().get("predictions")
    if prediction:
        result = "✅ Air aman diminum." if prediction[0][0] >= 0.5 else "💀 Air berbahaya untuk diminum."
    else:
        result = "Error: Prediksi tidak diketahui."
    
    print("Hasil:")
    print("------------------------------")
    print("-", result)
    print("------------------------------")
In [ ]:
data = pd.read_csv("data/water_potability.csv")
endpoint = "http://localhost:8080/v1/models/water-prediction-model:predict"

Prediction¶

In [ ]:
# Index data 2
prediction(2, data, endpoint)
Hasil:
------------------------------
- 💀 Air berbahaya untuk diminum.
------------------------------
In [ ]:
# Index data 2006
prediction(2006, data, endpoint)
Hasil:
------------------------------
- ✅ Air aman diminum.
------------------------------