decode(indices, index_to_class)

Decode indices to labels.

Parameters:
  • indices (Iterable[Any]) –

    Iterable (list, array, etc.) with indices.

  • index_to_class (Dict) –

    mapping between indices and labels.

Returns:
  • List( List ) –

    list of labels.

madewithml/predict.py
def decode(indices: Iterable[Any], index_to_class: Dict) -> List:
    """Decode indices to labels.

    Args:
        indices (Iterable[Any]): Iterable (list, array, etc.) with indices.
        index_to_class (Dict): mapping between indices and labels.

    Returns:
        List: list of labels.
    """
    return [index_to_class[index] for index in indices]

format_prob(prob, index_to_class)

Format probabilities to a dictionary mapping class label to probability.

Parameters:
  • prob (Iterable) –

    probabilities.

  • index_to_class (Dict) –

    mapping between indices and labels.

Returns:
  • Dict( Dict ) –

    Dictionary mapping class label to probability.

madewithml/predict.py
def format_prob(prob: Iterable, index_to_class: Dict) -> Dict:
    """Format probabilities to a dictionary mapping class label to probability.

    Args:
        prob (Iterable): probabilities.
        index_to_class (Dict): mapping between indices and labels.

    Returns:
        Dict: Dictionary mapping class label to probability.
    """
    d = {}
    for i, item in enumerate(prob):
        d[index_to_class[i]] = item
    return d

get_best_checkpoint(run_id)

Get the best checkpoint from a specific run.

Parameters:
  • run_id (str) –

    ID of the run to get the best checkpoint from.

Returns:
  • TorchCheckpoint( TorchCheckpoint ) –

    Best checkpoint from the run.

madewithml/predict.py
def get_best_checkpoint(run_id: str) -> TorchCheckpoint:  # pragma: no cover, mlflow logic
    """Get the best checkpoint from a specific run.

    Args:
        run_id (str): ID of the run to get the best checkpoint from.

    Returns:
        TorchCheckpoint: Best checkpoint from the run.
    """
    artifact_dir = urlparse(mlflow.get_run(run_id).info.artifact_uri).path  # get path from mlflow
    results = Result.from_path(artifact_dir)
    return results.best_checkpoints[0][0]

get_best_run_id(experiment_name='', metric='', mode='')

Get the best run_id from an MLflow experiment.

Parameters:
  • experiment_name (str) –

    name of the experiment.

  • metric (str) –

    metric to filter by.

  • mode (str) –

    direction of metric (ASC/DESC).

Returns:
  • str( str ) –

    best run id from experiment.

madewithml/predict.py
@app.command()
def get_best_run_id(experiment_name: str = "", metric: str = "", mode: str = "") -> str:  # pragma: no cover, mlflow logic
    """Get the best run_id from an MLflow experiment.

    Args:
        experiment_name (str): name of the experiment.
        metric (str): metric to filter by.
        mode (str): direction of metric (ASC/DESC).

    Returns:
        str: best run id from experiment.
    """
    sorted_runs = mlflow.search_runs(
        experiment_names=[experiment_name],
        order_by=[f"metrics.{metric} {mode}"],
    )
    run_id = sorted_runs.iloc[0].run_id
    print(run_id)
    return run_id

predict(run_id=None, title=None, description=None)

Predict the tag for a project given it's title and description.

Parameters:
  • run_id (str) –

    id of the specific run to load from. Defaults to None.

  • title (str) –

    project title. Defaults to "".

  • description (str) –

    project description. Defaults to "".

Returns:
  • Dict( Dict ) –

    prediction results for the input data.

madewithml/predict.py
@app.command()
def predict(
    run_id: Annotated[str, typer.Option(help="id of the specific run to load from")] = None,
    title: Annotated[str, typer.Option(help="project title")] = None,
    description: Annotated[str, typer.Option(help="project description")] = None,
) -> Dict:  # pragma: no cover, tested with inference workload
    """Predict the tag for a project given it's title and description.

    Args:
        run_id (str): id of the specific run to load from. Defaults to None.
        title (str, optional): project title. Defaults to "".
        description (str, optional): project description. Defaults to "".

    Returns:
        Dict: prediction results for the input data.
    """
    # Load components
    best_checkpoint = get_best_checkpoint(run_id=run_id)
    predictor = TorchPredictor.from_checkpoint(best_checkpoint)
    preprocessor = predictor.get_preprocessor()

    # Predict
    sample_df = pd.DataFrame([{"title": title, "description": description, "tag": "other"}])
    results = predict_with_proba(df=sample_df, predictor=predictor, index_to_class=preprocessor.index_to_class)
    logger.info(json.dumps(results, cls=NumpyEncoder, indent=2))
    return results

predict_with_proba(df, predictor)

Predict tags (with probabilities) for input data from a dataframe.

Parameters:
  • df (pd.DataFrame) –

    dataframe with input features.

  • predictor (ray.train.torch.torch_predictor.TorchPredictor) –

    loaded predictor from a checkpoint.

Returns:
  • List( List ) –

    list of predicted labels.

madewithml/predict.py
def predict_with_proba(
    df: pd.DataFrame,
    predictor: ray.train.torch.torch_predictor.TorchPredictor,
) -> List:  # pragma: no cover, tested with inference workload
    """Predict tags (with probabilities) for input data from a dataframe.

    Args:
        df (pd.DataFrame): dataframe with input features.
        predictor (ray.train.torch.torch_predictor.TorchPredictor): loaded predictor from a checkpoint.

    Returns:
        List: list of predicted labels.
    """
    preprocessor = predictor.get_preprocessor()
    z = predictor.predict(data=df)["predictions"]
    import numpy as np

    y_prob = torch.tensor(np.stack(z)).softmax(dim=1).numpy()
    results = []
    for i, prob in enumerate(y_prob):
        tag = decode([z[i].argmax()], preprocessor.index_to_class)[0]
        results.append({"prediction": tag, "probabilities": format_prob(prob, preprocessor.index_to_class)})
    return results