predict

`decode(indices, index_to_class)`

Decode indices to labels.

Parameters:	indices (`Iterable[Any]`) – Iterable (list, array, etc.) with indices. index_to_class (`Dict`) – mapping between indices and labels.

Returns:	List( `List` ) – list of labels.

madewithml/predict.py

def decode(indices: Iterable[Any], index_to_class: Dict) -> List:
    """Decode indices to labels.

    Args:
        indices (Iterable[Any]): Iterable (list, array, etc.) with indices.
        index_to_class (Dict): mapping between indices and labels.

    Returns:
        List: list of labels.
    """
    return [index_to_class[index] for index in indices]

`format_prob(prob, index_to_class)`

Format probabilities to a dictionary mapping class label to probability.

Parameters:	prob (`Iterable`) – probabilities. index_to_class (`Dict`) – mapping between indices and labels.

Returns:	Dict( `Dict` ) – Dictionary mapping class label to probability.

madewithml/predict.py

def format_prob(prob: Iterable, index_to_class: Dict) -> Dict:
    """Format probabilities to a dictionary mapping class label to probability.

    Args:
        prob (Iterable): probabilities.
        index_to_class (Dict): mapping between indices and labels.

    Returns:
        Dict: Dictionary mapping class label to probability.
    """
    d = {}
    for i, item in enumerate(prob):
        d[index_to_class[i]] = item
    return d

`get_best_checkpoint(run_id)`

Get the best checkpoint from a specific run.

Parameters:	run_id (`str`) – ID of the run to get the best checkpoint from.

Returns:	TorchCheckpoint( `TorchCheckpoint` ) – Best checkpoint from the run.

madewithml/predict.py

def get_best_checkpoint(run_id: str) -> TorchCheckpoint:  # pragma: no cover, mlflow logic
    """Get the best checkpoint from a specific run.

    Args:
        run_id (str): ID of the run to get the best checkpoint from.

    Returns:
        TorchCheckpoint: Best checkpoint from the run.
    """
    artifact_dir = urlparse(mlflow.get_run(run_id).info.artifact_uri).path  # get path from mlflow
    results = Result.from_path(artifact_dir)
    return results.best_checkpoints[0][0]

`get_best_run_id(experiment_name='', metric='', mode='')`

Get the best run_id from an MLflow experiment.

Parameters:	experiment_name (`str`) – name of the experiment. metric (`str`) – metric to filter by. mode (`str`) – direction of metric (ASC/DESC).

Returns:	str( `str` ) – best run id from experiment.

madewithml/predict.py

@app.command()
def get_best_run_id(experiment_name: str = "", metric: str = "", mode: str = "") -> str:  # pragma: no cover, mlflow logic
    """Get the best run_id from an MLflow experiment.

    Args:
        experiment_name (str): name of the experiment.
        metric (str): metric to filter by.
        mode (str): direction of metric (ASC/DESC).

    Returns:
        str: best run id from experiment.
    """
    sorted_runs = mlflow.search_runs(
        experiment_names=[experiment_name],
        order_by=[f"metrics.{metric} {mode}"],
    )
    run_id = sorted_runs.iloc[0].run_id
    print(run_id)
    return run_id

`predict(run_id=None, title=None, description=None)`

Predict the tag for a project given it's title and description.

Parameters:	run_id (`str`) – id of the specific run to load from. Defaults to None. title (`str`) – project title. Defaults to "". description (`str`) – project description. Defaults to "".

Returns:	Dict( `Dict` ) – prediction results for the input data.

madewithml/predict.py

@app.command()
def predict(
    run_id: Annotated[str, typer.Option(help="id of the specific run to load from")] = None,
    title: Annotated[str, typer.Option(help="project title")] = None,
    description: Annotated[str, typer.Option(help="project description")] = None,
) -> Dict:  # pragma: no cover, tested with inference workload
    """Predict the tag for a project given it's title and description.

    Args:
        run_id (str): id of the specific run to load from. Defaults to None.
        title (str, optional): project title. Defaults to "".
        description (str, optional): project description. Defaults to "".

    Returns:
        Dict: prediction results for the input data.
    """
    # Load components
    best_checkpoint = get_best_checkpoint(run_id=run_id)
    predictor = TorchPredictor.from_checkpoint(best_checkpoint)
    preprocessor = predictor.get_preprocessor()

    # Predict
    sample_df = pd.DataFrame([{"title": title, "description": description, "tag": "other"}])
    results = predict_with_proba(df=sample_df, predictor=predictor, index_to_class=preprocessor.index_to_class)
    logger.info(json.dumps(results, cls=NumpyEncoder, indent=2))
    return results

`predict_with_proba(df, predictor)`

Predict tags (with probabilities) for input data from a dataframe.

Parameters:	df (`pd.DataFrame`) – dataframe with input features. predictor (`ray.train.torch.torch_predictor.TorchPredictor`) – loaded predictor from a checkpoint.

Returns:	List( `List` ) – list of predicted labels.

madewithml/predict.py

def predict_with_proba(
    df: pd.DataFrame,
    predictor: ray.train.torch.torch_predictor.TorchPredictor,
) -> List:  # pragma: no cover, tested with inference workload
    """Predict tags (with probabilities) for input data from a dataframe.

    Args:
        df (pd.DataFrame): dataframe with input features.
        predictor (ray.train.torch.torch_predictor.TorchPredictor): loaded predictor from a checkpoint.

    Returns:
        List: list of predicted labels.
    """
    preprocessor = predictor.get_preprocessor()
    z = predictor.predict(data=df)["predictions"]
    import numpy as np

    y_prob = torch.tensor(np.stack(z)).softmax(dim=1).numpy()
    results = []
    for i, prob in enumerate(y_prob):
        tag = decode([z[i].argmax()], preprocessor.index_to_class)[0]
        results.append({"prediction": tag, "probabilities": format_prob(prob, preprocessor.index_to_class)})
    return results