Module `pdpipe.wrappers`

Wrapper-kind pdpipe pipeline stages.

Expand source code

"""Wrapper-kind pdpipe pipeline stages."""


from pdpipe.core import PdPipelineStage


class FitOnly(PdPipelineStage):
    """A wrapper that applies a stage to input data only when fitting.

    In other words, the input data is not transformed if the stage has
    already been fitted once.

    Parameters
    ----------
    stage : PdPipelineStage
        The pipeline stage to operate on input data only when fitting.

    Example
    -------
        >>> import pandas as pd; import pdpipe as pdp;
        >>> df = pd.DataFrame([[8,'a'],[5,'b']], [1,2], ['num', 'char'])
        >>> stage = pdp.FitOnly(pdp.ColDrop('num'))
        >>> stage(df)
          char
        1    a
        2    b
        >>> df2 = pd.DataFrame([[8,'a'],[5,'b']], [1,2], ['num', 'char'])
        >>> stage(df2)
           num char
        1    8    a
        2    5    b
    """
    _FITONLY_DESC = "Applying, only on fit, the stage: {}"

    def __init__(self, stage, **kwargs):
        self._stage = stage
        desc = FitOnly._FITONLY_DESC.format(stage.description())
        super_kwargs = {
            'desc': desc,
        }
        super_kwargs.update(**kwargs)
        super().__init__(**super_kwargs)

    def _prec(self, df):
        if self.is_fitted:
            return True
        return self._stage._prec(df)

    def _fit_transform(self, df, verbose):
        self.is_fitted = True
        return self._stage.fit_transform(df, verbose=verbose)

    def _transform(self, df, verbose):
        if verbose:
            print(
                f"Skipping, because not in fit, "
                f"the stage: {self._stage.description()}"
            )
        return df

Classes

class FitOnly (stage, **kwargs)

A wrapper that applies a stage to input data only when fitting.

In other words, the input data is not transformed if the stage has already been fitted once.

Parameters

stage : PdPipelineStage: The pipeline stage to operate on input data only when fitting.

Example

>>> import pandas as pd; import pdpipe as pdp;
>>> df = pd.DataFrame([[8,'a'],[5,'b']], [1,2], ['num', 'char'])
>>> stage = pdp.FitOnly(pdp.ColDrop('num'))
>>> stage(df)
  char
1    a
2    b
>>> df2 = pd.DataFrame([[8,'a'],[5,'b']], [1,2], ['num', 'char'])
>>> stage(df2)
   num char
1    8    a
2    5    b

Expand source code

class FitOnly(PdPipelineStage):
    """A wrapper that applies a stage to input data only when fitting.

    In other words, the input data is not transformed if the stage has
    already been fitted once.

    Parameters
    ----------
    stage : PdPipelineStage
        The pipeline stage to operate on input data only when fitting.

    Example
    -------
        >>> import pandas as pd; import pdpipe as pdp;
        >>> df = pd.DataFrame([[8,'a'],[5,'b']], [1,2], ['num', 'char'])
        >>> stage = pdp.FitOnly(pdp.ColDrop('num'))
        >>> stage(df)
          char
        1    a
        2    b
        >>> df2 = pd.DataFrame([[8,'a'],[5,'b']], [1,2], ['num', 'char'])
        >>> stage(df2)
           num char
        1    8    a
        2    5    b
    """
    _FITONLY_DESC = "Applying, only on fit, the stage: {}"

    def __init__(self, stage, **kwargs):
        self._stage = stage
        desc = FitOnly._FITONLY_DESC.format(stage.description())
        super_kwargs = {
            'desc': desc,
        }
        super_kwargs.update(**kwargs)
        super().__init__(**super_kwargs)

    def _prec(self, df):
        if self.is_fitted:
            return True
        return self._stage._prec(df)

    def _fit_transform(self, df, verbose):
        self.is_fitted = True
        return self._stage.fit_transform(df, verbose=verbose)

    def _transform(self, df, verbose):
        if verbose:
            print(
                f"Skipping, because not in fit, "
                f"the stage: {self._stage.description()}"
            )
        return df

Ancestors

PdPipelineStage
abc.ABC

Inherited members

PdPipelineStage:
- AdHocStage
- AggByCols
- ApplyByCols
- ApplyToRows
- Bin
- ColByFrameFunc
- ColDrop
- ColRename
- ColReorder
- ColumnDtypeEnforcer
- ColumnTransformer
- ColumnsBasedPipelineStage
- ConditionValidator
- DropDuplicates
- DropNa
- DropRareTokens
- DropTokensByLength
- DropTokensByList
- Encode
- FitOnly
- FreqDrop
- Log
- MapColVals
- OneHotEncode
- PdPipeline
- RegexReplace
- RemoveStopwords
- RowDrop
- Scale
- Schematize
- SetIndex
- SnowballStem
- TfidfVectorizeTokenLists
- TokenizeText
- UntokenizeText
- ValDrop
- ValKeep
- apply
- description
- fit
- fit_transform
- transform