Source code for raphtory.nullmodels

  1"""
  2Generate randomised reference models for a temporal graph edgelist
  3"""
  4
  5import pandas as pd
  6
  7
[docs] 8def shuffle_column( 9 graph_df: pd.DataFrame, col_number=None, col_name=None, inplace=False 10): 11 """ 12 Returns an edgelist with a given column shuffled. Exactly one of col_number or col_name should be specified. 13 14 Args: 15 graph_df (pd.DataFrame): The input DataFrame representing the timestamped edgelist. 16 col_number (int, optional): The column number to shuffle. Default is None. 17 col_name (str, optional): The column name to shuffle. Default is None. 18 inplace (bool, optional): If True, shuffles the column in-place. Otherwise, creates a copy of the DataFrame. Default is False. 19 20 Returns: 21 pd.DataFrame: The shuffled DataFrame with the specified column. 22 23 Raises: 24 AssertionError: If neither col_number nor col_name is provided. 25 AssertionError: If both col_number and col_name are provided. 26 27 """ 28 assert ( 29 col_number is not None or col_name is not None 30 ), f"No column number or name provided." 31 assert not ( 32 col_name is not None and col_number is not None 33 ), f"Cannot have both a column number and a column name." 34 35 if inplace: 36 df = graph_df 37 else: 38 df = graph_df.copy() 39 40 no_events = len(df) 41 42 if col_number is not None: 43 col = df[df.columns[col_number]].sample(n=no_events) 44 col.reset_index(inplace=True, drop=True) 45 df[df.columns[col_number]] = col 46 if col_name is not None: 47 col = df[col_name].sample(n=no_events) 48 col.reset_index(inplace=True, drop=True) 49 df[col_name] = col 50 return df
51 52
[docs] 53def shuffle_multiple_columns( 54 graph_df: pd.DataFrame, 55 col_numbers: list = None, 56 col_names: list = None, 57 inplace=False, 58): 59 """ 60 Returns an edgelist with given columns shuffled. Exactly one of col_numbers or col_names should be specified. 61 62 Args: 63 graph_df (pd.DataFrame): The input DataFrame representing the graph. 64 col_numbers (list, optional): The list of column numbers to shuffle. Default is None. 65 col_names (list, optional): The list of column names to shuffle. Default is None. 66 inplace (bool, optional): If True, shuffles the columns in-place. Otherwise, creates a copy of the DataFrame. Default is False. 67 68 Returns: 69 pd.DataFrame: The shuffled DataFrame with the specified columns. 70 71 Raises: 72 AssertionError: If neither col_numbers nor col_names are provided. 73 AssertionError: If both col_numbers and col_names are provided. 74 75 """ 76 assert ( 77 col_numbers is not None or col_names is not None 78 ), f"No column numbers or names provided." 79 assert not ( 80 col_names is not None and col_numbers is not None 81 ), f"Cannot have both column numbers and column names." 82 83 if col_numbers is not None: 84 for n in col_numbers: 85 df = shuffle_column(graph_df, col_number=n, inplace=inplace) 86 if col_names is not None: 87 for name in col_names: 88 df = shuffle_column(graph_df, col_name=name) 89 return df
90 91
[docs] 92def permuted_timestamps_model( 93 graph_df: pd.DataFrame, 94 time_col: int = None, 95 time_name: str = None, 96 inplace=False, 97 sorted=False, 98): 99 """ 100 Returns a DataFrame with the time column shuffled. 101 102 Args: 103 graph_df (pd.DataFrame): The input DataFrame representing the graph. 104 time_col (int, optional): The column number of the time column to shuffle. Default is None. 105 time_name (str, optional): The column name of the time column to shuffle. Default is None. 106 inplace (bool, optional): If True, shuffles the time column in-place. Otherwise, creates a copy of the DataFrame. Default is False. 107 sorted (bool, optional): If True, sorts the DataFrame by the shuffled time column. Default is False. 108 109 Returns: 110 pd.DataFrame or None: The shuffled DataFrame with the time column, or None if inplace=True. 111 112 """ 113 shuffled_df = shuffle_column(graph_df, time_col, time_name, inplace) 114 115 if sorted: 116 shuffled_df.sort_values( 117 by=time_name if time_name else shuffled_df.columns[time_col], inplace=True 118 ) 119 120 if inplace: 121 return 122 else: 123 return shuffled_df