Source code for raphtory.nullmodels
1"""
2Generate randomised reference models for a temporal graph edgelist
3"""
4
5import pandas as pd
6
7
[docs]
8def shuffle_column(
9 graph_df: pd.DataFrame, col_number=None, col_name=None, inplace=False
10):
11 """
12 Returns an edgelist with a given column shuffled. Exactly one of col_number or col_name should be specified.
13
14 Args:
15 graph_df (pd.DataFrame): The input DataFrame representing the timestamped edgelist.
16 col_number (int, optional): The column number to shuffle. Default is None.
17 col_name (str, optional): The column name to shuffle. Default is None.
18 inplace (bool, optional): If True, shuffles the column in-place. Otherwise, creates a copy of the DataFrame. Default is False.
19
20 Returns:
21 pd.DataFrame: The shuffled DataFrame with the specified column.
22
23 Raises:
24 AssertionError: If neither col_number nor col_name is provided.
25 AssertionError: If both col_number and col_name are provided.
26
27 """
28 assert (
29 col_number is not None or col_name is not None
30 ), f"No column number or name provided."
31 assert not (
32 col_name is not None and col_number is not None
33 ), f"Cannot have both a column number and a column name."
34
35 if inplace:
36 df = graph_df
37 else:
38 df = graph_df.copy()
39
40 no_events = len(df)
41
42 if col_number is not None:
43 col = df[df.columns[col_number]].sample(n=no_events)
44 col.reset_index(inplace=True, drop=True)
45 df[df.columns[col_number]] = col
46 if col_name is not None:
47 col = df[col_name].sample(n=no_events)
48 col.reset_index(inplace=True, drop=True)
49 df[col_name] = col
50 return df
51
52
[docs]
53def shuffle_multiple_columns(
54 graph_df: pd.DataFrame,
55 col_numbers: list = None,
56 col_names: list = None,
57 inplace=False,
58):
59 """
60 Returns an edgelist with given columns shuffled. Exactly one of col_numbers or col_names should be specified.
61
62 Args:
63 graph_df (pd.DataFrame): The input DataFrame representing the graph.
64 col_numbers (list, optional): The list of column numbers to shuffle. Default is None.
65 col_names (list, optional): The list of column names to shuffle. Default is None.
66 inplace (bool, optional): If True, shuffles the columns in-place. Otherwise, creates a copy of the DataFrame. Default is False.
67
68 Returns:
69 pd.DataFrame: The shuffled DataFrame with the specified columns.
70
71 Raises:
72 AssertionError: If neither col_numbers nor col_names are provided.
73 AssertionError: If both col_numbers and col_names are provided.
74
75 """
76 assert (
77 col_numbers is not None or col_names is not None
78 ), f"No column numbers or names provided."
79 assert not (
80 col_names is not None and col_numbers is not None
81 ), f"Cannot have both column numbers and column names."
82
83 if col_numbers is not None:
84 for n in col_numbers:
85 df = shuffle_column(graph_df, col_number=n, inplace=inplace)
86 if col_names is not None:
87 for name in col_names:
88 df = shuffle_column(graph_df, col_name=name)
89 return df
90
91
[docs]
92def permuted_timestamps_model(
93 graph_df: pd.DataFrame,
94 time_col: int = None,
95 time_name: str = None,
96 inplace=False,
97 sorted=False,
98):
99 """
100 Returns a DataFrame with the time column shuffled.
101
102 Args:
103 graph_df (pd.DataFrame): The input DataFrame representing the graph.
104 time_col (int, optional): The column number of the time column to shuffle. Default is None.
105 time_name (str, optional): The column name of the time column to shuffle. Default is None.
106 inplace (bool, optional): If True, shuffles the time column in-place. Otherwise, creates a copy of the DataFrame. Default is False.
107 sorted (bool, optional): If True, sorts the DataFrame by the shuffled time column. Default is False.
108
109 Returns:
110 pd.DataFrame or None: The shuffled DataFrame with the time column, or None if inplace=True.
111
112 """
113 shuffled_df = shuffle_column(graph_df, time_col, time_name, inplace)
114
115 if sorted:
116 shuffled_df.sort_values(
117 by=time_name if time_name else shuffled_df.columns[time_col], inplace=True
118 )
119
120 if inplace:
121 return
122 else:
123 return shuffled_df