Một giải pháp khả thi:
import pandas as pd
from itertools import chain
l1 = [10, 20, 30, 60, 80, 40, 30, 70]
l2 = [20, 30, 40, 80, 70, 50, 90, 100]
d = dict()
for i, j in zip(l1, l2):
if i == j:
continue
d.setdefault(i, []).append(j)
for k in d:
d[k].extend(chain.from_iterable(d.get(v, []) for v in d[k]))
df = pd.DataFrame({'df1': list(d.keys()), 'df2': [', '.join(str(v) for v in d[k]) for k in d]})
print(df)
Bản in:
df1 df2
0 10 20, 30, 40, 90, 50
1 20 30, 40, 90, 50
2 30 40, 90, 50
3 60 80, 70, 100
4 80 70, 100
5 40 50
6 70 100
EDIT: Giải pháp khác dựa trên dữ liệu đầu vào mới. Bây giờ tôi đang kiểm tra các vòng tròn có thể có trong đường dẫn:
import pandas as pd
data = '''
0 10 20
1 10 30
2 10 80
3 10 90
4 10 120
5 10 140
6 10 170
7 20 180
8 30 40
9 30 165
10 30 175
11 40 20
12 40 50
13 50 60
14 60 70
15 70 180
16 80 180
17 90 100
18 100 110
19 110 180
20 120 130
21 130 180
22 140 150
23 150 160
24 160 165
25 165 180
26 165 200
27 170 175
28 175 180
29 175 200
30 180 190
31 190 200
32 200 210
33 210 220
34 220 230
35 230 240
36 240 -
'''
df1, df2 = [], []
for line in data.splitlines()[:-1]: # <--- get rid of last `-` character
line = line.strip().split()
if not line:
continue
df1.append(int(line[1]))
df2.append(int(line[2]))
from pprint import pprint
d = dict()
for i, j in zip(df1, df2):
if i == j:
continue
d.setdefault(i, []).append(j)
for k in d:
seen = set()
for v in d[k]:
for val in d.get(v, []):
if val not in seen:
seen.add(val)
d[k].append(val)
df = pd.DataFrame({'df1': list(d.keys()), 'df2': [', '.join(str(v) for v in d[k]) for k in d]})
print(df)
Bản in:
df1 df2
0 10 20, 30, 80, 90, 120, 140, 170, 180, 40, 165, 1...
1 20 180, 190, 200, 210, 220, 230, 240
2 30 40, 165, 175, 20, 50, 180, 200, 190, 210, 220,...
3 40 20, 50, 180, 190, 200, 210, 220, 230, 240, 60, 70
4 50 60, 70, 180, 190, 200, 210, 220, 230, 240
5 60 70, 180, 190, 200, 210, 220, 230, 240
6 70 180, 190, 200, 210, 220, 230, 240
7 80 180, 190, 200, 210, 220, 230, 240
8 90 100, 110, 180, 190, 200, 210, 220, 230, 240
9 100 110, 180, 190, 200, 210, 220, 230, 240
10 110 180, 190, 200, 210, 220, 230, 240
11 120 130, 180, 190, 200, 210, 220, 230, 240
12 130 180, 190, 200, 210, 220, 230, 240
13 140 150, 160, 165, 180, 200, 190, 210, 220, 230, 240
14 150 160, 165, 180, 200, 190, 210, 220, 230, 240
15 160 165, 180, 200, 190, 210, 220, 230, 240
16 165 180, 200, 190, 210, 200, 220, 230, 240
17 170 175, 180, 200, 190, 210, 220, 230, 240
18 175 180, 200, 190, 210, 200, 220, 230, 240
19 180 190, 200, 210, 220, 230, 240
20 190 200, 210, 220, 230, 240
21 200 210, 220, 230, 240
22 210 220, 230, 240
23 220 230, 240
24 230 240
Hoặc pprint(d, width=250)
:
{10: [20, 30, 80, 90, 120, 140, 170, 180, 40, 165, 175, 100, 130, 150, 190, 20, 50, 200, 110, 160, 60, 210, 70, 220, 230, 240],
20: [180, 190, 200, 210, 220, 230, 240],
30: [40, 165, 175, 20, 50, 180, 200, 190, 210, 220, 230, 240, 60, 70],
40: [20, 50, 180, 190, 200, 210, 220, 230, 240, 60, 70],
50: [60, 70, 180, 190, 200, 210, 220, 230, 240],
60: [70, 180, 190, 200, 210, 220, 230, 240],
70: [180, 190, 200, 210, 220, 230, 240],
80: [180, 190, 200, 210, 220, 230, 240],
90: [100, 110, 180, 190, 200, 210, 220, 230, 240],
100: [110, 180, 190, 200, 210, 220, 230, 240],
110: [180, 190, 200, 210, 220, 230, 240],
120: [130, 180, 190, 200, 210, 220, 230, 240],
130: [180, 190, 200, 210, 220, 230, 240],
140: [150, 160, 165, 180, 200, 190, 210, 220, 230, 240],
150: [160, 165, 180, 200, 190, 210, 220, 230, 240],
160: [165, 180, 200, 190, 210, 220, 230, 240],
165: [180, 200, 190, 210, 200, 220, 230, 240],
170: [175, 180, 200, 190, 210, 220, 230, 240],
175: [180, 200, 190, 210, 200, 220, 230, 240],
180: [190, 200, 210, 220, 230, 240],
190: [200, 210, 220, 230, 240],
200: [210, 220, 230, 240],
210: [220, 230, 240],
220: [230, 240],
230: [240]}
EDIT 2: Nếu df
là khung dữ liệu đầu vào của bạn với các cột "df1" và "df2":
from pprint import pprint
d = dict()
for i, j in zip(df.df1, df.df2):
if i == j:
continue
if j == '-': # <-- this will remove the `-` character in df2
continue
d.setdefault(i, []).append(j)
for k in d:
seen = set()
for v in d[k]:
for val in d.get(v, []):
if val not in seen:
seen.add(val)
d[k].append(val)
df = pd.DataFrame({'df1': list(d.keys()), 'df2': [', '.join(str(v) for v in d[k]) for k in d]})
print(df)