| |
|
|
| |
| import plotly.express as px |
| from plotly.graph_objs import Figure, FigureWidget |
| import datasets |
| import pandas as pd |
| import huggingface_hub |
| import plotly.graph_objs as go |
| import numpy as np |
| from PIL import Image |
|
|
| FIGURES: dict[str, Figure] = {} |
| |
|
|
| df = pd.read_csv("nlp_datas.csv") |
| fig = px.treemap( |
| df, |
| path=[px.Constant("nlp-datasets"), "task", "dataset"], |
| values="size", |
| |
| |
| |
| ) |
|
|
| FIGURES["nlp"] = fig |
| fig.update_layout( |
| paper_bgcolor="rgba(0,0,0,0)", |
| |
| margin=dict(t=0, l=0, r=0, b=0), |
| |
| ) |
| |
| |
| |
| fig |
| |
| df = pd.read_csv("llm.csv") |
| fig = px.treemap( |
| df, |
| path=[px.Constant("LLM"), "dataset"], |
| values="size", |
| |
| |
| |
| ) |
| FIGURES["gpt"] = fig |
| fig.update_layout( |
| paper_bgcolor="rgba(0,0,0,0)", |
| |
| margin=dict(t=0, l=0, r=0, b=0), |
| |
| ) |
| |
| fig |
| |
|
|
| df = pd.read_csv("./seq-time.csv", index_col=0) |
| df.index = df.index.map(lambda x: eval(x.replace("k", "*1024"))) |
| df["platformers"] = df["platformers"] / 7 |
| df.drop([df.columns[-1]], axis=1, inplace=True) |
| df = df.reset_index(names="sequence length").melt( |
| id_vars="sequence length", var_name="model", value_name="time" |
| ) |
| fig = px.line(df, x="sequence length", y="time", color="model") |
| FIGURES["seq-time"] = fig |
| fig.update_layout( |
| paper_bgcolor="rgba(0,0,0,0)", |
| |
| margin=dict(t=0, l=0, r=0, b=0), |
| plot_bgcolor="rgba(0,0,0,0)", |
| legend_font=dict(color="white"), |
| ) |
| fig.update_xaxes( |
| color="white", |
| ) |
| fig.update_yaxes( |
| |
| |
| |
| |
| |
| |
| color="white", |
| ) |
| fig |
| |
|
|
| df = pd.read_csv("seq-tflops.csv", index_col=0) |
| |
| |
| df = df.reset_index(names="sequence length").melt( |
| id_vars="sequence length", var_name="model", value_name="tflops" |
| ) |
| fig = px.bar(df, x="sequence length", y="tflops", color="model", barmode="group") |
| FIGURES["seq-tflops"] = fig |
| fig.update_layout( |
| paper_bgcolor="rgba(0,0,0,0)", |
| |
| margin=dict(t=0, l=0, r=0, b=0), |
| plot_bgcolor="rgba(0,0,0,0)", |
| legend_font=dict(color="white"), |
| ) |
|
|
| fig.update_xaxes( |
| color="white", |
| ) |
| fig.update_yaxes( |
| |
| |
| |
| |
| |
| |
| color="white", |
| ) |
| fig |
| |
|
|
|
|
| df = datasets.load_dataset("SUSTech/webvid", split="train[:100]").to_pandas() |
|
|
| df = df.drop(["duration"], axis=1) |
|
|
|
|
| fig = go.Figure( |
| data=[ |
| go.Table( |
| header=dict( |
| values=list(df.columns), fill_color="paleturquoise", align="left" |
| ), |
| cells=dict( |
| values=[df[col] for col in df.columns], |
| fill_color="lavender", |
| align="left", |
| |
| ), |
| ) |
| ] |
| ) |
|
|
| fig.update_layout( |
| paper_bgcolor="rgba(0,0,0,0)", |
| |
| margin=dict(t=0, l=0, r=0, b=0), |
| |
| ) |
| |
| FIGURES["webvid"] = fig |
| |
|
|
| fig = go.Figure() |
|
|
| data = { |
| "402-page transcripts from Apollo 11’s mission to the moon": 326914, |
| "44-minute silent Buster Keaton movie": 696417, |
| "more than 100,000 lines of code": 816767, |
| "Generate 1min video": 1000000, |
| } |
|
|
| df = pd.Series(data, name="token").to_frame().reset_index(names="task") |
|
|
|
|
| |
| fig = px.bar( |
| df, |
| y="token", |
| x="task", |
| text_auto=".2s", |
| |
| |
| |
| ) |
| FIGURES["token-bar"] = fig |
|
|
| fig.update_traces( |
| textfont_size=12, |
| textangle=0, |
| textposition="outside", |
| cliponaxis=False, |
| textfont_color="white", |
| ) |
| fig.update_layout( |
| paper_bgcolor="rgba(0,0,0,0)", |
| |
| margin=dict(t=0, l=0, r=0, b=0), |
| plot_bgcolor="rgba(0,0,0,0)", |
| legend_font=dict(color="white"), |
| ) |
|
|
| fig.update_xaxes( |
| color="white", |
| |
| zeroline=False, |
| showline=False, |
| showgrid=False, |
| title="", |
| ) |
| fig.update_yaxes( |
| |
| showline=False, |
| showgrid=False, |
| zeroline=False, |
| |
| |
| |
| color="white", |
| ) |
| fig |
|
|
|
|
| |
| def generate_loss(steps, initial_loss, decay_rate, noise_factor): |
| loss = initial_loss * np.exp(-decay_rate * steps) |
| noise = noise_factor * loss * np.random.randn(*steps.shape) |
| return loss + noise |
|
|
|
|
| def splitpoints(total, split): |
| step = total // split |
| for i in range(split - 1): |
| yield slice(i * step, (i + 1) * step) |
| yield slice((i + 1) * step, None) |
|
|
|
|
| meta = [ |
| { |
| "name": "2xDGX on aws", |
| "color": "red", |
| "icon": "../figures/gc.png", |
| }, |
| { |
| "name": "16xDGX on aliyun", |
| "color": "orange", |
| "icon": "../figures/aws-white.png", |
| }, |
| { |
| "name": "128xDGX on ucloud", |
| "color": "blue", |
| "icon": "../figures/aliyun.png", |
| }, |
| ] |
|
|
|
|
| steps = np.linspace(0, 1, 1000) |
| loss = generate_loss(steps, initial_loss=1, decay_rate=5, noise_factor=0.1) |
| fig = go.Figure() |
| |
| |
| |
|
|
| FIGURES["cloud-switch"] = fig |
| for i, idx in enumerate(splitpoints(1000, len(meta))): |
| fig.add_trace( |
| go.Scatter( |
| x=steps[idx], |
| y=loss[idx], |
| mode="lines", |
| name=meta[i]["name"], |
| line=dict(color=meta[i]["color"]), |
| ) |
| ) |
| fig.add_layout_image( |
| x=0.8, |
| sizex=0.2, |
| y=0.2, |
| sizey=0.2, |
| xref="paper", |
| yref="paper", |
| opacity=1.0, |
| layer="above", |
| source=Image.open("../figures/logo/ucloud.png"), |
| ) |
| fig.add_layout_image( |
| x=0.17, |
| sizex=0.15, |
| y=0.7, |
| sizey=0.15, |
| xref="paper", |
| yref="paper", |
| opacity=1.0, |
| layer="above", |
| source=Image.open("../figures/aws-white.png"), |
| ) |
| fig.add_layout_image( |
| x=0.43, |
| sizex=0.15, |
| y=0.3, |
| sizey=0.15, |
| xref="paper", |
| yref="paper", |
| opacity=1.0, |
| layer="above", |
| source=Image.open("../figures/aliyun.png"), |
| ) |
|
|
| fig.update_layout( |
| showlegend=False, |
| paper_bgcolor="rgba(0,0,0,0)", |
| plot_bgcolor="rgba(255,255,255,0)", |
| |
| |
| ) |
| fig.update_xaxes( |
| showticklabels=False, |
| |
| showline=False, |
| zeroline=False, |
| showgrid=False, |
| |
| automargin=True, |
| ) |
| fig.update_yaxes( |
| showticklabels=False, |
| zeroline=False, |
| showline=False, |
| griddash="4px", |
| gridcolor="rgba(255,255,255,0.3)", |
| title="Loss", |
| color="white", |
| ) |
| fig |
|
|
|
|
| |
| def plot_gantt(df): |
| fig = px.timeline(df, x_start="Start", x_end="End", y="Task", color="Task") |
|
|
| fig.update_layout(xaxis_tickformat="%H:%M") |
|
|
| fig.update_layout( |
| showlegend=False, |
| paper_bgcolor="rgba(0,0,0,0)", |
| |
| plot_bgcolor="rgba(255,255,255,0)", |
| |
| |
| ) |
| fig.update_xaxes( |
| showticklabels=False, |
| |
| showline=False, |
| zeroline=False, |
| showgrid=False, |
| |
| automargin=True, |
| ) |
| fig.update_yaxes( |
| |
| zeroline=False, |
| showline=False, |
| griddash="4px", |
| gridcolor="rgba(0,0,0,0.3)", |
| title="", |
| color="white", |
| tickfont=dict(size=20), |
| ) |
|
|
| return fig |
|
|
|
|
| |
| num_rows = 1000 |
| download_prop = 0.65 |
| df = pd.DataFrame( |
| {"Start": pd.date_range("1-jan-2021", periods=num_rows, freq="4h")} |
| ).assign( |
| End=lambda d: d.Start + pd.Timedelta(hours=1), |
| Task=np.random.choice( |
| ["Read", "Transform"], num_rows, p=(download_prop, 1 - download_prop) |
| ), |
| ) |
|
|
| df.loc[0, "Task"] = "Read" |
| df.loc[len(df) - 1, "Task"] = "Transform" |
| df = df.groupby(df.Task.ne(df.Task.shift()).cumsum()).agg( |
| {"Start": "min", "End": "max", "Task": "first"} |
| ) |
|
|
| timeline = df.copy() |
| |
|
|
| df = timeline.copy() |
| ddi = pd.date_range(df.iloc[0].Start, end=df.iloc[-1].End, periods=10) |
| for start, end in zip(ddi[2:-1:3], ddi[3::3]): |
| df.loc[df["Start"].between(start, end), "Task"] = "Train" |
| df.loc[len(df) + 1] = pd.Series({"Start": start, "End": end, "Task": "Train"}) |
|
|
| FIGURES["profile-naive"] = plot_gantt(df) |
| FIGURES["profile-naive"] |
| |
|
|
| df = timeline.copy() |
| prop = 10 |
| ddi = pd.date_range(df.iloc[0].Start, end=df.iloc[-1].End, periods=(prop + 1) * 10) |
| for start, end in zip(ddi[1 : -1 : prop + 1], ddi[prop :: prop + 1]): |
| df.loc[df["Start"].between(start, end), "Task"] = "Train" |
| df.loc[len(df) + 1] = pd.Series({"Start": start, "End": end, "Task": "Train"}) |
| FIGURES["profile-old"] = plot_gantt(df) |
| FIGURES["profile-old"] |
| |
|
|
| df = timeline.copy() |
|
|
| df.loc[len(df) + 1] = pd.Series( |
| {"Start": df.iloc[0].Start, "End": df.iloc[-1].Start, "Task": "Train"} |
| ) |
| FIGURES["profile-stream"] = plot_gantt(df) |
| FIGURES["profile-stream"] |
|
|
| |
|
|
| for k, v in FIGURES.items(): |
| print(k) |
| v.write_html( |
| f"../components/{k}.qmd", |
| full_html=False, |
| include_plotlyjs="cdn", |
| ) |
|
|
| |
| |
| |
| import qrcode |
| from qrcode.image.styledpil import StyledPilImage |
| from qrcode.image.styles.moduledrawers.pil import RoundedModuleDrawer |
| from qrcode.image.styles.colormasks import RadialGradiantColorMask |
|
|
| qr = qrcode.QRCode(error_correction=qrcode.constants.ERROR_CORRECT_L) |
| qr.add_data("https://u.wechat.com/MAmdMGMYjGFC4-2ESxZ1oyw") |
|
|
| |
| img_2 = qr.make_image( |
| |
| |
| fill_color="white", |
| back_color="transparent", |
| ) |
| |
| |
| |
| img_2.save("../figures/qr/jing.png") |
| |
|
|
|
|
| qr = qrcode.QRCode(error_correction=qrcode.constants.ERROR_CORRECT_L) |
| qr.add_data("mailto:data@sustech.edu.cn?subject=Hello&body=") |
|
|
| |
| img_2 = qr.make_image( |
| |
| |
| fill_color="white", |
| back_color="transparent", |
| ) |
| |
| |
| |
| img_2.save("../figures/qr/mail-data.png") |
|
|
|
|
|
|