Examples¶
dataguy_by_element.py¶
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8
9# http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Clicks hard-coded menu items by element and selector.
20"""
21from typing import List
22
23import webtraversallibrary as wtl
24
25from .util import parse_cli_args
26
27
28@wtl.single_tab
29def policy(_, view: wtl.View) -> List[wtl.Action]:
30 # Picking an action at random relating to one of these elements
31 elements = view.snapshot.elements
32 menu_elements = [e for e in elements if e.location.x < 10 and e.location.y < 200 and e.metadata["tag"] == "a"]
33 actions_a: wtl.Action = view.actions.by_element(menu_elements[0])
34
35 # Another way is doing it by selector - this one matches all the menu items (equivalent to the above)
36 actions_b: wtl.Action = view.actions.by_selector(wtl.Selector(css=".sidenav div a"))
37
38 # Click the first menu item and then, before snapshotting, the second
39 return [actions_a[0], actions_b[1]]
40
41
42if __name__ == "__main__":
43 cli_args = parse_cli_args()
44
45 workflow = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
46
47 workflow.classifiers.add(wtl.ActiveElementFilter(action=wtl.actions.Click))
48
49 workflow.run()
50 workflow.quit()
dementor.py¶
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8
9# http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Remove an element, one at a time, until the page is empty.
20"""
21
22import random
23from typing import List, Union
24
25import webtraversallibrary as wtl
26
27from .util import parse_cli_args
28
29
30@wtl.single_tab
31def policy(workflow: wtl.Workflow, view: wtl.View) -> Union[wtl.Action, List[wtl.Action]]:
32 # After seven deletions, start over from step 3
33 if workflow.loop_idx == 7:
34 return wtl.actions.Revert(3)
35
36 # Randomly pick one of the deleting actions
37 return [
38 random.choice(view.actions.by_type(wtl.actions.Remove)),
39 wtl.actions.Wait(0.25),
40 wtl.actions.Clear(viewport=False),
41 wtl.actions.WaitForUser(),
42 ]
43
44
45if __name__ == "__main__":
46 cli_args = parse_cli_args()
47
48 wf = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
49
50 wf.classifiers.add(
51 wtl.ElementClassifier(
52 name="dementor",
53 enabled=True,
54 highlight=False,
55 action=wtl.actions.Remove,
56 callback=lambda e, _: e, # Will label _all_ elements removable
57 )
58 )
59
60 wf.run()
61 wf.quit()
dynamic_windows.py¶
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8
9# http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Dynamically handles multiple tabs and windows.
20Creates new windows and/or tabs every iteration.
21"""
22
23from random import choice
24from typing import Dict
25
26import webtraversallibrary as wtl
27
28from .util import parse_cli_args
29
30
31def policy(workflow: wtl.Workflow, view: wtl.View) -> Dict[wtl.View, wtl.Action]:
32 loop_idx: int = workflow.loop_idx + 1
33 window_idx: int = loop_idx // 3
34
35 # Every third tab, create a new window, otherwise use the latest window
36 if window_idx >= len(workflow.windows):
37 window: wtl.window = workflow.create_window(str(window_idx))
38 else:
39 window: wtl.window = workflow.windows[-1]
40
41 # Create a window to a new Wikipedia number link
42 window.create_tab(str(loop_idx), url=f"https://en.wikipedia.org/wiki/{loop_idx}")
43
44 # Click a random clickable element in a random view/tab
45 v = choice(list(view.values()))
46 return {v: choice(v.actions.by_type(wtl.actions.Click))}
47
48
49if __name__ == "__main__":
50 cli_args = parse_cli_args()
51
52 wf = wtl.Workflow(
53 config=wtl.Config(cli_args.config), policy=policy, url="https://en.wikipedia.org/wiki/0", output=cli_args.output
54 )
55
56 wf.classifiers.add(wtl.ActiveElementFilter(action=wtl.actions.Click))
57
58 wf.run()
59 wf.quit()
fill_form.py¶
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8
9# http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Randomly fill all random text input fields on a page with a preset list of names.
20"""
21
22from random import choice
23from typing import List
24
25import webtraversallibrary as wtl
26from webtraversallibrary.actions import Clear, FillText
27
28from .util import parse_cli_args
29
30
31@wtl.single_tab
32def goal(_, view: wtl.View) -> bool:
33 # Find all elements we could fill in text on
34 targets: List[wtl.Action] = [action.target for action in view.actions.by_type(FillText)]
35 # Get all their collective contents
36 texts: str = [element.metadata["text"] for element in targets]
37 # Stop running if they have all been filled
38 return all(t != "" for t in texts)
39
40
41content = ["Robin Carpenter", "Kenny Turner", "Ernestine Ferguson", "Marcelo Allen", "Marilyn Rich", "Rupert Strong"]
42
43
44@wtl.single_tab
45def policy(_, view: wtl.View) -> List[wtl.Action]:
46 # Filter out all the FillText actions where the element is still empty
47 fill_actions: List[wtl.Action] = [
48 action for action in view.actions.by_type(FillText) if not action.target.metadata["text"]
49 ]
50 # Randomly pick an action and a text
51 action: wtl.Action = choice(fill_actions)
52 text: str = choice(content)
53 # Execute
54 return [Clear(), action(text)]
55
56
57def text_field_classifier_func(elements: wtl.Elements, _) -> List[wtl.PageElement]:
58 # For now, we consider all input fields where the type attribute has a specific value.
59 return [e for e in elements if e.metadata["tag"] == "input" and e.metadata["type"] in ("text", "email", "password")]
60
61
62if __name__ == "__main__":
63 cli_args = parse_cli_args()
64
65 workflow = wtl.Workflow(
66 config=wtl.Config(cli_args.config),
67 policy=policy,
68 goal=goal,
69 url="https://www.getharvest.com/signup",
70 output=cli_args.output,
71 )
72
73 # We just need a text field classifier, no need to consider what's active (all of them should be).
74 workflow.classifiers.add(
75 wtl.ElementClassifier(name="textfield", action=FillText, callback=text_field_classifier_func, highlight=True)
76 )
77
78 workflow.run()
79 workflow.quit()
80
81 print("Workflow successful?", workflow.success)
82
83 # This is the last view, i.e. the one where goal() returned True
84 final_view: wtl.View = workflow.history[-1]
85
86 # Get all texts
87 final_texts: List[str] = [action.target.metadata["text"] for action in final_view.actions.by_type(FillText)]
88
89 print("Names entered: ", ", ".join(final_texts))
gui.py¶
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8
9# http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Basic example of using WTL together with a graphical frontend.
20"""
21
22# pylint: disable=global-statement
23
24import tkinter as tk
25from pathlib import Path
26from threading import Lock, Thread
27from time import sleep
28from tkinter import font
29
30import webtraversallibrary as wtl
31
32from .util import parse_cli_args
33
34# === THREAD SYNC ===
35# Global variables used for communicating.
36# Uses a single lock for everything.
37# 0=Waiting for new view, 1=Update GUI image, 2=Waiting for new action
38
39data_lock = Lock()
40current_view = None
41current_action = None
42state = 0
43
44
45# === WTL THREAD ===
46# Running WTL in a separate thread (so GUI can run on the main thread).
47# Very simple state machine that saves the current view and busy waits
48# until an action has been set from the GUI.
49
50
51@wtl.single_tab
52def policy(_, view: wtl.View) -> wtl.Action:
53 global state, current_view
54
55 with data_lock:
56 current_view = view
57 state = 1
58
59 while True:
60 with data_lock:
61 if state == 0:
62 return current_action
63
64 sleep(0.5)
65
66
67def wtl_thread(cli_args):
68 workflow = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
69
70 workflow.classifiers.add(wtl.ActiveElementFilter(action=wtl.actions.Click, highlight=True))
71
72 workflow.run()
73 workflow.quit()
74
75
76# === GUI THREAD (MAIN) ===
77#
78
79show_actives = None
80current_filename = ""
81
82
83def gui_thread():
84 """
85 Sets up the window with design and all callbacks.
86 """
87 global show_actives
88
89 window = tk.Tk()
90 window.title("WTL browser")
91 window.geometry("1920x1080")
92
93 top_frame = tk.Frame()
94
95 canvas = tk.Canvas(top_frame, width=1280, height=1080)
96 canvas.pack(padx=10, pady=10, side=tk.LEFT)
97 img = None
98
99 show_actives = tk.IntVar()
100 ch = tk.Checkbutton(top_frame, text="Show active elements", variable=show_actives)
101 ch.pack(side=tk.LEFT)
102
103 small_font = font.Font(size=16)
104 listbox = tk.Listbox(top_frame, width=375, height=200, font=small_font)
105 listbox.pack(padx=5, pady=5, side=tk.LEFT)
106
107 label_frame = tk.Frame(window, width=1400, height=100, bg="white")
108 label_frame.pack_propagate(0)
109
110 desc_label = tk.Label(label_frame, text="Hello", justify=tk.LEFT, wraplength=1300, bg="white")
111 desc_label.pack()
112 label_frame.pack()
113 top_frame.pack()
114
115 def get_element(mouse): # pylint: disable=inconsistent-return-statements
116 """
117 Look for the element at current coords with smallest bounds
118 """
119 point = wtl.Point(mouse.x - 5, mouse.y - 5)
120
121 with data_lock:
122 if not current_view:
123 return
124
125 smallest_element, smallest_area = None, 999999
126 for e in current_view.snapshot.elements:
127 if point in e.bounds and e.bounds.area < smallest_area:
128 smallest_area, smallest_element = e.bounds.area, e
129
130 return smallest_element
131
132 def hover(mouse):
133 """
134 Update the top label when hovering over an element.
135 """
136 nonlocal desc_label
137
138 smallest_element = get_element(mouse)
139
140 with data_lock:
141 if smallest_element:
142 output = [f"{k}={str(v)}" for k, v in smallest_element.metadata.items() if k != "text"]
143 desc_label.config(text=", ".join(output))
144 else:
145 desc_label.config(text=str("{}"))
146
147 def double_clicked(mouse):
148 """
149 Set action of the clicked element.
150 Does not check if it's active or not.
151 """
152 global state, current_action
153
154 smallest_element = get_element(mouse)
155
156 with data_lock:
157 if smallest_element:
158 state = 0
159 current_action = wtl.actions.Click(wtl.Selector(f'[wtl-uid="{smallest_element.wtl_uid}"]'))
160
161 def selected(_):
162 """
163 Sets action by clicking an element in the listbox.
164 """
165 global state, current_action
166 nonlocal listbox
167
168 with data_lock:
169 data = str(listbox.get(listbox.curselection())).split(" (", maxsplit=1)[0]
170 state = 0
171 current_action = wtl.actions.Click(wtl.Selector(f'[wtl-uid="{data}"]'))
172
173 # Bind functions to the GUI objects
174 canvas.bind("<Motion>", hover)
175 canvas.bind("<Double-Button-1>", double_clicked)
176 listbox.bind("<<ListboxSelect>>", selected)
177
178 def upd_view():
179 """
180 Checks state and updates the GUI with screenshot and list of actions.
181 """
182 global state, current_filename
183 nonlocal img, listbox
184
185 with data_lock:
186 if state == 1:
187 current_filename = None
188 state = 2
189 listbox.delete(0, tk.END)
190 for item in current_view.actions.by_type(wtl.actions.Click):
191 wtl_uid = str(item.target.wtl_uid)
192 text = item.target.metadata["text"]
193 listbox.insert(tk.END, wtl_uid + f" ({text})")
194
195 if state == 2:
196 filename = "first" if show_actives.get() == 0 else "is_active"
197 if filename != current_filename:
198 current_filename = filename
199 current_view.snapshot.screenshots[filename].save(Path("."))
200 img = tk.PhotoImage(file=f"{filename}.png")
201 canvas.create_image(5, 5, anchor=tk.NW, image=img)
202
203 window.after(250, upd_view)
204
205 window.after(1000, upd_view)
206 window.mainloop()
207
208
209# === === === === ===
210# Entry point: Setup WTL thread and run GUI on this (main) thread.
211
212if __name__ == "__main__":
213 _cli_args = parse_cli_args()
214 _wtl_thread = Thread(target=wtl_thread, args=(_cli_args,))
215 _wtl_thread.start()
216 gui_thread()
217 _wtl_thread.join()
hard_coded.py¶
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8
9# http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Simple example showing a hard-coded generator of actions.
20"""
21
22import webtraversallibrary as wtl
23from webtraversallibrary.actions import Clear, Click, Highlight
24
25from .util import parse_cli_args, start_server
26
27
28@wtl.single_tab_coroutine
29def policy():
30 # Highlight some titles, and then click a menu item.
31 # Once the generator is exhausted, workflow will interpret StopIteration as cancelling the tabs.
32
33 yield
34 for i in range(1, 6):
35 yield [Clear(), Highlight(target=wtl.Selector(f"h2:nth-of-type({i}) > a"))]
36 yield Click(wtl.Selector("h2:nth-of-type(1) > a"))
37
38
39if __name__ == "__main__":
40 cli_args = parse_cli_args()
41
42 workflow = wtl.Workflow(
43 config=wtl.Config(cli_args.config), policy=policy, url=start_server() + "/blog", output=cli_args.output
44 )
45
46 workflow.run()
47 workflow.quit()
href_scorer.py¶
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8
9# http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Ranks all links on a page by the length of the HREF attribute.
20Does nothing with them.
21"""
22
23import webtraversallibrary as wtl
24from webtraversallibrary.actions import Wait
25
26from .util import parse_cli_args
27
28
29@wtl.single_tab
30def policy(_, __):
31 # Wait so that the classifier isn't run over and over again
32 return Wait(10)
33
34
35def url_length_classifier_func(elements, _):
36 # Score all elements with an href attribute with a score of the length of the href attribute
37 href_elements = [element for element in elements if element.metadata["href"]]
38 return [(element, len(element.metadata["href"])) for element in href_elements]
39
40
41if __name__ == "__main__":
42 cli_args = parse_cli_args()
43
44 workflow = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
45
46 workflow.classifiers.add(
47 wtl.ElementClassifier(
48 name="url_length",
49 highlight=True,
50 mode=wtl.ScalingMode.LINEAR,
51 highlight_color=wtl.Color(0, 0, 255),
52 callback=url_length_classifier_func,
53 )
54 )
55
56 workflow.run()
57 workflow.quit()
interactive.py¶
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8
9# http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Simple example of an interactive "game", as an old-style text RPG, for web traversal.
20"""
21
22import logging
23import random
24from typing import List
25
26import webtraversallibrary as wtl
27
28from .util import parse_cli_args
29
30initials = [
31 "You see a house. Above the door hangs a sign: ",
32 "You are in an open field. You find a note that says: ",
33 "You are inside an old cottage. On the wall you see something written: ",
34 "You are standing in the wtl office. It is loud. One of the screens say: ",
35 "Rain is pouring through the broken roof. The rain patters a sound. You make out: ",
36]
37
38
39textfield_desc = [
40 "A person appears and asks you for a name. He hands you a paper and pen to write with.",
41 "An empty notepad really wants something to be written.",
42 "You see a dusty whiteboard with a pen that still works.",
43 "A piece of paper is asking you what is on your mind. You have a pen in your hand.",
44 "A parchment and quill lie before you.",
45]
46
47
48vague_desc = [
49 "You can see people in the distance.",
50 "Marketing people are ringing a bell.",
51 "Everything around you looks really clean.",
52 "There are multiple paths forward.",
53 "You see shadows lurking about far away. They look friendly.",
54]
55
56
57content_desc = [
58 "A bleached old parchment says: ",
59 "A pretty little note by your feet says: ",
60 "You find an old metal bracelet with an inscription: ",
61 "You are standing next to an old radio. It repeats over and over again: ",
62 "A whisper is carried by the wind. It says: ",
63]
64
65
66@wtl.single_tab_coroutine
67def policy():
68 print("\n === === === \n")
69 _, view = yield
70 initial = random.choice(initials)
71 spoken = False
72
73 while True:
74 if not spoken:
75 title = view.snapshot.page_metadata["title"]
76 print(f'{initial}"{title}"')
77 spoken = True
78 cmd = input("\n> ").strip().lower().split(" ")
79 action = None
80
81 if cmd[0] == "help":
82 print("\nAvailable commands:\nhelp: shows this message")
83 print("navigate")
84 print("jump")
85 print("look")
86 print("click")
87 print("move")
88 print("right")
89 continue
90
91 if cmd[0] == "navigate" or cmd[0] == "jump":
92 action = wtl.actions.Navigate(cmd[1])
93
94 if cmd[0] == "look":
95 buttons = view.actions.by_type(wtl.actions.Click)
96 textfields = [v for v in view.actions.by_type(wtl.actions.FillText) if v.target.metadata["text"] == ""]
97 texts = (
98 view.snapshot.elements.by_selector(wtl.Selector("h1"))
99 + view.snapshot.elements.by_selector(wtl.Selector("h2"))
100 + view.snapshot.elements.by_selector(wtl.Selector("h3"))
101 + view.snapshot.elements.by_selector(wtl.Selector("p"))
102 )
103
104 if textfields:
105 print(random.choice(textfield_desc))
106
107 elif buttons:
108 print(random.choice(vague_desc))
109
110 if texts:
111 print(random.choice(content_desc) + '"' + random.choice(texts).metadata["text"] + '"')
112
113 continue
114
115 if cmd[0] == "click":
116 text = " ".join(cmd[1:])
117 elements = view.snapshot.elements.by_selector(wtl.Selector(f'[value~="{text}"]'))
118 if not elements:
119 elements = [e for e in view.snapshot.elements if text in e.metadata["text"]]
120 if not elements:
121 elements = view.snapshot.elements.by_selector(wtl.Selector(text))
122 if elements:
123 action = wtl.actions.Click(random.choice(elements))
124
125 if cmd[0] == "move":
126 action = random.choice(view.actions.by_type(wtl.actions.Click))
127 initial = random.choice(initials)
128
129 if cmd[0] == "write":
130 textfields = [v for v in view.actions.by_type(wtl.actions.FillText) if v.target.metadata["text"] == ""]
131 action = random.choice(textfields)(" ".join(cmd[1:]))
132
133 if not action:
134 print("I do not understand.")
135 continue
136
137 spoken = False
138 _, view = yield action
139
140
141def text_field_classifier_func(elements: wtl.Elements, _) -> List[wtl.PageElement]:
142 return [e for e in elements if e.metadata["tag"] == "input" and e.metadata["type"] in ("text", "email", "password")]
143
144
145if __name__ == "__main__":
146 cli_args = parse_cli_args()
147
148 workflow = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
149
150 workflow.classifiers.add(wtl.ActiveElementFilter(action=wtl.actions.Click))
151 workflow.classifiers.add(
152 wtl.ElementClassifier(
153 name="textfield", action=wtl.actions.FillText, callback=text_field_classifier_func, highlight=True
154 )
155 )
156
157 logging.getLogger("wtl").setLevel(logging.CRITICAL)
158
159 workflow.run()
160 workflow.quit()
largest_image.py¶
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8
9# http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Ranks all images on a page by their geometric size.
20Clicks on the largest, then checks if the URL changed, then stops.
21"""
22from typing import Optional
23
24import webtraversallibrary as wtl
25from webtraversallibrary.actions import Click
26from webtraversallibrary.goals import N_STEPS
27
28from .util import parse_cli_args
29
30goal = N_STEPS(2)
31
32
33@wtl.single_tab
34def policy(workflow: wtl.Workflow, view: wtl.View) -> Optional[wtl.Action]:
35 if len(workflow.history) == 1:
36 images_by_size = sorted(
37 view.snapshot.elements.by_score("image"), key=lambda element: element.bounds.area, reverse=True
38 )
39 return Click(images_by_size[0])
40
41 print("\n", view.snapshot.page_metadata["url"] != workflow.history[0].snapshot.page_metadata["url"], "\n")
42 return None
43
44
45def image_classifier_func(elements, _):
46 return [elem for elem in elements if elem.metadata["tag"] == "img"]
47
48
49if __name__ == "__main__":
50 cli_args = parse_cli_args()
51
52 wf = wtl.Workflow(
53 config=wtl.Config(cli_args.config), policy=policy, goal=goal, url=cli_args.url, output=cli_args.output
54 )
55
56 wf.classifiers.add(wtl.ElementClassifier(name="image", highlight=True, callback=image_classifier_func))
57
58 wf.run()
59 wf.quit()
monkeypatch.py¶
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8
9# http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Click any active element, but all links lead to Rome, literally, using workflow.monkeypatches.
20"""
21
22from random import choice
23
24import webtraversallibrary as wtl
25from webtraversallibrary.actions import Click
26
27from .util import parse_cli_args
28
29
30@wtl.single_tab
31def policy(_, view: wtl.View) -> wtl.Action:
32 menu_actions = view.actions.by_type(Click).by_score("is_active")
33 return choice(menu_actions)
34
35
36if __name__ == "__main__":
37 cli_args = parse_cli_args()
38
39 workflow = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
40
41 workflow.monkeypatches.set_default("https://en.wikipedia.org/wiki/Rome")
42
43 workflow.classifiers.add(wtl.ActiveElementFilter(action=Click))
44
45 workflow.run()
46 workflow.quit()
multiples.py¶
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8
9# http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Creates a total of three tabs in two windows, and clicks randomly on all of them.
20"""
21
22from random import choice
23from typing import Dict
24
25import webtraversallibrary as wtl
26
27from .util import parse_cli_args
28
29
30def policy(_, view: wtl.View) -> Dict[wtl.View, wtl.Action]:
31 return {v: choice(v.actions.by_type(wtl.actions.Click)) for v in view.values()}
32
33
34if __name__ == "__main__":
35 cli_args = parse_cli_args()
36
37 workflow = wtl.Workflow(
38 config=wtl.Config(cli_args.config),
39 policy=policy,
40 url={
41 "first": {"A": "www.uppsalahandkraft.se", "B": "https://www.uppsalamodemassa.se"},
42 "second": {"C": "shop.biskopsgarden.com"},
43 },
44 output=cli_args.output,
45 )
46
47 workflow.classifiers.add(wtl.ActiveElementFilter(action=wtl.actions.Click))
48
49 workflow.run()
50 workflow.quit()
random_dataguy_memory.py¶
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8
9# http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Defines a subset of all active elements (menu items) and clicks randomly on those exactly once.
20When they have all been clicked, abort the workflow.
21"""
22
23from random import choice
24from typing import List
25
26import webtraversallibrary as wtl
27from webtraversallibrary.actions import Abort, Click
28
29from .util import parse_cli_args
30
31
32@wtl.single_tab
33def policy(workflow: wtl.Workflow, view: wtl.View) -> wtl.Action:
34 if "previous" not in view.metadata:
35 view.metadata["previous"] = []
36 else:
37 workflow.js.annotate(
38 wtl.Point(100, 100), wtl.Color(0, 0, 0), 30, "This is an annotation", wtl.Color(128, 128, 128, 128)
39 )
40
41 if workflow.config.debug.screenshots:
42 # Create screenshot of previous actions with an emphasis on the latest
43 scr = view.snapshot.new_screenshot("history", of="full")
44 for prev in view.metadata["previous"]:
45 scr.highlight(prev.bounds, color=wtl.Color(255, 0, 0, 100))
46 scr.highlight(
47 view.metadata["previous_action"][0].target.bounds, text="Latest action", color=wtl.Color(0, 0, 255, 100)
48 )
49 scr.save(workflow.output_path)
50
51 # Save screenshot of the current live view
52 workflow.scraper.capture_screenshot("live").save(workflow.output_path)
53
54 # Get all elements tagged as "menu"
55 menu_elements = view.snapshot.elements.by_score("menu")
56
57 # Filter out those we have already clicked on
58 menu_elements = [
59 e for e in menu_elements if e.metadata["text"] not in [e.metadata["text"] for e in view.metadata["previous"]]
60 ]
61
62 if menu_elements:
63 # If there are any left, click that and remember its text
64 element = choice(menu_elements)
65 action = Click(element)
66 view.metadata["previous"].append(element)
67 else:
68 # Otherwise, stop everything
69 action = Abort()
70
71 # Return
72 print("Here are the buttons I've clicked: ", [e.metadata["text"] for e in view.metadata["previous"]])
73 print("Last time I did", view.metadata["previous_action"][0])
74 return action
75
76
77def menu_classifier_func(elements: wtl.Elements, _) -> List[wtl.PageElement]:
78 # The condition here is completely hard-coded for the given page.
79 return [elem for elem in elements if elem.location.x < 10 and elem.location.y < 200 and elem.metadata["tag"] == "a"]
80
81
82if __name__ == "__main__":
83 cli_args = parse_cli_args()
84
85 wf = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
86
87 wf.classifiers.add(wtl.ActiveElementFilter(action=Click))
88
89 wf.classifiers.add(
90 wtl.ElementClassifier(
91 name="menu",
92 action=Click,
93 subset="is_active", # Consider only active elements
94 highlight=True,
95 callback=menu_classifier_func,
96 )
97 )
98
99 wf.run()
100 wf.quit()
random_dataguy.py¶
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8
9# http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Defines a subset of all active elements (menu items) and clicks randomly on those.
20"""
21
22from random import choice
23from typing import List
24
25import webtraversallibrary as wtl
26from webtraversallibrary.actions import Click
27
28from .util import parse_cli_args
29
30
31@wtl.single_tab
32def policy(_, view: wtl.View) -> wtl.Action:
33 menu_actions = view.actions.by_type(Click).by_score("menu")
34 return choice(menu_actions)
35
36
37def menu_classifier_func(elements: wtl.Elements, _) -> List[wtl.PageElement]:
38 # The condition here is completely hard-coded for the given page.
39 return [elem for elem in elements if elem.location.x < 10 and elem.location.y < 200 and elem.metadata["tag"] == "a"]
40
41
42if __name__ == "__main__":
43 cli_args = parse_cli_args()
44
45 workflow = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
46
47 workflow.classifiers.add(wtl.ActiveElementFilter(action=Click))
48
49 workflow.classifiers.add(
50 wtl.ElementClassifier(
51 name="menu",
52 action=Click,
53 subset="is_active", # Consider only active elements
54 highlight=True,
55 callback=menu_classifier_func,
56 )
57 )
58
59 workflow.run()
60 workflow.quit()
random_traversal.py¶
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8
9# http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Click on any random clickable element on a page.
20Also demonstrates the use of postload_callbacks.
21"""
22
23from functools import partial
24from random import choice, random
25
26import webtraversallibrary as wtl
27from webtraversallibrary.actions import Click, Refresh
28
29from .util import parse_cli_args
30
31
32@wtl.single_tab
33def policy(workflow: wtl.Workflow, view: wtl.View) -> wtl.Action:
34 assert workflow.duplicate_loop_idx == workflow.loop_idx
35
36 # With some small probabilty, refresh instead of clicking.
37 return choice(view.actions.by_type(Click)) if random() < 0.95 else view.actions.by_type(Refresh).unique()
38
39
40def set_duplicate_loop_idx(workflow: wtl.Workflow):
41 workflow.duplicate_loop_idx = workflow.loop_idx
42
43
44if __name__ == "__main__":
45 cli_args = parse_cli_args()
46
47 wf = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
48
49 wf.classifiers.add(wtl.ActiveElementFilter(action=Click))
50
51 wf.postload_callbacks.append(partial(set_duplicate_loop_idx, wf))
52
53 wf.run()
54 wf.quit()
size_scorer.py¶
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8
9# http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Classifies active elements with a "size score" and then clicks some element with average size.
20"""
21
22from random import choice
23from typing import Dict
24
25import webtraversallibrary as wtl
26from webtraversallibrary.actions import Click
27
28from .util import parse_cli_args
29
30
31@wtl.single_tab
32def policy(_, view: wtl.View) -> wtl.Action:
33 return choice(view.actions.by_type(Click).by_score("size__average"))
34
35
36def size_classifier_func(elements: wtl.Elements, _) -> Dict[str, float]:
37 # Computes a normalized size.
38 # Note that this is not the simplest way of clicking the largest clickable element.
39
40 largest_area = max(e.bounds.area for e in elements)
41
42 def score(element):
43 return element.bounds.area / largest_area
44
45 return {
46 "big": [(e, score(e)) for e in elements if score(e) > 0.75],
47 "average": [(e, abs(0.5 - score(e))) for e in elements if 0.25 < score(e) <= 0.75],
48 }
49
50
51if __name__ == "__main__":
52 cli_args = parse_cli_args()
53
54 workflow = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
55
56 workflow.classifiers.add(wtl.ActiveElementFilter())
57
58 workflow.classifiers.add(
59 wtl.ElementClassifier(
60 name="size", subset="is_active", highlight=0.5, action=Click, callback=size_classifier_func
61 )
62 )
63
64 with workflow:
65 workflow.run()
tictactoe.py¶
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8
9# http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Plays TicTacToe online by using a simple AI implementation.
20"""
21from typing import List
22
23import webtraversallibrary as wtl
24from webtraversallibrary.actions import Click
25
26from .util import parse_cli_args
27
28
29def checkWin(board, player) -> bool:
30 for i in range(0, 3):
31 if board[i * 3] == player and board[i * 3 + 1] == player and board[i * 3 + 2] == player:
32 return True
33 if board[i] == player and board[i + 3] == player and board[i + 6] == player:
34 return True
35 if board[0] == player and board[4] == player and board[8] == player:
36 return True
37 if board[2] == player and board[4] == player and board[6] == player:
38 return True
39 return False
40
41
42def getAIMove(board, nextMove, aiPlayer):
43 if checkWin(board, aiPlayer):
44 return (-1, 10)
45 if checkWin(board, "O" if aiPlayer == "X" else "X"):
46 return (-1, -10)
47
48 free = [i for i, b in enumerate(board) if b == " "]
49 if not free:
50 return (-1, 0)
51 if len(free) == len(board):
52 return (4, 0)
53
54 moves = []
55 for i in free:
56 nextBoard = board[:]
57 nextBoard[i] = nextMove
58 score = getAIMove(nextBoard, ("X" if nextMove == "O" else "O"), aiPlayer)[1]
59 moves.append((i, score))
60
61 moves.sort(key=lambda m: m[1], reverse=nextMove == aiPlayer)
62 return moves[0]
63
64
65def printBoard(board):
66 print("\n")
67 for i in range(9):
68 if not board[i] == " ":
69 print(board[i], end=" ")
70 else:
71 print("_", end=" ")
72 if i in (2, 5):
73 print("")
74 print("\n")
75
76
77@wtl.single_tab
78def policy(_, view: wtl.View) -> List[wtl.Action]:
79 start = view.actions.by_score("start")
80 if start:
81 return [start[0]]
82
83 tiles = [t.target for t in view.actions.by_score("tile")]
84 board = [t.metadata["class"][5].upper() if t.metadata["class"] else " " for t in tiles]
85 move = getAIMove(board, "X", "X")
86
87 printBoard(board)
88
89 return [wtl.actions.Clear(viewport=False), wtl.actions.Click(tiles[move[0]])]
90
91
92def _start_btn(elements, _):
93 return [e for e in elements if e.metadata["id"] == "sync-task-cover" and "block" in e.metadata["display"]]
94
95
96def _tile_div(elements, _):
97 return [
98 e
99 for e in elements
100 if e.metadata["tag"] == "span" and e.metadata["id"].startswith("ttt") and e.tag.parent.name == "div"
101 ]
102
103
104if __name__ == "__main__":
105 cli_args = parse_cli_args()
106
107 workflow = wtl.Workflow(
108 config=wtl.Config(cli_args.config),
109 policy=policy,
110 url="https://stanfordnlp.github.io/miniwob-plusplus/html/miniwob/tic-tac-toe.html",
111 output=cli_args.output,
112 )
113
114 workflow.classifiers.add(wtl.ActiveElementFilter())
115 workflow.classifiers.add(wtl.ActiveElementFilter(name="start", callback=_start_btn, action=Click))
116 workflow.classifiers.add(wtl.ActiveElementFilter(name="tile", callback=_tile_div, action=Click))
117
118 workflow.run()
119 workflow.quit()
util.py¶
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8
9# http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Not an example, just helper functions for the other examples.
20"""
21import os
22import subprocess
23from argparse import ArgumentParser
24from pathlib import Path
25
26import webtraversallibrary as wtl
27
28
29def start_server() -> str:
30 my_env = os.environ.copy()
31 my_env["FLASK_APP"] = "tests/site/flask_app.py"
32 subprocess.Popen("python3 -m flask run".split(), env=my_env, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
33 return "http://localhost:5000"
34
35
36def parse_cli_args() -> ArgumentParser:
37 """
38 Parses CLI flags relevant for examples.
39 """
40 parser = ArgumentParser()
41
42 group = parser.add_argument_group("Run parameters")
43 group.add_argument("--url", type=str, default="DEFAULT", help="URL to run the workflow on.")
44 group.add_argument(
45 "--output",
46 type=Path,
47 help="Where to save the result locally. If save, remember to also add save flag for config.",
48 default=None,
49 )
50 group.add_argument(
51 "--windows",
52 type=str,
53 nargs="*",
54 default=[wtl.Workflow.SINGLE_TAB],
55 help="Tab names (comma-separated). Use space separation for multiple windows.",
56 )
57 group.add_argument(
58 "--config",
59 type=str,
60 nargs="*",
61 default=[],
62 required=False,
63 help="Names of config files in config/, such as " '"iphone_x_mobile", or key=value pairs.',
64 )
65
66 cli_args = parser.parse_args()
67 cli_args.config.insert(0, "default")
68
69 if cli_args.url == "DEFAULT":
70 cli_args.url = start_server()
71
72 return cli_args
view_classifier.py¶
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8
9# http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Clicks randomly on a page until _dataguy_ is not part of the URL anymore, using a ViewClassifier.
20"""
21
22from random import choice
23from typing import List, Set
24
25import webtraversallibrary as wtl
26from webtraversallibrary.actions import Annotate, Clear, Click
27
28from .util import parse_cli_args
29
30
31@wtl.single_tab
32def goal(_, view):
33 # Stop when we dataguy is not part of the URL
34 return "dataguy" not in view.tags
35
36
37@wtl.single_tab
38def policy(_, view: wtl.View) -> List[wtl.Action]:
39 # Do any random click
40 return [
41 Clear(),
42 Annotate(
43 location=wtl.Point(30, 30),
44 color=wtl.Color(0, 0, 0),
45 size=20,
46 text="Still dataguy",
47 background=wtl.Color(128, 50, 128),
48 ),
49 choice(view.actions.by_type(Click)),
50 ]
51
52
53def dataguy_classifier_func(view: wtl.View) -> Set[str]:
54 # This will assign "dataguy" to a view if the URL contains that, otherwise "other"
55 # It can be retreived with view.tags
56 return {"dataguy" if "dataguy" in view.snapshot.page_metadata["url"] else "other"}
57
58
59if __name__ == "__main__":
60 cli_args = parse_cli_args()
61
62 workflow = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
63
64 workflow.classifiers.add(wtl.ActiveElementFilter(action=Click))
65
66 # The syntax for a ViewClassifier is similar, but simpler
67 workflow.classifiers.add(wtl.ViewClassifier(name="dataguy", callback=dataguy_classifier_func))
68
69 workflow.run()
70 workflow.quit()
wiki_game.py¶
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8
9# http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Click on any random clickable element on a page.
20Also demonstrates the use of postload_callbacks.
21"""
22
23from random import choice
24
25import webtraversallibrary as wtl
26from webtraversallibrary.actions import Abort, Click
27
28from .util import parse_cli_args
29
30
31@wtl.single_tab_coroutine
32def policy():
33 workflow, view = yield
34
35 # Store Page A's URL
36 page_a_url = workflow.current_window.driver.current_url
37 workflow, view = yield Click(
38 choice(view.snapshot.elements.by_subtree(wtl.Selector("div[id='bodyContent']")).by_selector(wtl.Selector("a")))
39 )
40
41 # Store Page B's URL
42 page_b_url = workflow.current_window.driver.current_url
43
44 description = ""
45 # Stores first paragraph from page B's body
46 try:
47 description = view.snapshot.elements.by_selector(wtl.Selector("div p:nth-of-type(1)")).unique().metadata["text"]
48 if description.empty:
49 raise IndexError()
50 except IndexError:
51 description = view.snapshot.elements.by_selector(wtl.Selector("div p:nth-of-type(2)")).unique().metadata["text"]
52
53 # Limit the description to 50 characters to improve search
54 description_subset = str(description[0:49])
55
56 # Navigate back to page A
57 workflow, view = yield wtl.actions.Navigate(page_a_url)
58
59 link_to_click = view.snapshot.elements.by_selector(wtl.Selector("input[type='submit']")).unique()
60
61 # In the search bar in page A, fill text with description_subset and
62 # click search to get search results for the descriptions
63
64 workflow, view = yield [
65 wtl.actions.FillText(wtl.Selector("input[type='search']"), str(description_subset)),
66 Click(link_to_click),
67 ]
68
69 # Store search result's URL
70 search_url = workflow.current_window.driver.current_url
71
72 search_results = view.snapshot.elements.by_selector(wtl.Selector("div[class=mw-search-result-heading] a"))
73
74 i = 0
75
76 # Go to first link in the search result
77 try:
78 workflow, view = yield Click(search_results[i])
79 except IndexError:
80 print("Empty search results!!")
81 yield Abort()
82
83 # Check if landing URL equals PAGE B URL, if yes, break, else iterate and go to next link in the search result
84 # untill the URL's match
85
86 while True:
87 if workflow.current_window.driver.current_url == page_b_url:
88 print("Woohoo!!!")
89 break
90
91 try:
92 workflow, view = yield [wtl.actions.Navigate(search_url), Click(search_results[i + 1])]
93 i += 1
94 except IndexError:
95 print("Search result exhausted!!")
96 break
97
98 yield None
99
100
101if __name__ == "__main__":
102 cli_args = parse_cli_args()
103
104 wf = wtl.Workflow(
105 config=wtl.Config(cli_args.config),
106 policy=policy,
107 url="https://en.wikipedia.org/wiki/Special:Random",
108 output=cli_args.output,
109 )
110
111 wf.classifiers.add(wtl.ActiveElementFilter(action=Click))
112
113 wf.classifiers.add(wtl.ElementClassifier(name="textfield", action=wtl.actions.FillText, highlight=True))
114
115 wf.run()
116 wf.quit()
without_workflow.py¶
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8
9# http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Goes to a random wikipage, finds 'n' most common words in the article, where 'n' being the number of
20most common words that the user wants.
21"""
22
23import collections
24from collections import defaultdict
25
26import webtraversallibrary as wtl
27
28from .util import parse_cli_args
29
30cli_args = parse_cli_args()
31config = wtl.Config(cli_args.config)
32
33window = wtl.Window(config)
34
35# Navigates to a Wiki article chosen at random
36window.scraper.navigate("https://en.wikipedia.org/wiki/Special:Random")
37
38# Takes a snapshot of the current page
39snapshot = window.scraper.scrape_current_page()
40
41# Fetches all the elements with links in the current page
42links = snapshot.elements.by_selector(wtl.Selector("a")) # pylint: disable=no-member
43print("Number of links in the article ", len(links))
44
45# Gets the current URL of the page
46search_url = window.driver.current_url
47
48print("Link to the wiki article : ", search_url)
49
50paragraphs = snapshot.elements.by_selector(wtl.Selector("div p")) # pylint: disable=no-member
51article = ""
52
53# Fetch stopwords from a local file containing an array of stopwords
54with open("examples/stopwords.txt", encoding="utf8") as f:
55 stopwords = f.read()
56
57for p in paragraphs:
58 article = article + " " + p.metadata["text"]
59
60# Find n most common words in the article, where n being the number of common words required by the user
61
62wordcount = defaultdict(int)
63for word in article.lower().split():
64 if word not in stopwords:
65 wordcount[word] += 1
66
67n_print = int(input("How many most common words to print: "))
68print(f"\nOK. The {n_print} most common words are as follows\n")
69word_counter = collections.Counter(wordcount)
70for word, count in word_counter.most_common(n_print):
71 print(word, ": ", count)
72
73# Close the browser
74window.quit()
zindex.py¶
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8
9# http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Compute the zIndex of all active elements and highlights the top 1% elements.
20"""
21
22import logging
23from typing import List, Tuple
24
25import webtraversallibrary as wtl
26from webtraversallibrary.logging_utils import setup_logging
27
28from .util import parse_cli_args
29
30
31@wtl.single_tab
32def policy(_, __) -> wtl.Action:
33 return wtl.actions.WaitForUser()
34
35
36# https://stackoverflow.com/questions/1388007/getting-the-z-index-of-a-div-in-javascript
37Z_INDEX_JS = """
38window.getZIndex = function (e) {
39 if (e === null) {
40 return 0;
41 }
42 let z = window.document.defaultView.getComputedStyle(e).getPropertyValue('z-index');
43 if (isNaN(z)) {
44 return window.getZIndex(e.parentElement);
45 }
46 return z;
47};
48console.log("Hello!");
49let element = document.querySelector(arguments[0]);
50if (element !== null) {
51 return window.getZIndex(element);
52}
53"""
54
55
56def _compute_z_index(elements: wtl.Elements, workflow: wtl.Workflow) -> List[Tuple[wtl.PageElement, float]]:
57 # This may be slow for many elements. If you need more performance, consider a JS script
58 # which computes all elements' z-values combined and returns the map directly.
59 result = []
60 for e in elements:
61 zIndex = workflow.js.execute_script(Z_INDEX_JS, e.selector.css) or 0
62 result.append((e, int(zIndex)))
63 return result
64
65
66if __name__ == "__main__":
67 cli_args = parse_cli_args()
68
69 wf = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
70
71 wf.classifiers.add(wtl.ActiveElementFilter(action=wtl.actions.Click))
72
73 wf.classifiers.add(
74 wtl.ElementClassifier(
75 name="zIndex",
76 subset="is_active",
77 enabled=True,
78 highlight=0.99,
79 mode=wtl.ScalingMode.LINEAR,
80 callback=_compute_z_index,
81 )
82 )
83
84 setup_logging(logging_level=logging.DEBUG)
85
86 wf.run()
87 wf.quit()