Examples

dataguy_by_element.py

 1# Licensed to the Apache Software Foundation (ASF) under one
 2# or more contributor license agreements.  See the NOTICE file
 3# distributed with this work for additional information
 4# regarding copyright ownership.  The ASF licenses this file
 5# to you under the Apache License, Version 2.0 (the
 6# "License"); you may not use this file except in compliance
 7# with the License.  You may obtain a copy of the License at
 8
 9#   http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Clicks hard-coded menu items by element and selector.
20"""
21from typing import List
22
23import webtraversallibrary as wtl
24
25from .util import parse_cli_args
26
27
28@wtl.single_tab
29def policy(_, view: wtl.View) -> List[wtl.Action]:
30    # Picking an action at random relating to one of these elements
31    elements = view.snapshot.elements
32    menu_elements = [e for e in elements if e.location.x < 10 and e.location.y < 200 and e.metadata["tag"] == "a"]
33    actions_a: wtl.Action = view.actions.by_element(menu_elements[0])
34
35    # Another way is doing it by selector - this one matches all the menu items (equivalent to the above)
36    actions_b: wtl.Action = view.actions.by_selector(wtl.Selector(css=".sidenav div a"))
37
38    # Click the first menu item and then, before snapshotting, the second
39    return [actions_a[0], actions_b[1]]
40
41
42if __name__ == "__main__":
43    cli_args = parse_cli_args()
44
45    workflow = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
46
47    workflow.classifiers.add(wtl.ActiveElementFilter(action=wtl.actions.Click))
48
49    workflow.run()
50    workflow.quit()

dementor.py

 1# Licensed to the Apache Software Foundation (ASF) under one
 2# or more contributor license agreements.  See the NOTICE file
 3# distributed with this work for additional information
 4# regarding copyright ownership.  The ASF licenses this file
 5# to you under the Apache License, Version 2.0 (the
 6# "License"); you may not use this file except in compliance
 7# with the License.  You may obtain a copy of the License at
 8
 9#   http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Remove an element, one at a time, until the page is empty.
20"""
21
22import random
23from typing import List, Union
24
25import webtraversallibrary as wtl
26
27from .util import parse_cli_args
28
29
30@wtl.single_tab
31def policy(workflow: wtl.Workflow, view: wtl.View) -> Union[wtl.Action, List[wtl.Action]]:
32    # After seven deletions, start over from step 3
33    if workflow.loop_idx == 7:
34        return wtl.actions.Revert(3)
35
36    # Randomly pick one of the deleting actions
37    return [
38        random.choice(view.actions.by_type(wtl.actions.Remove)),
39        wtl.actions.Wait(0.25),
40        wtl.actions.Clear(viewport=False),
41        wtl.actions.WaitForUser(),
42    ]
43
44
45if __name__ == "__main__":
46    cli_args = parse_cli_args()
47
48    wf = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
49
50    wf.classifiers.add(
51        wtl.ElementClassifier(
52            name="dementor",
53            enabled=True,
54            highlight=False,
55            action=wtl.actions.Remove,
56            callback=lambda e, _: e,  # Will label _all_ elements removable
57        )
58    )
59
60    wf.run()
61    wf.quit()

dynamic_windows.py

 1# Licensed to the Apache Software Foundation (ASF) under one
 2# or more contributor license agreements.  See the NOTICE file
 3# distributed with this work for additional information
 4# regarding copyright ownership.  The ASF licenses this file
 5# to you under the Apache License, Version 2.0 (the
 6# "License"); you may not use this file except in compliance
 7# with the License.  You may obtain a copy of the License at
 8
 9#   http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Dynamically handles multiple tabs and windows.
20Creates new windows and/or tabs every iteration.
21"""
22
23from random import choice
24from typing import Dict
25
26import webtraversallibrary as wtl
27
28from .util import parse_cli_args
29
30
31def policy(workflow: wtl.Workflow, view: wtl.View) -> Dict[wtl.View, wtl.Action]:
32    loop_idx: int = workflow.loop_idx + 1
33    window_idx: int = loop_idx // 3
34
35    # Every third tab, create a new window, otherwise use the latest window
36    if window_idx >= len(workflow.windows):
37        window: wtl.window = workflow.create_window(str(window_idx))
38    else:
39        window: wtl.window = workflow.windows[-1]
40
41    # Create a window to a new Wikipedia number link
42    window.create_tab(str(loop_idx), url=f"https://en.wikipedia.org/wiki/{loop_idx}")
43
44    # Click a random clickable element in a random view/tab
45    v = choice(list(view.values()))
46    return {v: choice(v.actions.by_type(wtl.actions.Click))}
47
48
49if __name__ == "__main__":
50    cli_args = parse_cli_args()
51
52    wf = wtl.Workflow(
53        config=wtl.Config(cli_args.config), policy=policy, url="https://en.wikipedia.org/wiki/0", output=cli_args.output
54    )
55
56    wf.classifiers.add(wtl.ActiveElementFilter(action=wtl.actions.Click))
57
58    wf.run()
59    wf.quit()

fill_form.py

 1# Licensed to the Apache Software Foundation (ASF) under one
 2# or more contributor license agreements.  See the NOTICE file
 3# distributed with this work for additional information
 4# regarding copyright ownership.  The ASF licenses this file
 5# to you under the Apache License, Version 2.0 (the
 6# "License"); you may not use this file except in compliance
 7# with the License.  You may obtain a copy of the License at
 8
 9#   http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Randomly fill all random text input fields on a page with a preset list of names.
20"""
21
22from random import choice
23from typing import List
24
25import webtraversallibrary as wtl
26from webtraversallibrary.actions import Clear, FillText
27
28from .util import parse_cli_args
29
30
31@wtl.single_tab
32def goal(_, view: wtl.View) -> bool:
33    # Find all elements we could fill in text on
34    targets: List[wtl.Action] = [action.target for action in view.actions.by_type(FillText)]
35    # Get all their collective contents
36    texts: str = [element.metadata["text"] for element in targets]
37    # Stop running if they have all been filled
38    return all(t != "" for t in texts)
39
40
41content = ["Robin Carpenter", "Kenny Turner", "Ernestine Ferguson", "Marcelo Allen", "Marilyn Rich", "Rupert Strong"]
42
43
44@wtl.single_tab
45def policy(_, view: wtl.View) -> List[wtl.Action]:
46    # Filter out all the FillText actions where the element is still empty
47    fill_actions: List[wtl.Action] = [
48        action for action in view.actions.by_type(FillText) if not action.target.metadata["text"]
49    ]
50    # Randomly pick an action and a text
51    action: wtl.Action = choice(fill_actions)
52    text: str = choice(content)
53    # Execute
54    return [Clear(), action(text)]
55
56
57def text_field_classifier_func(elements: wtl.Elements, _) -> List[wtl.PageElement]:
58    # For now, we consider all input fields where the type attribute has a specific value.
59    return [e for e in elements if e.metadata["tag"] == "input" and e.metadata["type"] in ("text", "email", "password")]
60
61
62if __name__ == "__main__":
63    cli_args = parse_cli_args()
64
65    workflow = wtl.Workflow(
66        config=wtl.Config(cli_args.config),
67        policy=policy,
68        goal=goal,
69        url="https://www.getharvest.com/signup",
70        output=cli_args.output,
71    )
72
73    # We just need a text field classifier, no need to consider what's active (all of them should be).
74    workflow.classifiers.add(
75        wtl.ElementClassifier(name="textfield", action=FillText, callback=text_field_classifier_func, highlight=True)
76    )
77
78    workflow.run()
79    workflow.quit()
80
81    print("Workflow successful?", workflow.success)
82
83    # This is the last view, i.e. the one where goal() returned True
84    final_view: wtl.View = workflow.history[-1]
85
86    # Get all texts
87    final_texts: List[str] = [action.target.metadata["text"] for action in final_view.actions.by_type(FillText)]
88
89    print("Names entered: ", ", ".join(final_texts))

gui.py

  1# Licensed to the Apache Software Foundation (ASF) under one
  2# or more contributor license agreements.  See the NOTICE file
  3# distributed with this work for additional information
  4# regarding copyright ownership.  The ASF licenses this file
  5# to you under the Apache License, Version 2.0 (the
  6# "License"); you may not use this file except in compliance
  7# with the License.  You may obtain a copy of the License at
  8
  9#   http://www.apache.org/licenses/LICENSE-2.0
 10
 11# Unless required by applicable law or agreed to in writing,
 12# software distributed under the License is distributed on an
 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 14# KIND, either express or implied.  See the License for the
 15# specific language governing permissions and limitations
 16# under the License.
 17
 18"""
 19Basic example of using WTL together with a graphical frontend.
 20"""
 21
 22# pylint: disable=global-statement
 23
 24import tkinter as tk
 25from pathlib import Path
 26from threading import Lock, Thread
 27from time import sleep
 28from tkinter import font
 29
 30import webtraversallibrary as wtl
 31
 32from .util import parse_cli_args
 33
 34# === THREAD SYNC ===
 35# Global variables used for communicating.
 36# Uses a single lock for everything.
 37# 0=Waiting for new view, 1=Update GUI image, 2=Waiting for new action
 38
 39data_lock = Lock()
 40current_view = None
 41current_action = None
 42state = 0
 43
 44
 45# === WTL THREAD ===
 46# Running WTL in a separate thread (so GUI can run on the main thread).
 47# Very simple state machine that saves the current view and busy waits
 48# until an action has been set from the GUI.
 49
 50
 51@wtl.single_tab
 52def policy(_, view: wtl.View) -> wtl.Action:
 53    global state, current_view
 54
 55    with data_lock:
 56        current_view = view
 57        state = 1
 58
 59    while True:
 60        with data_lock:
 61            if state == 0:
 62                return current_action
 63
 64        sleep(0.5)
 65
 66
 67def wtl_thread(cli_args):
 68    workflow = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
 69
 70    workflow.classifiers.add(wtl.ActiveElementFilter(action=wtl.actions.Click, highlight=True))
 71
 72    workflow.run()
 73    workflow.quit()
 74
 75
 76# === GUI THREAD (MAIN) ===
 77#
 78
 79show_actives = None
 80current_filename = ""
 81
 82
 83def gui_thread():
 84    """
 85    Sets up the window with design and all callbacks.
 86    """
 87    global show_actives
 88
 89    window = tk.Tk()
 90    window.title("WTL browser")
 91    window.geometry("1920x1080")
 92
 93    top_frame = tk.Frame()
 94
 95    canvas = tk.Canvas(top_frame, width=1280, height=1080)
 96    canvas.pack(padx=10, pady=10, side=tk.LEFT)
 97    img = None
 98
 99    show_actives = tk.IntVar()
100    ch = tk.Checkbutton(top_frame, text="Show active elements", variable=show_actives)
101    ch.pack(side=tk.LEFT)
102
103    small_font = font.Font(size=16)
104    listbox = tk.Listbox(top_frame, width=375, height=200, font=small_font)
105    listbox.pack(padx=5, pady=5, side=tk.LEFT)
106
107    label_frame = tk.Frame(window, width=1400, height=100, bg="white")
108    label_frame.pack_propagate(0)
109
110    desc_label = tk.Label(label_frame, text="Hello", justify=tk.LEFT, wraplength=1300, bg="white")
111    desc_label.pack()
112    label_frame.pack()
113    top_frame.pack()
114
115    def get_element(mouse):  # pylint: disable=inconsistent-return-statements
116        """
117        Look for the element at current coords with smallest bounds
118        """
119        point = wtl.Point(mouse.x - 5, mouse.y - 5)
120
121        with data_lock:
122            if not current_view:
123                return
124
125            smallest_element, smallest_area = None, 999999
126            for e in current_view.snapshot.elements:
127                if point in e.bounds and e.bounds.area < smallest_area:
128                    smallest_area, smallest_element = e.bounds.area, e
129
130        return smallest_element
131
132    def hover(mouse):
133        """
134        Update the top label when hovering over an element.
135        """
136        nonlocal desc_label
137
138        smallest_element = get_element(mouse)
139
140        with data_lock:
141            if smallest_element:
142                output = [f"{k}={str(v)}" for k, v in smallest_element.metadata.items() if k != "text"]
143                desc_label.config(text=", ".join(output))
144            else:
145                desc_label.config(text=str("{}"))
146
147    def double_clicked(mouse):
148        """
149        Set action of the clicked element.
150        Does not check if it's active or not.
151        """
152        global state, current_action
153
154        smallest_element = get_element(mouse)
155
156        with data_lock:
157            if smallest_element:
158                state = 0
159                current_action = wtl.actions.Click(wtl.Selector(f'[wtl-uid="{smallest_element.wtl_uid}"]'))
160
161    def selected(_):
162        """
163        Sets action by clicking an element in the listbox.
164        """
165        global state, current_action
166        nonlocal listbox
167
168        with data_lock:
169            data = str(listbox.get(listbox.curselection())).split(" (", maxsplit=1)[0]
170            state = 0
171            current_action = wtl.actions.Click(wtl.Selector(f'[wtl-uid="{data}"]'))
172
173    # Bind functions to the GUI objects
174    canvas.bind("<Motion>", hover)
175    canvas.bind("<Double-Button-1>", double_clicked)
176    listbox.bind("<<ListboxSelect>>", selected)
177
178    def upd_view():
179        """
180        Checks state and updates the GUI with screenshot and list of actions.
181        """
182        global state, current_filename
183        nonlocal img, listbox
184
185        with data_lock:
186            if state == 1:
187                current_filename = None
188                state = 2
189                listbox.delete(0, tk.END)
190                for item in current_view.actions.by_type(wtl.actions.Click):
191                    wtl_uid = str(item.target.wtl_uid)
192                    text = item.target.metadata["text"]
193                    listbox.insert(tk.END, wtl_uid + f" ({text})")
194
195            if state == 2:
196                filename = "first" if show_actives.get() == 0 else "is_active"
197                if filename != current_filename:
198                    current_filename = filename
199                    current_view.snapshot.screenshots[filename].save(Path("."))
200                    img = tk.PhotoImage(file=f"{filename}.png")
201                    canvas.create_image(5, 5, anchor=tk.NW, image=img)
202
203        window.after(250, upd_view)
204
205    window.after(1000, upd_view)
206    window.mainloop()
207
208
209# === === === === ===
210# Entry point: Setup WTL thread and run GUI on this (main) thread.
211
212if __name__ == "__main__":
213    _cli_args = parse_cli_args()
214    _wtl_thread = Thread(target=wtl_thread, args=(_cli_args,))
215    _wtl_thread.start()
216    gui_thread()
217    _wtl_thread.join()

hard_coded.py

 1# Licensed to the Apache Software Foundation (ASF) under one
 2# or more contributor license agreements.  See the NOTICE file
 3# distributed with this work for additional information
 4# regarding copyright ownership.  The ASF licenses this file
 5# to you under the Apache License, Version 2.0 (the
 6# "License"); you may not use this file except in compliance
 7# with the License.  You may obtain a copy of the License at
 8
 9#   http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Simple example showing a hard-coded generator of actions.
20"""
21
22import webtraversallibrary as wtl
23from webtraversallibrary.actions import Clear, Click, Highlight
24
25from .util import parse_cli_args, start_server
26
27
28@wtl.single_tab_coroutine
29def policy():
30    # Highlight some titles, and then click a menu item.
31    # Once the generator is exhausted, workflow will interpret StopIteration as cancelling the tabs.
32
33    yield
34    for i in range(1, 6):
35        yield [Clear(), Highlight(target=wtl.Selector(f"h2:nth-of-type({i}) > a"))]
36    yield Click(wtl.Selector("h2:nth-of-type(1) > a"))
37
38
39if __name__ == "__main__":
40    cli_args = parse_cli_args()
41
42    workflow = wtl.Workflow(
43        config=wtl.Config(cli_args.config), policy=policy, url=start_server() + "/blog", output=cli_args.output
44    )
45
46    workflow.run()
47    workflow.quit()

href_scorer.py

 1# Licensed to the Apache Software Foundation (ASF) under one
 2# or more contributor license agreements.  See the NOTICE file
 3# distributed with this work for additional information
 4# regarding copyright ownership.  The ASF licenses this file
 5# to you under the Apache License, Version 2.0 (the
 6# "License"); you may not use this file except in compliance
 7# with the License.  You may obtain a copy of the License at
 8
 9#   http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Ranks all links on a page by the length of the HREF attribute.
20Does nothing with them.
21"""
22
23import webtraversallibrary as wtl
24from webtraversallibrary.actions import Wait
25
26from .util import parse_cli_args
27
28
29@wtl.single_tab
30def policy(_, __):
31    # Wait so that the classifier isn't run over and over again
32    return Wait(10)
33
34
35def url_length_classifier_func(elements, _):
36    # Score all elements with an href attribute with a score of the length of the href attribute
37    href_elements = [element for element in elements if element.metadata["href"]]
38    return [(element, len(element.metadata["href"])) for element in href_elements]
39
40
41if __name__ == "__main__":
42    cli_args = parse_cli_args()
43
44    workflow = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
45
46    workflow.classifiers.add(
47        wtl.ElementClassifier(
48            name="url_length",
49            highlight=True,
50            mode=wtl.ScalingMode.LINEAR,
51            highlight_color=wtl.Color(0, 0, 255),
52            callback=url_length_classifier_func,
53        )
54    )
55
56    workflow.run()
57    workflow.quit()

interactive.py

  1# Licensed to the Apache Software Foundation (ASF) under one
  2# or more contributor license agreements.  See the NOTICE file
  3# distributed with this work for additional information
  4# regarding copyright ownership.  The ASF licenses this file
  5# to you under the Apache License, Version 2.0 (the
  6# "License"); you may not use this file except in compliance
  7# with the License.  You may obtain a copy of the License at
  8
  9#   http://www.apache.org/licenses/LICENSE-2.0
 10
 11# Unless required by applicable law or agreed to in writing,
 12# software distributed under the License is distributed on an
 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 14# KIND, either express or implied.  See the License for the
 15# specific language governing permissions and limitations
 16# under the License.
 17
 18"""
 19Simple example of an interactive "game", as an old-style text RPG, for web traversal.
 20"""
 21
 22import logging
 23import random
 24from typing import List
 25
 26import webtraversallibrary as wtl
 27
 28from .util import parse_cli_args
 29
 30initials = [
 31    "You see a house. Above the door hangs a sign: ",
 32    "You are in an open field. You find a note that says: ",
 33    "You are inside an old cottage. On the wall you see something written: ",
 34    "You are standing in the wtl office. It is loud. One of the screens say: ",
 35    "Rain is pouring through the broken roof. The rain patters a sound. You make out: ",
 36]
 37
 38
 39textfield_desc = [
 40    "A person appears and asks you for a name. He hands you a paper and pen to write with.",
 41    "An empty notepad really wants something to be written.",
 42    "You see a dusty whiteboard with a pen that still works.",
 43    "A piece of paper is asking you what is on your mind. You have a pen in your hand.",
 44    "A parchment and quill lie before you.",
 45]
 46
 47
 48vague_desc = [
 49    "You can see people in the distance.",
 50    "Marketing people are ringing a bell.",
 51    "Everything around you looks really clean.",
 52    "There are multiple paths forward.",
 53    "You see shadows lurking about far away. They look friendly.",
 54]
 55
 56
 57content_desc = [
 58    "A bleached old parchment says: ",
 59    "A pretty little note by your feet says: ",
 60    "You find an old metal bracelet with an inscription: ",
 61    "You are standing next to an old radio. It repeats over and over again: ",
 62    "A whisper is carried by the wind. It says: ",
 63]
 64
 65
 66@wtl.single_tab_coroutine
 67def policy():
 68    print("\n === === === \n")
 69    _, view = yield
 70    initial = random.choice(initials)
 71    spoken = False
 72
 73    while True:
 74        if not spoken:
 75            title = view.snapshot.page_metadata["title"]
 76            print(f'{initial}"{title}"')
 77            spoken = True
 78        cmd = input("\n> ").strip().lower().split(" ")
 79        action = None
 80
 81        if cmd[0] == "help":
 82            print("\nAvailable commands:\nhelp: shows this message")
 83            print("navigate")
 84            print("jump")
 85            print("look")
 86            print("click")
 87            print("move")
 88            print("right")
 89            continue
 90
 91        if cmd[0] == "navigate" or cmd[0] == "jump":
 92            action = wtl.actions.Navigate(cmd[1])
 93
 94        if cmd[0] == "look":
 95            buttons = view.actions.by_type(wtl.actions.Click)
 96            textfields = [v for v in view.actions.by_type(wtl.actions.FillText) if v.target.metadata["text"] == ""]
 97            texts = (
 98                view.snapshot.elements.by_selector(wtl.Selector("h1"))
 99                + view.snapshot.elements.by_selector(wtl.Selector("h2"))
100                + view.snapshot.elements.by_selector(wtl.Selector("h3"))
101                + view.snapshot.elements.by_selector(wtl.Selector("p"))
102            )
103
104            if textfields:
105                print(random.choice(textfield_desc))
106
107            elif buttons:
108                print(random.choice(vague_desc))
109
110            if texts:
111                print(random.choice(content_desc) + '"' + random.choice(texts).metadata["text"] + '"')
112
113            continue
114
115        if cmd[0] == "click":
116            text = " ".join(cmd[1:])
117            elements = view.snapshot.elements.by_selector(wtl.Selector(f'[value~="{text}"]'))
118            if not elements:
119                elements = [e for e in view.snapshot.elements if text in e.metadata["text"]]
120            if not elements:
121                elements = view.snapshot.elements.by_selector(wtl.Selector(text))
122            if elements:
123                action = wtl.actions.Click(random.choice(elements))
124
125        if cmd[0] == "move":
126            action = random.choice(view.actions.by_type(wtl.actions.Click))
127            initial = random.choice(initials)
128
129        if cmd[0] == "write":
130            textfields = [v for v in view.actions.by_type(wtl.actions.FillText) if v.target.metadata["text"] == ""]
131            action = random.choice(textfields)(" ".join(cmd[1:]))
132
133        if not action:
134            print("I do not understand.")
135            continue
136
137        spoken = False
138        _, view = yield action
139
140
141def text_field_classifier_func(elements: wtl.Elements, _) -> List[wtl.PageElement]:
142    return [e for e in elements if e.metadata["tag"] == "input" and e.metadata["type"] in ("text", "email", "password")]
143
144
145if __name__ == "__main__":
146    cli_args = parse_cli_args()
147
148    workflow = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
149
150    workflow.classifiers.add(wtl.ActiveElementFilter(action=wtl.actions.Click))
151    workflow.classifiers.add(
152        wtl.ElementClassifier(
153            name="textfield", action=wtl.actions.FillText, callback=text_field_classifier_func, highlight=True
154        )
155    )
156
157    logging.getLogger("wtl").setLevel(logging.CRITICAL)
158
159    workflow.run()
160    workflow.quit()

largest_image.py

 1# Licensed to the Apache Software Foundation (ASF) under one
 2# or more contributor license agreements.  See the NOTICE file
 3# distributed with this work for additional information
 4# regarding copyright ownership.  The ASF licenses this file
 5# to you under the Apache License, Version 2.0 (the
 6# "License"); you may not use this file except in compliance
 7# with the License.  You may obtain a copy of the License at
 8
 9#   http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Ranks all images on a page by their geometric size.
20Clicks on the largest, then checks if the URL changed, then stops.
21"""
22from typing import Optional
23
24import webtraversallibrary as wtl
25from webtraversallibrary.actions import Click
26from webtraversallibrary.goals import N_STEPS
27
28from .util import parse_cli_args
29
30goal = N_STEPS(2)
31
32
33@wtl.single_tab
34def policy(workflow: wtl.Workflow, view: wtl.View) -> Optional[wtl.Action]:
35    if len(workflow.history) == 1:
36        images_by_size = sorted(
37            view.snapshot.elements.by_score("image"), key=lambda element: element.bounds.area, reverse=True
38        )
39        return Click(images_by_size[0])
40
41    print("\n", view.snapshot.page_metadata["url"] != workflow.history[0].snapshot.page_metadata["url"], "\n")
42    return None
43
44
45def image_classifier_func(elements, _):
46    return [elem for elem in elements if elem.metadata["tag"] == "img"]
47
48
49if __name__ == "__main__":
50    cli_args = parse_cli_args()
51
52    wf = wtl.Workflow(
53        config=wtl.Config(cli_args.config), policy=policy, goal=goal, url=cli_args.url, output=cli_args.output
54    )
55
56    wf.classifiers.add(wtl.ElementClassifier(name="image", highlight=True, callback=image_classifier_func))
57
58    wf.run()
59    wf.quit()

monkeypatch.py

 1# Licensed to the Apache Software Foundation (ASF) under one
 2# or more contributor license agreements.  See the NOTICE file
 3# distributed with this work for additional information
 4# regarding copyright ownership.  The ASF licenses this file
 5# to you under the Apache License, Version 2.0 (the
 6# "License"); you may not use this file except in compliance
 7# with the License.  You may obtain a copy of the License at
 8
 9#   http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Click any active element, but all links lead to Rome, literally, using workflow.monkeypatches.
20"""
21
22from random import choice
23
24import webtraversallibrary as wtl
25from webtraversallibrary.actions import Click
26
27from .util import parse_cli_args
28
29
30@wtl.single_tab
31def policy(_, view: wtl.View) -> wtl.Action:
32    menu_actions = view.actions.by_type(Click).by_score("is_active")
33    return choice(menu_actions)
34
35
36if __name__ == "__main__":
37    cli_args = parse_cli_args()
38
39    workflow = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
40
41    workflow.monkeypatches.set_default("https://en.wikipedia.org/wiki/Rome")
42
43    workflow.classifiers.add(wtl.ActiveElementFilter(action=Click))
44
45    workflow.run()
46    workflow.quit()

multiples.py

 1# Licensed to the Apache Software Foundation (ASF) under one
 2# or more contributor license agreements.  See the NOTICE file
 3# distributed with this work for additional information
 4# regarding copyright ownership.  The ASF licenses this file
 5# to you under the Apache License, Version 2.0 (the
 6# "License"); you may not use this file except in compliance
 7# with the License.  You may obtain a copy of the License at
 8
 9#   http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Creates a total of three tabs in two windows, and clicks randomly on all of them.
20"""
21
22from random import choice
23from typing import Dict
24
25import webtraversallibrary as wtl
26
27from .util import parse_cli_args
28
29
30def policy(_, view: wtl.View) -> Dict[wtl.View, wtl.Action]:
31    return {v: choice(v.actions.by_type(wtl.actions.Click)) for v in view.values()}
32
33
34if __name__ == "__main__":
35    cli_args = parse_cli_args()
36
37    workflow = wtl.Workflow(
38        config=wtl.Config(cli_args.config),
39        policy=policy,
40        url={
41            "first": {"A": "www.uppsalahandkraft.se", "B": "https://www.uppsalamodemassa.se"},
42            "second": {"C": "shop.biskopsgarden.com"},
43        },
44        output=cli_args.output,
45    )
46
47    workflow.classifiers.add(wtl.ActiveElementFilter(action=wtl.actions.Click))
48
49    workflow.run()
50    workflow.quit()

random_dataguy_memory.py

  1# Licensed to the Apache Software Foundation (ASF) under one
  2# or more contributor license agreements.  See the NOTICE file
  3# distributed with this work for additional information
  4# regarding copyright ownership.  The ASF licenses this file
  5# to you under the Apache License, Version 2.0 (the
  6# "License"); you may not use this file except in compliance
  7# with the License.  You may obtain a copy of the License at
  8
  9#   http://www.apache.org/licenses/LICENSE-2.0
 10
 11# Unless required by applicable law or agreed to in writing,
 12# software distributed under the License is distributed on an
 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 14# KIND, either express or implied.  See the License for the
 15# specific language governing permissions and limitations
 16# under the License.
 17
 18"""
 19Defines a subset of all active elements (menu items) and clicks randomly on those exactly once.
 20When they have all been clicked, abort the workflow.
 21"""
 22
 23from random import choice
 24from typing import List
 25
 26import webtraversallibrary as wtl
 27from webtraversallibrary.actions import Abort, Click
 28
 29from .util import parse_cli_args
 30
 31
 32@wtl.single_tab
 33def policy(workflow: wtl.Workflow, view: wtl.View) -> wtl.Action:
 34    if "previous" not in view.metadata:
 35        view.metadata["previous"] = []
 36    else:
 37        workflow.js.annotate(
 38            wtl.Point(100, 100), wtl.Color(0, 0, 0), 30, "This is an annotation", wtl.Color(128, 128, 128, 128)
 39        )
 40
 41        if workflow.config.debug.screenshots:
 42            # Create screenshot of previous actions with an emphasis on the latest
 43            scr = view.snapshot.new_screenshot("history", of="full")
 44            for prev in view.metadata["previous"]:
 45                scr.highlight(prev.bounds, color=wtl.Color(255, 0, 0, 100))
 46            scr.highlight(
 47                view.metadata["previous_action"][0].target.bounds, text="Latest action", color=wtl.Color(0, 0, 255, 100)
 48            )
 49            scr.save(workflow.output_path)
 50
 51            # Save screenshot of the current live view
 52            workflow.scraper.capture_screenshot("live").save(workflow.output_path)
 53
 54    # Get all elements tagged as "menu"
 55    menu_elements = view.snapshot.elements.by_score("menu")
 56
 57    # Filter out those we have already clicked on
 58    menu_elements = [
 59        e for e in menu_elements if e.metadata["text"] not in [e.metadata["text"] for e in view.metadata["previous"]]
 60    ]
 61
 62    if menu_elements:
 63        # If there are any left, click that and remember its text
 64        element = choice(menu_elements)
 65        action = Click(element)
 66        view.metadata["previous"].append(element)
 67    else:
 68        # Otherwise, stop everything
 69        action = Abort()
 70
 71    # Return
 72    print("Here are the buttons I've clicked: ", [e.metadata["text"] for e in view.metadata["previous"]])
 73    print("Last time I did", view.metadata["previous_action"][0])
 74    return action
 75
 76
 77def menu_classifier_func(elements: wtl.Elements, _) -> List[wtl.PageElement]:
 78    # The condition here is completely hard-coded for the given page.
 79    return [elem for elem in elements if elem.location.x < 10 and elem.location.y < 200 and elem.metadata["tag"] == "a"]
 80
 81
 82if __name__ == "__main__":
 83    cli_args = parse_cli_args()
 84
 85    wf = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
 86
 87    wf.classifiers.add(wtl.ActiveElementFilter(action=Click))
 88
 89    wf.classifiers.add(
 90        wtl.ElementClassifier(
 91            name="menu",
 92            action=Click,
 93            subset="is_active",  # Consider only active elements
 94            highlight=True,
 95            callback=menu_classifier_func,
 96        )
 97    )
 98
 99    wf.run()
100    wf.quit()

random_dataguy.py

 1# Licensed to the Apache Software Foundation (ASF) under one
 2# or more contributor license agreements.  See the NOTICE file
 3# distributed with this work for additional information
 4# regarding copyright ownership.  The ASF licenses this file
 5# to you under the Apache License, Version 2.0 (the
 6# "License"); you may not use this file except in compliance
 7# with the License.  You may obtain a copy of the License at
 8
 9#   http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Defines a subset of all active elements (menu items) and clicks randomly on those.
20"""
21
22from random import choice
23from typing import List
24
25import webtraversallibrary as wtl
26from webtraversallibrary.actions import Click
27
28from .util import parse_cli_args
29
30
31@wtl.single_tab
32def policy(_, view: wtl.View) -> wtl.Action:
33    menu_actions = view.actions.by_type(Click).by_score("menu")
34    return choice(menu_actions)
35
36
37def menu_classifier_func(elements: wtl.Elements, _) -> List[wtl.PageElement]:
38    # The condition here is completely hard-coded for the given page.
39    return [elem for elem in elements if elem.location.x < 10 and elem.location.y < 200 and elem.metadata["tag"] == "a"]
40
41
42if __name__ == "__main__":
43    cli_args = parse_cli_args()
44
45    workflow = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
46
47    workflow.classifiers.add(wtl.ActiveElementFilter(action=Click))
48
49    workflow.classifiers.add(
50        wtl.ElementClassifier(
51            name="menu",
52            action=Click,
53            subset="is_active",  # Consider only active elements
54            highlight=True,
55            callback=menu_classifier_func,
56        )
57    )
58
59    workflow.run()
60    workflow.quit()

random_traversal.py

 1# Licensed to the Apache Software Foundation (ASF) under one
 2# or more contributor license agreements.  See the NOTICE file
 3# distributed with this work for additional information
 4# regarding copyright ownership.  The ASF licenses this file
 5# to you under the Apache License, Version 2.0 (the
 6# "License"); you may not use this file except in compliance
 7# with the License.  You may obtain a copy of the License at
 8
 9#   http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Click on any random clickable element on a page.
20Also demonstrates the use of postload_callbacks.
21"""
22
23from functools import partial
24from random import choice, random
25
26import webtraversallibrary as wtl
27from webtraversallibrary.actions import Click, Refresh
28
29from .util import parse_cli_args
30
31
32@wtl.single_tab
33def policy(workflow: wtl.Workflow, view: wtl.View) -> wtl.Action:
34    assert workflow.duplicate_loop_idx == workflow.loop_idx
35
36    # With some small probabilty, refresh instead of clicking.
37    return choice(view.actions.by_type(Click)) if random() < 0.95 else view.actions.by_type(Refresh).unique()
38
39
40def set_duplicate_loop_idx(workflow: wtl.Workflow):
41    workflow.duplicate_loop_idx = workflow.loop_idx
42
43
44if __name__ == "__main__":
45    cli_args = parse_cli_args()
46
47    wf = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
48
49    wf.classifiers.add(wtl.ActiveElementFilter(action=Click))
50
51    wf.postload_callbacks.append(partial(set_duplicate_loop_idx, wf))
52
53    wf.run()
54    wf.quit()

size_scorer.py

 1# Licensed to the Apache Software Foundation (ASF) under one
 2# or more contributor license agreements.  See the NOTICE file
 3# distributed with this work for additional information
 4# regarding copyright ownership.  The ASF licenses this file
 5# to you under the Apache License, Version 2.0 (the
 6# "License"); you may not use this file except in compliance
 7# with the License.  You may obtain a copy of the License at
 8
 9#   http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Classifies active elements with a "size score" and then clicks some element with average size.
20"""
21
22from random import choice
23from typing import Dict
24
25import webtraversallibrary as wtl
26from webtraversallibrary.actions import Click
27
28from .util import parse_cli_args
29
30
31@wtl.single_tab
32def policy(_, view: wtl.View) -> wtl.Action:
33    return choice(view.actions.by_type(Click).by_score("size__average"))
34
35
36def size_classifier_func(elements: wtl.Elements, _) -> Dict[str, float]:
37    # Computes a normalized size.
38    # Note that this is not the simplest way of clicking the largest clickable element.
39
40    largest_area = max(e.bounds.area for e in elements)
41
42    def score(element):
43        return element.bounds.area / largest_area
44
45    return {
46        "big": [(e, score(e)) for e in elements if score(e) > 0.75],
47        "average": [(e, abs(0.5 - score(e))) for e in elements if 0.25 < score(e) <= 0.75],
48    }
49
50
51if __name__ == "__main__":
52    cli_args = parse_cli_args()
53
54    workflow = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
55
56    workflow.classifiers.add(wtl.ActiveElementFilter())
57
58    workflow.classifiers.add(
59        wtl.ElementClassifier(
60            name="size", subset="is_active", highlight=0.5, action=Click, callback=size_classifier_func
61        )
62    )
63
64    with workflow:
65        workflow.run()

tictactoe.py

  1# Licensed to the Apache Software Foundation (ASF) under one
  2# or more contributor license agreements.  See the NOTICE file
  3# distributed with this work for additional information
  4# regarding copyright ownership.  The ASF licenses this file
  5# to you under the Apache License, Version 2.0 (the
  6# "License"); you may not use this file except in compliance
  7# with the License.  You may obtain a copy of the License at
  8
  9#   http://www.apache.org/licenses/LICENSE-2.0
 10
 11# Unless required by applicable law or agreed to in writing,
 12# software distributed under the License is distributed on an
 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 14# KIND, either express or implied.  See the License for the
 15# specific language governing permissions and limitations
 16# under the License.
 17
 18"""
 19Plays TicTacToe online by using a simple AI implementation.
 20"""
 21from typing import List
 22
 23import webtraversallibrary as wtl
 24from webtraversallibrary.actions import Click
 25
 26from .util import parse_cli_args
 27
 28
 29def checkWin(board, player) -> bool:
 30    for i in range(0, 3):
 31        if board[i * 3] == player and board[i * 3 + 1] == player and board[i * 3 + 2] == player:
 32            return True
 33        if board[i] == player and board[i + 3] == player and board[i + 6] == player:
 34            return True
 35    if board[0] == player and board[4] == player and board[8] == player:
 36        return True
 37    if board[2] == player and board[4] == player and board[6] == player:
 38        return True
 39    return False
 40
 41
 42def getAIMove(board, nextMove, aiPlayer):
 43    if checkWin(board, aiPlayer):
 44        return (-1, 10)
 45    if checkWin(board, "O" if aiPlayer == "X" else "X"):
 46        return (-1, -10)
 47
 48    free = [i for i, b in enumerate(board) if b == " "]
 49    if not free:
 50        return (-1, 0)
 51    if len(free) == len(board):
 52        return (4, 0)
 53
 54    moves = []
 55    for i in free:
 56        nextBoard = board[:]
 57        nextBoard[i] = nextMove
 58        score = getAIMove(nextBoard, ("X" if nextMove == "O" else "O"), aiPlayer)[1]
 59        moves.append((i, score))
 60
 61    moves.sort(key=lambda m: m[1], reverse=nextMove == aiPlayer)
 62    return moves[0]
 63
 64
 65def printBoard(board):
 66    print("\n")
 67    for i in range(9):
 68        if not board[i] == " ":
 69            print(board[i], end="   ")
 70        else:
 71            print("_", end="   ")
 72        if i in (2, 5):
 73            print("")
 74    print("\n")
 75
 76
 77@wtl.single_tab
 78def policy(_, view: wtl.View) -> List[wtl.Action]:
 79    start = view.actions.by_score("start")
 80    if start:
 81        return [start[0]]
 82
 83    tiles = [t.target for t in view.actions.by_score("tile")]
 84    board = [t.metadata["class"][5].upper() if t.metadata["class"] else " " for t in tiles]
 85    move = getAIMove(board, "X", "X")
 86
 87    printBoard(board)
 88
 89    return [wtl.actions.Clear(viewport=False), wtl.actions.Click(tiles[move[0]])]
 90
 91
 92def _start_btn(elements, _):
 93    return [e for e in elements if e.metadata["id"] == "sync-task-cover" and "block" in e.metadata["display"]]
 94
 95
 96def _tile_div(elements, _):
 97    return [
 98        e
 99        for e in elements
100        if e.metadata["tag"] == "span" and e.metadata["id"].startswith("ttt") and e.tag.parent.name == "div"
101    ]
102
103
104if __name__ == "__main__":
105    cli_args = parse_cli_args()
106
107    workflow = wtl.Workflow(
108        config=wtl.Config(cli_args.config),
109        policy=policy,
110        url="https://stanfordnlp.github.io/miniwob-plusplus/html/miniwob/tic-tac-toe.html",
111        output=cli_args.output,
112    )
113
114    workflow.classifiers.add(wtl.ActiveElementFilter())
115    workflow.classifiers.add(wtl.ActiveElementFilter(name="start", callback=_start_btn, action=Click))
116    workflow.classifiers.add(wtl.ActiveElementFilter(name="tile", callback=_tile_div, action=Click))
117
118    workflow.run()
119    workflow.quit()

util.py

 1# Licensed to the Apache Software Foundation (ASF) under one
 2# or more contributor license agreements.  See the NOTICE file
 3# distributed with this work for additional information
 4# regarding copyright ownership.  The ASF licenses this file
 5# to you under the Apache License, Version 2.0 (the
 6# "License"); you may not use this file except in compliance
 7# with the License.  You may obtain a copy of the License at
 8
 9#   http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Not an example, just helper functions for the other examples.
20"""
21import os
22import subprocess
23from argparse import ArgumentParser
24from pathlib import Path
25
26import webtraversallibrary as wtl
27
28
29def start_server() -> str:
30    my_env = os.environ.copy()
31    my_env["FLASK_APP"] = "tests/site/flask_app.py"
32    subprocess.Popen("python3 -m flask run".split(), env=my_env, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
33    return "http://localhost:5000"
34
35
36def parse_cli_args() -> ArgumentParser:
37    """
38    Parses CLI flags relevant for examples.
39    """
40    parser = ArgumentParser()
41
42    group = parser.add_argument_group("Run parameters")
43    group.add_argument("--url", type=str, default="DEFAULT", help="URL to run the workflow on.")
44    group.add_argument(
45        "--output",
46        type=Path,
47        help="Where to save the result locally. If save, remember to also add save flag for config.",
48        default=None,
49    )
50    group.add_argument(
51        "--windows",
52        type=str,
53        nargs="*",
54        default=[wtl.Workflow.SINGLE_TAB],
55        help="Tab names (comma-separated). Use space separation for multiple windows.",
56    )
57    group.add_argument(
58        "--config",
59        type=str,
60        nargs="*",
61        default=[],
62        required=False,
63        help="Names of config files in config/, such as " '"iphone_x_mobile", or key=value pairs.',
64    )
65
66    cli_args = parser.parse_args()
67    cli_args.config.insert(0, "default")
68
69    if cli_args.url == "DEFAULT":
70        cli_args.url = start_server()
71
72    return cli_args

view_classifier.py

 1# Licensed to the Apache Software Foundation (ASF) under one
 2# or more contributor license agreements.  See the NOTICE file
 3# distributed with this work for additional information
 4# regarding copyright ownership.  The ASF licenses this file
 5# to you under the Apache License, Version 2.0 (the
 6# "License"); you may not use this file except in compliance
 7# with the License.  You may obtain a copy of the License at
 8
 9#   http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Clicks randomly on a page until _dataguy_ is not part of the URL anymore, using a ViewClassifier.
20"""
21
22from random import choice
23from typing import List, Set
24
25import webtraversallibrary as wtl
26from webtraversallibrary.actions import Annotate, Clear, Click
27
28from .util import parse_cli_args
29
30
31@wtl.single_tab
32def goal(_, view):
33    # Stop when we dataguy is not part of the URL
34    return "dataguy" not in view.tags
35
36
37@wtl.single_tab
38def policy(_, view: wtl.View) -> List[wtl.Action]:
39    # Do any random click
40    return [
41        Clear(),
42        Annotate(
43            location=wtl.Point(30, 30),
44            color=wtl.Color(0, 0, 0),
45            size=20,
46            text="Still dataguy",
47            background=wtl.Color(128, 50, 128),
48        ),
49        choice(view.actions.by_type(Click)),
50    ]
51
52
53def dataguy_classifier_func(view: wtl.View) -> Set[str]:
54    # This will assign "dataguy" to a view if the URL contains that, otherwise "other"
55    # It can be retreived with view.tags
56    return {"dataguy" if "dataguy" in view.snapshot.page_metadata["url"] else "other"}
57
58
59if __name__ == "__main__":
60    cli_args = parse_cli_args()
61
62    workflow = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
63
64    workflow.classifiers.add(wtl.ActiveElementFilter(action=Click))
65
66    # The syntax for a ViewClassifier is similar, but simpler
67    workflow.classifiers.add(wtl.ViewClassifier(name="dataguy", callback=dataguy_classifier_func))
68
69    workflow.run()
70    workflow.quit()

wiki_game.py

  1# Licensed to the Apache Software Foundation (ASF) under one
  2# or more contributor license agreements.  See the NOTICE file
  3# distributed with this work for additional information
  4# regarding copyright ownership.  The ASF licenses this file
  5# to you under the Apache License, Version 2.0 (the
  6# "License"); you may not use this file except in compliance
  7# with the License.  You may obtain a copy of the License at
  8
  9#   http://www.apache.org/licenses/LICENSE-2.0
 10
 11# Unless required by applicable law or agreed to in writing,
 12# software distributed under the License is distributed on an
 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 14# KIND, either express or implied.  See the License for the
 15# specific language governing permissions and limitations
 16# under the License.
 17
 18"""
 19Click on any random clickable element on a page.
 20Also demonstrates the use of postload_callbacks.
 21"""
 22
 23from random import choice
 24
 25import webtraversallibrary as wtl
 26from webtraversallibrary.actions import Abort, Click
 27
 28from .util import parse_cli_args
 29
 30
 31@wtl.single_tab_coroutine
 32def policy():
 33    workflow, view = yield
 34
 35    # Store Page A's URL
 36    page_a_url = workflow.current_window.driver.current_url
 37    workflow, view = yield Click(
 38        choice(view.snapshot.elements.by_subtree(wtl.Selector("div[id='bodyContent']")).by_selector(wtl.Selector("a")))
 39    )
 40
 41    # Store Page B's URL
 42    page_b_url = workflow.current_window.driver.current_url
 43
 44    description = ""
 45    # Stores first paragraph from page B's body
 46    try:
 47        description = view.snapshot.elements.by_selector(wtl.Selector("div p:nth-of-type(1)")).unique().metadata["text"]
 48        if description.empty:
 49            raise IndexError()
 50    except IndexError:
 51        description = view.snapshot.elements.by_selector(wtl.Selector("div p:nth-of-type(2)")).unique().metadata["text"]
 52
 53    # Limit the description to 50 characters to improve search
 54    description_subset = str(description[0:49])
 55
 56    # Navigate back to page A
 57    workflow, view = yield wtl.actions.Navigate(page_a_url)
 58
 59    link_to_click = view.snapshot.elements.by_selector(wtl.Selector("input[type='submit']")).unique()
 60
 61    # In the search bar in page A, fill text with description_subset and
 62    # click search to get search results for the descriptions
 63
 64    workflow, view = yield [
 65        wtl.actions.FillText(wtl.Selector("input[type='search']"), str(description_subset)),
 66        Click(link_to_click),
 67    ]
 68
 69    # Store search result's URL
 70    search_url = workflow.current_window.driver.current_url
 71
 72    search_results = view.snapshot.elements.by_selector(wtl.Selector("div[class=mw-search-result-heading] a"))
 73
 74    i = 0
 75
 76    # Go to first link in the search result
 77    try:
 78        workflow, view = yield Click(search_results[i])
 79    except IndexError:
 80        print("Empty search results!!")
 81        yield Abort()
 82
 83    # Check if landing URL equals PAGE B URL, if yes, break, else iterate and go to next link in the search result
 84    # untill the URL's match
 85
 86    while True:
 87        if workflow.current_window.driver.current_url == page_b_url:
 88            print("Woohoo!!!")
 89            break
 90
 91        try:
 92            workflow, view = yield [wtl.actions.Navigate(search_url), Click(search_results[i + 1])]
 93            i += 1
 94        except IndexError:
 95            print("Search result exhausted!!")
 96            break
 97
 98        yield None
 99
100
101if __name__ == "__main__":
102    cli_args = parse_cli_args()
103
104    wf = wtl.Workflow(
105        config=wtl.Config(cli_args.config),
106        policy=policy,
107        url="https://en.wikipedia.org/wiki/Special:Random",
108        output=cli_args.output,
109    )
110
111    wf.classifiers.add(wtl.ActiveElementFilter(action=Click))
112
113    wf.classifiers.add(wtl.ElementClassifier(name="textfield", action=wtl.actions.FillText, highlight=True))
114
115    wf.run()
116    wf.quit()

without_workflow.py

 1# Licensed to the Apache Software Foundation (ASF) under one
 2# or more contributor license agreements.  See the NOTICE file
 3# distributed with this work for additional information
 4# regarding copyright ownership.  The ASF licenses this file
 5# to you under the Apache License, Version 2.0 (the
 6# "License"); you may not use this file except in compliance
 7# with the License.  You may obtain a copy of the License at
 8
 9#   http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Goes to a random wikipage, finds 'n' most common words in the article, where 'n' being the number of
20most common words that the user wants.
21"""
22
23import collections
24from collections import defaultdict
25
26import webtraversallibrary as wtl
27
28from .util import parse_cli_args
29
30cli_args = parse_cli_args()
31config = wtl.Config(cli_args.config)
32
33window = wtl.Window(config)
34
35# Navigates to a Wiki article chosen at random
36window.scraper.navigate("https://en.wikipedia.org/wiki/Special:Random")
37
38# Takes a snapshot of the current page
39snapshot = window.scraper.scrape_current_page()
40
41# Fetches all the elements with links in the current page
42links = snapshot.elements.by_selector(wtl.Selector("a"))  # pylint: disable=no-member
43print("Number of links in the article ", len(links))
44
45# Gets the current URL of the page
46search_url = window.driver.current_url
47
48print("Link to the wiki article : ", search_url)
49
50paragraphs = snapshot.elements.by_selector(wtl.Selector("div p"))  # pylint: disable=no-member
51article = ""
52
53# Fetch stopwords from a local file containing an array of stopwords
54with open("examples/stopwords.txt", encoding="utf8") as f:
55    stopwords = f.read()
56
57for p in paragraphs:
58    article = article + " " + p.metadata["text"]
59
60# Find n most common words in the article, where n being the number of common words required by the user
61
62wordcount = defaultdict(int)
63for word in article.lower().split():
64    if word not in stopwords:
65        wordcount[word] += 1
66
67n_print = int(input("How many most common words to print: "))
68print(f"\nOK. The {n_print} most common words are as follows\n")
69word_counter = collections.Counter(wordcount)
70for word, count in word_counter.most_common(n_print):
71    print(word, ": ", count)
72
73# Close the browser
74window.quit()

zindex.py

 1# Licensed to the Apache Software Foundation (ASF) under one
 2# or more contributor license agreements.  See the NOTICE file
 3# distributed with this work for additional information
 4# regarding copyright ownership.  The ASF licenses this file
 5# to you under the Apache License, Version 2.0 (the
 6# "License"); you may not use this file except in compliance
 7# with the License.  You may obtain a copy of the License at
 8
 9#   http://www.apache.org/licenses/LICENSE-2.0
10
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""
19Compute the zIndex of all active elements and highlights the top 1% elements.
20"""
21
22import logging
23from typing import List, Tuple
24
25import webtraversallibrary as wtl
26from webtraversallibrary.logging_utils import setup_logging
27
28from .util import parse_cli_args
29
30
31@wtl.single_tab
32def policy(_, __) -> wtl.Action:
33    return wtl.actions.WaitForUser()
34
35
36# https://stackoverflow.com/questions/1388007/getting-the-z-index-of-a-div-in-javascript
37Z_INDEX_JS = """
38window.getZIndex = function (e) {
39  if (e === null) {
40    return 0;
41  }
42  let z = window.document.defaultView.getComputedStyle(e).getPropertyValue('z-index');
43  if (isNaN(z)) {
44    return window.getZIndex(e.parentElement);
45  }
46  return z;
47};
48console.log("Hello!");
49let element = document.querySelector(arguments[0]);
50if (element !== null) {
51  return window.getZIndex(element);
52}
53"""
54
55
56def _compute_z_index(elements: wtl.Elements, workflow: wtl.Workflow) -> List[Tuple[wtl.PageElement, float]]:
57    # This may be slow for many elements. If you need more performance, consider a JS script
58    # which computes all elements' z-values combined and returns the map directly.
59    result = []
60    for e in elements:
61        zIndex = workflow.js.execute_script(Z_INDEX_JS, e.selector.css) or 0
62        result.append((e, int(zIndex)))
63    return result
64
65
66if __name__ == "__main__":
67    cli_args = parse_cli_args()
68
69    wf = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)
70
71    wf.classifiers.add(wtl.ActiveElementFilter(action=wtl.actions.Click))
72
73    wf.classifiers.add(
74        wtl.ElementClassifier(
75            name="zIndex",
76            subset="is_active",
77            enabled=True,
78            highlight=0.99,
79            mode=wtl.ScalingMode.LINEAR,
80            callback=_compute_z_index,
81        )
82    )
83
84    setup_logging(logging_level=logging.DEBUG)
85
86    wf.run()
87    wf.quit()