Skip to content

Commit

Permalink
Add AST tools/visualizers and polymorphism examples
Browse files Browse the repository at this point in the history
  • Loading branch information
christian-byrne committed Oct 8, 2024
1 parent ab41718 commit cb95198
Show file tree
Hide file tree
Showing 15 changed files with 2,981 additions and 0 deletions.
186 changes: 186 additions & 0 deletions ast_visualizers/color_code_tokens.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
from PIL import Image, ImageDraw, ImageFont
import nltk
import os

SAVE_PATH = "/home/c_byrne/school/courses/csc372-programming_languages/assignments/small-assignments"
FILENAME = "color_coded_tokens.png"
DISPLAY_EXAMPLES = True
REPLACE_TOKEN_TYPE_NAMES = {
"DT: determiner": "AT: article",
"IN: preposition or conjunction, subordinating": "IN: preposition",
}

if not os.path.exists(SAVE_PATH):
raise FileExistsError(f"Path {SAVE_PATH} does not exist")

# Download necessary NLTK datasets
nltk.download("punkt")
nltk.download("averaged_perceptron_tagger")

# Sample sentence
sentence = "the girl has a gift for the boy in the car"

# Tokenize and POS tag the sentence
tokens = nltk.word_tokenize(sentence)
pos_tags = nltk.pos_tag(tokens)
# Define color map for POS tags
color_map = {
"NN": "blue", # Noun
"VB": "green", # Verb
"JJ": "orange", # Adjective
"RB": "purple", # Adverb
"DT": "red", # Determiner
"IN": "brown", # Preposition
"CC": "pink", # Coordinating conjunction
"PR": "gray", # Pronoun
"MD": "olive", # Modal
"TO": "cyan", # To
"UH": "gold", # Interjection
"FW": "magenta", # Foreign word
"CD": "teal", # Cardinal number
"WP": "navy", # Wh-pronoun
"WR": "lime", # Wh-adverb
"EX": "maroon", # Existential there
"RP": "indigo", # Particle
"POS": "khaki", # Possessive ending
"SYM": "sienna", # Symbol
"LS": "orchid", # List item marker
"PDT": "salmon", # Predeterminer
"WP$": "lavender", # Possessive wh-pronoun
"NNP": "skyblue", # Proper noun
"NNPS": "lightcoral", # Proper noun plural
"NNS": "lightgreen", # Noun plural
"VBG": "lightyellow", # Verb gerund
"VBD": "lightcyan", # Verb past tense
}

BASE_FONTSIZE = 64

# Create a blank image with white background
image_width = 2048
image_height = image_width // 2
image = Image.new("RGB", (image_width, image_height), (255, 255, 255))
draw = ImageDraw.Draw(image)

# Load a font (use a default one, or provide a path to a specific font)
font = ImageFont.truetype("arial.ttf", BASE_FONTSIZE)

# Starting coordinates for drawing the text
OFFSET_X = image_width // 13
OFFSET_Y = image_height // 6
x, y = OFFSET_X, image_height // 2 - OFFSET_Y

seen_token_types = set()

# Draw each word with color-coded based on its POS tag
for word, tag in pos_tags:
seen_token_types.add(tag)
tag_base = tag[:2]
color = color_map.get(tag_base, (0, 0, 0)) # Default to black if tag not found

# Draw the word
draw.text((x, y), word, fill=color, font=font)

# Get the bounding box of the text
bbox = draw.textbbox((x, y), word, font=font)
text_width = bbox[2] - bbox[0]

# Update x-coordinate for the next word (add a little space)
x += text_width + 10

nltk.download("wordnet")
nltk.download("tagsets")

from nltk.corpus import wordnet
from nltk.data import load
import re
from textwrap import wrap


def format_descriptions(tags, tagdict, tag_base, include_examples=True, example_count=5):
for tag in tags:
entry = tagdict[tag]
# defn = [tag + ": " + entry[0]]
# examples = wrap(
# entry[1], width=75, initial_indent=" ", subsequent_indent=" "
# )
tag_display = tag if tag == tag_base else f"{tag} ({tag_base})"
descrip = f"{tag_display}: {entry[0]}"
# descrip = f"{entry[0]}"
if include_examples:
descrip += f" ({', '.join(entry[1].split(' ')[:example_count])}, ...)"
return apply_replacements(descrip)


def apply_replacements(description: str) -> str:
for old, new in REPLACE_TOKEN_TYPE_NAMES.items():
description = description.replace(old, new)
return description


def get_tag_description(
tagpattern, tag_base, tagset="upenn_tagset", include_examples=True, example_count=5
):
# try:
# # return wordnet._tagset.
# return wordnet._tagset
# except KeyError:
# print(f"Unknown tag: {tag}")
# nltk.help.upenn_tagset(tag)
# return tag

tagdict = load("help/tagsets/" + tagset + ".pickle")
if tagdict is None:
raise ValueError(f"Unknown tagset: {tagset}")
if not tagpattern:
return format_descriptions(
sorted(tagdict),
tagdict,
tag_base,
include_examples=include_examples,
example_count=example_count,
)
elif tagpattern in tagdict:
return format_descriptions(
[tagpattern],
tagdict,
tag_base,
include_examples=include_examples,
example_count=example_count,
)
else:
tagpattern = re.compile(tagpattern)
tags = [tag for tag in sorted(tagdict) if tagpattern.match(tag)]
if tags:
return format_descriptions(
tags,
tagdict,
tag_base,
include_examples=include_examples,
example_count=example_count,
)
else:
print("No matching tags found.")


# Draw the legend
legend_x = OFFSET_X // 4
legend_y = image_height // 2 + OFFSET_Y // 6

font = ImageFont.truetype("arial.ttf", BASE_FONTSIZE // 4)
for i, tag in enumerate(seen_token_types):
tag_base = tag[:2]
color = color_map.get(tag_base, (0, 0, 0)) # Default to black if tag not found

display_text = get_tag_description(tag, tag_base, include_examples=DISPLAY_EXAMPLES)
draw.text((legend_x, legend_y), display_text, fill=color, font=font)
legend_y += OFFSET_Y // 4

# Save or display the image
image.show()

# If file exists, overwrite it
if os.path.exists(os.path.join(SAVE_PATH, FILENAME)):
os.remove(os.path.join(SAVE_PATH, FILENAME))

image.save(os.path.join(SAVE_PATH, FILENAME))
Binary file added ast_visualizers/sentence_visualization.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
26 changes: 26 additions & 0 deletions polymorphism/Foo.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
public class Foo {
private int x;
private int y;

public Foo(int x, int y) {
this.x = x;
this.y = y;
}

public int add() {
return x + y;
}

public static void main(String[] args) {
Foo one = new Foo(1, 1);
Foo two = new Foo(2, 2);
System.out.println(one.equals(two));
System.out.println(one.hashCode());
System.out.println(one.toString());
/* Note that this class can use "equals",
* "hashCode", and "toString" because
* all classes in Java inherits these
* methods from the Object class.
*/
}
}
13 changes: 13 additions & 0 deletions polymorphism/FooChild.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
public class FooChild extends Foo {
public FooChild(int x, int y) {
super(x, y);
}

public static void main(String[] args) {
FooChild one = new FooChild(3, 4);
System.out.println(one.add());
/* This statement works because FooChild
* inherits the "add" method from its parent
* class, Foo. */
}
}
22 changes: 22 additions & 0 deletions polymorphism/FooTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
public class FooTest {
public static boolean equals(Foo x, Foo y) {
return x.add() == y.add();
}

public static void main(String[] args) {
Foo x = new Foo(2, 2);
FooChild y = new FooChild(7, 7);
System.out.println(FooTest.equals(x, x));
System.out.println(FooTest.equals(x, new Foo(3, 3)));
System.out.println(FooTest.equals(y, new Foo(11, 3)));
System.out.println(FooTest.equals(x, y));
System.out.println(FooTest.equals(y, x));
}
}

/* This is where we see subtype polymorphism.
* The equals method is defined to work for items of
* type Foo. But the tests below show that it
* works on items of type Foo and items of type FooChild.
*/

34 changes: 34 additions & 0 deletions polymorphism/Polymorphism1.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
public class Polymorphism1 {
public static int pow (int a, int b) {System.out.println("first");
if (b <= 0)
return 1;
return a*Polymorphism1.pow(a, b-1);
}

public static double pow (double a, int b) {System.out.println("second");
if (b <= 0)
return 1.0;
return a*Polymorphism1.pow(a, b-1);
}

/* public static int pow (double a, int b) {System.out.println("third");
if (b <= 0)
return 1;
return ((int) a)*Polymorphism1.pow(a, b-1);
}*/

public static double pow(int a, double b) {System.out.println("fourth");
if(b < 1)
return 1;
return a*Polymorphism1.pow(a, b-1);
}

public static void main(String[] args) {
System.out.println(Polymorphism1.pow(2, 3));
System.out.println(Polymorphism1.pow(2.0, 3));
System.out.println(Polymorphism1.pow(2, 3.0));
}
}

/* In Java, method overloading is allowed. This means that a programmer can write two methods that have the same name but take different parameters. Notice that the first definition above takes two integers, the second takes a double and an int, and the fourth takes an int and a double. Java knows which definition to use based on the types of the arguments that get passed in when the method is called. For method overloading to work in Java, it is necessary that the parameters are different in number, type, and/or order. The third definition is commented out because this is not legal due to the fact that the parameters match those of the second definition in type, number, and order. The return types of the two methods are different, but that is not enough for Java to determine which method should be used. Some languages do allow overloading of methods that only differ in return type, but most do not. This is because it is much easier for a compiler to determine which method to use based on the types of the arguments than on the type that is returned. */

103 changes: 103 additions & 0 deletions polymorphism/Polymorphism2.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
public class Polymorphism2 {
public static void main(String[] args) {
//promotion
double x = 2*8.2;
/* In the statement above, * works on the two numbers because of
* a type of implicit type conversion called promotion. In this kind
* of coercion, a value of a smaller type is automatically converted to a larger
* type (in this case int to double), which then allows the multiplication
* operator to be applied. The result will be a double as well. Note that
* not all languages will do this. SML, for example, will not.
*/

//function call--polymorphic
System.out.println(Polymorphism2.f((byte) 2));
System.out.println(Polymorphism2.f((short) 2));
System.out.println(Polymorphism2.f('a'));
System.out.println(Polymorphism2.f(3));
System.out.println(Polymorphism2.f(4L));
System.out.println(Polymorphism2.f(5.6F));

/* All of the statements above are legal in Java because in each
* case, the value that is passed in is of a type that is equal or
* smaller than a double, which is the type of f's parameter.
* This is more implicit type coercion, which is a form of polymorphism.
* Note that we are dealing with primitive data here, not objects.
* But what is happening is that the method is expecting a double,
* and all of these values can be automatically converted to a double
* because it would not result in loss of data.
*/

//what happens here?
int y = (int)2.0;
/* In this case, we are trying to put a double into a space reserved
* for an integer. This will not be done automatically in Java because
* doubles are larger than integers, so there would be potential data loss.
* Here, the programmer must specify that they want the conversion to take
* place with explicit casting.
*/

//function call--polymorphic
System.out.println(Polymorphism2.g((byte) 1));
System.out.println(Polymorphism2.g((short) 2));
System.out.println(Polymorphism2.g('a'));
//System.out.println(Polymorphism2.g(3.0));
//System.out.println(Polymorphism2.g(4L));
//System.out.println(Polymorphism2.g(5.6F));

/* The parameter expected by method g above is an integer.
* So the first three statements work because bytes, shorts,
* and chars are all small enough to fit into an integer spot.
* This is why the implicit type coercion can happen.
* But in the last three cases, the statements would produce errors
* because doubles, longs, and floats are too big to fit into
* an integer spot. Therefore, the implicit type conversion cannot
* be done. Explicit casting would be necessary to make these work.
*/


Polymorphism2.h('a', 5);
/* Here, there are two options for h that
* would work with these arguments because chars can be
* converted to integers. However, it will use the
* method that is closest to the arguments passed in. In other
* words, Java will not do the implicit conversion unless it
* is necessary. */

//Polymorphism2.h('a','b');
/* This line is commented out because it will not compile,
* which may seem odd since chars can be converted to integers
* so any one of the h definitions could work with these arguments.
* We know that Java will do as little conversion as necessary, so
* it will not choose h1 over the other two. The problem is that
* h2 and h3 require the exact same amount of conversion, so
* Java does not know what to do--so it just doesn't allow it.
*/
}

public static double f (double x) {System.out.println("f1");
return x;
}

public static int g (int x) {
return x;
}

//Where things get tricky...

/*public static int f(int x) {System.out.println("f2");
return x*x;
}*/

public static void h(int x, int y) {
System.out.println("h1");
}

public static void h(char x, int y) {
System.out.println("h2");
}

public static void h(int x, char y) {
System.out.println("h3");
}
}
Loading

0 comments on commit cb95198

Please sign in to comment.