You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2672 lines
70 KiB
2672 lines
70 KiB
\documentclass[11pt,a4paper]{article} |
|
%%\documentclass[8pt,a4paper,twocolumn]{article} |
|
\usepackage[a4paper,left=1cm,right=1cm,top=1.0cm,bottom=2.5cm]{geometry} |
|
\usepackage[turkish]{babel} |
|
\usepackage{times} |
|
\usepackage{graphicx} |
|
\usepackage{natbib} |
|
\usepackage{algorithm} |
|
\usepackage{algorithmic} |
|
\usepackage[utf8]{inputenc} |
|
\usepackage{nomencl} |
|
\usepackage{commath} |
|
\usepackage{url} |
|
\usepackage{subfig} |
|
\usepackage[colorinlistoftodos]{todonotes} |
|
\usepackage{epstopdf} |
|
\usepackage{amsmath} |
|
\usepackage{mathtools} |
|
\usepackage{paralist} |
|
\usepackage{hyperref} |
|
\usepackage{pythonhighlight} |
|
\usepackage{listings} |
|
|
|
|
|
\definecolor{codegreen}{rgb}{0,0.6,0} |
|
\definecolor{codegray}{rgb}{0.5,0.5,0.5} |
|
\definecolor{codepurple}{rgb}{0.58,0,0.82} |
|
\definecolor{backcolour}{rgb}{0.95,0.95,0.92} |
|
|
|
\lstdefinestyle{mystyle}{ |
|
backgroundcolor=\color{backcolour}, |
|
commentstyle=\color{codegreen}, |
|
keywordstyle=\color{magenta}, |
|
numberstyle=\tiny\color{codegray}, |
|
stringstyle=\color{codepurple}, |
|
basicstyle=\footnotesize, |
|
breakatwhitespace=false, |
|
breaklines=true, |
|
captionpos=b, |
|
keepspaces=true, |
|
numbers=left, |
|
numbersep=5pt, |
|
showspaces=false, |
|
showstringspaces=false, |
|
showtabs=false, |
|
tabsize=2 |
|
} |
|
|
|
|
|
|
|
|
|
\def\ExtendVersion{1} |
|
\newcommand{\credit}[1]{Thanks to: #1} |
|
|
|
|
|
\input{taypack.tex} |
|
%turning on/off comments |
|
\usepackage{comment} |
|
\includecomment{comment} %show comments |
|
%\excludecomment{comment} %do not show comments |
|
|
|
\lstset{style=mystyle} |
|
\begin{document} |
|
\lstset{language=Python} |
|
|
|
% first the title is needed |
|
\title{\centering{Python: From Basics to the Extreme}} |
|
|
|
% the name(s) of the author(s) follow(s) next |
|
%\author{Keke\c{c}} |
|
%\author{İbrâhim Taygun Keke\c{c}} |
|
%\author{İbrâhim Taygun Keke\c{c}} |
|
%\author{UL} |
|
|
|
\maketitle |
|
|
|
%\begin{abstract} |
|
%\end{abstract} |
|
|
|
\newcounter{Madde}[section] |
|
\newenvironment{Madde}[1][]{\refstepcounter{Madde}\par\medskip |
|
\textbf{Md ~\theMadde. #1} \rmfamily}{\medskip} |
|
|
|
\newcommand{\citemd}[1]{(\textbf{M. {#1}})} |
|
|
|
\tableofcontents |
|
|
|
\section{Introduction} |
|
This reference book contains very frequently used concepts in Python language. |
|
The reader is assumed to be familiar with the abstract concepts of variables, loops, functions and such. |
|
At each section, a working code example is provided. |
|
|
|
\textbf{Correct usage of the material.} |
|
\begin{itemize} |
|
\item analyze the code example |
|
\item copy/paste and run the example |
|
\item check the output of the code whether it makes sense. |
|
\end{itemize} |
|
|
|
\section{Basics} |
|
|
|
\subsection{Variables and Data Types} |
|
You can declare integer and string variables. |
|
\begin{python} |
|
x = 5 |
|
y = "John" |
|
print(x) |
|
#5 |
|
print(y) |
|
#John |
|
\end{python} |
|
|
|
Declare string, integer or float. |
|
\begin{python} |
|
x = str(3) # x will be '3' |
|
y = int(3) # y will be 3 |
|
z = float(3) # z will be 3.0 |
|
print(type(x)) |
|
#<class 'str'> |
|
print(type(y)) |
|
#<class 'int'> |
|
\end{python} |
|
|
|
Different variable types exist. But for now, focus on string, integer, float, list, tuple, dictionary, and sets. |
|
\begin{python} |
|
Text Type: str |
|
Numeric Types: int, float, complex |
|
Sequence Types: list, tuple, range |
|
Mapping Type: dict |
|
Set Types: set, frozenset |
|
Boolean Type: bool |
|
Binary Types: bytes, bytearray, memoryview |
|
None Type: NoneType |
|
\end{python} |
|
|
|
|
|
List is a collection which is ordered and changeable. Allows duplicate members. |
|
|
|
Tuple is a collection which is ordered and unchangeable. Allows duplicate members. |
|
|
|
Set is a collection which is unordered, unchangeable, and unindexed. No duplicate members. |
|
|
|
Dictionary is a collection which is ordered and changeable. No duplicate members. |
|
|
|
\subsection{List} |
|
List is a python data type. Lists can store multiple variables in a single variable. |
|
\begin{python} |
|
thislist = ["apple", "banana", "cherry"] |
|
print(thislist) |
|
#['apple', 'banana', 'cherry'] |
|
\end{python} |
|
|
|
\subsubsection{access list items} |
|
\begin{python} |
|
thislist = ["apple", "banana", "cherry"] |
|
print(thislist[1]) |
|
#banana |
|
\end{python} |
|
|
|
\subsubsection{change list item value} |
|
\begin{python} |
|
thislist = ["apple", "banana", "cherry"] |
|
thislist[1] = "blackcurrant" |
|
print(thislist) |
|
#['apple', 'blackcurrant', 'cherry'] |
|
\end{python} |
|
|
|
\subsubsection{change a range of list item values} |
|
\begin{python} |
|
thislist = ["apple", "banana", "cherry", "orange", "kiwi", "mango"] |
|
thislist[1:3] = ["blackcurrant", "watermelon"] |
|
print(thislist) |
|
#['apple', 'blackcurrant', 'watermelon', 'orange', 'kiwi', 'mango'] |
|
\end{python} |
|
|
|
\subsubsection{add list items} |
|
\begin{python} |
|
thislist = ["apple", "banana", "cherry"] |
|
thislist.append("orange") |
|
print(thislist) |
|
#['apple', 'banana', 'cherry', 'orange'] |
|
\end{python} |
|
|
|
\subsubsection{remove list items} |
|
\begin{python} |
|
thislist = ["apple", "banana", "cherry"] |
|
thislist.remove("banana") |
|
print(thislist) |
|
#['apple', 'cherry'] |
|
\end{python} |
|
|
|
\paragraph{remove duplicates from the list} |
|
|
|
If new element is seen, first adds it to the list, and then returns it. |
|
|
|
\begin{python} |
|
a = [1,2,3,2,1,5,6,5,5,5] |
|
seen = set() |
|
uniq = [x for x in a if x not in seen and not seen.add(x)] |
|
print(uniq) |
|
#[1, 2, 3, 5, 6] |
|
\end{python} |
|
|
|
\subsubsection{looping a list} |
|
\begin{python} |
|
thislist = ["apple", "banana", "cherry"] |
|
for x in thislist: |
|
print(x) |
|
#apple |
|
#banana |
|
#cherry |
|
\end{python} |
|
|
|
\subsubsection{list slicing} |
|
you can access multiple elements with slicing operation. |
|
\begin{python} |
|
thislist = ["apple", "banana", "cherry", "watermelon", "grape", "kiwi"] |
|
print(thislist[1:4] ) |
|
#['banana', 'cherry', 'watermelon'] |
|
|
|
## Here, we access each 2nd element from 1st to 6th element of the list. |
|
print(thislist[1:6:2]) |
|
#['banana', 'watermelon', 'kiwi'] |
|
|
|
## Reverse slicing: access elements from end to beginning of the list. |
|
print(thislist[6:1:-1]) |
|
#['kiwi', 'grape', 'watermelon', 'cherry'] |
|
|
|
\end{python} |
|
|
|
\subsubsection{list comprehension} |
|
This powerful concept can single-line the traditional loops. |
|
\begin{python} |
|
fruits = ["apple", "banana", "cherry", "kiwi", "mango"] |
|
newlist = [] |
|
|
|
for x in fruits: |
|
if "a" in x: |
|
newlist.append(x) |
|
|
|
print(newlist) |
|
# ['apple', 'banana', 'mango'] |
|
|
|
fruits = ["apple", "banana", "cherry", "kiwi", "mango"] |
|
newlist = [x for x in fruits if "a" in x] |
|
|
|
print(newlist) |
|
# ['apple', 'banana', 'mango'] |
|
\end{python} |
|
|
|
\subsubsection{list comprehension: condition + operation} |
|
\begin{python} |
|
fruits = ["apple", "banana", "cherry", "kiwi", "mango"] |
|
newlist = [x.upper() for x in fruits] |
|
print(newlist) |
|
#['APPLE', 'BANANA', 'CHERRY', 'KIWI', 'MANGO'] |
|
\end{python} |
|
|
|
\subsubsection{sorting a list} |
|
\begin{python} |
|
thislist = ["orange", "mango", "kiwi", "pineapple", "banana"] |
|
thislist.sort() |
|
print(thislist) |
|
#['banana', 'kiwi', 'mango', 'orange', 'pineapple'] |
|
\end{python} |
|
|
|
\subsubsection{copying a list} |
|
\begin{python} |
|
thislist = ["apple", "banana", "cherry"] |
|
mylist = thislist.copy() |
|
print(mylist) |
|
#['apple', 'banana', 'cherry'] |
|
\end{python} |
|
|
|
\subsubsection{join two lists} |
|
\begin{python} |
|
list1 = ["a", "b", "c"] |
|
list2 = [1, 2, 3] |
|
|
|
list3 = list1 + list2 |
|
print(list3) |
|
#['a', 'b', 'c', 1, 2, 3] |
|
\end{python} |
|
|
|
|
|
\subsubsection{looping through multiple Lists} |
|
\begin{python} |
|
a = ['a1', 'a2', 'a3'] |
|
b = ['b1', 'b2'] |
|
|
|
for x, y in zip(a, b): |
|
print(x, y) |
|
#a1 b1 |
|
#a2 b2 |
|
\end{python} |
|
|
|
|
|
\subsubsection{other list methods} |
|
\begin{python} |
|
Method Description |
|
append() Adds an element at the end of the list |
|
clear() Removes all the elements from the list |
|
copy() Returns a copy of the list |
|
count() Returns the number of elements with the specified value |
|
extend() Add the elements of a list (or any iterable), to the end of the current list |
|
index() Returns the index of the first element with the specified value |
|
insert() Adds an element at the specified position |
|
pop() Removes the element at the specified position |
|
remove() Removes the item with the specified value |
|
reverse() Reverses the order of the list |
|
sort() Sorts the list |
|
\end{python} |
|
|
|
\subsection{Dictionary} |
|
Unlike lists, in dictionary each entry has a key, and a value. These key, value pairs constitute the dictionary content. |
|
\begin{python} |
|
thisdict = { |
|
"brand": "Ford", |
|
"model": "Mustang", |
|
"year": 1964 |
|
} |
|
print(thisdict) |
|
#{'brand': 'Ford', 'model': 'Mustang', 'year': 1964} |
|
\end{python} |
|
|
|
\subsubsection{access dictionary values} |
|
\begin{python} |
|
thisdict = { |
|
"brand": "Ford", |
|
"model": "Mustang", |
|
"year": 1964 |
|
} |
|
print(thisdict["brand"]) |
|
#Ford |
|
\end{python} |
|
|
|
\subsubsection{access dictionary keys or values} |
|
\begin{python} |
|
car = { |
|
"brand": "Ford", |
|
"model": "Mustang", |
|
"year": 1964 |
|
} |
|
|
|
x = car.keys() |
|
print(x) |
|
#dict_keys(['brand', 'model', 'year']) |
|
v = car.values() |
|
print(v) |
|
#dict_values(['Ford', 'Mustang', 1964]) |
|
\end{python} |
|
|
|
\subsubsection{inverse dictionary lookup} |
|
\begin{python} |
|
d = {"a":0, "b":1, "c":2} |
|
dict(zip(d.values(), d.keys())) |
|
#{0: 'a', 1: 'b', 2: 'c'} |
|
\end{python} |
|
|
|
\subsubsection{change dictionary items} |
|
\begin{python} |
|
car = { |
|
"brand": "Ford", |
|
"model": "Mustang", |
|
"year": 1964 |
|
} |
|
x = car.items() |
|
|
|
print(x) #before the change |
|
#dict_items([('brand', 'Ford'), ('model', 'Mustang'), ('year', 1964)]) |
|
|
|
car["year"] = 2020 |
|
print(x) #after the change |
|
#dict_items([('brand', 'Ford'), ('model', 'Mustang'), ('year', 2020)]) |
|
\end{python} |
|
|
|
\subsubsection{add dictionary items} |
|
\begin{python} |
|
thisdict = { |
|
"brand": "Ford", |
|
"model": "Mustang", |
|
"year": 1964 |
|
} |
|
thisdict["color"] = "red" |
|
print(thisdict) |
|
#{'brand': 'Ford', 'model': 'Mustang', 'year': 1964, 'color': 'red'} |
|
\end{python} |
|
|
|
\subsubsection{remove dictionary item} |
|
\begin{python} |
|
thisdict = { |
|
"brand": "Ford", |
|
"model": "Mustang", |
|
"year": 1964 |
|
} |
|
del thisdict["model"] |
|
print(thisdict) |
|
#{'brand': 'Ford', 'year': 1964} |
|
\end{python} |
|
|
|
\subsubsection{looping a dictionary} |
|
\begin{python} |
|
thisdict={'brand': 'Ford', 'year': 1964} |
|
for x, y in thisdict.items(): |
|
print(x, y) |
|
#brand Ford |
|
#year 1964 |
|
\end{python} |
|
|
|
\subsubsection{copy a dictionary} |
|
\begin{python} |
|
thisdict = { |
|
"brand": "Ford", |
|
"model": "Mustang", |
|
"year": 1964 |
|
} |
|
mydict = thisdict.copy() |
|
print(mydict) |
|
#{'brand': 'Ford', 'model': 'Mustang', 'year': 1964} |
|
\end{python} |
|
|
|
|
|
\subsubsection{intersections of two dictionaries} |
|
\begin{python} |
|
some_dict = {'zope':'zzz', 'python':'rocks' } |
|
another_dict = {'python':'rocks', 'perl':'interesting' } |
|
|
|
print "Intersects:", [k for k in some_dict if k in another_dict] |
|
#python |
|
\end{python} |
|
|
|
\subsubsection{other dictionary operations} |
|
\begin{python} |
|
clear() Removes all the elements from the dictionary |
|
copy() Returns a copy of the dictionary |
|
fromkeys() Returns a dictionary with the specified keys and value |
|
get() Returns the value of the specified key |
|
items() Returns a list containing a tuple for each key value pair |
|
keys() Returns a list containing the dictionary's keys |
|
pop() Removes the element with the specified key |
|
popitem() Removes the last inserted key-value pair |
|
setdefault() Returns the value of the specified key. If the key does not exist: insert the key, with the specified value |
|
update() Updates the dictionary with the specified key-value pairs |
|
values() Returns a list of all the values in the dictionary |
|
\end{python} |
|
|
|
\subsection{Sets} |
|
Python sets are like lists. They are used to store multiple items in a single variable.Set elements are unordered. Items are unchangeable, can't be altered after creation. But we can add and remove elements to the set. Unlike lists, they can't have two identical elements. |
|
|
|
\begin{python} |
|
thisset = {"apple", "banana", "cherry"} |
|
print(thisset) |
|
#{'apple', 'cherry', 'banana'} |
|
|
|
##length |
|
print(len(thisset)) |
|
#3 |
|
\end{python} |
|
|
|
Sets can have different objects as elements. |
|
\begin{python} |
|
set1 = {"abc", 34, True, 40, "male"} |
|
\end{python} |
|
|
|
\subsubsection{access elements} |
|
\begin{python} |
|
thisset = {"apple", "banana", "cherry"} |
|
for x in thisset: |
|
print(x) |
|
#apple |
|
#cherry |
|
#banana |
|
\end{python} |
|
|
|
\subsubsection{check element existence} |
|
\begin{python} |
|
thisset = {"apple", "banana", "cherry"} |
|
print("banana" in thisset) |
|
#True |
|
\end{python} |
|
|
|
\subsubsection{add elements} |
|
\begin{python} |
|
thisset = {"apple", "banana", "cherry"} |
|
thisset.add("orange") |
|
print(thisset) |
|
#{'orange', 'apple', 'cherry', 'banana'} |
|
\end{python} |
|
|
|
\subsubsection{remove elements} |
|
We can remove the set elements with remove() function. |
|
\begin{python} |
|
thisset = {"apple", "banana", "cherry"} |
|
thisset.remove("banana") |
|
print(thisset) |
|
#{'apple', 'cherry'} |
|
\end{python} |
|
|
|
Alternatively we can remove the last element using the pop() function. |
|
\begin{python} |
|
thisset = {"apple", "banana", "cherry"} |
|
|
|
x = thisset.pop() |
|
print(x) |
|
#apple |
|
print(thisset) |
|
#{'cherry', 'banana'} |
|
\end{python} |
|
|
|
\subsubsection{looping sets} |
|
\begin{python} |
|
thisset = {"apple", "banana", "cherry"} |
|
|
|
for x in thisset: |
|
print(x) |
|
#apple |
|
#cherry |
|
#banana |
|
\end{python} |
|
|
|
\subsubsection{joining/combining/ (union) of sets} |
|
\begin{python} |
|
set1 = {"a", "b" , "c"} |
|
set2 = {1, 2, 3} |
|
set3 = set1.union(set2) |
|
print(set3) |
|
#{'a', 'b', 1, 2, 3, 'c'} |
|
\end{python} |
|
|
|
\subsubsection{merge (intersection) of sets} |
|
\begin{python} |
|
x = {"apple", "banana", "cherry"} |
|
y = {"google", "microsoft", "apple"} |
|
z = x.intersection(y) |
|
print(z) |
|
#{'apple'} |
|
\end{python} |
|
|
|
\subsection{Tuples} |
|
Tuples are used to store multiple items in a single variable. |
|
Elements of tuples are ordered and unchangeable. |
|
\begin{python} |
|
thistuple = ("apple", "banana", "cherry") |
|
print(thistuple) |
|
#('apple', 'banana', 'cherry') |
|
|
|
print(len(thistuple)) |
|
#3 |
|
|
|
## a tuple with different objects |
|
tuple1 = ("abc", 34, True, 40, "male") |
|
\end{python} |
|
|
|
\subsubsection{accessing elements} |
|
\begin{python} |
|
thistuple = ("apple", "banana", "cherry") |
|
print(thistuple[1]) |
|
#banana |
|
|
|
##negative indexing |
|
print(thistuple[-1]) |
|
#cherry |
|
|
|
##slicing |
|
thistuple = ("apple", "banana", "cherry", "orange", "kiwi", "melon", "mango") |
|
print(thistuple[2:5]) |
|
#('cherry', 'orange', 'kiwi') |
|
|
|
print(thistuple[:4]) |
|
#('apple', 'banana', 'cherry', 'orange') |
|
\end{python} |
|
|
|
\subsubsection{check element existence} |
|
\begin{python} |
|
thistuple = ("apple", "banana", "cherry") |
|
if "apple" in thistuple: |
|
print("Yes, 'apple' is in the fruits tuple") |
|
#Yes, 'apple' is in the fruits tuple |
|
\end{python} |
|
|
|
|
|
\subsubsection{unpack elements} |
|
\begin{python} |
|
fruits = ("apple", "banana", "cherry", "strawberry", "raspberry") |
|
|
|
(green, yellow, *red) = fruits |
|
|
|
print(green) |
|
#apple |
|
print(yellow) |
|
#banana |
|
print(red) |
|
#['cherry', 'strawberry', 'raspberry'] |
|
\end{python} |
|
|
|
|
|
\subsubsection{looping elements} |
|
\begin{python} |
|
thistuple = ("apple", "banana", "cherry") |
|
for x in thistuple: |
|
print(x) |
|
#apple |
|
#banana |
|
#cherry |
|
\end{python} |
|
|
|
|
|
\subsubsection{joining two tuples} |
|
\begin{python} |
|
tuple1 = ("a", "b" , "c") |
|
tuple2 = (1, 2, 3) |
|
|
|
tuple3 = tuple1 + tuple2 |
|
print(tuple3) |
|
#('a', 'b', 'c', 1, 2, 3) |
|
\end{python} |
|
|
|
\subsubsection{multiply tuples} |
|
\begin{python} |
|
fruits = ("apple", "banana", "cherry") |
|
mytuple = fruits * 2 |
|
print(mytuple) |
|
#('apple', 'banana', 'cherry', 'apple', 'banana', 'cherry') |
|
\end{python} |
|
|
|
\subsection{If conditional} |
|
If conditional checks for cases in your program. |
|
\begin{python} |
|
if 5 > 2: |
|
print("Five is greater than two!") |
|
#Five is greater than two! |
|
\end{python} |
|
|
|
\subsubsection{Elif conditional} |
|
\begin{python} |
|
a = 33 |
|
b = 33 |
|
if b > a: |
|
print("b is greater than a") |
|
elif a == b: |
|
print("a and b are equal") |
|
#a and b are equal |
|
\end{python} |
|
|
|
\subsubsection{Else-If conditional} |
|
\begin{python} |
|
a = 200 |
|
b = 33 |
|
if b > a: |
|
print("b is greater than a") |
|
elif a == b: |
|
print("a and b are equal") |
|
else: |
|
print("a is greater than b") |
|
#a is greater than b |
|
\end{python} |
|
|
|
\subsubsection{Short If} |
|
\begin{python} |
|
if a > b: print("a is greater than b") |
|
#a is greater than b |
|
\end{python} |
|
|
|
\subsubsection{Short If-Else} |
|
\begin{python} |
|
a = 2 |
|
b = 330 |
|
print("A") if a > b else print("B") |
|
# B |
|
\end{python} |
|
|
|
\subsubsection{And or conditional} |
|
\begin{python} |
|
a = 200 |
|
b = 33 |
|
c = 500 |
|
if a > b and c > a: |
|
print("Both conditions are True") |
|
# Both conditions are True |
|
|
|
if a > b or a > c: |
|
print("At least one of the conditions is True") |
|
#At least one of the conditions is True |
|
\end{python} |
|
|
|
\subsubsection{Nested if} |
|
With nested if's you can create branches in your program. |
|
\begin{python} |
|
x = 41 |
|
if x > 10: |
|
print("Above ten,") |
|
if x > 20: |
|
print("and also above 20!") |
|
else: |
|
print("but not above 20.") |
|
#Above ten, |
|
#and also above 20! |
|
\end{python} |
|
|
|
\subsubsection{Pass} |
|
if statements cannot be empty, if you need to have empty statment, use pass statement to avoid getting an error. |
|
\begin{python} |
|
a = 33 |
|
b = 200 |
|
if b > a: |
|
pass |
|
\end{python} |
|
|
|
\subsection{While Loop} |
|
\begin{python} |
|
i = 1 |
|
while i < 4: |
|
print(i) |
|
i += 1 |
|
#1 |
|
#2 |
|
#3 |
|
\end{python} |
|
|
|
\subsubsection{Break statement} |
|
You can exit the loops immediately with break statement. |
|
\begin{python} |
|
i = 1 |
|
while i < 6: |
|
print(i) |
|
if i == 3: |
|
break |
|
i += 1 |
|
#1 |
|
#2 |
|
#3 |
|
\end{python} |
|
|
|
\subsubsection{Continue statement} |
|
With continue statement we can stop the current iteration, and continue with the next. |
|
\begin{python} |
|
i = 0 |
|
while i < 6: |
|
i += 1 |
|
if i == 3: |
|
continue |
|
print(i) |
|
#1 |
|
#2 |
|
#4 |
|
#5 |
|
#6 |
|
\end{python} |
|
|
|
\subsubsection{While-Else statement} |
|
\begin{python} |
|
i = 1 |
|
while i < 6: |
|
print(i) |
|
i += 1 |
|
else: |
|
print("i is no longer less than 6") |
|
#1 |
|
#2 |
|
#3 |
|
#4 |
|
#5 |
|
#i is no longer less than 6 |
|
\end{python} |
|
|
|
\subsection{For Loop} |
|
\begin{python} |
|
fruits = ["apple", "banana", "cherry"] |
|
for x in fruits: |
|
print(x) |
|
#apple |
|
#banana |
|
#cherry |
|
\end{python} |
|
|
|
|
|
\subsubsection{Looping integers} |
|
\begin{python} |
|
for x in range(4): |
|
print(x) |
|
#0 |
|
#1 |
|
#2 |
|
#3 |
|
\end{python} |
|
|
|
\subsubsection{Looping a string variable} |
|
\begin{python} |
|
for x in "car": |
|
print(x) |
|
#c |
|
#a |
|
#r |
|
\end{python} |
|
|
|
\subsubsection{breaking the loop} |
|
\begin{python} |
|
fruits = ["apple", "banana", "cherry"] |
|
for x in fruits: |
|
print(x) |
|
if x == "banana": |
|
break |
|
#apple |
|
#banana |
|
\end{python} |
|
|
|
\subsubsection{for loop nested} |
|
\begin{python} |
|
adj = ["red", "big", "tasty"] |
|
fruits = ["apple", "banana", "cherry"] |
|
|
|
for x in adj: |
|
for y in fruits: |
|
print(x, y) |
|
#red apple |
|
#red banana |
|
#red cherry |
|
#big apple |
|
#big banana |
|
#big cherry |
|
#tasty apple |
|
#tasty banana |
|
#tasty cherry |
|
\end{python} |
|
|
|
\subsubsection{for loop pass statement} |
|
\begin{python} |
|
for x in [0, 1, 2]: |
|
pass |
|
\end{python} |
|
|
|
\subsection{Function} |
|
In python, you can declare functions. Functions are code pieces that you can execute multiple times easily. |
|
|
|
\subsubsection{defining a function} |
|
\begin{python} |
|
# define the function |
|
def my_function(): |
|
print("Hello from a function") |
|
\end{python} |
|
|
|
\subsubsection{calling a function} |
|
\begin{python} |
|
def my_function(): |
|
print("Hello from a function") |
|
|
|
my_function() |
|
#Hello from a function |
|
\end{python} |
|
|
|
\subsubsection{giving function an argument} |
|
|
|
\begin{python} |
|
def my_function(fname): |
|
print(fname + " Refsnes") |
|
|
|
my_function("Emil") |
|
#Emil Refsnes |
|
my_function("Tobias") |
|
#Tobias Refsnes |
|
my_function("Linus") |
|
#Linus Refsnes |
|
\end{python} |
|
|
|
|
|
\subsubsection{giving function multiple arguments} |
|
\begin{python} |
|
def my_function(fname, lname): |
|
print(fname + " " + lname) |
|
|
|
my_function("Emil", "Refsnes") |
|
#Emil Refsnes |
|
\end{python} |
|
|
|
\subsubsection{giving function with default parameters} |
|
\begin{python} |
|
def my_function(country = "Norway"): |
|
print("I am from " + country) |
|
|
|
my_function("Sweden") |
|
#I am from Sweden |
|
my_function("India") |
|
#I am from India |
|
my_function() |
|
#I am from Norway |
|
my_function("Brazil") |
|
#I am from Brazil |
|
\end{python} |
|
|
|
\subsubsection{giving function unknown number of parameters} |
|
\begin{python} |
|
def my_function(*kids): |
|
print("The youngest child is " + kids[2]) |
|
|
|
my_function("Emil", "Tobias", "Linus") |
|
#The youngest child is Linus |
|
\end{python} |
|
|
|
\subsubsection{function returning values} |
|
\begin{python} |
|
def my_function(x): |
|
return 5 * x |
|
|
|
print(my_function(3)) |
|
#15 |
|
print(my_function(5)) |
|
#25 |
|
print(my_function(9)) |
|
#45 |
|
|
|
\end{python} |
|
|
|
\subsubsection{function returning multiple values} |
|
\begin{python} |
|
def my_function(x): |
|
return 5 * x, x * x |
|
|
|
print(my_function(3)) |
|
#(15, 9) |
|
print(my_function(10)) |
|
#(50, 100) |
|
print(type(my_function(10))) |
|
#<class 'tuple'> |
|
\end{python} |
|
|
|
\subsubsection{shortcut function: lambda} |
|
Lambda functions are one-liner functions. Sometimes you need to define such functions inside others. This gives very much ease. |
|
\begin{python} |
|
x = lambda a : a + 10 |
|
print(x(5)) |
|
#15 |
|
|
|
x = lambda a, b : a * b |
|
print(x(5, 6)) |
|
#30 |
|
|
|
x = lambda a, b, c : a + b + c |
|
print(x(5, 6, 2)) |
|
#13 |
|
\end{python} |
|
|
|
\subsection{Modules} |
|
Python modules are libraries (set of functions) written by others. Using these code pieces help you to re-use written code. |
|
|
|
\subsubsection{writing a module} |
|
Save the following to a file named mymodule.py . |
|
\begin{python} |
|
def greeting(name): |
|
print("Hello, " + name) |
|
|
|
person1 = { |
|
"name": "John", |
|
"age": 36, |
|
"country": "Norway" |
|
} |
|
|
|
\end{python} |
|
|
|
\subsubsection{use functions of a module} |
|
\begin{python} |
|
import mymodule |
|
mymodule.greeting("Jonathan") |
|
#Hello Jonathan |
|
\end{python} |
|
|
|
\subsubsection{custom naming a module} |
|
\begin{python} |
|
import mymodule as mx |
|
a = mx.person1["age"] |
|
print(a) |
|
#36 |
|
\end{python} |
|
|
|
\subsubsection{partial import of a module} |
|
Sometimes, you just need a function, or an object from a module package. In these cases, you only import that part of the module. No need to import whole module. This is a better practice. |
|
\begin{python} |
|
from mymodule import person1 |
|
print (person1["age"]) |
|
#36 |
|
\end{python} |
|
|
|
\subsubsection{listing functions of the module} |
|
You can list all the function names in a module with dir command. |
|
\begin{python} |
|
import platform |
|
x = dir(platform) |
|
# too long to write the output... check your program. |
|
\end{python} |
|
|
|
\subsection{Classes} |
|
%Python supports object oriented programming (OOP). In default, what we are doing is functional programming. That is, |
|
%the program is a composition of functions. In OOP, similar to the real world, we have objects. These objects are represented by classes in the program. |
|
|
|
%A class has attributes, and methods (in other word class functions). We create instances from that class using class constructor. |
|
\begin{python} |
|
class Person: |
|
def __init__(mysillyobject, name, age): |
|
mysillyobject.name = name |
|
mysillyobject.age = age |
|
|
|
def myfunc(abc): |
|
print("Hello my name is " + abc.name) |
|
|
|
p1 = Person("John", 36) |
|
p1.myfunc() |
|
\end{python} |
|
|
|
\subsection{Uncategorized} |
|
|
|
\subsubsection{add padding to opencv image} |
|
\begin{python} |
|
|
|
#cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) |
|
padSize = 500 |
|
ib = cv2.copyMakeBorder(img_dilated, padSize, padSize, padSize, padSize, cv2.BORDER_CONSTANT, (0,0,0)) |
|
|
|
\end{python} |
|
\subsubsection{find neighboring pairs in list} |
|
\begin{python} |
|
A = [1, 2, 3, 4] |
|
B = [(i,j) for i,j in zip(A, A[1:])] |
|
#[(1, 2), (2, 3), (3, 4)] |
|
\end{python} |
|
|
|
\subsubsection{find most frequent elements in list} |
|
\begin{python} |
|
from collections import Counter |
|
mylist = [1,1,1,1,3,4,5,5,5,6,6,7,8,9] |
|
counter = Counter(mylist) |
|
most_common = counter.most_common(2) |
|
print(most_common) |
|
#[(1, 4), (5, 3)] # 4 ones, 3 fives seen. |
|
\end{python} |
|
|
|
\subsubsection{cartesian product of two lists} |
|
\begin{python} |
|
import itertools |
|
a = [1,2,3] |
|
b = [4,5,6] |
|
for i in itertools.product( a, b ): |
|
print(i) |
|
\end{python} |
|
|
|
\subsubsection{pad integer to have zeros} |
|
\begin{python} |
|
def NDigited(x,n=3): |
|
return (n-len(x)) * '0' + x |
|
\end{python} |
|
|
|
\paragraph{optimized version} |
|
\credit{jnmbk} |
|
\begin{python} |
|
def NDigited(x,n=3) |
|
return x.zfill(n) |
|
\end{python} |
|
|
|
\subsubsection{matplotlib display strings on y-axis} |
|
\begin{python} |
|
yticks(np.arange(5), ('String1', 'String2', 'String3', '4', '5')) |
|
\end{python} |
|
|
|
\subsubsection{broadcast image or matrix channels} |
|
This operation is usually needed to go from 1d image to 3d image. |
|
\begin{python} |
|
nChannels = 3 |
|
m3d = np.repeat( m.reshape(m.shape[0], m.shape[1], 1), nChannels, axis=2) |
|
\end{python} |
|
|
|
Another identical way to do it. |
|
\begin{python} |
|
nChannels = 3 |
|
m3d = np.tile(m[:, :, None], [1, 1, nChannels]) |
|
\end{python} |
|
|
|
\subsubsection{1d interpolation of x-y values} |
|
Given a set of 2D points, we fit a curve to these points. |
|
\begin{python} |
|
xdata = [0,1,2,3,4,5] |
|
ydata = [0,1,4,9,16,25] |
|
f2 = interp1d(xdata, ydata, kind = 'quadratic') |
|
xnew = np.linspace(-5, 5, 1000) |
|
ynew = f2(xnew) |
|
\end{python} |
|
|
|
\subsection{useful numpy functions} |
|
\begin{python} |
|
## remove empty dimension |
|
x = np.array([[[0], [1], [2]]]) |
|
print(x.shape) |
|
#(1, 3, 1) |
|
dd = np.squeeze(cc) |
|
print(x.shape) |
|
#(3,) |
|
|
|
## vertically stack list of matrices |
|
x = np.dstack( possibleCurves ) |
|
|
|
## randomly choose 5 values from the interval 0-100 |
|
randIdxs = np.random.choice(100 , 5, replace=False) |
|
|
|
## reshape 1D data for one feature problems |
|
X = x.reshape(-1,1) |
|
|
|
\end{python} |
|
|
|
\section{String Manipulation, Searching, Sorting}\label{sect:strings} |
|
|
|
\subsection{substring search} |
|
|
|
\begin{python} |
|
word = 'cart for supermarket' |
|
|
|
##substring search: find first occurrence |
|
result = word.find('supermarket') |
|
print("Substring 'geeks' found at index:", result) |
|
#Substring 'geeks' found at index: 9 |
|
|
|
##substring search with start end specification: searched in 'for su'. |
|
print(word.find('su', 4, 12)) |
|
#9 |
|
\end{python} |
|
|
|
\subsubsection{string between two substrings} |
|
\begin{python} |
|
import re |
|
s = 'asdf=5;iwantthis123jasd' |
|
result = re.search('asdf=5;(.*)123jasd', s) |
|
print(result.group(1)) |
|
#iwantthis |
|
\end{python} |
|
|
|
\subsubsection{Create index for strings} |
|
\begin{python} |
|
a = ['a', 'b', 'c'] |
|
b = dict(map(lambda t: (t[1], t[0]), enumerate(a))) |
|
#{'a':0, 'b':1, 'c':2} |
|
\end{python} |
|
|
|
\subsection{string concatenation} |
|
\begin{python} |
|
s1 = "myStrFirst" |
|
s2 = "secondString" |
|
s3 = s1 + " " + s2 |
|
print(s3) |
|
#myStrFirst secondString |
|
\end{python} |
|
|
|
\subsection{string splitting} |
|
\begin{python} |
|
## simple string splitting |
|
txt = "apple#banana#cherry#orange" |
|
x = txt.split("#") |
|
print(x) |
|
#['apple', 'banana', 'cherry', 'orange'] |
|
|
|
## setting the maxsplit parameter to 1, will return a list with 2 elements! |
|
txt = "apple#banana#cherry#orange" |
|
x = txt.split("#", 1) |
|
#['apple', 'banana#cherry#orange'] |
|
\end{python} |
|
|
|
\subsection{stripping string} |
|
Remove leading and trailing spaces and specific characters at the beginning and at the end of a string. |
|
\begin{python} |
|
txt = " banana sss " |
|
x = txt.strip() |
|
print("of all fruits", x, "is my favorite") |
|
#of all fruits banana is my favorite |
|
|
|
txt = ",,,,,rrttgg.....banana....rrr" |
|
x = txt.strip(",.grt") |
|
print(x) |
|
#banana |
|
\end{python} |
|
|
|
|
|
|
|
\subsection{combining list of strings} |
|
\begin{python} |
|
text = ['Python', 'is', 'a', 'fun', 'programming', 'language'] |
|
print(' '.join(text)) |
|
# Python is a fun programming language |
|
\end{python} |
|
|
|
|
|
|
|
\section{Input Output Operations}\label{sect:io} |
|
|
|
\subsection{create a file} |
|
\begin{python} |
|
f = open("demofile3.txt", "w") |
|
f.write("I have added content!") |
|
f.close() |
|
\end{python} |
|
|
|
\subsection{write to a file: fast shortcut} |
|
In this version, you don't have to remember closing the file. |
|
\begin{python} |
|
with open("demofile3.txt", "w") as fp: |
|
fp.write(f.write("I have added content!") |
|
\end{python} |
|
|
|
\subsection{create directory} |
|
The following program checks for a directory, and creates it if not present. |
|
\begin{python} |
|
import os |
|
directory = "newDirectory" |
|
parent_dir = "/home/User/Documents" |
|
path = os.path.join(parent_dir, directory) |
|
os.makedirs(path,exist_ok = True) |
|
\end{python} |
|
|
|
\subsection{remove file} |
|
\begin{python} |
|
import os |
|
fileName = 'myFile.txt' # File name |
|
location = "/home/User/Documents" |
|
path = os.path.join(location, fileName) |
|
|
|
if os.path.exists(path): |
|
os.remove(path) |
|
else: |
|
print("The file does not exist") |
|
\end{python} |
|
|
|
\subsection{save and load pickle file} |
|
Pickle is the default binary storage format of Python. It can store any type of variable inside. |
|
\begin{python} |
|
|
|
import pickle |
|
|
|
a = {'hello': 'world'} |
|
|
|
with open('filename.pkl', 'wb') as handle: |
|
pickle.dump(a, handle, protocol=pickle.HIGHEST_PROTOCOL) |
|
|
|
with open('filename.pkl', 'rb') as handle: |
|
b = pickle.load(handle) |
|
|
|
print(a == b) |
|
\end{python} |
|
|
|
\begin{python} |
|
def load_obj(name): |
|
with open(name + '.pkl', 'rb') as f: |
|
return pickle.load(f) |
|
|
|
def write_obj(name, data): |
|
with open(name, 'wb') as handle: |
|
pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL) |
|
\end{python} |
|
|
|
\subsection{joining paths} |
|
\begin{python} |
|
import os |
|
path = "/home" |
|
## Join various path components |
|
print(os.path.join(path, "User/Desktop", "file.txt")) |
|
#/home/User/Desktop/file.txt |
|
\end{python} |
|
|
|
\subsection{check file existence} |
|
\begin{python} |
|
path = 'D:/Pycharm/USER/testFile.txt' |
|
|
|
isFile = os.path.isfile(path) |
|
print(isFile) |
|
#False |
|
\end{python} |
|
|
|
\subsection{list files in a directory} |
|
\begin{python} |
|
import os |
|
path = "/" |
|
dir_list = os.listdir(path) |
|
|
|
print(dir_list) |
|
# too many output, please run the code yourself. |
|
\end{python} |
|
|
|
\subsection{iterate (traverse) files in a folder} |
|
\begin{python} |
|
import os |
|
for (root,dirs,files) in os.walk('Test', topdown=true): |
|
print (root) |
|
print (dirs) |
|
print (files) |
|
# too many output, please run the code yourself. |
|
\end{python} |
|
|
|
\subsection{sort files by date} |
|
\begin{python} |
|
import os |
|
search_dir = "/mydir/" |
|
files = os.listdir(search_dir) |
|
files = [os.path.join(search_dir, f) for f in files] |
|
files.sort(key=lambda x: os.path.getmtime(x)) |
|
\end{python} |
|
|
|
\subsection{write to CSV file} |
|
\begin{python} |
|
import csv |
|
|
|
with open('employee_file.csv', mode='w') as employee_file: |
|
employee_writer = csv.writer(employee_file, delimiter=',', quotechar='"') |
|
employee_writer.writerow(['name', 'department', 'birthday month']) |
|
employee_writer.writerow(['John Smith', 'Accounting', 'November']) |
|
employee_writer.writerow(['Erica Meyers', 'IT', 'March']) |
|
employee_writer.writerow(['Monica Barker', 'HR', 'December']) |
|
## open the employee_file.csv and you will see: |
|
## name,department,birthday month |
|
## John Smith,Accounting,November |
|
## Erica Meyers,IT,March |
|
## Monica Barker,HR,December |
|
\end{python} |
|
|
|
\subsection{read CSV file example} |
|
\begin{python} |
|
import csv |
|
|
|
with open('employee_file.csv', mode='w') as employee_file: |
|
employee_writer = csv.writer(employee_file, delimiter=',', quotechar='"') |
|
employee_writer.writerow(['name', 'department', 'birthday month']) |
|
employee_writer.writerow(['John Smith', 'Accounting', 'November']) |
|
employee_writer.writerow(['Erica Meyers', 'IT', 'March']) |
|
employee_writer.writerow(['Monica Barker', 'HR', 'December']) |
|
|
|
with open('employee_birthday.txt') as csv_file: |
|
csv_reader = csv.reader(csv_file, delimiter=',') |
|
line_count = 0 |
|
for row in csv_reader: |
|
if line_count == 0: |
|
print(f'Column names are {", ".join(row)}') |
|
line_count += 1 |
|
else: |
|
print(f'\t{row[0]} works in the {row[1]} department, and was born in {row[2]}.') |
|
line_count += 1 |
|
print(f'Processed {line_count} lines.') |
|
|
|
\end{python} |
|
|
|
\subsubsection{read CSV into list} |
|
\begin{python} |
|
import csv |
|
|
|
with open('employee_file.csv', mode='w') as employee_file: |
|
employee_writer = csv.writer(employee_file, delimiter=',', quotechar='"') |
|
employee_writer.writerow(['name', 'department', 'birthday month']) |
|
employee_writer.writerow(['John Smith', 'Accounting', 'November']) |
|
employee_writer.writerow(['Erica Meyers', 'IT', 'March']) |
|
employee_writer.writerow(['Monica Barker', 'HR', 'December']) |
|
|
|
## |
|
import csv |
|
def readCSVIntoList(fileName, discardHeader=False): |
|
rows = [] |
|
with open(fileName) as csv_file: |
|
csv_reader = csv.reader(csv_file, delimiter=',') |
|
for row in csv_reader: |
|
rows.append(row) |
|
if discardHeader: |
|
rows.remove(rows[0]) |
|
return rows |
|
|
|
rows = readCSVIntoList('employee_birthday.txt',discardHeader=True) |
|
print(rows) |
|
#[['John Smith', 'Accounting', 'November'], ['Erica Meyers', 'IT', 'March'], ['Monica Barker', 'HR', 'December']] |
|
\end{python} |
|
|
|
\subsection{Adding Command Line Arguments} |
|
The following program expects command line arguments. If not provided, uses default arguments. |
|
\begin{python} |
|
## run with python sourcefile.py --keyword mykeyword --page 1 |
|
## or python sourcefile.py |
|
import argparse |
|
|
|
parser = argparse.ArgumentParser(description="Just an example",formatter_class=argparse.ArgumentDefaultsHelpFormatter) |
|
parser.add_argument("-k", "--keyword", type=str, help="query keywords") |
|
parser.add_argument("-p", "--page", type=str, help="query page") |
|
args = parser.parse_args() |
|
config = vars(args) |
|
#print(config) |
|
|
|
if args.keyword: |
|
searchKeyword=args.keyword |
|
searchPage=args.pageidx |
|
else: |
|
searchKeyword = "googleit" |
|
searchPage="0" |
|
|
|
print("SearchKeyword is " + searchKeyword + " Search Page is " + searchPage ) |
|
\end{python} |
|
|
|
\section{Time and Date} |
|
\subsection{Get current date } |
|
\begin{python} |
|
##Get the current date in DD-MM-YYYY-HR-MM-SS format: |
|
from datetime import datetime |
|
now = datetime.now() |
|
curDate = now.strftime("%d-%m-%Y-%H-%M-%S") |
|
print(curDate) |
|
# 18-06-2022-10-40-59 |
|
\end{python} |
|
|
|
\subsection{Convert Unix time to datetime} |
|
\begin{python} |
|
from datetime import datetime |
|
x = 1656100252345 |
|
d = datetime.utcfromtimestamp( x / 1000 ).strftime('%Y-%m-%d %H:%M:%S') |
|
#2022-06-24 19:50:52 |
|
\end{python} |
|
|
|
\section{Algorithms} |
|
\subsection{Remove elements} |
|
|
|
\subsection{Find middle coordinates of a coordinate array} |
|
\begin{python} |
|
[ (linesProc2[i] + linesProc2[i+1]) / 2 for i,x in enumerate( linesProc2[:-1] ) ] |
|
\end{python} |
|
|
|
\section{Regex} |
|
\subsection{Nongreedy regex search} |
|
Default behaviour of regex is to greedy matching (searches the longest sequence up to the end). To search nongreedy: |
|
\begin{python} |
|
text="From: test: test", |
|
regex="^F.+:" -> match="From: test:" |
|
regex="^F.+?:" -> match="From:" |
|
\end{python} |
|
|
|
\section{Network}\label{sect:network} |
|
|
|
\subsection{Single threaded to multi threaded} |
|
Python programs are by default single threaded. This source is a multi-threaded example: |
|
\begin{python} |
|
### ---------------------- ### |
|
### The following program is single threaded, it takes approximately six seconds. |
|
### ---------------------- ### |
|
|
|
from time import sleep, perf_counter |
|
|
|
def task(): |
|
print('Starting a task...') |
|
sleep(3) |
|
print('done') |
|
|
|
|
|
start_time = perf_counter() |
|
|
|
task() |
|
task() |
|
|
|
end_time = perf_counter() |
|
|
|
print(f'It took {end_time- start_time: 0.2f} second(s) to complete.') |
|
# result depends on your cpu. please run the code! |
|
|
|
### ---------------------- ### |
|
### The following program is multi-threaded and it takes approximately 3 seconds. |
|
### ---------------------- ### |
|
from time import sleep, perf_counter |
|
from threading import Thread |
|
|
|
def task(): |
|
print('Starting a task...') |
|
sleep(3) |
|
print('done') |
|
|
|
|
|
start_time = perf_counter() |
|
|
|
# create two new threads |
|
t1 = Thread(target=task) |
|
t2 = Thread(target=task) |
|
|
|
# start the threads |
|
t1.start() |
|
t2.start() |
|
|
|
# wait for the threads to complete |
|
t1.join() |
|
t2.join() |
|
|
|
end_time = perf_counter() |
|
|
|
print(f'It took {end_time- start_time: 0.2f} second(s) to complete.') |
|
# result depends on your cpu. please run the code! |
|
\end{python} |
|
|
|
\subsection{Multi-thread with argument} |
|
\begin{python} |
|
from time import sleep, perf_counter |
|
from threading import Thread |
|
def task(id): |
|
print(f'Starting the task {id}...') |
|
sleep(1) |
|
print(f'The task {id} completed') |
|
|
|
start_time = perf_counter() |
|
## create and start 10 threads |
|
threads = [] |
|
for n in range(1, 11): |
|
t = Thread(target=task, args=(n,)) |
|
threads.append(t) |
|
t.start() |
|
|
|
## wait for the threads to complete |
|
for t in threads: |
|
t.join() |
|
|
|
end_time = perf_counter() |
|
print(f'It took {end_time- start_time: 0.2f} second(s) to complete.') |
|
# Please run the program on your computer to see the output! |
|
\end{python} |
|
|
|
|
|
\section{Web}\label{sect:web} |
|
|
|
\subsection{Scrape HTML with Beautiful Soup} |
|
This example code scrapes an HTML page and searches for HTML div tags inside it. |
|
\begin{python} |
|
import requests |
|
from bs4 import BeautifulSoup |
|
|
|
URL = "https://edition.cnn.com/" |
|
page = requests.get(URL) |
|
|
|
soup = BeautifulSoup(page.content, "html.parser") |
|
|
|
## print the HTML content |
|
print(soup.prettify()) |
|
|
|
## find first div element in the page |
|
myDiv = soup.find("div") |
|
|
|
## find all div elements in the page |
|
myDivs = soup.findAll("div") |
|
|
|
## find all divs with class equal to the following string. |
|
job_elements = results.find_all("div", attrs={"class":"card-content"} ) |
|
|
|
## get tag of an "a" element. |
|
r.find('a')['href'] |
|
|
|
\end{python} |
|
|
|
\subsection{Make request until success} |
|
\begin{python} |
|
def retryResponseGetSoup(url): |
|
response = get(url) |
|
bs = BeautifulSoup(response.content) |
|
while bs.text == 'Baglanti hatasi.' or response.status_code != 200: |
|
print('retrying...') |
|
time.sleep(5) |
|
response = get(url) |
|
bs = BeautifulSoup(response.content) |
|
return bs |
|
|
|
url = "www.google.com" |
|
s = retryResponseGetSoup(url) |
|
|
|
\end{python} |
|
|
|
\subsection{Selenium} |
|
|
|
\subsubsection{Access attribute of an element} |
|
\begin{python} |
|
### selenium python scroll to element's location |
|
elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'navigationPane')]/a") |
|
for e in elements: |
|
e.get_attribute("outerHTML") |
|
\end{python} |
|
|
|
|
|
\subsubsection{Scroll to element} |
|
\begin{python} |
|
### selenium python scroll to element's location |
|
desired_y = element.location['y'] |
|
current_y = (driver.execute_script('return window.innerHeight') / 2) + driver.execute_script('return window.pageYOffset') |
|
scroll_y_by = desired_y |
|
driver.execute_script("window.scrollBy(0, arguments[0]);", scroll_y_by) |
|
\end{python} |
|
|
|
\subsubsection{Access pure HTML of the element} |
|
\begin{python} |
|
tableElems = driver.find_element(By.XPATH, "//table[contains(@class, 'morphologyTable')]//tbody") |
|
tableElems.get_attribute("outerHTML") |
|
\end{python} |
|
|
|
\subsubsection{Save cropped screenshot} |
|
\begin{python} |
|
driver.save_screenshot('shot.png') |
|
im = Image.open('shot.png') |
|
im = im.crop((int(x1-5),int(0), int(x1+w1+5), int(y2-y1+h2))) |
|
im.save('shot.png') |
|
\end{python} |
|
|
|
\subsection{Download files} |
|
\begin{python} |
|
import requests |
|
image_url = "https://www.python.org/static/community_logos/python-logo-master-v3-TM.png" |
|
|
|
r = requests.get(image_url) |
|
with open("python_logo.png",'wb') as f: |
|
f.write(r.content) |
|
|
|
\end{python} |
|
|
|
|
|
\section{Pandas} |
|
[describe pandas library with 1-2 sentences. Then why its useful with 1-2 sentences.] |
|
|
|
\subsection{read CSV } |
|
We can use Pandas library to read CSV's easily. The content goes to a DataFrame type of the Pandas library. |
|
\begin{python} |
|
with open('employee_file.csv', mode='w+') as employee_file: |
|
employee_writer = csv.writer(employee_file, delimiter=',', quotechar='"') |
|
employee_writer.writerow(['name', 'department', 'birthday month']) |
|
employee_writer.writerow(['John Smith', 'Accounting', 'November']) |
|
employee_writer.writerow(['Erica Meyers', 'IT', 'March']) |
|
employee_writer.writerow(['Monica Barker', 'HR', 'December']) |
|
|
|
import pandas as pd |
|
data= pd.read_csv("employee_file.csv") |
|
print(data) |
|
# name department birthday month |
|
#0 John Smith Accounting November |
|
#1 Erica Meyers IT March |
|
#2 Monica Barker HR December |
|
\end{python} |
|
|
|
\subsection{analyze and clean data } |
|
\begin{python} |
|
with open('employee_file.csv', mode='w+') as employee_file: |
|
employee_writer = csv.writer(employee_file, delimiter=',', quotechar='"') |
|
employee_writer.writerow(['name', 'department', 'birthday month','salary','gender']) |
|
employee_writer.writerow(['John Smith', 'Accounting', 'November', '100', 'm']) |
|
employee_writer.writerow(['Erica Meyers', 'IT', 'March', '200', 'f']) |
|
employee_writer.writerow(['Monica Barker', '', '', '400', 'f']) |
|
|
|
import pandas as pd |
|
df= pd.read_csv("employee_file.csv") |
|
|
|
## print head of the data |
|
print(df.head(10)) |
|
## print tail of the data |
|
print(df.tail()) |
|
## info about the data |
|
print(df.info()) |
|
|
|
## get column data types |
|
df.dtypes |
|
|
|
## drop empty rows |
|
new_df = df.dropna() |
|
print(new_df.to_string()) |
|
|
|
## drop unnecessary columns |
|
df.drop(columns=['gender']) |
|
|
|
## replace empty places |
|
df.fillna(130, inplace = True) |
|
|
|
## only replace specific columns |
|
df["salary"].fillna(130, inplace = True) |
|
|
|
## substitute column mean to the empty places |
|
x = df["salary"].mean() |
|
df["salary"].fillna(x, inplace = True) |
|
|
|
## substitute column median to the empty places |
|
x = df["salary"].median() |
|
df["salary"].fillna(x, inplace = True) |
|
|
|
## substitute column mode to the empty places |
|
x = df["salary"].mode()[0] |
|
df["salary"].fillna(x, inplace = True) |
|
|
|
## remove rows with |
|
df.dropna(subset=['department'], inplace = True) |
|
|
|
|
|
\end{python} |
|
|
|
\subsection{basic functionality: access, sampling, filtering} |
|
\begin{python} |
|
import pandas as pd |
|
df = pd.read_csv("../pokemon_data.txt", delimiter="\t") |
|
## Read headers |
|
df.columns |
|
|
|
## list the frequency of each Generation field |
|
df['Generation'].value_counts() |
|
|
|
## give how many uniques are in the dataset |
|
df["Generation"].nunique() |
|
|
|
## Read each column |
|
df["Speed"] |
|
df["Speed"][0:5] |
|
df.ColumnName |
|
df[ ["Speed", "HP"] ] |
|
|
|
## Read each row |
|
df.iloc[1] |
|
df.iloc[1:4] |
|
[row for index, row in df.iterrows()] |
|
|
|
## Read a specific location (R,C) |
|
df.iloc[2,1] |
|
|
|
## Select rows |
|
df.loc[ df["Type 1" == "Fire"] |
|
|
|
## Sorting data |
|
df.sort_values("Speed") |
|
df.sort_values("Speed", ascending=False) |
|
df.sort_values(["HP", "Speed"], ascending[1,0]) |
|
|
|
## Making changes to the data |
|
df['Total'] = df['Total'] - 5 |
|
df['Total'] = df[ ['CA','CB','CC'].mean() |
|
|
|
def f(x,y): |
|
return x+y |
|
|
|
## Iterating over one column |
|
result = [x for x in df['End']] |
|
|
|
## Iterating over two columns, use `zip` |
|
result = [f(x, y) for x, y in zip(df['Start'], df['End'])] |
|
|
|
## Iterating over multiple columns - same data type |
|
result = [f(row[0], row[1]) for row in df[['Start', 'End']].to_numpy()] |
|
|
|
## Iterating over multiple columns - differing data type |
|
result = [f(row[0], row[1]) for row in zip(df['Start'], df['End'])] |
|
|
|
## worst and very slow solutions. Avoid. |
|
for row in df.itertuples(): |
|
print(row) |
|
|
|
##Removing columns |
|
df.drop( columns=['Total'], in_place=True) |
|
|
|
##Removing columns conditionally |
|
df.query("salary > 20") |
|
|
|
## Summing a dataframe |
|
df.sum(axis=1) |
|
|
|
## Drop repeating entries |
|
df.drop_duplicates(inplace = True) |
|
|
|
## save results to csv |
|
df.to_csv("myCsvFile.csv", index=False) |
|
|
|
\end{python} |
|
|
|
\subsection{advanced: multi column access, contains, groupby} |
|
\begin{python} |
|
##################### |
|
### Advanced |
|
import pandas as pd |
|
df = pd.read_csv("../pokemon_data.txt", delimiter="\t") |
|
|
|
df.loc[ df["Type 1" == "Grass" ] |
|
|
|
## Sample using multiple condition |
|
new_df = (df.loc[ df["Type 1" == "Grass" ]) & df.loc[ df["Type 2" == "Poison" ] |
|
|
|
## After filtering, index stays. You have to reset index then. |
|
new_df.reset_index(in_place=True) |
|
new_df.reset_index(drop=True, in_place=True) # removes old idx |
|
|
|
## Using contains |
|
df.loc[ df["Name"].str.contains("Mega") ] |
|
df.loc[ ~df["Name"].str.contains("Mega") ] # take other set |
|
df.loc[ df["Name"].str.contains("Fire|Grass", regex=True) ] |
|
df.loc[ df["Name"].str.contains("pi[a-z]*", regex=True) ] |
|
|
|
#### Conditional Changes |
|
## Change Type1 column having entry "fire" to "flamer" |
|
df.loc[ df["Type 1"] ] == "Fire", "Type 1"] = "Flamer" |
|
|
|
## Change two columns at the same time. |
|
df.loc[ df["Total"] > 500, ["Generation", "Legendary"] ] = ["Test 1", "Test2"] |
|
|
|
## Aggregate data using groupby |
|
df.groupby( ["Type 1"] ).mean() |
|
df.groupby("Type 1")['HP'].sum() |
|
df.groupby( ["Type 1"] ).mean().sort_values("Defense", ascending=False) |
|
df.groupby( ["Type 1"] ).count() |
|
|
|
### Working with large data |
|
for df in pd.read_csv("modified.csv", chunksize=5000) |
|
print(df) |
|
|
|
\end{python} |
|
|
|
\subsection{calculate column cumulatives} |
|
\begin{python} |
|
import pandas as pd |
|
df = pd.DataFrame(data=[[1, 2, 7, 10], [10, 22, 1, 30], |
|
[30, 42, 2, 10], [100,142, 22,1]], |
|
columns=['Start','End','Value1','Value2']) |
|
|
|
df2 = df[['Value1', 'Value2']].cumsum() |
|
df2.rename(columns={'Value1': 'Cumulative Value1', 'Value2': 'Cumulative Value2'}, |
|
inplace=True) |
|
print(df2) |
|
\end{python} |
|
|
|
\subsection{operations on two data frames} |
|
\begin{python} |
|
import numpy as np |
|
import pandas as pd |
|
|
|
df = pd.DataFrame(data=[[1, 2, 7, 10], [10, 22, 1, 30], |
|
[30, 42, 2, 10], [100,142, 22,1]], |
|
columns=['Value1','Value2','Value3','Value4']) |
|
|
|
df2 = pd.DataFrame(data=[[10, 20, 30, 40], [5, 1, 6, 32], |
|
[143, 152, 2, 10], [np.nan, 162, 12, 11]], |
|
columns=['Value1','Value2','Value3','Value4']) |
|
|
|
## add dataframes |
|
df + df2 |
|
|
|
## replaces missing values with 0 while adding |
|
df.add(df2, fill_value=0) |
|
|
|
## check whether df > df2. Result is a boolean filled data frame. |
|
## eq, ne, lt, gt, le, and ge are the functions here. |
|
## their usage is the same. |
|
df.gt(df2) |
|
\end{python} |
|
|
|
\subsubsection{Concat join rows} |
|
\begin{python} |
|
import pandas as pd |
|
df = pd.DataFrame(data=[[10, 20, 30], [11, 21, 31]], columns=['Key1','Key2', 'Key3'] ) |
|
df2 = pd.DataFrame(data=[[5, 6, 7], [5, 8, 12]], columns=['Key1','Key2', 'Key3'] ) |
|
|
|
df3 = pd.concat([df, df2], axis=0) |
|
df3.reset_index(drop=True) # otherwise indexes get mixed |
|
# Key1 Key2 Key3 |
|
#0 10 20 30 |
|
#1 11 21 31 |
|
#2 5 6 7 |
|
#3 5 8 12 |
|
\end{python} |
|
|
|
\subsubsection{Concat join rows with different columns} |
|
\begin{python} |
|
import pandas as pd |
|
df = pd.DataFrame(data=[[10, 20, 30], [11, 21, 31]], columns=['Key1','Key2', 'Key3'] ) |
|
df2 = pd.DataFrame(data=[[5, "Lazy"], [5, "Hardworking"]],columns=['Key4','Key5'] ) |
|
pd.concat([df, df2], axis=0) |
|
# Key1 Key2 Key3 Key4 Key5 |
|
#0 10.0 20.0 30.0 NaN NaN |
|
#1 11.0 21.0 31.0 NaN NaN |
|
#0 NaN NaN NaN 5.0 Lazy |
|
#1 NaN NaN NaN 5.0 Hardworking |
|
\end{python} |
|
|
|
\subsubsection{Concat join columns} |
|
\begin{python} |
|
import pandas as pd |
|
df = pd.DataFrame(data=[[10, 20, 30], [11, 21, 31]], columns=['Key1','Key2', 'Key3'] ) |
|
df2 = pd.DataFrame(data=[[5, "Lazy"], [5, "Hardworking"]], columns=['Key4','Key5'] ) |
|
|
|
pd.concat([df, df2], axis=1) |
|
# Key1 Key2 Key3 Key4 Key5 |
|
#0 10 20 30 5 Lazy |
|
#1 11 21 31 5 Hardworking |
|
\end{python} |
|
|
|
\subsection{applying a function to dataFrame rows or columns} |
|
\begin{python} |
|
import numpy as np |
|
df = pd.DataFrame(data=[["Kevin", 2, 6.], ["Frank", 22, 8.], |
|
["Sarah", 4, 5.], ["Galvin", 3, 10.]], |
|
columns=['Name','Years','Ability']) |
|
print(df) |
|
# Name Years Ability |
|
#0 Kevin 2 6 |
|
#1 Frank 22 8 |
|
#2 Sarah 4 5 |
|
#3 Galvin 3 10 |
|
|
|
## sum rows of the frame |
|
df1 = df.apply(np.sum, axis=0) |
|
print(df1) |
|
#Name KevinFrankSarahGalvin |
|
#Years 31 |
|
#Ability 29 |
|
|
|
## sum columns of the frame |
|
df2 = df[["Years", "Ability"]].apply(np.sum, axis=1) |
|
print(df2) |
|
#0 8.0 |
|
#1 30.0 |
|
#2 9.0 |
|
#3 13.0 |
|
\end{python} |
|
|
|
\subsection{plot values with dates on x axis} |
|
\begin{python} |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
df = pd.DataFrame(data=[["10-06-2022", 5], ["09-06-2022", 3], ["11-06-2022", 20], |
|
["13-06-2022", 12],["12-06-2022", 15], ["14-06-2022", 7]], columns=['Date','Sales']) |
|
|
|
df["time"] = pd.to_datetime(df['Date'], format='%d-%m-%Y') |
|
##df["time"] = pd.to_datetime(df['Date'], format='%Y-%m-%d %H:%M:%S.%f') |
|
|
|
df.set_index(['time'],inplace=True) |
|
df.plot() |
|
\end{python} |
|
|
|
\subsection{Example code 1:} |
|
\begin{python} |
|
import pandas as pd |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
|
|
df = pd.read_csv("examples/4weeks_date.csv") |
|
|
|
##see the columns |
|
df.columns |
|
|
|
##strip the column names to remove extra whitespaces |
|
df = df.rename(columns=lambda x: x.strip()) |
|
|
|
##check dataset |
|
df.describe() |
|
df.info() |
|
print("initial length of dataset %d " % len(df) ) |
|
|
|
##drop rows when ENTRIES or EXITS is zero |
|
df_clean = df.query("ENTRIES > 0") |
|
df_clean = df_clean.query("EXITS > 0") |
|
print("length after cleaning 1 %d " % len(df_clean) ) |
|
|
|
##drop entries bigger than 5M |
|
df_clean = df_clean.query("ENTRIES < 5000000") |
|
df_clean = df_clean.query("EXITS < 5000000") |
|
df_clean.reset_index(drop = True, inplace = True) |
|
print("length after cleaning 2 %d " % len(df_clean) ) |
|
|
|
##compute total activity : ENTRIES + EXITS |
|
df_clean["TA"] = df_clean["ENTRIES"] + df_clean["EXITS"] |
|
|
|
##combine date and time columns. Then convert to pdDate |
|
df_clean["DT"] = df_clean["DATE"] + " " + df_clean["TIME"] |
|
df_clean["DATETIME"] = pd.to_datetime(df_clean['DT'], format='%m/%d/%Y %H:%M:%S') |
|
|
|
##select a station and sum same day activities |
|
df_clean["LINENAME"].value_counts() |
|
usedLineName = "1237ACENQRS" |
|
myLineDF = df_clean[ df_clean["LINENAME"] == usedLineName] |
|
myLineDF.reset_index(drop = True, inplace = True) |
|
|
|
##sort rows with DATE + TIME |
|
myLineDF = myLineDF.sort_values( ["DATE","TIME"] ) |
|
myLineDF.reset_index(drop = True, inplace = True) |
|
|
|
##add rows with identical DATETIME |
|
myLineDFGrouped = myLineDF.groupby("DATETIME").mean() |
|
myLineDFGrouped.reset_index(drop = True, inplace = True) |
|
|
|
## Create figure and plot space |
|
fig, ax = plt.subplots(figsize=(10, 10)) |
|
ax.plot(myLineDFGrouped.index , myLineDFGrouped["TA"], color='purple') |
|
ax.set(xlabel="Date", ylabel="Total activity", title="Total entries for line: %s" % usedLineName ) |
|
plt.show() |
|
|
|
## Alternatively use default Plotter |
|
myLineDFGrouped.plot() |
|
\end{python} |
|
|
|
\section{Matplotlib Visualization} |
|
\subsection{Basics} |
|
|
|
\subsubsection{Basic plotting} |
|
\begin{python} |
|
from matplotlib import pyplot as plt |
|
plt.figure(figsize = (20,9)) |
|
plt.plot() |
|
\end{python} |
|
|
|
|
|
|
|
|
|
\section{Seaborn Visualization} |
|
Seaborn is a visualization library on top of Matplotlib. Seaborn is more comfortable in handling Pandas data frames. |
|
|
|
\subsection{Plotting an histogram} |
|
\begin{python} |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
## plot only density |
|
sns.distplot([0, 1, 2, 3, 4, 5], hist=False) |
|
plt.show() |
|
## plot density + histogram |
|
sns.distplot([0, 1, 2, 3, 3, 3, 3, 4, 5, 7]) |
|
plt.show() |
|
\end{python} |
|
|
|
\subsection{Lineplot} |
|
\begin{python} |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
|
|
## loading dataset |
|
data = sns.load_dataset("iris") |
|
# [150 rows x 5 columns] |
|
|
|
### draw lineplot |
|
sns.lineplot(x="sepal_length", y="sepal_width", data=data) |
|
|
|
## setting the x limit of the plot |
|
plt.xlim(5) |
|
|
|
\end{python} |
|
|
|
\subsection{Scatterplot} |
|
\begin{python} |
|
# importing packages |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
|
|
# loading dataset |
|
data = sns.load_dataset("iris") |
|
|
|
sns.scatterplot(x='sepal_length', y='sepal_width', data=data) |
|
plt.show() |
|
\end{python} |
|
|
|
\section{OpenCV} |
|
|
|
\subsection{Basics} |
|
|
|
\subsubsection{Typecase PIL Image to OpenCV image} |
|
\begin{python} |
|
from PIL import Image |
|
import cv2 |
|
import matplotlib.pyplot as plt |
|
pillowImage = Image.open("kiz-kulesi.jpg") |
|
rgb_image_float= np.asarray(pillowImage,dtype=float)/255.0 |
|
plt.imshow(rgb_image_float) |
|
\end{python} |
|
|
|
|
|
\subsubsection{Draw circle on Image} |
|
\begin{python} |
|
from PIL import Image, ImageDraw |
|
image = Image.new('RGBA', (200, 200)) |
|
draw = ImageDraw.Draw(image) |
|
draw.ellipse((20, 20, 180, 180), fill = 'blue', outline ='blue') |
|
draw.point((100, 100), 'red') |
|
image.save('test.png') |
|
\end{python} |
|
|
|
\subsubsection{Flip, resize, rotate, crop images} |
|
\begin{python} |
|
import cv2 |
|
import scipy.ndimage |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
original_image = cv2.imread("kiz-kulesi.jpg", cv2.IMREAD_GRAYSCALE) |
|
|
|
flipud_image=np.flipud(original_image) |
|
fliplr_image=np.fliplr(original_image) |
|
rotated_image=scipy.ndimage.rotate(original_image,45) |
|
resized_image=scipy.misc.imresize(original_image, 0.5, interp='bilinear', mode=None) |
|
rows,cols=original_image.shape |
|
croped_image = original_image[int(rows / 3): -int(rows / 3), int(cols / 4): - int(cols / 4)] |
|
fig1, axes_array = plt.subplots(2, 3) |
|
fig1.set_size_inches(9,6) |
|
image_plot = axes_array[0][0].imshow(original_image ,cmap=plt.cm.gray) |
|
axes_array[0][0].set(title='Original') |
|
image_plot = axes_array[0][1].imshow(flipud_image,cmap=plt.cm.gray) |
|
axes_array[0][1].set(title='Flipped up-down') |
|
image_plot = axes_array[0][2].imshow(fliplr_image,cmap=plt.cm.gray) |
|
axes_array[0][2].set(title='Flipped left-right') |
|
image_plot = axes_array[1][0].imshow(rotated_image,cmap=plt.cm.gray) |
|
axes_array[1][0].set(title='Rotated') |
|
image_plot = axes_array[1][1].imshow(resized_image,cmap=plt.cm.gray) |
|
axes_array[1][1].set(title='Resized') |
|
image_plot = axes_array[1][2].imshow(croped_image,cmap=plt.cm.gray) |
|
axes_array[1][2].set(title='Cropped') |
|
plt.show() |
|
\end{python} |
|
|
|
\subsubsection{Operating on HSV colorspace} |
|
\begin{python} |
|
import matplotlib |
|
import matplotlib.pyplot as plt |
|
def demo_rgb_to_hsv(original_image,reduce_intensity_factor=0.5): |
|
original_rgb_float= np.asarray(original_image,dtype=float)/255.0 |
|
original_rgb_float = original_rgb_float[:,:,:3] |
|
hsv_image=matplotlib.colors.rgb_to_hsv(original_rgb_float) |
|
hsv_image_processed=hsv_image.copy() |
|
hsv_image_processed[:,: ,2]=hsv_image[:,: ,2]*reduce_intensity_factor |
|
rgb_image_processed=matplotlib.colors.hsv_to_rgb(hsv_image_processed) |
|
fig1, axes_array = plt.subplots(1, 2) |
|
fig1.set_size_inches(8,4) |
|
image_plot = axes_array[0].imshow(original_rgb_float) # Show the RGB image |
|
axes_array[0].axis('off') |
|
axes_array[0].set(title='RGB Image') |
|
image_plot = axes_array[1].imshow(rgb_image_processed) # Show the gray image |
|
axes_array[1].axis('off') |
|
axes_array[1].set(title='Intensity Reduced Image') |
|
plt.show() |
|
rgb_image_int = Image.open("kiz-kulesi.jpg") |
|
demo_rgb_to_hsv(rgb_image_int) |
|
\end{python} |
|
|
|
\subsubsection{1d Gaussian Kernel} |
|
\begin{python} |
|
def display_1d_gaussian(mean=0.0,sigma=0.5): |
|
x=np.linspace(-10,10,1000) |
|
y= (1/np.sqrt(2*np.pi*sigma**2))*np.exp(-((x-mean)**2)/(2*sigma**2)) |
|
fig, axes1 = plt.subplots(1, 1) |
|
fig.set_size_inches(6,3) |
|
axes1.set(xlabel="X",ylabel="Y",title='Gaussian Curve',ylim=(0,1)) |
|
plt.grid(True) |
|
axes1.plot(x,y,color='gray') |
|
plt.fill_between(x,y,0,color='#c0f0c0') |
|
plt.show() |
|
|
|
\end{python} |
|
|
|
\subsubsection{2d Gaussian Kernel Image} |
|
\begin{python} |
|
import scipy.stats |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
from mpl_toolkits.mplot3d import Axes3D |
|
from matplotlib.ticker import LinearLocator, FormatStrFormatter |
|
def display_gaussian_kernel(sigma=1.0): |
|
X = np.linspace(-5, 5, 400) |
|
Y = np.linspace(-5, 5, 400) |
|
X, Y = np.meshgrid(X, Y) |
|
R = np.sqrt(X**2 + Y**2) |
|
Z = np.sin(R) |
|
mu = np.array([0.0, 0.0]) |
|
covariance = np.diag(np.array([sigma, sigma])**2) |
|
XY = np.column_stack([X.flat, Y.flat]) |
|
z = scipy.stats.multivariate_normal.pdf(XY, mean=mu, cov=covariance) |
|
Z = z.reshape(X.shape) |
|
|
|
# Plot the surface. |
|
fig = plt.figure() |
|
fig.set_size_inches(8,4) |
|
ax1 = fig.add_subplot(121) |
|
ax1.imshow(Z) |
|
ax2 = fig.add_subplot(122, projection='3d') |
|
|
|
surf = ax2.plot_surface(X, Y, Z, cmap=plt.cm.coolwarm, linewidth=0, antialiased=False) |
|
# Customize the z axis. |
|
ax2.set_zlim(0, .2) |
|
ax2.zaxis.set_major_locator(LinearLocator(10)) |
|
ax2.zaxis.set_major_formatter(FormatStrFormatter('%.02f')) |
|
|
|
# Add a color bar which maps values to colors. |
|
fig.colorbar(surf, shrink=0.5, aspect=5) |
|
plt.show() |
|
|
|
display_gaussian_kernel() |
|
|
|
\end{python} |
|
|
|
\subsubsection{find horizontal lines} |
|
\begin{python} |
|
import cv2 |
|
# Load image, convert to grayscale, Otsu's threshold |
|
image = cv2.imread('kiz-kulesi.jpg') |
|
result = image.copy() |
|
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY) |
|
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] |
|
|
|
# Detect horizontal lines |
|
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40,1)) |
|
detect_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2) |
|
cnts = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
|
cnts = cnts[0] if len(cnts) == 2 else cnts[1] |
|
for c in cnts: |
|
cv2.drawContours(result, [c], -1, (36,255,12), 2) |
|
plt.figure(figsize = (20,9)); plt.imshow(result) |
|
\end{python} |
|
|
|
\subsubsection{Drawing text on image} |
|
\begin{python} |
|
import cv2 |
|
|
|
# path |
|
path = r'kiz-kulesi.jpg' |
|
|
|
# Reading an image in default mode |
|
image = cv2.imread(path) |
|
|
|
# Window name in which image is displayed |
|
window_name = 'Image' |
|
|
|
# font |
|
font = cv2.FONT_HERSHEY_SIMPLEX |
|
# position |
|
org = (40, 40) |
|
# fontScale |
|
fontScale = 1 |
|
# Green color in BGR |
|
color = (0, 255, 0) |
|
# Line thickness of 2 px |
|
thickness = 2 |
|
|
|
image = cv2.putText(image, 'Hello', org, font, fontScale, color, thickness, cv2.LINE_AA) |
|
|
|
# Displaying the image |
|
plt.figure(figsize = (20,9)); plt.imshow(img_rgb) |
|
\end{python} |
|
|
|
\subsection{Template Matching} |
|
\begin{python} |
|
from matplotlib import pyplot as plt |
|
import numpy as np |
|
import cv2 |
|
import imutils |
|
|
|
def multiscaleTemplateMatching(imFileToLoad,templateFileToLoad): |
|
template = cv2.imread(templateFileToLoad) |
|
template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY) |
|
template = cv2.Canny(template, 50, 200) |
|
(tH, tW) = template.shape[:2] |
|
# loop over the images to find the template in |
|
image = cv2.imread(imFileToLoad) |
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) |
|
found = None |
|
# loop over the scales of the image |
|
for scale in np.linspace(0.2, 1.0, 20)[::-1]: |
|
# resize the image according to the scale, and keep track |
|
# of the ratio of the resizing |
|
resized = imutils.resize(gray, width = int(gray.shape[1] * scale)) |
|
r = gray.shape[1] / float(resized.shape[1]) |
|
# if the resized image is smaller than the template, then break |
|
# from the loop |
|
if resized.shape[0] < tH or resized.shape[1] < tW: |
|
break |
|
|
|
# detect edges in the resized, grayscale image and apply template |
|
# matching to find the template in the image |
|
edged = cv2.Canny(resized, 50, 200) |
|
result = cv2.matchTemplate(edged, template, cv2.TM_CCOEFF) |
|
(_, maxVal, _, maxLoc) = cv2.minMaxLoc(result) |
|
# check to see if the iteration should be visualized |
|
if True: |
|
# draw a bounding box around the detected region |
|
clone = np.dstack([edged, edged, edged]) |
|
cv2.rectangle(clone, (maxLoc[0], maxLoc[1]), |
|
(maxLoc[0] + tW, maxLoc[1] + tH), (0, 0, 255), 2) |
|
# if we have found a new maximum correlation value, then update |
|
# the bookkeeping variable |
|
if found is None or maxVal > found[0]: |
|
found = (maxVal, maxLoc, r) |
|
# unpack the bookkeeping variable and compute the (x, y) coordinates |
|
# of the bounding box based on the resized ratio |
|
(_, maxLoc, r) = found |
|
(startX, startY) = (int(maxLoc[0] * r), int(maxLoc[1] * r)) |
|
(endX, endY) = (int((maxLoc[0] + tW) * r), int((maxLoc[1] + tH) * r)) |
|
# draw a bounding box around the detected result and display the image |
|
|
|
return [startX, startY, endX, endY] |
|
|
|
img_rgb = cv2.imread(shot) |
|
startX, startY, endX, endY = multiscaleTemplateMatching(shot,templateFileToLoad) |
|
cv2.rectangle(img_rgb, (startX, startY), (endX, endY), (0, 0, 255), 2) |
|
plt.figure(figsize = (20,9)) |
|
plt.imshow(img_rgb) |
|
\end{python} |
|
|
|
\subsection{Overlapping bounding box removal: nonmaxima suppression} |
|
\begin{python} |
|
def NMS(boxes, overlapThresh = 0.4): |
|
# Return an empty list, if no boxes given |
|
if len(boxes) == 0: |
|
return [] |
|
x1 = boxes[:, 0] # x coordinate of the top-left corner |
|
y1 = boxes[:, 1] # y coordinate of the top-left corner |
|
x2 = boxes[:, 2] # x coordinate of the bottom-right corner |
|
y2 = boxes[:, 3] # y coordinate of the bottom-right corner |
|
# Compute the area of the bounding boxes and sort the bounding |
|
# Boxes by the bottom-right y-coordinate of the bounding box |
|
areas = (x2 - x1 + 1) * (y2 - y1 + 1) # We add 1, because the pixel at the start as well as at the end counts |
|
# The indices of all boxes at start. We will redundant indices one by one. |
|
indices = np.arange(len(x1)) |
|
for i,box in enumerate(boxes): |
|
# Create temporary indices |
|
temp_indices = indices[indices!=i] |
|
# Find out the coordinates of the intersection box |
|
xx1 = np.maximum(box[0], boxes[temp_indices,0]) |
|
yy1 = np.maximum(box[1], boxes[temp_indices,1]) |
|
xx2 = np.minimum(box[2], boxes[temp_indices,2]) |
|
yy2 = np.minimum(box[3], boxes[temp_indices,3]) |
|
# Find out the width and the height of the intersection box |
|
w = np.maximum(0, xx2 - xx1 + 1) |
|
h = np.maximum(0, yy2 - yy1 + 1) |
|
# compute the ratio of overlap |
|
overlap = (w * h) / areas[temp_indices] |
|
# if the actual boungding box has an overlap bigger than threshold with any other box, remove it's index |
|
if np.any(overlap) > overlapThresh: |
|
indices = indices[indices != i] |
|
#return only the boxes at the remaining indices |
|
return boxes[indices].astype(int) |
|
\end{python} |
|
|
|
\subsection{SingleScale Multiple Template Matching} |
|
\begin{python} |
|
import cv2 |
|
def singleScaleMultipleTemplateMatching(imageFileName, templateFileName): |
|
print("[INFO] loading images...") |
|
image = cv2.imread(imageFileName) |
|
img_rgb = image.copy() |
|
template = cv2.imread(templateFileName) |
|
(tH, tW) = template.shape[:2] |
|
|
|
# convert both the image and template to grayscale |
|
imageGray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) |
|
templateGray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY) |
|
# perform template matching |
|
print("[INFO] performing template matching...") |
|
result = cv2.matchTemplate(imageGray, templateGray, |
|
cv2.TM_CCOEFF_NORMED) |
|
|
|
(yCoords, xCoords) = np.where(result >= 0.75) |
|
clone = image.copy() |
|
print("[INFO] {} matched locations *before* NMS".format(len(yCoords))) |
|
# loop over our starting (x, y)-coordinates |
|
for (x, y) in zip(xCoords, yCoords): |
|
# draw the bounding box on the image |
|
cv2.rectangle(clone, (x, y), (x + tW, y + tH), |
|
(255, 0, 0), 3) |
|
|
|
# initialize our list of rectangles |
|
rects = [] |
|
# loop over the starting (x, y)-coordinates again |
|
for (x, y) in zip(xCoords, yCoords): |
|
# update our list of rectangles |
|
rects.append((x, y, x + tW, y + tH)) |
|
# apply non-maxima suppression to the rectangles |
|
pick = NMS(np.array(rects)) |
|
# pick = rects |
|
print("[INFO] {} matched locations *after* NMS".format(len(pick))) |
|
|
|
# loop over the final bounding boxes |
|
for (startX, startY, endX, endY) in pick: |
|
# draw the bounding box on the image |
|
cv2.rectangle(img_rgb, (startX, startY), (endX, endY), |
|
(0, 255, 0), 2) |
|
return pick, img_rgb |
|
|
|
pick , img_rgb = singleScaleMultipleTemplateMatching("cropped2.png","template.png") |
|
|
|
plt.figure(figsize = (20,9)) |
|
plt.imshow(img_rgb) |
|
\end{python} |
|
|
|
\subsection{Finding and Plotting Contours} |
|
\begin{python} |
|
def findAndPlotContours(fileName, blob_area_thresh=20): |
|
img = cv2.imread(fileName, cv2.IMREAD_COLOR) |
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) |
|
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 101, 3) |
|
|
|
### following morphology open and close can be applied. |
|
#kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5)) |
|
#blob = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel) |
|
#kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9,9)) |
|
#blob = cv2.morphologyEx(blob, cv2.MORPH_CLOSE, kernel) |
|
blob = thresh |
|
|
|
# invert blob |
|
blob = (255 - blob) |
|
|
|
# Get contours |
|
cnts = cv2.findContours(blob, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
|
cnts = cnts[0] if len(cnts) == 2 else cnts[1] |
|
|
|
## select the contours larger than having area 20 |
|
cnts = [c for c in cnts if cv2.contourArea(c) > blob_area_thresh] |
|
#big_contour = max(cnts, key=cv2.contourArea) |
|
|
|
## return contours and buffer image |
|
result = img.copy() |
|
result[:,:,0] = 255 |
|
result[:,:,1] = 255 |
|
result[:,:,2] = 255 |
|
for c in cnts: |
|
cv2.drawContours(result, [c], -1, (0,0,255), 1) |
|
return result, c |
|
|
|
result, c = findAndPlotContours("kiz-kulesi.jpg",20) |
|
\end{python} |
|
|
|
\subsection{Circle Detection} |
|
\begin{python} |
|
import matplotlib.pyplot as plt |
|
import cv2 |
|
|
|
img = cv2.imread("cropped2.png", cv2.IMREAD_COLOR) |
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) |
|
|
|
detected_circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1, 20, param1 = 50, param2 = 30, minRadius = 1, maxRadius = 400) |
|
|
|
for pt in detected_circles[0, :]: |
|
a, b, r = pt[0], pt[1], pt[2] |
|
# Draw the circumference of the circle. |
|
cv2.circle(img, (a, b), r, (0, 255, 0), 2) |
|
# Draw a small circle (of radius 1) to show the center. |
|
cv2.circle(img, (a, b), 1, (0, 0, 255), 3) |
|
|
|
plt.figure(figsize = (20,9)) |
|
plt.imshow(img) |
|
\end{python} |
|
|
|
\subsection{Connected Components Analysis} |
|
\begin{python} |
|
import matplotlib.pyplot as plt |
|
import cv2 |
|
|
|
img = cv2.imread("cropped2.png", cv2.IMREAD_COLOR) |
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) |
|
threshold = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1] |
|
analysis = cv2.connectedComponentsWithStats(threshold, 4, cv2.CV_32S) |
|
(totalLabels, label_ids, values, centroid) = analysis |
|
|
|
#plt.figure(figsize = (20,9)) |
|
#plt.imshow(threshold) |
|
|
|
# Loop through each component |
|
output = np.zeros(gray.shape, dtype="uint8") |
|
for i in range(1, totalLabels): |
|
area = values[i, cv2.CC_STAT_AREA] |
|
|
|
if (area > 110) and (area < 900): |
|
|
|
# Labels stores all the IDs of the components on the each pixel |
|
# It has the same dimension as the threshold |
|
# So we'll check the component |
|
# then convert it to 255 value to mark it white |
|
componentMask = (label_ids == i).astype("uint8") * 255 |
|
|
|
# Creating the Final output mask |
|
output = cv2.bitwise_or(output, componentMask) |
|
|
|
plt.figure(figsize = (20,9)) |
|
plt.imshow(output) |
|
|
|
\end{python} |
|
|
|
\subsection{Fit ellipses to objects} |
|
\begin{python} |
|
img = cv2.imread("cropped2.png", cv2.IMREAD_COLOR) |
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) |
|
gray = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, s, 7.0) |
|
cnts, hier = cv2.findContours(gray,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) |
|
|
|
ellipses = [] |
|
if len(cnts) != 0: |
|
for i in range(len(cnts)): |
|
if len(cnts[i]) >= 5: |
|
ellipse=cv2.fitEllipse(cnts[i]) |
|
print(ellipse) |
|
ellipses.append(ellipse) |
|
centCoord = ( int(ellipse[0][0]), int(ellipse[0][1]) ) |
|
axisLen = ( int(ellipse[1][0]), int(ellipse[1][1]) ) |
|
angle = ellipse[2] |
|
|
|
## angle filtering |
|
#offSet = np.min( np.fabs( [angle, angle-90, angle-180, angle-270, angle-360]) ) |
|
#if offSet < 5: |
|
|
|
img = cv2.ellipse(img, centCoord, axisLen, angle, 0, 360, (0,0,255)) |
|
|
|
# cv2.drawContours(img,cnts,-1,(150,10,255),2) |
|
|
|
plt.figure(figsize = (20,9)) |
|
plt.imshow(img) |
|
\end{python} |
|
|
|
\section{Numpy} |
|
|
|
\subsection{Fitting} |
|
|
|
\subsubsection{Curve fitting} |
|
\begin{python} |
|
import pandas as pd |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
from sklearn.preprocessing import PolynomialFeatures |
|
from sklearn.linear_model import LinearRegression |
|
from sklearn.metrics import r2_score |
|
from sklearn.metrics import mean_squared_error |
|
|
|
def generateQuadraticData(): |
|
x = np.random.rand(100) - 0.5 |
|
u = 0.1 * np.random.rand(100) |
|
|
|
a = 1 |
|
b = 0.0 |
|
c = 0.1 |
|
|
|
y = a * np.multiply(x,x) + np.multiply(x,b) + c + u |
|
#plt.scatter(x,y) |
|
return x,y |
|
|
|
x,y = generateQuadraticData() |
|
X = x.reshape(-1,1) # for one feature problems |
|
|
|
###We compare nonlinear regression here with different power |
|
regr = LinearRegression() |
|
quadratic = PolynomialFeatures(degree=2) |
|
cubic = PolynomialFeatures(degree=3) |
|
X_quad = quadratic.fit_transform(X) |
|
X_cubic = cubic.fit_transform(X) |
|
|
|
X_fit = np.arange(X.min(), X.max(), 0.05)[:, np.newaxis] |
|
|
|
regr = regr.fit(X, y) |
|
y_lin_fit = regr.predict(X_fit) |
|
linear_r2 = r2_score(y, regr.predict(X)) |
|
|
|
regr = regr.fit(X_quad, y) |
|
y_quad_fit = regr.predict(quadratic.fit_transform(X_fit)) |
|
quadratic_r2 = r2_score(y, regr.predict(X_quad)) |
|
|
|
regr = regr.fit(X_cubic, y) |
|
y_cubic_fit = regr.predict(cubic.fit_transform(X_fit)) |
|
cubic_r2 = r2_score(y, regr.predict(X_cubic)) |
|
|
|
plt.scatter(X, y, label='training points', color='lightgray') |
|
|
|
plt.plot(X_fit, y_lin_fit, |
|
label='linear (d=1), $R^2=%.2f$' % linear_r2, |
|
color='blue', |
|
lw=2, |
|
linestyle=':') |
|
|
|
plt.plot(X_fit, y_quad_fit, |
|
label='quadratic (d=2), $R^2=%.2f$' % quadratic_r2, |
|
color='red', |
|
lw=2, |
|
linestyle='-') |
|
|
|
plt.plot(X_fit, y_cubic_fit, |
|
label='cubic (d=3), $R^2=%.2f$' % cubic_r2, |
|
color='green', |
|
lw=2, |
|
linestyle='--') |
|
|
|
plt.legend(loc='upper right') |
|
plt.tight_layout() |
|
plt.show() |
|
\end{python} |
|
|
|
\subsubsection{Ransac curve fitting} |
|
\begin{python} |
|
import pandas as pd |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
from sklearn.preprocessing import PolynomialFeatures |
|
from sklearn.linear_model import LinearRegression |
|
from sklearn.metrics import r2_score |
|
from sklearn.metrics import mean_squared_error |
|
from sklearn import linear_model, datasets |
|
|
|
def generateQuadraticData(noiseFraction = 0.2): |
|
nDataPoints = 100 |
|
nNoisePoints = int(nDataPoints * noiseFraction) |
|
## data |
|
a, b, c = 1, 0.0, 0.1 |
|
x = np.random.rand(nDataPoints) - 0.5 |
|
y = a * np.multiply(x,x) + np.multiply(x,b) + c |
|
## outliers |
|
u = np.random.rand(nNoisePoints, 2) |
|
|
|
x = np.concatenate( (x,u[:,0]), axis=0) |
|
y = np.concatenate( (y,u[:,1]), axis=0) |
|
# plt.scatter(x,y) |
|
|
|
return x,y |
|
|
|
noiseFraction = 0.7 |
|
x,y = generateQuadraticData(noiseFraction) |
|
|
|
## 1D data reshape. |
|
X = x.reshape(-1,1) # for one feature problems |
|
quadratic = PolynomialFeatures(degree=2) |
|
X_quad = quadratic.fit_transform(X) |
|
X_fit = np.arange(2*np.min(X), 2*np.max(X), 0.05)[:, np.newaxis] #evaluation interval |
|
regr = linear_model.RANSACRegressor() |
|
regr = regr.fit(X_quad, y) |
|
y_quad_fit = regr.predict(quadratic.fit_transform(X_fit)) |
|
quadratic_r2 = r2_score(y, regr.predict(X_quad)) |
|
inlier_mask = regr.inlier_mask_ |
|
outlier_mask = np.logical_not(inlier_mask) |
|
|
|
plt.figure(figsize = (10,9)) |
|
plt.plot(X_fit, y_quad_fit, label='quadratic (d=2), $R^2=%.2f$' % quadratic_r2,color='blue', lw=1,linestyle='-') |
|
plt.scatter(X[inlier_mask], y[inlier_mask], color="green", marker=".", label="Inliers") |
|
plt.scatter(X[outlier_mask], y[outlier_mask], color="red", marker=".", label="Outliers") |
|
plt.legend(loc='upper right') |
|
plt.tight_layout() |
|
plt.show() |
|
\end{python} |
|
|
|
\section{Scikit-Learn} |
|
Scikit learn supports many machine learning models. |
|
|
|
\subsection{Linear Regression} |
|
\begin{python} |
|
import matplotlib.pyplot as plt |
|
import numpy as np |
|
from sklearn import datasets, linear_model |
|
from sklearn.metrics import mean_squared_error, r2_score |
|
|
|
# Load the diabetes dataset |
|
diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True) |
|
|
|
# Use only one feature |
|
diabetes_X = diabetes_X[:, np.newaxis, 2] |
|
|
|
# Split the data into training/testing sets |
|
diabetes_X_train = diabetes_X[:-20] |
|
diabetes_X_test = diabetes_X[-20:] |
|
|
|
# Split the targets into training/testing sets |
|
diabetes_y_train = diabetes_y[:-20] |
|
diabetes_y_test = diabetes_y[-20:] |
|
|
|
# Create linear regression object |
|
regr = linear_model.LinearRegression() |
|
|
|
# Train the model using the training sets |
|
regr.fit(diabetes_X_train, diabetes_y_train) |
|
|
|
# Make predictions using the testing set |
|
diabetes_y_pred = regr.predict(diabetes_X_test) |
|
|
|
# The coefficients |
|
print("Coefficients: \n", regr.coef_) |
|
# The mean squared error |
|
print("Mean squared error: %.2f" % mean_squared_error(diabetes_y_test, diabetes_y_pred)) |
|
# The coefficient of determination: 1 is perfect prediction |
|
print("Coefficient of determination: %.2f" % r2_score(diabetes_y_test, diabetes_y_pred)) |
|
|
|
# Plot outputs |
|
plt.scatter(diabetes_X_test, diabetes_y_test, color="black") |
|
plt.plot(diabetes_X_test, diabetes_y_pred, color="blue", linewidth=3) |
|
|
|
plt.xticks(()) |
|
plt.yticks(()) |
|
|
|
plt.show() |
|
|
|
\end{python} |
|
|
|
%\section*{Appendix} |
|
%\bibliographystyle{plain} |
|
%\bibliography{references} |
|
|
|
\end{document}
|
|
|