Add pttPush

dev/addPush
Zovjsra 4 years ago
parent d3438bd04c
commit 8014aef8d1

@ -3,6 +3,7 @@ import threading
import random
import string
import dataHandlerPTT as ptt
import dataHandlerPTTPush as pttPush
import generalText as gen
from queue import Queue
from datetime import datetime
@ -58,6 +59,37 @@ def index():
return redirect('/ptt')
@app.route('/ptt_push')
def ptt_push():
return render_template('ptt_push.html', title='推文 Sententree')
@app.route('/ptt_push/init', methods=['POST'])
def pttPushInit():
author = next(pttPush.getMostFrequentAuthor())
pushes = pttPush.findAuthorPush(
author=[author], stopwords=pttPush.defaultStopWords)
result: dict = {
'author': author,
'stopwords': pttPush.defaultStopWords,
'tsv': pushes[0],
'json': pushes[1]
}
return jsonify(Result=result)
@app.route('/ptt_push/addRequest', methods=['POST'])
def pttPushAddRequest():
pushes = pttPush.findAuthorPush(author=request.json['author'].split(
' '), aid=request.json['aid'].split(' '), keyword=request.json['keyword'], stopwords=request.json['stopwords'])
result = {
'keyword': request.json['keyword'],
'tsv': pushes[0],
'json': pushes[1]
}
return jsonify(Result=result)
@app.route('/ptt')
def pttSententree():
return render_template('ptt.html', title="PTT Sententree")
@ -79,18 +111,6 @@ def updateContent():
})
@app.route('/askProgressHandler', methods=['POST'])
def askProgressHandler():
key = request.json['key']
randId = ''.join(random.choices(
string.ascii_uppercase + string.ascii_lowercase, k=15))
threading.Thread(target=ptt.progressListener,
args=(key, eventQueue, randId,)).start()
return jsonify(Result={
'id': randId
})
@app.route('/addRequest', methods=['POST'])
def addRequest():
content = request.json
@ -126,4 +146,4 @@ def initPage():
if __name__ == "__main__":
app.run(debug=False, port=4980, host='0.0.0.0', threaded=True)
app.run(debug=True, port=4998, host='0.0.0.0', threaded=True)

@ -0,0 +1,89 @@
from PTTData import PTTData
from pprint import pprint
from datetime import datetime
from progressbar import ProgressBar
import json
import csv
import io
data = PTTData('Gossiping')
lastUpdate: datetime = None
mostFrequentAuthor: str = None
defaultStopWords = []
with open('resource/stopWords.txt', 'r', encoding='UTF-8') as file:
for word in file.readlines():
word = word.strip()
defaultStopWords.append(word)
def getMostFrequentAuthor(title: str = None):
global mostFrequentAuthor, lastUpdate
if (lastUpdate == datetime.today().date()):
yield mostFrequentAuthor
elif (mostFrequentAuthor != None):
yield mostFrequentAuthor
authorList = data.pushCllc.aggregate(pipeline=[{'$group': {
'_id': '$author',
'count': {
'$sum': 1
}
}}])
lastUpdate = datetime.today().date()
mostFrequentAuthor = max(authorList, key=lambda x: x['count'])['_id']
yield mostFrequentAuthor
return
def possegPushes(ids: list, stopwords: list, keyword: str):
possegs = data.pushPossegCllc.find({'ID': {'$in': ids}})
result = []
for index, p in enumerate(possegs):
words = [i[1] for i in p['content'] if i[0] not in [
'eng', 'x', 'm'] and i[1] not in stopwords]
if(keyword == '' or keyword in words):
result.append({
'posString': ' '.join(words),
'ID': p['ID']
})
return result
def findAuthorPush(author: list = None, aid: list = None, keyword: str = '', stopwords: list = []):
terms = {}
if (author != [''] and author != None):
terms['author'] = {
'$in': author
}
if (aid != [''] and aid != None):
terms['postAid'] = {
'$in': aid
}
print(terms)
pushes = data.pushCllc.find(terms)
pushId = []
pushContent = {}
for p in pushes:
pushId.append(p['_id'])
pushContent[str(p['_id'])] = {
'title': p['title'],
'author': p['author'],
'pushes': p['pushes']
}
possegList = possegPushes(pushId, stopwords, keyword)
possegResult = [['id', 'text', 'count']]
for index, n in enumerate(possegList):
if(str(n['ID']) in pushContent.keys()):
pushContent[str(n['ID'])]['part'] = str(n['posString'])
possegResult.append([index, n['posString'], 3000])
jsonString = json.dumps(
[i for i in pushContent.values()], indent=4, ensure_ascii=False)
with io.StringIO() as f:
writer = csv.writer(f, delimiter='\t')
writer.writerows(possegResult)
csvString = f.getvalue()
return (csvString, jsonString)
if __name__ == "__main__":
pprint(findAuthorPush(['gwenwoo']))

@ -309,6 +309,17 @@ input[type="date" i] {
animation: blinker 1s linear infinite;
}
.searchBox {
padding: 7px;
align-content: center;
border-radius: 3px;
border-style: solid;
border-width: 1px;
border-color: lightslategray;
margin-left: 5px;
margin-right: 5px;
}
@keyframes blinker {
50% {
color: red;

@ -310,7 +310,7 @@ function buildSentetree(tsvString) {
titles = node.data.topEntries.map(function(x) {
return wordTitleList[x.rawText]
})
//console.log(titles)
console.log(titles)
infoStr = ''
for (index in titles) {
if (index == 0) {

@ -0,0 +1,329 @@
init()
var tsvPath = ''
var titlePath = ''
var defaultStartDate
var defaultEndDate
var totalPosts
var startDate
var endDate
var wordPushList
var randId
var globKeyword = ''
var stopwords = []
function init() {
$.ajax({
type: 'POST',
url: 'ptt_push/init',
dataType: 'json',
success: function(data) {
console.log(data)
tsvString = data.Result.tsv
wordPushList = JSON.parse(data.Result.json)
stopwords = data.Result.stopwords
console.log(wordPushList)
$('#idBox').val(data.Result.author)
buildSentetree(tsvString)
}
})
$(document).ready(function() {
$(window).keydown(function(event) {
if (event.keyCode == 13) {
event.preventDefault()
sendRequest()
}
});
});
$(window).on('mousemove', function(e) {
$('#nodeTitle').css({
left: e.pageX,
top: e.pageY
})
})
$('#titleListContainer').hover(
function() { // Run on hover/mouseenter
$(this).css('overflow', 'auto')
},
function() { // Run on mouseleave
$(this).css('overflow', 'hidden')
}
)
$('#titleListLayer').click(function(e) {
if ($('#titleListLayer').is(e.target)) {
hideTitles()
}
})
$('#stopWordEditorLayer').click(function(e) {
if ($('#stopWordEditorLayer').is(e.target)) {
hideStopWordEditor()
}
})
}
function clearStopWord() {
stopwords = []
$('#sweContainer').html('')
}
function addStopWord() {
newswRaw = $('#newStopWord').val()
newswList = newswRaw.split(' ')
for (newsw of newswList) {
if (newsw != '') {
if (stopwords.includes(newsw)) {
} else {
stopwords.push(newsw)
$('#sweContainer').append($('<li>').attr('class', 'w3-display-container').append($('<span>').append(newsw)).append($('<span>').attr('class', 'w3-button w3-hover-red w3-transparent w3-display-right').click(function(e) {
var index = $(this).parent().index()
console.log(stopwords[index])
stopwords.splice(index, 1)
console.log(stopwords)
$('#sweContainer li').eq(index).remove()
}).append("&times;")))
console.log(document.getElementById('sweContainer').children[stopwords.indexOf(newsw)])
}
document.getElementById("sweContainer").scrollTop = document.getElementById('sweContainer').children[stopwords.indexOf(newsw)].offsetTop
}
}
$('#newStopWord').val('')
}
function showStopwordEditor() {
console.log(stopwords)
$(window).unbind('keydown')
$(window).keydown(function(event) {
if (event.keyCode == 13) {
addStopWord()
}
})
$('#sweContainer').empty()
for (word of stopwords) {
$('#sweContainer').append($('<li>').attr('class', 'w3-display-container').append($('<span>').append(word)).append($('<span>').attr('class', 'w3-button w3-hover-red w3-transparent w3-display-right').click(function(e) {
var index = $(this).parent().index()
console.log(stopwords[index])
stopwords.splice(index, 1)
console.log(stopwords)
$('#sweContainer li').eq(index).remove()
}).append("&times;")))
}
$('#stopWordEditorLayer').removeClass('hidden')
}
function hideStopWordEditor() {
$(window).unbind('keydown')
$(window).keydown(function(event) {
if (event.keyCode == 13) {
event.preventDefault()
sendRequest()
}
})
$('#stopWordEditorLayer').addClass('hidden')
}
function downloadStopWord() {
stopWordString = stopwords.join('\n')
download(stopWordString, 'stopwords.txt', 'text/plain')
}
function hidePopup() {
$('#infoWindowLayer').toggleClass('hidden')
$('#progressInfo').html('')
$('#progBarInner').css('width', 0 + '%')
closeEventListner()
}
function setDate(_startDate, _endDate) {
document.getElementById('startDate').value = _startDate
document.getElementById("endDate").value = _endDate
startDate = _startDate
endDate = _endDate
}
function getProgressing(event) {
data = JSON.parse(event.data)
$('#progressInfo').html(data.comment)
$('#progBarInner').css('width', data.progress + '%')
}
function getProgressFinished(event) {
data = JSON.parse(event.data)
changeGraph(data)
hidePopup()
}
function closeEventListner() {
progListener.removeEventListener('progressing' + randId, getProgressing)
progListener.removeEventListener('progressFinished' + randId, getProgressFinished)
}
function sendRequest() {
if ($('#idBox').val() == '' && $('#titleBox').val() == '') {
window.alert('請至少填寫一個鄉民id或是')
}
content = JSON.stringify({
author: $('#idBox').val(),
aid: $('#titleBox').val(),
keyword: $('#keywordBox').val(),
stopwords: stopwords,
pos: {
noun: $('#noun').is(':checked'),
verb: $('#verb').is(':checked'),
adj: $('#adj').is(':checked'),
adv: $('#adv').is(':checked'),
pron: $('#pron').is(':checked'),
aux: $('#aux').is(':checked'),
other: $('#other').is(':checked')
}
})
console.log(content)
$.ajax({
type: 'POST',
url: 'ptt_push/addRequest',
data: content,
contentType: 'application/json',
success: function(data) {
console.log(data)
tsvString = data.Result.tsv
wordPushList = JSON.parse(data.Result.json)
console.log(wordPushList)
changeGraph(data.Result)
}
})
}
function changeGraph(data) {
console.log(data)
let tsvString = data.tsv
let json = JSON.parse(data.json)
destroyCurrentGraph()
d3.select('#graph').append('div').attr('id', 'vis')
buildSentetree(tsvString)
}
function destroyCurrentGraph() {
d3.selectAll('#vis').remove()
}
function hideTitles() {
$('#titleListLayer').addClass('hidden')
}
function buildSentetree(tsvString) {
console.log("Build.")
var model;
var tree;
var data;
if (typeof tsvString === 'undefined') {
d3.tsv(tsvPath, buildTree)
} else {
data = d3.tsvParse(tsvString)
buildTree(_, data)
}
function buildTree(error, rawdata) {
const data = rawdata.map(d => Object.assign({}, d, { count: +d.count }));
model = new SentenTree.SentenTreeBuilder()
.tokenize(SentenTree.tokenizer.tokenizeBySpace)
.transformToken(token => (/score(d|s)?/.test(token) ? 'score' : token))
.buildModel(data, {
maxSupportRatio: 0.8,
minSupportRatio: 0.001
});
tree = new SentenTree.SentenTreeVis('#vis', {
fontSize: [15, 40],
gapBetweenGraph: 10
});
tree.data(model.getRenderedGraphs(5))
.on('nodeClick', node => {
$("#keywordBox").val(node.data.entity)
$('#titleListLayer').removeClass('hidden')
seqList = node.data.seq.DBs.map(function(n) {
return n.rawText
})
seqList = seqList.filter(function(v, i) {
return seqList.indexOf(v) == i
})
titleList = []
console.log(seqList)
for (s of seqList) {
titleTemp = wordPushList.filter(function(n) {
return n.part == s
})
titleList = titleList.concat(titleTemp)
}
console.log(titleList)
info = wordPushList[node.data.entity]
$('#titleListKeyword').html(node.data.entity)
$('#titleListKeywordInfo').html('')
$('#titleListContainer').empty()
for (i of titleList) {
let link = $('<a>').append(
$('<h4>').html(i.title)
)
for (p of i.pushes) {
link.append(
$('<span>').attr('style', 'margin: 0px 10px').html((['推', '噓', '→'])[p.type - 1] + ' ' + p.author + ': ' + p.content + '<br>')
)
}
$('#titleListContainer').append(
$('<li>').attr('class', 'w3-panel').append(
link
)
)
}
})
.on('nodeMouseenter', node => {
console.log(node)
let titles = []
node.data.topEntries.forEach(function(x) {
console.log(x)
let result = wordPushList.filter(function(y) {
return y.part == x.rawText
})
for (r of result) {
if (titles.indexOf(r) < 0 && titles.length < 5) {
titles.push(r)
}
}
})
console.log(titles)
infoStr = ''
for (index in titles) {
if (index == 0) {
infoStr += titles[index].title + '<br>'
} else {
if (titles[index].title != titles[index - 1].title) {
infoStr += titles[index].title + '<br>'
}
}
pos = titles[index].part.indexOf(node.data.entity)
infoStr += titles[index].pushes.filter(function(x) {
return x.content.includes(node.data.entity)
})[0].content + '<br>'
}
$(nodeTitleContent).html(infoStr)
$('#nodeTitle').removeClass('hidden')
tree.highlightNeighbors(node)
})
.on('nodeMouseleave', node => {
$('#nodeTitle').addClass('hidden')
tree.clearHighlightNeighbors()
}).on('layoutStart', layout => {
console.log(layout)
}).on('linkMouseenter', link => {
console.log(link)
})
new ResizeSensor(jQuery('#d3kitRoot'), function() {
var scale, origin;
scale = Math.min(2, ($('#graph').outerWidth()) / ($('#d3kitRoot').outerWidth() + 60))
$('#vis').css({
transform: "scale(" + scale + ")",
'transform-origin': 'top left'
});
})
}
}

@ -27,6 +27,7 @@
</div>
<div class='w3-bar w3-teal'>
<button class="w3-button w3-teal" type="button" onclick="location.href='/ptt'">PTT Sententree</button>
<button class="w3-button" type="button" onclick="location.href='/ptt_push'">推文Sententree</button>
<button class="w3-button w3-teal" type="button" onclick="location.href='/generalTxt'" style="color: darkseagreen;">泛用文字視覺化工具</button>
</div>
<div id='heading'>

@ -11,6 +11,7 @@
<body>
<div class="w3-bar w3-teal">
<button class="w3-button" type="button" onclick="location.href='/ptt'" style="color: darkseagreen;">PTT Sententree</button>
<button class="w3-button" type="button" onclick="location.href='/ptt_push'">推文Sententree</button>
<button class="w3-button w3-teal" type="button" onclick="location.href='/generalTxt'">泛用文字視覺化工具</button>
</div>

@ -0,0 +1,90 @@
<!doctype html>
<html>
<head>
<title>{{ title }}</title>
<link href="/static/css/w3.css" type="text/css" rel="stylesheet">
<link href="/static/css/main.css" type="text/css" rel="stylesheet">
<link href="https://fonts.googleapis.com/css2?family=Noto+Sans+TC:wght@100;300;400;500;700;900&display=swap" rel="stylesheet">
</head>
<body>
<div class="w3-bar w3-teal">
<button class="w3-button" type="button" onclick="location.href='/ptt'">PTT
Sententree</button>
<button class="w3-button" type="button" onclick="location.href='/ptt_push'" style="color: darkseagreen;">推文Sententree</button>
<button class="w3-button w3-teal" type="button" onclick="location.href='/generalTxt'">泛用文字視覺化工具</button>
</div>
<div id="nodeTitle" class="nodeTitle hidden">
<div id="nodeTitleContent"></div>
</div>
<div id="stopWordEditorLayer" class="info hidden">
<div id="stopWordEditor">
<h4 id="sweTitle" style="margin:10px; display: inline;">編輯停用詞</h4>
<ul id="sweContainer" class="w3-ul w3-hoverable" style="margin-bottom: 10px;"></ul>
<div>
<input class="w3-input w3-border" style="width: 85%; display: inline;" type="text" id="newStopWord" placeholder="新增停用詞(以空白隔開)">
<button class="general-button w3-right" type="button" id="confirm" style="background-color: #379; margin-left: 8px;" onclick="addStopWord()">新增</button>
</div>
<div id="sweButtons" style="margin: 20px 0px;">
<button class="general-button" type="button" id="confirm" style="background-color: #379; margin: 0px 10px" onclick="hideStopWordEditor(); sendRequest()">確認</button>
<button class="general-button" type="button" id="confirm" style="background-color: #379; margin: 0px 10px" onclick="downloadStopWord()">匯出停用詞</button>
<button class="general-button" type="button" id="confirm" style="background-color: #379; margin: 0px 10px" onclick="clearStopWord()">全部清除</button>
<button class="general-button w3-right" type="button" id="confirm" style="background-color: #379; margin: 0px 20px" onclick="hideStopWordEditor()">返回</button>
</div>
</div>
</div>
<div id="titleListLayer" class="info hidden">
<div id="titleList">
<h2 id="titleListKeyword" style="margin:10px; display: inline;"></h2>
<span id="titleListKeywordInfo"></span>
<ul id="titleListContainer" class="w3-ul w3-hoverable"></ul>
<div id="backButton" style="margin: 20px 0px;">
<button class="general-button" type="button" id="confirm" style="background-color: #379; margin: 0px 20px" onclick="hideTitles()">返回</button>
</div>
</div>
</div>
<div id="heading">
<h2>{{title}}</h2>
<p>SentenTree <a href="https://github.com/twitter/SentenTree">https://github.com/twitter/SentenTree</a></p>
<p id="comment">同時使用關鍵詞和詞性搜尋的時候,必須選擇所設關鍵詞本身的詞性,否則會搜尋不到結果。</p>
<p id="comment">點選圖上的單詞可以查看單詞的資訊,觀看原始文章,也會快速切換關鍵字。</p>
<p id="comment">若搜尋到的文章超過50篇圖表僅會顯示推文數最多的前50篇文章。</p>
<div id="searchingTarget">
<form name="form" enctype="multipart/form-data">
<input id="idBox" class='searchBox' type="text" name="message" placeholder="輸入鄉民ID">
<input id="titleBox" class='searchBox' type="text" name="message" placeholder="輸入文章aid">
<input id="keywordBox" class='searchBox' type="text" name="message" placeholder="輸入關鍵詞">
<button class="general-button" type="button" id="sendButton" onclick="sendRequest()">搜尋關鍵字</button>
<button class="general-button" type="button" id="resetButton" onclick="document.getElementById('keywordBox').value=''">清除關鍵字</button>
<button class="general-button" type="button" id="editSWButton" onclick="showStopwordEditor()">編輯停用詞</button>
</form>
</div>
<div id="advancedArea">
<form name="advanced" enctype="multipart/form-data">
<span>選擇詞性</span>
<input type="checkbox" id="noun" checked="checked">名詞
<input type="checkbox" id="verb" checked="checked">動詞
<input type="checkbox" id="adj" checked="checked">形容詞
<input type="checkbox" id="adv" checked="checked">副詞
<input type="checkbox" id="pron" checked="checked">代詞
<input type="checkbox" id="aux" checked="checked">助詞
<input type="checkbox" id="other" checked="checked">其他詞性
</form>
</div>
</div>
<div id="graphInfo"></div>
<div id="graph">
<div id="vis"></div>
</div>
</body>
<script src="/static/node_modules/jquery/dist/jquery.js"></script>
<script src="/static/node_modules/jquery-ui-dist/jquery-ui.js"></script>
<script src="/static/node_modules/css-element-queries/src/ResizeSensor.js"></script>
<script src="/static/node_modules/sententree/dist/sententree-standalone.js"></script>
<script src="/static/node_modules/d3/build/d3.js"></script>
<script src="/static/js/download.js"></script>
<script src="/static/js/pttPush.js"></script>
</html>
Loading…
Cancel
Save