Add pttPush

dev/addPush
Zovjsra 4 years ago
parent d3438bd04c
commit 8014aef8d1

@ -3,6 +3,7 @@ import threading
import random import random
import string import string
import dataHandlerPTT as ptt import dataHandlerPTT as ptt
import dataHandlerPTTPush as pttPush
import generalText as gen import generalText as gen
from queue import Queue from queue import Queue
from datetime import datetime from datetime import datetime
@ -58,6 +59,37 @@ def index():
return redirect('/ptt') return redirect('/ptt')
@app.route('/ptt_push')
def ptt_push():
return render_template('ptt_push.html', title='推文 Sententree')
@app.route('/ptt_push/init', methods=['POST'])
def pttPushInit():
author = next(pttPush.getMostFrequentAuthor())
pushes = pttPush.findAuthorPush(
author=[author], stopwords=pttPush.defaultStopWords)
result: dict = {
'author': author,
'stopwords': pttPush.defaultStopWords,
'tsv': pushes[0],
'json': pushes[1]
}
return jsonify(Result=result)
@app.route('/ptt_push/addRequest', methods=['POST'])
def pttPushAddRequest():
pushes = pttPush.findAuthorPush(author=request.json['author'].split(
' '), aid=request.json['aid'].split(' '), keyword=request.json['keyword'], stopwords=request.json['stopwords'])
result = {
'keyword': request.json['keyword'],
'tsv': pushes[0],
'json': pushes[1]
}
return jsonify(Result=result)
@app.route('/ptt') @app.route('/ptt')
def pttSententree(): def pttSententree():
return render_template('ptt.html', title="PTT Sententree") return render_template('ptt.html', title="PTT Sententree")
@ -79,18 +111,6 @@ def updateContent():
}) })
@app.route('/askProgressHandler', methods=['POST'])
def askProgressHandler():
key = request.json['key']
randId = ''.join(random.choices(
string.ascii_uppercase + string.ascii_lowercase, k=15))
threading.Thread(target=ptt.progressListener,
args=(key, eventQueue, randId,)).start()
return jsonify(Result={
'id': randId
})
@app.route('/addRequest', methods=['POST']) @app.route('/addRequest', methods=['POST'])
def addRequest(): def addRequest():
content = request.json content = request.json
@ -126,4 +146,4 @@ def initPage():
if __name__ == "__main__": if __name__ == "__main__":
app.run(debug=False, port=4980, host='0.0.0.0', threaded=True) app.run(debug=True, port=4998, host='0.0.0.0', threaded=True)

@ -0,0 +1,89 @@
from PTTData import PTTData
from pprint import pprint
from datetime import datetime
from progressbar import ProgressBar
import json
import csv
import io
data = PTTData('Gossiping')
lastUpdate: datetime = None
mostFrequentAuthor: str = None
defaultStopWords = []
with open('resource/stopWords.txt', 'r', encoding='UTF-8') as file:
for word in file.readlines():
word = word.strip()
defaultStopWords.append(word)
def getMostFrequentAuthor(title: str = None):
global mostFrequentAuthor, lastUpdate
if (lastUpdate == datetime.today().date()):
yield mostFrequentAuthor
elif (mostFrequentAuthor != None):
yield mostFrequentAuthor
authorList = data.pushCllc.aggregate(pipeline=[{'$group': {
'_id': '$author',
'count': {
'$sum': 1
}
}}])
lastUpdate = datetime.today().date()
mostFrequentAuthor = max(authorList, key=lambda x: x['count'])['_id']
yield mostFrequentAuthor
return
def possegPushes(ids: list, stopwords: list, keyword: str):
possegs = data.pushPossegCllc.find({'ID': {'$in': ids}})
result = []
for index, p in enumerate(possegs):
words = [i[1] for i in p['content'] if i[0] not in [
'eng', 'x', 'm'] and i[1] not in stopwords]
if(keyword == '' or keyword in words):
result.append({
'posString': ' '.join(words),
'ID': p['ID']
})
return result
def findAuthorPush(author: list = None, aid: list = None, keyword: str = '', stopwords: list = []):
terms = {}
if (author != [''] and author != None):
terms['author'] = {
'$in': author
}
if (aid != [''] and aid != None):
terms['postAid'] = {
'$in': aid
}
print(terms)
pushes = data.pushCllc.find(terms)
pushId = []
pushContent = {}
for p in pushes:
pushId.append(p['_id'])
pushContent[str(p['_id'])] = {
'title': p['title'],
'author': p['author'],
'pushes': p['pushes']
}
possegList = possegPushes(pushId, stopwords, keyword)
possegResult = [['id', 'text', 'count']]
for index, n in enumerate(possegList):
if(str(n['ID']) in pushContent.keys()):
pushContent[str(n['ID'])]['part'] = str(n['posString'])
possegResult.append([index, n['posString'], 3000])
jsonString = json.dumps(
[i for i in pushContent.values()], indent=4, ensure_ascii=False)
with io.StringIO() as f:
writer = csv.writer(f, delimiter='\t')
writer.writerows(possegResult)
csvString = f.getvalue()
return (csvString, jsonString)
if __name__ == "__main__":
pprint(findAuthorPush(['gwenwoo']))

@ -309,6 +309,17 @@ input[type="date" i] {
animation: blinker 1s linear infinite; animation: blinker 1s linear infinite;
} }
.searchBox {
padding: 7px;
align-content: center;
border-radius: 3px;
border-style: solid;
border-width: 1px;
border-color: lightslategray;
margin-left: 5px;
margin-right: 5px;
}
@keyframes blinker { @keyframes blinker {
50% { 50% {
color: red; color: red;

@ -308,9 +308,9 @@ function buildSentetree(tsvString) {
.on('nodeMouseenter', node => { .on('nodeMouseenter', node => {
console.log(node) console.log(node)
titles = node.data.topEntries.map(function(x) { titles = node.data.topEntries.map(function(x) {
return wordTitleList[x.rawText] return wordTitleList[x.rawText]
}) })
//console.log(titles) console.log(titles)
infoStr = '' infoStr = ''
for (index in titles) { for (index in titles) {
if (index == 0) { if (index == 0) {

@ -0,0 +1,329 @@
init()
var tsvPath = ''
var titlePath = ''
var defaultStartDate
var defaultEndDate
var totalPosts
var startDate
var endDate
var wordPushList
var randId
var globKeyword = ''
var stopwords = []
function init() {
$.ajax({
type: 'POST',
url: 'ptt_push/init',
dataType: 'json',
success: function(data) {
console.log(data)
tsvString = data.Result.tsv
wordPushList = JSON.parse(data.Result.json)
stopwords = data.Result.stopwords
console.log(wordPushList)
$('#idBox').val(data.Result.author)
buildSentetree(tsvString)
}
})
$(document).ready(function() {
$(window).keydown(function(event) {
if (event.keyCode == 13) {
event.preventDefault()
sendRequest()
}
});
});
$(window).on('mousemove', function(e) {
$('#nodeTitle').css({
left: e.pageX,
top: e.pageY
})
})
$('#titleListContainer').hover(
function() { // Run on hover/mouseenter
$(this).css('overflow', 'auto')
},
function() { // Run on mouseleave
$(this).css('overflow', 'hidden')
}
)
$('#titleListLayer').click(function(e) {
if ($('#titleListLayer').is(e.target)) {
hideTitles()
}
})
$('#stopWordEditorLayer').click(function(e) {
if ($('#stopWordEditorLayer').is(e.target)) {
hideStopWordEditor()
}
})
}
function clearStopWord() {
stopwords = []
$('#sweContainer').html('')
}
function addStopWord() {
newswRaw = $('#newStopWord').val()
newswList = newswRaw.split(' ')
for (newsw of newswList) {
if (newsw != '') {
if (stopwords.includes(newsw)) {
} else {
stopwords.push(newsw)
$('#sweContainer').append($('<li>').attr('class', 'w3-display-container').append($('<span>').append(newsw)).append($('<span>').attr('class', 'w3-button w3-hover-red w3-transparent w3-display-right').click(function(e) {
var index = $(this).parent().index()
console.log(stopwords[index])
stopwords.splice(index, 1)
console.log(stopwords)
$('#sweContainer li').eq(index).remove()
}).append("&times;")))
console.log(document.getElementById('sweContainer').children[stopwords.indexOf(newsw)])
}
document.getElementById("sweContainer").scrollTop = document.getElementById('sweContainer').children[stopwords.indexOf(newsw)].offsetTop
}
}
$('#newStopWord').val('')
}
function showStopwordEditor() {
console.log(stopwords)
$(window).unbind('keydown')
$(window).keydown(function(event) {
if (event.keyCode == 13) {
addStopWord()
}
})
$('#sweContainer').empty()
for (word of stopwords) {
$('#sweContainer').append($('<li>').attr('class', 'w3-display-container').append($('<span>').append(word)).append($('<span>').attr('class', 'w3-button w3-hover-red w3-transparent w3-display-right').click(function(e) {
var index = $(this).parent().index()
console.log(stopwords[index])
stopwords.splice(index, 1)
console.log(stopwords)
$('#sweContainer li').eq(index).remove()
}).append("&times;")))
}
$('#stopWordEditorLayer').removeClass('hidden')
}
function hideStopWordEditor() {
$(window).unbind('keydown')
$(window).keydown(function(event) {
if (event.keyCode == 13) {
event.preventDefault()
sendRequest()
}
})
$('#stopWordEditorLayer').addClass('hidden')
}
function downloadStopWord() {
stopWordString = stopwords.join('\n')
download(stopWordString, 'stopwords.txt', 'text/plain')
}
function hidePopup() {
$('#infoWindowLayer').toggleClass('hidden')
$('#progressInfo').html('')
$('#progBarInner').css('width', 0 + '%')
closeEventListner()
}
function setDate(_startDate, _endDate) {
document.getElementById('startDate').value = _startDate
document.getElementById("endDate").value = _endDate
startDate = _startDate
endDate = _endDate
}
function getProgressing(event) {
data = JSON.parse(event.data)
$('#progressInfo').html(data.comment)
$('#progBarInner').css('width', data.progress + '%')
}
function getProgressFinished(event) {
data = JSON.parse(event.data)
changeGraph(data)
hidePopup()
}
function closeEventListner() {
progListener.removeEventListener('progressing' + randId, getProgressing)
progListener.removeEventListener('progressFinished' + randId, getProgressFinished)
}
function sendRequest() {
if ($('#idBox').val() == '' && $('#titleBox').val() == '') {
window.alert('請至少填寫一個鄉民id或是')
}
content = JSON.stringify({
author: $('#idBox').val(),
aid: $('#titleBox').val(),
keyword: $('#keywordBox').val(),
stopwords: stopwords,
pos: {
noun: $('#noun').is(':checked'),
verb: $('#verb').is(':checked'),
adj: $('#adj').is(':checked'),
adv: $('#adv').is(':checked'),
pron: $('#pron').is(':checked'),
aux: $('#aux').is(':checked'),
other: $('#other').is(':checked')
}
})
console.log(content)
$.ajax({
type: 'POST',
url: 'ptt_push/addRequest',
data: content,
contentType: 'application/json',
success: function(data) {
console.log(data)
tsvString = data.Result.tsv
wordPushList = JSON.parse(data.Result.json)
console.log(wordPushList)
changeGraph(data.Result)
}
})
}
function changeGraph(data) {
console.log(data)
let tsvString = data.tsv
let json = JSON.parse(data.json)
destroyCurrentGraph()
d3.select('#graph').append('div').attr('id', 'vis')
buildSentetree(tsvString)
}
function destroyCurrentGraph() {
d3.selectAll('#vis').remove()
}
function hideTitles() {
$('#titleListLayer').addClass('hidden')
}
function buildSentetree(tsvString) {
console.log("Build.")
var model;
var tree;
var data;
if (typeof tsvString === 'undefined') {
d3.tsv(tsvPath, buildTree)
} else {
data = d3.tsvParse(tsvString)
buildTree(_, data)
}
function buildTree(error, rawdata) {
const data = rawdata.map(d => Object.assign({}, d, { count: +d.count }));
model = new SentenTree.SentenTreeBuilder()
.tokenize(SentenTree.tokenizer.tokenizeBySpace)
.transformToken(token => (/score(d|s)?/.test(token) ? 'score' : token))
.buildModel(data, {
maxSupportRatio: 0.8,
minSupportRatio: 0.001
});
tree = new SentenTree.SentenTreeVis('#vis', {
fontSize: [15, 40],
gapBetweenGraph: 10
});
tree.data(model.getRenderedGraphs(5))
.on('nodeClick', node => {
$("#keywordBox").val(node.data.entity)
$('#titleListLayer').removeClass('hidden')
seqList = node.data.seq.DBs.map(function(n) {
return n.rawText
})
seqList = seqList.filter(function(v, i) {
return seqList.indexOf(v) == i
})
titleList = []
console.log(seqList)
for (s of seqList) {
titleTemp = wordPushList.filter(function(n) {
return n.part == s
})
titleList = titleList.concat(titleTemp)
}
console.log(titleList)
info = wordPushList[node.data.entity]
$('#titleListKeyword').html(node.data.entity)
$('#titleListKeywordInfo').html('')
$('#titleListContainer').empty()
for (i of titleList) {
let link = $('<a>').append(
$('<h4>').html(i.title)
)
for (p of i.pushes) {
link.append(
$('<span>').attr('style', 'margin: 0px 10px').html((['推', '噓', '→'])[p.type - 1] + ' ' + p.author + ': ' + p.content + '<br>')
)
}
$('#titleListContainer').append(
$('<li>').attr('class', 'w3-panel').append(
link
)
)
}
})
.on('nodeMouseenter', node => {
console.log(node)
let titles = []
node.data.topEntries.forEach(function(x) {
console.log(x)
let result = wordPushList.filter(function(y) {
return y.part == x.rawText
})
for (r of result) {
if (titles.indexOf(r) < 0 && titles.length < 5) {
titles.push(r)
}
}
})
console.log(titles)
infoStr = ''
for (index in titles) {
if (index == 0) {
infoStr += titles[index].title + '<br>'
} else {
if (titles[index].title != titles[index - 1].title) {
infoStr += titles[index].title + '<br>'
}
}
pos = titles[index].part.indexOf(node.data.entity)
infoStr += titles[index].pushes.filter(function(x) {
return x.content.includes(node.data.entity)
})[0].content + '<br>'
}
$(nodeTitleContent).html(infoStr)
$('#nodeTitle').removeClass('hidden')
tree.highlightNeighbors(node)
})
.on('nodeMouseleave', node => {
$('#nodeTitle').addClass('hidden')
tree.clearHighlightNeighbors()
}).on('layoutStart', layout => {
console.log(layout)
}).on('linkMouseenter', link => {
console.log(link)
})
new ResizeSensor(jQuery('#d3kitRoot'), function() {
var scale, origin;
scale = Math.min(2, ($('#graph').outerWidth()) / ($('#d3kitRoot').outerWidth() + 60))
$('#vis').css({
transform: "scale(" + scale + ")",
'transform-origin': 'top left'
});
})
}
}

@ -27,6 +27,7 @@
</div> </div>
<div class='w3-bar w3-teal'> <div class='w3-bar w3-teal'>
<button class="w3-button w3-teal" type="button" onclick="location.href='/ptt'">PTT Sententree</button> <button class="w3-button w3-teal" type="button" onclick="location.href='/ptt'">PTT Sententree</button>
<button class="w3-button" type="button" onclick="location.href='/ptt_push'">推文Sententree</button>
<button class="w3-button w3-teal" type="button" onclick="location.href='/generalTxt'" style="color: darkseagreen;">泛用文字視覺化工具</button> <button class="w3-button w3-teal" type="button" onclick="location.href='/generalTxt'" style="color: darkseagreen;">泛用文字視覺化工具</button>
</div> </div>
<div id='heading'> <div id='heading'>

@ -11,6 +11,7 @@
<body> <body>
<div class="w3-bar w3-teal"> <div class="w3-bar w3-teal">
<button class="w3-button" type="button" onclick="location.href='/ptt'" style="color: darkseagreen;">PTT Sententree</button> <button class="w3-button" type="button" onclick="location.href='/ptt'" style="color: darkseagreen;">PTT Sententree</button>
<button class="w3-button" type="button" onclick="location.href='/ptt_push'">推文Sententree</button>
<button class="w3-button w3-teal" type="button" onclick="location.href='/generalTxt'">泛用文字視覺化工具</button> <button class="w3-button w3-teal" type="button" onclick="location.href='/generalTxt'">泛用文字視覺化工具</button>
</div> </div>

@ -0,0 +1,90 @@
<!doctype html>
<html>
<head>
<title>{{ title }}</title>
<link href="/static/css/w3.css" type="text/css" rel="stylesheet">
<link href="/static/css/main.css" type="text/css" rel="stylesheet">
<link href="https://fonts.googleapis.com/css2?family=Noto+Sans+TC:wght@100;300;400;500;700;900&display=swap" rel="stylesheet">
</head>
<body>
<div class="w3-bar w3-teal">
<button class="w3-button" type="button" onclick="location.href='/ptt'">PTT
Sententree</button>
<button class="w3-button" type="button" onclick="location.href='/ptt_push'" style="color: darkseagreen;">推文Sententree</button>
<button class="w3-button w3-teal" type="button" onclick="location.href='/generalTxt'">泛用文字視覺化工具</button>
</div>
<div id="nodeTitle" class="nodeTitle hidden">
<div id="nodeTitleContent"></div>
</div>
<div id="stopWordEditorLayer" class="info hidden">
<div id="stopWordEditor">
<h4 id="sweTitle" style="margin:10px; display: inline;">編輯停用詞</h4>
<ul id="sweContainer" class="w3-ul w3-hoverable" style="margin-bottom: 10px;"></ul>
<div>
<input class="w3-input w3-border" style="width: 85%; display: inline;" type="text" id="newStopWord" placeholder="新增停用詞(以空白隔開)">
<button class="general-button w3-right" type="button" id="confirm" style="background-color: #379; margin-left: 8px;" onclick="addStopWord()">新增</button>
</div>
<div id="sweButtons" style="margin: 20px 0px;">
<button class="general-button" type="button" id="confirm" style="background-color: #379; margin: 0px 10px" onclick="hideStopWordEditor(); sendRequest()">確認</button>
<button class="general-button" type="button" id="confirm" style="background-color: #379; margin: 0px 10px" onclick="downloadStopWord()">匯出停用詞</button>
<button class="general-button" type="button" id="confirm" style="background-color: #379; margin: 0px 10px" onclick="clearStopWord()">全部清除</button>
<button class="general-button w3-right" type="button" id="confirm" style="background-color: #379; margin: 0px 20px" onclick="hideStopWordEditor()">返回</button>
</div>
</div>
</div>
<div id="titleListLayer" class="info hidden">
<div id="titleList">
<h2 id="titleListKeyword" style="margin:10px; display: inline;"></h2>
<span id="titleListKeywordInfo"></span>
<ul id="titleListContainer" class="w3-ul w3-hoverable"></ul>
<div id="backButton" style="margin: 20px 0px;">
<button class="general-button" type="button" id="confirm" style="background-color: #379; margin: 0px 20px" onclick="hideTitles()">返回</button>
</div>
</div>
</div>
<div id="heading">
<h2>{{title}}</h2>
<p>SentenTree <a href="https://github.com/twitter/SentenTree">https://github.com/twitter/SentenTree</a></p>
<p id="comment">同時使用關鍵詞和詞性搜尋的時候,必須選擇所設關鍵詞本身的詞性,否則會搜尋不到結果。</p>
<p id="comment">點選圖上的單詞可以查看單詞的資訊,觀看原始文章,也會快速切換關鍵字。</p>
<p id="comment">若搜尋到的文章超過50篇圖表僅會顯示推文數最多的前50篇文章。</p>
<div id="searchingTarget">
<form name="form" enctype="multipart/form-data">
<input id="idBox" class='searchBox' type="text" name="message" placeholder="輸入鄉民ID">
<input id="titleBox" class='searchBox' type="text" name="message" placeholder="輸入文章aid">
<input id="keywordBox" class='searchBox' type="text" name="message" placeholder="輸入關鍵詞">
<button class="general-button" type="button" id="sendButton" onclick="sendRequest()">搜尋關鍵字</button>
<button class="general-button" type="button" id="resetButton" onclick="document.getElementById('keywordBox').value=''">清除關鍵字</button>
<button class="general-button" type="button" id="editSWButton" onclick="showStopwordEditor()">編輯停用詞</button>
</form>
</div>
<div id="advancedArea">
<form name="advanced" enctype="multipart/form-data">
<span>選擇詞性</span>
<input type="checkbox" id="noun" checked="checked">名詞
<input type="checkbox" id="verb" checked="checked">動詞
<input type="checkbox" id="adj" checked="checked">形容詞
<input type="checkbox" id="adv" checked="checked">副詞
<input type="checkbox" id="pron" checked="checked">代詞
<input type="checkbox" id="aux" checked="checked">助詞
<input type="checkbox" id="other" checked="checked">其他詞性
</form>
</div>
</div>
<div id="graphInfo"></div>
<div id="graph">
<div id="vis"></div>
</div>
</body>
<script src="/static/node_modules/jquery/dist/jquery.js"></script>
<script src="/static/node_modules/jquery-ui-dist/jquery-ui.js"></script>
<script src="/static/node_modules/css-element-queries/src/ResizeSensor.js"></script>
<script src="/static/node_modules/sententree/dist/sententree-standalone.js"></script>
<script src="/static/node_modules/d3/build/d3.js"></script>
<script src="/static/js/download.js"></script>
<script src="/static/js/pttPush.js"></script>
</html>
Loading…
Cancel
Save