diff --git a/app.py b/app.py
index 0471be9..2285074 100755
--- a/app.py
+++ b/app.py
@@ -3,6 +3,7 @@ import threading
import random
import string
import dataHandlerPTT as ptt
+import dataHandlerPTTPush as pttPush
import generalText as gen
from queue import Queue
from datetime import datetime
@@ -58,6 +59,37 @@ def index():
return redirect('/ptt')
+@app.route('/ptt_push')
+def ptt_push():
+ return render_template('ptt_push.html', title='推文 Sententree')
+
+
+@app.route('/ptt_push/init', methods=['POST'])
+def pttPushInit():
+ author = next(pttPush.getMostFrequentAuthor())
+ pushes = pttPush.findAuthorPush(
+ author=[author], stopwords=pttPush.defaultStopWords)
+ result: dict = {
+ 'author': author,
+ 'stopwords': pttPush.defaultStopWords,
+ 'tsv': pushes[0],
+ 'json': pushes[1]
+ }
+ return jsonify(Result=result)
+
+
+@app.route('/ptt_push/addRequest', methods=['POST'])
+def pttPushAddRequest():
+ pushes = pttPush.findAuthorPush(author=request.json['author'].split(
+ ' '), aid=request.json['aid'].split(' '), keyword=request.json['keyword'], stopwords=request.json['stopwords'])
+ result = {
+ 'keyword': request.json['keyword'],
+ 'tsv': pushes[0],
+ 'json': pushes[1]
+ }
+ return jsonify(Result=result)
+
+
@app.route('/ptt')
def pttSententree():
return render_template('ptt.html', title="PTT Sententree")
@@ -79,18 +111,6 @@ def updateContent():
})
-@app.route('/askProgressHandler', methods=['POST'])
-def askProgressHandler():
- key = request.json['key']
- randId = ''.join(random.choices(
- string.ascii_uppercase + string.ascii_lowercase, k=15))
- threading.Thread(target=ptt.progressListener,
- args=(key, eventQueue, randId,)).start()
- return jsonify(Result={
- 'id': randId
- })
-
-
@app.route('/addRequest', methods=['POST'])
def addRequest():
content = request.json
@@ -126,4 +146,4 @@ def initPage():
if __name__ == "__main__":
- app.run(debug=False, port=4980, host='0.0.0.0', threaded=True)
+ app.run(debug=True, port=4998, host='0.0.0.0', threaded=True)
diff --git a/dataHandlerPTTPush.py b/dataHandlerPTTPush.py
new file mode 100644
index 0000000..faa19c2
--- /dev/null
+++ b/dataHandlerPTTPush.py
@@ -0,0 +1,89 @@
+from PTTData import PTTData
+from pprint import pprint
+from datetime import datetime
+from progressbar import ProgressBar
+import json
+import csv
+import io
+
+data = PTTData('Gossiping')
+lastUpdate: datetime = None
+mostFrequentAuthor: str = None
+
+defaultStopWords = []
+with open('resource/stopWords.txt', 'r', encoding='UTF-8') as file:
+ for word in file.readlines():
+ word = word.strip()
+ defaultStopWords.append(word)
+
+
+def getMostFrequentAuthor(title: str = None):
+ global mostFrequentAuthor, lastUpdate
+ if (lastUpdate == datetime.today().date()):
+ yield mostFrequentAuthor
+ elif (mostFrequentAuthor != None):
+ yield mostFrequentAuthor
+ authorList = data.pushCllc.aggregate(pipeline=[{'$group': {
+ '_id': '$author',
+ 'count': {
+ '$sum': 1
+ }
+ }}])
+ lastUpdate = datetime.today().date()
+ mostFrequentAuthor = max(authorList, key=lambda x: x['count'])['_id']
+ yield mostFrequentAuthor
+ return
+
+
+def possegPushes(ids: list, stopwords: list, keyword: str):
+ possegs = data.pushPossegCllc.find({'ID': {'$in': ids}})
+ result = []
+ for index, p in enumerate(possegs):
+ words = [i[1] for i in p['content'] if i[0] not in [
+ 'eng', 'x', 'm'] and i[1] not in stopwords]
+ if(keyword == '' or keyword in words):
+ result.append({
+ 'posString': ' '.join(words),
+ 'ID': p['ID']
+ })
+ return result
+
+
+def findAuthorPush(author: list = None, aid: list = None, keyword: str = '', stopwords: list = []):
+ terms = {}
+ if (author != [''] and author != None):
+ terms['author'] = {
+ '$in': author
+ }
+ if (aid != [''] and aid != None):
+ terms['postAid'] = {
+ '$in': aid
+ }
+ print(terms)
+ pushes = data.pushCllc.find(terms)
+ pushId = []
+ pushContent = {}
+ for p in pushes:
+ pushId.append(p['_id'])
+ pushContent[str(p['_id'])] = {
+ 'title': p['title'],
+ 'author': p['author'],
+ 'pushes': p['pushes']
+ }
+ possegList = possegPushes(pushId, stopwords, keyword)
+ possegResult = [['id', 'text', 'count']]
+ for index, n in enumerate(possegList):
+ if(str(n['ID']) in pushContent.keys()):
+ pushContent[str(n['ID'])]['part'] = str(n['posString'])
+ possegResult.append([index, n['posString'], 3000])
+ jsonString = json.dumps(
+ [i for i in pushContent.values()], indent=4, ensure_ascii=False)
+ with io.StringIO() as f:
+ writer = csv.writer(f, delimiter='\t')
+ writer.writerows(possegResult)
+ csvString = f.getvalue()
+ return (csvString, jsonString)
+
+
+if __name__ == "__main__":
+ pprint(findAuthorPush(['gwenwoo']))
diff --git a/static/css/main.css b/static/css/main.css
index 1d9a6c9..ebde5f4 100755
--- a/static/css/main.css
+++ b/static/css/main.css
@@ -309,6 +309,17 @@ input[type="date" i] {
animation: blinker 1s linear infinite;
}
+.searchBox {
+ padding: 7px;
+ align-content: center;
+ border-radius: 3px;
+ border-style: solid;
+ border-width: 1px;
+ border-color: lightslategray;
+ margin-left: 5px;
+ margin-right: 5px;
+}
+
@keyframes blinker {
50% {
color: red;
diff --git a/static/js/ptt.js b/static/js/ptt.js
index fef3876..e911c58 100755
--- a/static/js/ptt.js
+++ b/static/js/ptt.js
@@ -308,9 +308,9 @@ function buildSentetree(tsvString) {
.on('nodeMouseenter', node => {
console.log(node)
titles = node.data.topEntries.map(function(x) {
- return wordTitleList[x.rawText]
- })
- //console.log(titles)
+ return wordTitleList[x.rawText]
+ })
+ console.log(titles)
infoStr = ''
for (index in titles) {
if (index == 0) {
diff --git a/static/js/pttPush.js b/static/js/pttPush.js
new file mode 100644
index 0000000..fd3046c
--- /dev/null
+++ b/static/js/pttPush.js
@@ -0,0 +1,329 @@
+init()
+var tsvPath = ''
+var titlePath = ''
+var defaultStartDate
+var defaultEndDate
+var totalPosts
+var startDate
+var endDate
+var wordPushList
+var randId
+var globKeyword = ''
+var stopwords = []
+
+function init() {
+ $.ajax({
+ type: 'POST',
+ url: 'ptt_push/init',
+ dataType: 'json',
+ success: function(data) {
+ console.log(data)
+ tsvString = data.Result.tsv
+ wordPushList = JSON.parse(data.Result.json)
+ stopwords = data.Result.stopwords
+ console.log(wordPushList)
+ $('#idBox').val(data.Result.author)
+ buildSentetree(tsvString)
+ }
+ })
+ $(document).ready(function() {
+ $(window).keydown(function(event) {
+ if (event.keyCode == 13) {
+ event.preventDefault()
+ sendRequest()
+ }
+ });
+ });
+ $(window).on('mousemove', function(e) {
+ $('#nodeTitle').css({
+ left: e.pageX,
+ top: e.pageY
+ })
+ })
+ $('#titleListContainer').hover(
+ function() { // Run on hover/mouseenter
+ $(this).css('overflow', 'auto')
+ },
+ function() { // Run on mouseleave
+ $(this).css('overflow', 'hidden')
+ }
+ )
+ $('#titleListLayer').click(function(e) {
+ if ($('#titleListLayer').is(e.target)) {
+ hideTitles()
+ }
+ })
+ $('#stopWordEditorLayer').click(function(e) {
+ if ($('#stopWordEditorLayer').is(e.target)) {
+ hideStopWordEditor()
+ }
+ })
+}
+
+function clearStopWord() {
+ stopwords = []
+ $('#sweContainer').html('')
+}
+
+function addStopWord() {
+ newswRaw = $('#newStopWord').val()
+ newswList = newswRaw.split(' ')
+ for (newsw of newswList) {
+ if (newsw != '') {
+ if (stopwords.includes(newsw)) {
+
+ } else {
+ stopwords.push(newsw)
+ $('#sweContainer').append($('
').attr('class', 'w3-display-container').append($('').append(newsw)).append($('').attr('class', 'w3-button w3-hover-red w3-transparent w3-display-right').click(function(e) {
+ var index = $(this).parent().index()
+ console.log(stopwords[index])
+ stopwords.splice(index, 1)
+ console.log(stopwords)
+ $('#sweContainer li').eq(index).remove()
+ }).append("×")))
+ console.log(document.getElementById('sweContainer').children[stopwords.indexOf(newsw)])
+ }
+ document.getElementById("sweContainer").scrollTop = document.getElementById('sweContainer').children[stopwords.indexOf(newsw)].offsetTop
+ }
+ }
+ $('#newStopWord').val('')
+}
+
+function showStopwordEditor() {
+ console.log(stopwords)
+ $(window).unbind('keydown')
+ $(window).keydown(function(event) {
+ if (event.keyCode == 13) {
+ addStopWord()
+ }
+ })
+ $('#sweContainer').empty()
+ for (word of stopwords) {
+ $('#sweContainer').append($('').attr('class', 'w3-display-container').append($('').append(word)).append($('').attr('class', 'w3-button w3-hover-red w3-transparent w3-display-right').click(function(e) {
+ var index = $(this).parent().index()
+ console.log(stopwords[index])
+ stopwords.splice(index, 1)
+ console.log(stopwords)
+ $('#sweContainer li').eq(index).remove()
+ }).append("×")))
+ }
+ $('#stopWordEditorLayer').removeClass('hidden')
+}
+
+function hideStopWordEditor() {
+ $(window).unbind('keydown')
+ $(window).keydown(function(event) {
+ if (event.keyCode == 13) {
+ event.preventDefault()
+ sendRequest()
+ }
+ })
+ $('#stopWordEditorLayer').addClass('hidden')
+}
+
+function downloadStopWord() {
+ stopWordString = stopwords.join('\n')
+ download(stopWordString, 'stopwords.txt', 'text/plain')
+}
+
+
+function hidePopup() {
+ $('#infoWindowLayer').toggleClass('hidden')
+ $('#progressInfo').html('')
+ $('#progBarInner').css('width', 0 + '%')
+ closeEventListner()
+}
+
+function setDate(_startDate, _endDate) {
+ document.getElementById('startDate').value = _startDate
+ document.getElementById("endDate").value = _endDate
+ startDate = _startDate
+ endDate = _endDate
+}
+
+function getProgressing(event) {
+ data = JSON.parse(event.data)
+ $('#progressInfo').html(data.comment)
+ $('#progBarInner').css('width', data.progress + '%')
+}
+
+function getProgressFinished(event) {
+ data = JSON.parse(event.data)
+ changeGraph(data)
+ hidePopup()
+}
+
+function closeEventListner() {
+ progListener.removeEventListener('progressing' + randId, getProgressing)
+ progListener.removeEventListener('progressFinished' + randId, getProgressFinished)
+}
+
+function sendRequest() {
+ if ($('#idBox').val() == '' && $('#titleBox').val() == '') {
+ window.alert('請至少填寫一個鄉民id或是')
+ }
+ content = JSON.stringify({
+ author: $('#idBox').val(),
+ aid: $('#titleBox').val(),
+ keyword: $('#keywordBox').val(),
+ stopwords: stopwords,
+ pos: {
+ noun: $('#noun').is(':checked'),
+ verb: $('#verb').is(':checked'),
+ adj: $('#adj').is(':checked'),
+ adv: $('#adv').is(':checked'),
+ pron: $('#pron').is(':checked'),
+ aux: $('#aux').is(':checked'),
+ other: $('#other').is(':checked')
+ }
+ })
+ console.log(content)
+ $.ajax({
+ type: 'POST',
+ url: 'ptt_push/addRequest',
+ data: content,
+ contentType: 'application/json',
+ success: function(data) {
+ console.log(data)
+ tsvString = data.Result.tsv
+ wordPushList = JSON.parse(data.Result.json)
+ console.log(wordPushList)
+ changeGraph(data.Result)
+ }
+ })
+}
+
+function changeGraph(data) {
+ console.log(data)
+ let tsvString = data.tsv
+ let json = JSON.parse(data.json)
+ destroyCurrentGraph()
+ d3.select('#graph').append('div').attr('id', 'vis')
+ buildSentetree(tsvString)
+}
+
+function destroyCurrentGraph() {
+ d3.selectAll('#vis').remove()
+}
+
+function hideTitles() {
+ $('#titleListLayer').addClass('hidden')
+}
+
+function buildSentetree(tsvString) {
+ console.log("Build.")
+ var model;
+ var tree;
+ var data;
+ if (typeof tsvString === 'undefined') {
+ d3.tsv(tsvPath, buildTree)
+ } else {
+ data = d3.tsvParse(tsvString)
+ buildTree(_, data)
+ }
+
+ function buildTree(error, rawdata) {
+ const data = rawdata.map(d => Object.assign({}, d, { count: +d.count }));
+ model = new SentenTree.SentenTreeBuilder()
+ .tokenize(SentenTree.tokenizer.tokenizeBySpace)
+ .transformToken(token => (/score(d|s)?/.test(token) ? 'score' : token))
+ .buildModel(data, {
+ maxSupportRatio: 0.8,
+ minSupportRatio: 0.001
+ });
+ tree = new SentenTree.SentenTreeVis('#vis', {
+ fontSize: [15, 40],
+ gapBetweenGraph: 10
+ });
+ tree.data(model.getRenderedGraphs(5))
+ .on('nodeClick', node => {
+ $("#keywordBox").val(node.data.entity)
+ $('#titleListLayer').removeClass('hidden')
+ seqList = node.data.seq.DBs.map(function(n) {
+ return n.rawText
+ })
+ seqList = seqList.filter(function(v, i) {
+ return seqList.indexOf(v) == i
+ })
+ titleList = []
+ console.log(seqList)
+ for (s of seqList) {
+ titleTemp = wordPushList.filter(function(n) {
+ return n.part == s
+ })
+ titleList = titleList.concat(titleTemp)
+ }
+ console.log(titleList)
+ info = wordPushList[node.data.entity]
+ $('#titleListKeyword').html(node.data.entity)
+ $('#titleListKeywordInfo').html('')
+
+ $('#titleListContainer').empty()
+ for (i of titleList) {
+ let link = $('').append(
+ $('').html(i.title)
+ )
+ for (p of i.pushes) {
+ link.append(
+ $('').attr('style', 'margin: 0px 10px').html((['推', '噓', '→'])[p.type - 1] + ' ' + p.author + ': ' + p.content + '
')
+ )
+ }
+ $('#titleListContainer').append(
+ $('').attr('class', 'w3-panel').append(
+ link
+ )
+ )
+ }
+ })
+ .on('nodeMouseenter', node => {
+ console.log(node)
+ let titles = []
+ node.data.topEntries.forEach(function(x) {
+ console.log(x)
+ let result = wordPushList.filter(function(y) {
+ return y.part == x.rawText
+ })
+ for (r of result) {
+ if (titles.indexOf(r) < 0 && titles.length < 5) {
+ titles.push(r)
+ }
+ }
+ })
+ console.log(titles)
+ infoStr = ''
+ for (index in titles) {
+ if (index == 0) {
+ infoStr += titles[index].title + '
'
+ } else {
+ if (titles[index].title != titles[index - 1].title) {
+ infoStr += titles[index].title + '
'
+ }
+ }
+ pos = titles[index].part.indexOf(node.data.entity)
+ infoStr += titles[index].pushes.filter(function(x) {
+ return x.content.includes(node.data.entity)
+ })[0].content + '
'
+ }
+ $(nodeTitleContent).html(infoStr)
+ $('#nodeTitle').removeClass('hidden')
+ tree.highlightNeighbors(node)
+ })
+ .on('nodeMouseleave', node => {
+ $('#nodeTitle').addClass('hidden')
+ tree.clearHighlightNeighbors()
+ }).on('layoutStart', layout => {
+ console.log(layout)
+ }).on('linkMouseenter', link => {
+ console.log(link)
+ })
+ new ResizeSensor(jQuery('#d3kitRoot'), function() {
+ var scale, origin;
+ scale = Math.min(2, ($('#graph').outerWidth()) / ($('#d3kitRoot').outerWidth() + 60))
+
+ $('#vis').css({
+ transform: "scale(" + scale + ")",
+ 'transform-origin': 'top left'
+ });
+ })
+ }
+}
\ No newline at end of file
diff --git a/templates/generalTxt.html b/templates/generalTxt.html
index b2d84ae..a990d6f 100644
--- a/templates/generalTxt.html
+++ b/templates/generalTxt.html
@@ -27,6 +27,7 @@
+
diff --git a/templates/ptt.html b/templates/ptt.html
index 6dd3fa3..e41df79 100755
--- a/templates/ptt.html
+++ b/templates/ptt.html
@@ -11,6 +11,7 @@
+
diff --git a/templates/ptt_push.html b/templates/ptt_push.html
new file mode 100644
index 0000000..e7c12eb
--- /dev/null
+++ b/templates/ptt_push.html
@@ -0,0 +1,90 @@
+
+
+
+
+
{{ title }}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
編輯停用詞
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file