修改停用詞的處理方法

dev/addPush
Zovjsra 4 years ago
parent 8014aef8d1
commit ed2e045958

@ -124,7 +124,7 @@ def filterPOS(content, aid):
stopwords = content['stopwords'] stopwords = content['stopwords']
else: else:
stopwords = defaultStopWords stopwords = defaultStopWords
stopped = [i for i in cutted if i not in stopwords] stopped = [i for i in cutted] # 不在server端刪除停用詞
return stopped return stopped
@ -217,7 +217,7 @@ def findResult(content):
filtered.append(i) filtered.append(i)
titles['info']['posts'] = len(filtered) titles['info']['posts'] = len(filtered)
filtered = [i for i in sorted( filtered = [i for i in sorted(
filtered, key=lambda x: x['pushes'], reverse=True)[:50]] filtered, key=lambda x: x['pushes'], reverse=True)[:(30 if (content['keyword'] == "") else 100)]]
print('到第一步為止生成花費', int(time()-timeStart), '') print('到第一步為止生成花費', int(time()-timeStart), '')
counter = 0 counter = 0
total = len(filtered) total = len(filtered)
@ -299,7 +299,7 @@ def getDefault(startDate, endDate):
filtered.append(i) filtered.append(i)
titles['info']['posts'] = len(filtered) titles['info']['posts'] = len(filtered)
filtered = [i for i in sorted( filtered = [i for i in sorted(
filtered, key=lambda x: x['pushes'], reverse=True)[:50]] filtered, key=lambda x: x['pushes'], reverse=True)[:30]]
counter = 0 counter = 0
total = len(postContents) total = len(postContents)
content = { content = {

@ -40,7 +40,7 @@ def possegPushes(ids: list, stopwords: list, keyword: str):
result = [] result = []
for index, p in enumerate(possegs): for index, p in enumerate(possegs):
words = [i[1] for i in p['content'] if i[0] not in [ words = [i[1] for i in p['content'] if i[0] not in [
'eng', 'x', 'm'] and i[1] not in stopwords] 'eng', 'x', 'm']]
if(keyword == '' or keyword in words): if(keyword == '' or keyword in words):
result.append({ result.append({
'posString': ' '.join(words), 'posString': ' '.join(words),

@ -19,3 +19,52 @@
新聞 新聞
標題 標題
內文 內文
可以
沒有
就是
自己
大家
我們
知道
網址
備註
連結
所以

@ -10,6 +10,7 @@ var wordTitleList
var randId var randId
var globKeyword = '' var globKeyword = ''
var stopwords = [] var stopwords = []
var tsvString
function init() { function init() {
$.ajax({ $.ajax({
@ -35,7 +36,7 @@ function init() {
$('#graphInfo').empty() $('#graphInfo').empty()
$('#graphInfo').attr('style', 'margin: 10px;').append('總文章數:' + json.info.posts + ',' + keywordCountString) $('#graphInfo').attr('style', 'margin: 10px;').append('總文章數:' + json.info.posts + ',' + keywordCountString)
totalPosts = json.info.posts totalPosts = json.info.posts
buildSentetree(tsvString) buildSentetree()
} }
}) })
$(document).ready(function() { $(document).ready(function() {
@ -102,7 +103,6 @@ function addStopWord() {
} }
function showStopwordEditor() { function showStopwordEditor() {
console.log(stopwords)
$(window).unbind('keydown') $(window).unbind('keydown')
$(window).keydown(function(event) { $(window).keydown(function(event) {
if (event.keyCode == 13) { if (event.keyCode == 13) {
@ -175,7 +175,7 @@ function sendRequest() {
startDate: $('#startDate').val(), startDate: $('#startDate').val(),
endDate: $('#endDate').val(), endDate: $('#endDate').val(),
keyword: $('#keywordBox').val(), keyword: $('#keywordBox').val(),
stopwords: stopwords, stopwords: [],
pos: { pos: {
noun: $('#noun').is(':checked'), noun: $('#noun').is(':checked'),
verb: $('#verb').is(':checked'), verb: $('#verb').is(':checked'),
@ -216,19 +216,21 @@ function changeGraph(data) {
$('#graphInfo').attr('style', 'margin: 10px;').append('總文章數:' + json.info.posts + keywordCountString) $('#graphInfo').attr('style', 'margin: 10px;').append('總文章數:' + json.info.posts + keywordCountString)
totalPosts = json.info.posts totalPosts = json.info.posts
destroyCurrentGraph() destroyCurrentGraph()
d3.select('#graph').append('div').attr('id', 'vis') buildSentetree()
buildSentetree(tsvString)
} }
function destroyCurrentGraph() { function destroyCurrentGraph() {
d3.selectAll('#vis').remove() d3.selectAll('#vis').remove()
d3.select('#graph').append('div').attr('id', 'vis')
} }
function hideTitles() { function hideTitles() {
$('#titleListLayer').addClass('hidden') $('#titleListLayer').addClass('hidden')
$('#setToKeyword').unbind()
$("#addToStopwords").unbind()
} }
function buildSentetree(tsvString) { function buildSentetree() {
console.log("Build.") console.log("Build.")
var model; var model;
var tree; var tree;
@ -253,10 +255,9 @@ function buildSentetree(tsvString) {
fontSize: [15, 40], fontSize: [15, 40],
gapBetweenGraph: 10 gapBetweenGraph: 10
}); });
console.log(tree)
tree.data(model.getRenderedGraphs(2)) tree.data(model.getRenderedGraphs(2))
.on('nodeClick', node => { .on('nodeClick', node => {
$("#keywordBox").val(node.data.entity)
$('#titleListLayer').removeClass('hidden')
seqList = node.data.seq.DBs.map(function(n) { seqList = node.data.seq.DBs.map(function(n) {
return n.rawText return n.rawText
}) })
@ -273,6 +274,27 @@ function buildSentetree(tsvString) {
info = wordTitleList[node.data.entity] info = wordTitleList[node.data.entity]
$('#titleListKeyword').html(node.data.entity) $('#titleListKeyword').html(node.data.entity)
$('#titleListKeywordInfo').html('') $('#titleListKeywordInfo').html('')
if (stopwords.indexOf(node.data.entity) < 0) {
$("#addToStopwords").html('設為停用詞').css('background-color', '#379').click(() => {
stopwords.push(node.data.entity)
destroyCurrentGraph()
buildSentetree()
hideTitles()
})
} else {
$("#addToStopwords").html('從停用詞移除').css('background-color', '#933').click(() => {
stopwords.pop(node.data.entity)
destroyCurrentGraph()
buildSentetree()
hideTitles()
})
}
$('#setToKeyword').click(() => {
$('#keywordBox').val(node.data.entity)
sendRequest()
hideTitles()
})
$('#titleListLayer').removeClass('hidden')
$.ajax({ $.ajax({
type: 'POST', type: 'POST',
url: '/ptt/keywordFrequency', url: '/ptt/keywordFrequency',

File diff suppressed because it is too large Load Diff

@ -19143,7 +19143,7 @@
words.forEach(function(w) { words.forEach(function(w) {
var value = fdist[w]; var value = fdist[w];
if (value < maxSupport && value > maxc) { if (value < maxSupport && value > maxc && (isNotRoot || stopwords.indexOf(itemset[w]) < 0)) {
maxw = +w; maxw = +w;
maxc = value; maxc = value;
} }
@ -19185,7 +19185,7 @@
} }
} }
return { word: word, pos: pos, count: count, s0: s0, s1: s1 }; return { word: word, pos: pos, count: stopwords.indexOf(itemset[word]) < 0 ? count : minSupport, s0: s0, s1: s1 };
} }
function expandSeqTree(rootSeq, graphs, expandCnt, minSupport, maxSupport, terms, itemset) { function expandSeqTree(rootSeq, graphs, expandCnt, minSupport, maxSupport, terms, itemset) {
@ -19344,6 +19344,7 @@
var graphs = []; var graphs = [];
var visibleGroups = expandSeqTree(this.rootSeq, graphs, DEFAULT_NODE_COUNT, minSupport, maxSupport, this.terms, itemset); var visibleGroups = expandSeqTree(this.rootSeq, graphs, DEFAULT_NODE_COUNT, minSupport, maxSupport, this.terms, itemset);
this.graphs = graphs.filter(function(g) { this.graphs = graphs.filter(function(g) {
return g.nodes.length > 2; return g.nodes.length > 2;
}).slice(0, 10); }).slice(0, 10);
@ -19376,9 +19377,11 @@
key: 'getRenderedGraphs', key: 'getRenderedGraphs',
value: function getRenderedGraphs(limit) { value: function getRenderedGraphs(limit) {
var graphs = arguments.length === 1 ? this.graphs.slice(0, limit) : this.graphs; var graphs = arguments.length === 1 ? this.graphs.slice(0, limit) : this.graphs;
console.log("slice")
var renderedGraphs = graphs.map(function(g) { var renderedGraphs = graphs.map(function(g) {
return g.toRenderedGraph(); return g.toRenderedGraph();
}); });
console.log("toRenderedGraph")
var globalFreqRange = [(0, _lodash.min)(renderedGraphs.map(function(g) { var globalFreqRange = [(0, _lodash.min)(renderedGraphs.map(function(g) {
return g.freqRange[0]; return g.freqRange[0];
})), (0, _lodash.max)(renderedGraphs.map(function(g) { })), (0, _lodash.max)(renderedGraphs.map(function(g) {
@ -37424,7 +37427,6 @@
}).filter(function(entry) { }).filter(function(entry) {
return entry.tokens.length > 0; return entry.tokens.length > 0;
}); });
return new _TokenizedDataset2.default(tokenizedEntries); return new _TokenizedDataset2.default(tokenizedEntries);
} }
}, { }, {
@ -37855,13 +37857,18 @@
heap.push(n); heap.push(n);
}); });
let counter = 1;
while (heap.size() > 0) { while (heap.size() > 0) {
console.log(`in while ${counter++}`)
var parent = heap.pop(); var parent = heap.pop();
console.log(heap)
if (parent.merged) { if (parent.merged) {
continue; continue;
} }
var groups = []; var groups = [];
console.log(parent.data.id)
if (parent.leftLinks.length > 1) { if (parent.leftLinks.length > 1) {
var lNodes = parent.leftLinks.map(function(l) { var lNodes = parent.leftLinks.map(function(l) {
return l.source; return l.source;
@ -37875,6 +37882,7 @@
}); });
groups = groups.concat(this.groupMergeableNodes(rNodes)); groups = groups.concat(this.groupMergeableNodes(rNodes));
} }
console.log(groups)
if (groups.length > 0) { if (groups.length > 0) {
var newNodes = groups.map(function(group) { var newNodes = groups.map(function(group) {
@ -38102,6 +38110,7 @@
var RenderedGraph = function() { var RenderedGraph = function() {
function RenderedGraph(rawGraph) { function RenderedGraph(rawGraph) {
console.log(arguments)
var _ref = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}, var _ref = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {},
_ref$bundle = _ref.bundle, _ref$bundle = _ref.bundle,
bundle = _ref$bundle === undefined ? true : _ref$bundle, bundle = _ref$bundle === undefined ? true : _ref$bundle,
@ -38134,8 +38143,10 @@
this.assignNodeIds(nodes); this.assignNodeIds(nodes);
console.log(bundle)
if (bundle) { if (bundle) {
var bundled = new _GraphBundler2.default(nodes, links).bundle(); var bundled = new _GraphBundler2.default(nodes, links).bundle();
console.log(bundled)
this.nodes = bundled.nodes; this.nodes = bundled.nodes;
this.links = bundled.links; this.links = bundled.links;
this.assignNodeIds(this.nodes); this.assignNodeIds(this.nodes);

@ -83,7 +83,7 @@
"visualization" "visualization"
], ],
"license": "Apache-2.0", "license": "Apache-2.0",
"main": "dist/SentenTree.min.js", "main": "dist/SentenTree.js",
"name": "sententree", "name": "sententree",
"repository": { "repository": {
"type": "git", "type": "git",

@ -132,9 +132,9 @@ export default class RenderedGraph {
const constraints = this.baseConstraints const constraints = this.baseConstraints
.concat(this.links.map(l => l.toConstraint())); .concat(this.links.map(l => l.toConstraint()));
return this.options.highFrequencyOnTop return this.options.highFrequencyOnTop ?
? constraints.concat(flatMap(this.nodes, n => n.computeOrderConstraints())) constraints.concat(flatMap(this.nodes, n => n.computeOrderConstraints())) :
: constraints; constraints;
} }
toGroupConstraint() { toGroupConstraint() {

@ -35,9 +35,9 @@ function growSeq(seq, terms, minSupport, maxSupport, itemset) {
let maxc = 0; let maxc = 0;
const isNotRoot = len > 0; const isNotRoot = len > 0;
const words = isNotRoot const words = isNotRoot ?
? Object.keys(fdist) Object.keys(fdist) :
: Object.keys(fdist).filter(w => !itemset[w].startsWith('#')); Object.keys(fdist).filter(w => !itemset[w].startsWith('#'));
words.forEach(w => { words.forEach(w => {
const value = fdist[w]; const value = fdist[w];
@ -62,7 +62,7 @@ function growSeq(seq, terms, minSupport, maxSupport, itemset) {
s0 = { size: 0, DBs: [] }; s0 = { size: 0, DBs: [] };
s1 = { size: 0, DBs: [] }; s1 = { size: 0, DBs: [] };
const words = seq.words; const words = seq.words;
for (let ti = 0; ti < seq.DBs.length; ti ++) { for (let ti = 0; ti < seq.DBs.length; ti++) {
const t = seq.DBs[ti]; const t = seq.DBs[ti];
const l = pos === 0 ? 0 : t.seqIndices[pos - 1] + 1; const l = pos === 0 ? 0 : t.seqIndices[pos - 1] + 1;
const r = pos === words.length ? t.tokens.length : t.seqIndices[pos]; const r = pos === words.length ? t.tokens.length : t.seqIndices[pos];
@ -261,9 +261,9 @@ export default class SentenTreeModel {
} }
getRenderedGraphs(limit) { getRenderedGraphs(limit) {
const graphs = arguments.length === 1 const graphs = arguments.length === 1 ?
? this.graphs.slice(0, limit) this.graphs.slice(0, limit) :
: this.graphs; this.graphs;
const renderedGraphs = graphs.map(g => g.toRenderedGraph()); const renderedGraphs = graphs.map(g => g.toRenderedGraph());
const globalFreqRange = [ const globalFreqRange = [
min(renderedGraphs.map(g => g.freqRange[0])), min(renderedGraphs.map(g => g.freqRange[0])),

@ -41,6 +41,8 @@
<ul id="titleListContainer" class="w3-ul w3-hoverable"></ul> <ul id="titleListContainer" class="w3-ul w3-hoverable"></ul>
<div id="backButton" style="margin: 20px 0px;"> <div id="backButton" style="margin: 20px 0px;">
<button class="general-button" type="button" id="confirm" style="background-color: #379; margin: 0px 20px" onclick="hideTitles()">返回</button> <button class="general-button" type="button" id="confirm" style="background-color: #379; margin: 0px 20px" onclick="hideTitles()">返回</button>
<button class="general-button" type="button" id="setToKeyword" style="background-color: #379; margin: 0px 20px">設為關鍵詞</button>
<button class="general-button" type='button' id='addToStopwords' style='background-color: #379;margin: 0px 40px;position: absolute;right: 0px;'></button>
</div> </div>
</div> </div>
</div> </div>

Loading…
Cancel
Save